From d64a942a76b240fd7529a708bac97a7d233258cb Mon Sep 17 00:00:00 2001 From: Claudio Atzori Date: Mon, 9 Aug 2021 12:32:26 +0200 Subject: [PATCH 001/162] fixed MappersTest --- .../test/java/eu/dnetlib/dhp/oa/graph/raw/MappersTest.java | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/raw/MappersTest.java b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/raw/MappersTest.java index c121dee2a..c431b4dd8 100644 --- a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/raw/MappersTest.java +++ b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/raw/MappersTest.java @@ -726,8 +726,8 @@ public class MappersTest { final Dataset p = (Dataset) list.get(0); assertValidId(p.getId()); - assertTrue(p.getOriginalId().size() == 1); - assertEquals("df76e73f-0483-49a4-a9bb-63f2f985574a", p.getOriginalId().get(0)); + assertEquals(2, p.getOriginalId().size()); + assertTrue(p.getOriginalId().stream().anyMatch(oid -> oid.equals("df76e73f-0483-49a4-a9bb-63f2f985574a"))); assertValidId(p.getCollectedfrom().get(0).getKey()); assertTrue(p.getAuthor().size() > 0); From 370dddb2fa8c27fbd95b618e22a2be9fcc8ebf40 Mon Sep 17 00:00:00 2001 From: Sandro La Bruzzo Date: Tue, 7 Sep 2021 11:20:41 +0200 Subject: [PATCH 002/162] fix bug on oai iterator that skip record cleaned --- .../java/eu/dnetlib/dhp/collection/plugin/oai/OaiIterator.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/plugin/oai/OaiIterator.java b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/plugin/oai/OaiIterator.java index 75dd746ea..4b254c0ef 100644 --- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/plugin/oai/OaiIterator.java +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/plugin/oai/OaiIterator.java @@ -161,7 +161,7 @@ public class OaiIterator implements Iterator { report.put(e.getClass().getName(), e.getMessage()); final String cleaned = XmlCleaner.cleanAllEntities(xml); try { - doc = DocumentHelper.parseText(xml); + doc = DocumentHelper.parseText(cleaned); } catch (final DocumentException e1) { final String resumptionToken = extractResumptionToken(xml); if (resumptionToken == null) { From ec94cc9b939d4cfe92643d0c1d14afe52e403559 Mon Sep 17 00:00:00 2001 From: Claudio Atzori Date: Fri, 1 Oct 2021 09:41:27 +0200 Subject: [PATCH 003/162] IndexNotificationsJob test: persist contents on HDFS instead of passing them to ES --- .../dhp/broker/oa/IndexNotificationsJob.java | 29 ++----------------- 1 file changed, 3 insertions(+), 26 deletions(-) diff --git a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/IndexNotificationsJob.java b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/IndexNotificationsJob.java index 80549e1ce..7b65c27ab 100644 --- a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/IndexNotificationsJob.java +++ b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/IndexNotificationsJob.java @@ -101,33 +101,10 @@ public class IndexNotificationsJob { ngEncoder) .flatMap((FlatMapFunction) g -> g.getData().iterator(), nEncoder); - final JavaRDD inputRdd = notifications + notifications .map((MapFunction) n -> prepareForIndexing(n, total), Encoders.STRING()) - .javaRDD(); - - final Map esCfg = new HashMap<>(); - // esCfg.put("es.nodes", "10.19.65.51, 10.19.65.52, 10.19.65.53, 10.19.65.54"); - - esCfg.put("es.index.auto.create", "false"); - esCfg.put("es.nodes", indexHost); - esCfg.put("es.mapping.id", "notificationId"); // THE PRIMARY KEY - esCfg.put("es.batch.write.retry.count", esBatchWriteRetryCount); - esCfg.put("es.batch.write.retry.wait", esBatchWriteRetryWait); - esCfg.put("es.batch.size.entries", esBatchSizeEntries); - esCfg.put("es.nodes.wan.only", esNodesWanOnly); - - log.info("*** Start indexing"); - JavaEsSpark.saveJsonToEs(inputRdd, index, esCfg); - log.info("*** End indexing"); - - log.info("*** Deleting old notifications"); - final String message = deleteOldNotifications(brokerApiBaseUrl, startTime - 1000); - log.info("*** Deleted notifications: " + message); - - log.info("*** sendNotifications (emails, ...)"); - sendNotifications(brokerApiBaseUrl, startTime - 1000); - log.info("*** ALL done."); - + .javaRDD() + .saveAsTextFile("/tmp/IndexNotificationsJob_test_6504"); } } From b01cd521b0eede2ad6ab8ab95e22f6a3dd14cae1 Mon Sep 17 00:00:00 2001 From: Claudio Atzori Date: Fri, 1 Oct 2021 11:26:33 +0200 Subject: [PATCH 004/162] removed configuration specifying the limit to 8 for spark.dynamicAllocation.maxExecutors --- .../dhp/broker/oa/notifications_only/oozie_app/workflow.xml | 1 - 1 file changed, 1 deletion(-) diff --git a/dhp-workflows/dhp-broker-events/src/main/resources/eu/dnetlib/dhp/broker/oa/notifications_only/oozie_app/workflow.xml b/dhp-workflows/dhp-broker-events/src/main/resources/eu/dnetlib/dhp/broker/oa/notifications_only/oozie_app/workflow.xml index 248326d57..70ab7d344 100644 --- a/dhp-workflows/dhp-broker-events/src/main/resources/eu/dnetlib/dhp/broker/oa/notifications_only/oozie_app/workflow.xml +++ b/dhp-workflows/dhp-broker-events/src/main/resources/eu/dnetlib/dhp/broker/oa/notifications_only/oozie_app/workflow.xml @@ -135,7 +135,6 @@ --executor-memory=${sparkExecutorMemory} --driver-memory=${sparkDriverMemory} - --conf spark.dynamicAllocation.maxExecutors="8" --conf spark.extraListeners=${spark2ExtraListeners} --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} From 31a6ad1d79cea595d85a1ef9a15fc6ea714a97f3 Mon Sep 17 00:00:00 2001 From: "michele.artini" Date: Mon, 4 Oct 2021 12:01:56 +0200 Subject: [PATCH 005/162] optimization of verifySubsriptions() --- .../dhp/broker/oa/IndexNotificationsJob.java | 39 +++++---- .../broker/oa/IndexNotificationsJobTest.java | 84 +++++++++++++++++++ 2 files changed, 105 insertions(+), 18 deletions(-) create mode 100644 dhp-workflows/dhp-broker-events/src/test/java/eu/dnetlib/dhp/broker/oa/IndexNotificationsJobTest.java diff --git a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/IndexNotificationsJob.java b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/IndexNotificationsJob.java index 7b65c27ab..c66e5f4f4 100644 --- a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/IndexNotificationsJob.java +++ b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/IndexNotificationsJob.java @@ -17,7 +17,6 @@ import org.apache.http.client.methods.HttpGet; import org.apache.http.impl.client.CloseableHttpClient; import org.apache.http.impl.client.HttpClients; import org.apache.spark.SparkConf; -import org.apache.spark.api.java.JavaRDD; import org.apache.spark.api.java.function.FlatMapFunction; import org.apache.spark.api.java.function.MapFunction; import org.apache.spark.sql.Dataset; @@ -25,7 +24,6 @@ import org.apache.spark.sql.Encoder; import org.apache.spark.sql.Encoders; import org.apache.spark.sql.SparkSession; import org.apache.spark.util.LongAccumulator; -import org.elasticsearch.spark.rdd.api.java.JavaEsSpark; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -46,13 +44,14 @@ public class IndexNotificationsJob { private static final Logger log = LoggerFactory.getLogger(IndexNotificationsJob.class); + private static Map>> conditionsForSubscriptions = new HashMap<>(); + public static void main(final String[] args) throws Exception { final ArgumentApplicationParser parser = new ArgumentApplicationParser( IOUtils - .toString( - IndexNotificationsJob.class - .getResourceAsStream("/eu/dnetlib/dhp/broker/oa/index_notifications.json"))); + .toString(IndexNotificationsJob.class + .getResourceAsStream("/eu/dnetlib/dhp/broker/oa/index_notifications.json"))); parser.parseArgument(args); final SparkConf conf = new SparkConf(); @@ -89,6 +88,8 @@ public class IndexNotificationsJob { final List subscriptions = listSubscriptions(brokerApiBaseUrl); + initConditionsForSubscriptions(subscriptions); + log.info("Number of subscriptions: " + subscriptions.size()); if (subscriptions.size() > 0) { @@ -96,9 +97,7 @@ public class IndexNotificationsJob { final Encoder nEncoder = Encoders.bean(Notification.class); final Dataset notifications = ClusterUtils .readPath(spark, eventsPath, Event.class) - .map( - (MapFunction) e -> generateNotifications(e, subscriptions, startTime), - ngEncoder) + .map((MapFunction) e -> generateNotifications(e, subscriptions, startTime), ngEncoder) .flatMap((FlatMapFunction) g -> g.getData().iterator(), nEncoder); notifications @@ -108,20 +107,27 @@ public class IndexNotificationsJob { } } - private static NotificationGroup generateNotifications(final Event e, + protected static void initConditionsForSubscriptions(final List subscriptions) { + subscriptions.forEach(s -> conditionsForSubscriptions.put(s.getSubscriptionId(), s.conditionsAsMap())); + } + + protected static NotificationGroup generateNotifications(final Event e, final List subscriptions, final long date) { final List list = subscriptions .stream() - .filter( - s -> StringUtils.isBlank(s.getTopic()) || s.getTopic().equals("*") || s.getTopic().equals(e.getTopic())) - .filter(s -> verifyConditions(e.getMap(), s.conditionsAsMap())) + .filter(s -> StringUtils.isBlank(s.getTopic()) || s.getTopic().equals("*") || s.getTopic().equals(e.getTopic())) + .filter(s -> verifyConditions(e.getMap(), conditionsAsMap(s))) .map(s -> generateNotification(s, e, date)) .collect(Collectors.toList()); return new NotificationGroup(list); } + private static Map> conditionsAsMap(final Subscription s) { + return conditionsForSubscriptions.get(s.getSubscriptionId()); + } + private static Notification generateNotification(final Subscription s, final Event e, final long date) { final Notification n = new Notification(); n.setNotificationId("ntf-" + DigestUtils.md5Hex(s.getSubscriptionId() + "@@@" + e.getEventId())); @@ -145,18 +151,15 @@ public class IndexNotificationsJob { if (conditions.containsKey("trust") && !SubscriptionUtils - .verifyFloatRange( - map.getTrust(), conditions.get("trust").get(0).getValue(), - conditions.get("trust").get(0).getOtherValue())) { + .verifyFloatRange(map.getTrust(), conditions.get("trust").get(0).getValue(), conditions.get("trust").get(0).getOtherValue())) { return false; } if (conditions.containsKey("targetDateofacceptance") && !conditions .get("targetDateofacceptance") .stream() - .anyMatch( - c -> SubscriptionUtils - .verifyDateRange(map.getTargetDateofacceptance(), c.getValue(), c.getOtherValue()))) { + .anyMatch(c -> SubscriptionUtils + .verifyDateRange(map.getTargetDateofacceptance(), c.getValue(), c.getOtherValue()))) { return false; } diff --git a/dhp-workflows/dhp-broker-events/src/test/java/eu/dnetlib/dhp/broker/oa/IndexNotificationsJobTest.java b/dhp-workflows/dhp-broker-events/src/test/java/eu/dnetlib/dhp/broker/oa/IndexNotificationsJobTest.java new file mode 100644 index 000000000..ea5dbd5d1 --- /dev/null +++ b/dhp-workflows/dhp-broker-events/src/test/java/eu/dnetlib/dhp/broker/oa/IndexNotificationsJobTest.java @@ -0,0 +1,84 @@ + +package eu.dnetlib.dhp.broker.oa; + +import static org.junit.jupiter.api.Assertions.assertEquals; + +import java.util.Arrays; +import java.util.List; + +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Test; + +import eu.dnetlib.dhp.broker.model.Event; +import eu.dnetlib.dhp.broker.model.MappedFields; +import eu.dnetlib.dhp.broker.model.Subscription; +import eu.dnetlib.dhp.broker.oa.util.NotificationGroup; + +class IndexNotificationsJobTest { + + private List subscriptions; + + @BeforeEach + void setUp() throws Exception { + final Subscription s = new Subscription(); + s.setTopic("ENRICH/MISSING/PID"); + s.setConditions("[{\"field\":\"targetDatasourceName\",\"fieldType\":\"STRING\",\"operator\":\"EXACT\",\"listParams\":[{\"value\":\"reposiTUm\"}]},{\"field\":\"trust\",\"fieldType\":\"FLOAT\",\"operator\":\"RANGE\",\"listParams\":[{\"value\":\"0\",\"otherValue\":\"1\"}]}]"); + subscriptions = Arrays.asList(s); + IndexNotificationsJob.initConditionsForSubscriptions(subscriptions); + } + + @Test + void testGenerateNotifications_invalid_topic() { + final Event event = new Event(); + event.setTopic("ENRICH/MISSING/PROJECT"); + + for (int i = 0; i < 10; i++) { + final long start = System.currentTimeMillis(); + final NotificationGroup res = IndexNotificationsJob.generateNotifications(event, subscriptions, 0); + final long end = System.currentTimeMillis(); + + System.out.println("no topic - execution time (ms): " + (end - start)); + + assertEquals(0, res.getData().size()); + } + } + + @Test + void testGenerateNotifications_topic_match() { + final Event event = new Event(); + event.setTopic("ENRICH/MISSING/PID"); + event.setMap(new MappedFields()); + event.getMap().setTargetDatasourceName("reposiTUm"); + event.getMap().setTrust(0.8f); + + for (int i = 0; i < 10; i++) { + final long start = System.currentTimeMillis(); + final NotificationGroup res = IndexNotificationsJob.generateNotifications(event, subscriptions, 0); + final long end = System.currentTimeMillis(); + + System.out.println("topic match - execution time (ms): " + (end - start)); + + assertEquals(1, res.getData().size()); + } + } + + @Test + void testGenerateNotifications_topic_no_match() { + final Event event = new Event(); + event.setTopic("ENRICH/MISSING/PID"); + event.setMap(new MappedFields()); + event.getMap().setTargetDatasourceName("Puma"); + event.getMap().setTrust(0.8f); + + for (int i = 0; i < 10; i++) { + final long start = System.currentTimeMillis(); + final NotificationGroup res = IndexNotificationsJob.generateNotifications(event, subscriptions, 0); + final long end = System.currentTimeMillis(); + + System.out.println("topic no match - execution time (ms): " + (end - start)); + + assertEquals(0, res.getData().size()); + } + } + +} From 0a9ef34b56cdc1fb9783a0efb84dab29b03ebcbb Mon Sep 17 00:00:00 2001 From: "michele.artini" Date: Mon, 4 Oct 2021 15:46:12 +0200 Subject: [PATCH 006/162] test --- .../broker/oa/IndexNotificationsJobTest.java | 84 +++++++++++++------ 1 file changed, 60 insertions(+), 24 deletions(-) diff --git a/dhp-workflows/dhp-broker-events/src/test/java/eu/dnetlib/dhp/broker/oa/IndexNotificationsJobTest.java b/dhp-workflows/dhp-broker-events/src/test/java/eu/dnetlib/dhp/broker/oa/IndexNotificationsJobTest.java index ea5dbd5d1..6010da2c3 100644 --- a/dhp-workflows/dhp-broker-events/src/test/java/eu/dnetlib/dhp/broker/oa/IndexNotificationsJobTest.java +++ b/dhp-workflows/dhp-broker-events/src/test/java/eu/dnetlib/dhp/broker/oa/IndexNotificationsJobTest.java @@ -18,6 +18,8 @@ class IndexNotificationsJobTest { private List subscriptions; + private static final int N_TIMES = 1_000_000; + @BeforeEach void setUp() throws Exception { final Subscription s = new Subscription(); @@ -32,15 +34,8 @@ class IndexNotificationsJobTest { final Event event = new Event(); event.setTopic("ENRICH/MISSING/PROJECT"); - for (int i = 0; i < 10; i++) { - final long start = System.currentTimeMillis(); - final NotificationGroup res = IndexNotificationsJob.generateNotifications(event, subscriptions, 0); - final long end = System.currentTimeMillis(); - - System.out.println("no topic - execution time (ms): " + (end - start)); - - assertEquals(0, res.getData().size()); - } + final NotificationGroup res = IndexNotificationsJob.generateNotifications(event, subscriptions, 0); + assertEquals(0, res.getData().size()); } @Test @@ -51,15 +46,8 @@ class IndexNotificationsJobTest { event.getMap().setTargetDatasourceName("reposiTUm"); event.getMap().setTrust(0.8f); - for (int i = 0; i < 10; i++) { - final long start = System.currentTimeMillis(); - final NotificationGroup res = IndexNotificationsJob.generateNotifications(event, subscriptions, 0); - final long end = System.currentTimeMillis(); - - System.out.println("topic match - execution time (ms): " + (end - start)); - - assertEquals(1, res.getData().size()); - } + final NotificationGroup res = IndexNotificationsJob.generateNotifications(event, subscriptions, 0); + assertEquals(1, res.getData().size()); } @Test @@ -70,15 +58,63 @@ class IndexNotificationsJobTest { event.getMap().setTargetDatasourceName("Puma"); event.getMap().setTrust(0.8f); - for (int i = 0; i < 10; i++) { - final long start = System.currentTimeMillis(); - final NotificationGroup res = IndexNotificationsJob.generateNotifications(event, subscriptions, 0); - final long end = System.currentTimeMillis(); + final NotificationGroup res = IndexNotificationsJob.generateNotifications(event, subscriptions, 0); + assertEquals(0, res.getData().size()); + } - System.out.println("topic no match - execution time (ms): " + (end - start)); + @Test + void testGenerateNotifications_invalid_topic_repeated() { + final Event event = new Event(); + event.setTopic("ENRICH/MISSING/PROJECT"); - assertEquals(0, res.getData().size()); + // warm up + IndexNotificationsJob.generateNotifications(event, subscriptions, 0); + + final long start = System.currentTimeMillis(); + for (int i = 0; i < N_TIMES; i++) { + IndexNotificationsJob.generateNotifications(event, subscriptions, 0); } + final long end = System.currentTimeMillis(); + System.out.println(String.format("no topic - repeated %s times - execution time: %s ms ", N_TIMES, end - start)); + + } + + @Test + void testGenerateNotifications_topic_match_repeated() { + final Event event = new Event(); + event.setTopic("ENRICH/MISSING/PID"); + event.setMap(new MappedFields()); + event.getMap().setTargetDatasourceName("reposiTUm"); + event.getMap().setTrust(0.8f); + + // warm up + IndexNotificationsJob.generateNotifications(event, subscriptions, 0); + + final long start = System.currentTimeMillis(); + for (int i = 0; i < N_TIMES; i++) { + IndexNotificationsJob.generateNotifications(event, subscriptions, 0); + } + final long end = System.currentTimeMillis(); + System.out.println(String.format("topic match - repeated %s times - execution time: %s ms ", N_TIMES, end - start)); + } + + @Test + void testGenerateNotifications_topic_no_match_repeated() { + final Event event = new Event(); + event.setTopic("ENRICH/MISSING/PID"); + event.setMap(new MappedFields()); + event.getMap().setTargetDatasourceName("Puma"); + event.getMap().setTrust(0.8f); + + // warm up + IndexNotificationsJob.generateNotifications(event, subscriptions, 0); + + final long start = System.currentTimeMillis(); + for (int i = 0; i < N_TIMES; i++) { + IndexNotificationsJob.generateNotifications(event, subscriptions, 0); + } + final long end = System.currentTimeMillis(); + System.out.println(String.format("topic no match - repeated %s times - execution time: %s ms ", N_TIMES, end - start)); } } From 8bbaa173354e99610053c5adfefd3f1f5c78f43f Mon Sep 17 00:00:00 2001 From: "michele.artini" Date: Tue, 5 Oct 2021 09:20:37 +0200 Subject: [PATCH 007/162] reimplemented of conditions cache as a non static variable --- .../dhp/broker/oa/IndexNotificationsJob.java | 42 +++++++++++-------- .../broker/oa/IndexNotificationsJobTest.java | 41 +++++++++++------- 2 files changed, 51 insertions(+), 32 deletions(-) diff --git a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/IndexNotificationsJob.java b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/IndexNotificationsJob.java index c66e5f4f4..5dc3315c4 100644 --- a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/IndexNotificationsJob.java +++ b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/IndexNotificationsJob.java @@ -44,14 +44,13 @@ public class IndexNotificationsJob { private static final Logger log = LoggerFactory.getLogger(IndexNotificationsJob.class); - private static Map>> conditionsForSubscriptions = new HashMap<>(); - public static void main(final String[] args) throws Exception { final ArgumentApplicationParser parser = new ArgumentApplicationParser( IOUtils - .toString(IndexNotificationsJob.class - .getResourceAsStream("/eu/dnetlib/dhp/broker/oa/index_notifications.json"))); + .toString( + IndexNotificationsJob.class + .getResourceAsStream("/eu/dnetlib/dhp/broker/oa/index_notifications.json"))); parser.parseArgument(args); final SparkConf conf = new SparkConf(); @@ -88,16 +87,19 @@ public class IndexNotificationsJob { final List subscriptions = listSubscriptions(brokerApiBaseUrl); - initConditionsForSubscriptions(subscriptions); - log.info("Number of subscriptions: " + subscriptions.size()); if (subscriptions.size() > 0) { + final Map>> conditionsMap = prepareConditionsMap(subscriptions); + final Encoder ngEncoder = Encoders.bean(NotificationGroup.class); final Encoder nEncoder = Encoders.bean(Notification.class); final Dataset notifications = ClusterUtils .readPath(spark, eventsPath, Event.class) - .map((MapFunction) e -> generateNotifications(e, subscriptions, startTime), ngEncoder) + .map( + (MapFunction) e -> generateNotifications( + e, subscriptions, conditionsMap, startTime), + ngEncoder) .flatMap((FlatMapFunction) g -> g.getData().iterator(), nEncoder); notifications @@ -107,27 +109,28 @@ public class IndexNotificationsJob { } } - protected static void initConditionsForSubscriptions(final List subscriptions) { - subscriptions.forEach(s -> conditionsForSubscriptions.put(s.getSubscriptionId(), s.conditionsAsMap())); + protected static Map>> prepareConditionsMap( + final List subscriptions) { + final Map>> map = new HashMap<>(); + subscriptions.forEach(s -> map.put(s.getSubscriptionId(), s.conditionsAsMap())); + return map; } protected static NotificationGroup generateNotifications(final Event e, final List subscriptions, + final Map>> conditionsMap, final long date) { final List list = subscriptions .stream() - .filter(s -> StringUtils.isBlank(s.getTopic()) || s.getTopic().equals("*") || s.getTopic().equals(e.getTopic())) - .filter(s -> verifyConditions(e.getMap(), conditionsAsMap(s))) + .filter( + s -> StringUtils.isBlank(s.getTopic()) || s.getTopic().equals("*") || s.getTopic().equals(e.getTopic())) + .filter(s -> verifyConditions(e.getMap(), conditionsMap.get(s.getSubscriptionId()))) .map(s -> generateNotification(s, e, date)) .collect(Collectors.toList()); return new NotificationGroup(list); } - private static Map> conditionsAsMap(final Subscription s) { - return conditionsForSubscriptions.get(s.getSubscriptionId()); - } - private static Notification generateNotification(final Subscription s, final Event e, final long date) { final Notification n = new Notification(); n.setNotificationId("ntf-" + DigestUtils.md5Hex(s.getSubscriptionId() + "@@@" + e.getEventId())); @@ -151,15 +154,18 @@ public class IndexNotificationsJob { if (conditions.containsKey("trust") && !SubscriptionUtils - .verifyFloatRange(map.getTrust(), conditions.get("trust").get(0).getValue(), conditions.get("trust").get(0).getOtherValue())) { + .verifyFloatRange( + map.getTrust(), conditions.get("trust").get(0).getValue(), + conditions.get("trust").get(0).getOtherValue())) { return false; } if (conditions.containsKey("targetDateofacceptance") && !conditions .get("targetDateofacceptance") .stream() - .anyMatch(c -> SubscriptionUtils - .verifyDateRange(map.getTargetDateofacceptance(), c.getValue(), c.getOtherValue()))) { + .anyMatch( + c -> SubscriptionUtils + .verifyDateRange(map.getTargetDateofacceptance(), c.getValue(), c.getOtherValue()))) { return false; } diff --git a/dhp-workflows/dhp-broker-events/src/test/java/eu/dnetlib/dhp/broker/oa/IndexNotificationsJobTest.java b/dhp-workflows/dhp-broker-events/src/test/java/eu/dnetlib/dhp/broker/oa/IndexNotificationsJobTest.java index 6010da2c3..8ecb3061b 100644 --- a/dhp-workflows/dhp-broker-events/src/test/java/eu/dnetlib/dhp/broker/oa/IndexNotificationsJobTest.java +++ b/dhp-workflows/dhp-broker-events/src/test/java/eu/dnetlib/dhp/broker/oa/IndexNotificationsJobTest.java @@ -5,10 +5,12 @@ import static org.junit.jupiter.api.Assertions.assertEquals; import java.util.Arrays; import java.util.List; +import java.util.Map; import org.junit.jupiter.api.BeforeEach; import org.junit.jupiter.api.Test; +import eu.dnetlib.dhp.broker.model.ConditionParams; import eu.dnetlib.dhp.broker.model.Event; import eu.dnetlib.dhp.broker.model.MappedFields; import eu.dnetlib.dhp.broker.model.Subscription; @@ -18,15 +20,19 @@ class IndexNotificationsJobTest { private List subscriptions; + private Map>> conditionsMap; + private static final int N_TIMES = 1_000_000; @BeforeEach void setUp() throws Exception { final Subscription s = new Subscription(); s.setTopic("ENRICH/MISSING/PID"); - s.setConditions("[{\"field\":\"targetDatasourceName\",\"fieldType\":\"STRING\",\"operator\":\"EXACT\",\"listParams\":[{\"value\":\"reposiTUm\"}]},{\"field\":\"trust\",\"fieldType\":\"FLOAT\",\"operator\":\"RANGE\",\"listParams\":[{\"value\":\"0\",\"otherValue\":\"1\"}]}]"); + s + .setConditions( + "[{\"field\":\"targetDatasourceName\",\"fieldType\":\"STRING\",\"operator\":\"EXACT\",\"listParams\":[{\"value\":\"reposiTUm\"}]},{\"field\":\"trust\",\"fieldType\":\"FLOAT\",\"operator\":\"RANGE\",\"listParams\":[{\"value\":\"0\",\"otherValue\":\"1\"}]}]"); subscriptions = Arrays.asList(s); - IndexNotificationsJob.initConditionsForSubscriptions(subscriptions); + conditionsMap = IndexNotificationsJob.prepareConditionsMap(subscriptions); } @Test @@ -34,7 +40,8 @@ class IndexNotificationsJobTest { final Event event = new Event(); event.setTopic("ENRICH/MISSING/PROJECT"); - final NotificationGroup res = IndexNotificationsJob.generateNotifications(event, subscriptions, 0); + final NotificationGroup res = IndexNotificationsJob + .generateNotifications(event, subscriptions, conditionsMap, 0); assertEquals(0, res.getData().size()); } @@ -46,7 +53,8 @@ class IndexNotificationsJobTest { event.getMap().setTargetDatasourceName("reposiTUm"); event.getMap().setTrust(0.8f); - final NotificationGroup res = IndexNotificationsJob.generateNotifications(event, subscriptions, 0); + final NotificationGroup res = IndexNotificationsJob + .generateNotifications(event, subscriptions, conditionsMap, 0); assertEquals(1, res.getData().size()); } @@ -58,7 +66,8 @@ class IndexNotificationsJobTest { event.getMap().setTargetDatasourceName("Puma"); event.getMap().setTrust(0.8f); - final NotificationGroup res = IndexNotificationsJob.generateNotifications(event, subscriptions, 0); + final NotificationGroup res = IndexNotificationsJob + .generateNotifications(event, subscriptions, conditionsMap, 0); assertEquals(0, res.getData().size()); } @@ -68,14 +77,15 @@ class IndexNotificationsJobTest { event.setTopic("ENRICH/MISSING/PROJECT"); // warm up - IndexNotificationsJob.generateNotifications(event, subscriptions, 0); + IndexNotificationsJob.generateNotifications(event, subscriptions, conditionsMap, 0); final long start = System.currentTimeMillis(); for (int i = 0; i < N_TIMES; i++) { - IndexNotificationsJob.generateNotifications(event, subscriptions, 0); + IndexNotificationsJob.generateNotifications(event, subscriptions, conditionsMap, 0); } final long end = System.currentTimeMillis(); - System.out.println(String.format("no topic - repeated %s times - execution time: %s ms ", N_TIMES, end - start)); + System.out + .println(String.format("no topic - repeated %s times - execution time: %s ms ", N_TIMES, end - start)); } @@ -88,14 +98,15 @@ class IndexNotificationsJobTest { event.getMap().setTrust(0.8f); // warm up - IndexNotificationsJob.generateNotifications(event, subscriptions, 0); + IndexNotificationsJob.generateNotifications(event, subscriptions, conditionsMap, 0); final long start = System.currentTimeMillis(); for (int i = 0; i < N_TIMES; i++) { - IndexNotificationsJob.generateNotifications(event, subscriptions, 0); + IndexNotificationsJob.generateNotifications(event, subscriptions, conditionsMap, 0); } final long end = System.currentTimeMillis(); - System.out.println(String.format("topic match - repeated %s times - execution time: %s ms ", N_TIMES, end - start)); + System.out + .println(String.format("topic match - repeated %s times - execution time: %s ms ", N_TIMES, end - start)); } @Test @@ -107,14 +118,16 @@ class IndexNotificationsJobTest { event.getMap().setTrust(0.8f); // warm up - IndexNotificationsJob.generateNotifications(event, subscriptions, 0); + IndexNotificationsJob.generateNotifications(event, subscriptions, conditionsMap, 0); final long start = System.currentTimeMillis(); for (int i = 0; i < N_TIMES; i++) { - IndexNotificationsJob.generateNotifications(event, subscriptions, 0); + IndexNotificationsJob.generateNotifications(event, subscriptions, conditionsMap, 0); } final long end = System.currentTimeMillis(); - System.out.println(String.format("topic no match - repeated %s times - execution time: %s ms ", N_TIMES, end - start)); + System.out + .println( + String.format("topic no match - repeated %s times - execution time: %s ms ", N_TIMES, end - start)); } } From 69008e20c2e1c4e6af8f45c8ad4acd17bdf54fe7 Mon Sep 17 00:00:00 2001 From: "michele.artini" Date: Tue, 5 Oct 2021 11:58:20 +0200 Subject: [PATCH 008/162] log and tests --- .../dhp/broker/oa/IndexNotificationsJob.java | 2 + .../oa/samples/SimpleVariableJobTest.java | 132 ++++++++++++++++++ 2 files changed, 134 insertions(+) create mode 100644 dhp-workflows/dhp-broker-events/src/test/java/eu/dnetlib/dhp/broker/oa/samples/SimpleVariableJobTest.java diff --git a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/IndexNotificationsJob.java b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/IndexNotificationsJob.java index 5dc3315c4..fb2e26ba2 100644 --- a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/IndexNotificationsJob.java +++ b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/IndexNotificationsJob.java @@ -92,6 +92,8 @@ public class IndexNotificationsJob { if (subscriptions.size() > 0) { final Map>> conditionsMap = prepareConditionsMap(subscriptions); + log.info("ConditionsMap: " + new ObjectMapper().writeValueAsString(conditionsMap)); + final Encoder ngEncoder = Encoders.bean(NotificationGroup.class); final Encoder nEncoder = Encoders.bean(Notification.class); final Dataset notifications = ClusterUtils diff --git a/dhp-workflows/dhp-broker-events/src/test/java/eu/dnetlib/dhp/broker/oa/samples/SimpleVariableJobTest.java b/dhp-workflows/dhp-broker-events/src/test/java/eu/dnetlib/dhp/broker/oa/samples/SimpleVariableJobTest.java new file mode 100644 index 000000000..a6d1c89d3 --- /dev/null +++ b/dhp-workflows/dhp-broker-events/src/test/java/eu/dnetlib/dhp/broker/oa/samples/SimpleVariableJobTest.java @@ -0,0 +1,132 @@ + +package eu.dnetlib.dhp.broker.oa.samples; + +import java.io.IOException; +import java.nio.file.Files; +import java.nio.file.Path; +import java.util.ArrayList; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.stream.Collectors; + +import org.apache.commons.io.FileUtils; +import org.apache.spark.SparkConf; +import org.apache.spark.api.java.function.MapFunction; +import org.apache.spark.sql.Encoders; +import org.apache.spark.sql.SparkSession; +import org.junit.jupiter.api.AfterAll; +import org.junit.jupiter.api.BeforeAll; +import org.junit.jupiter.api.Disabled; +import org.junit.jupiter.api.Test; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import com.fasterxml.jackson.core.JsonParseException; +import com.fasterxml.jackson.databind.JsonMappingException; +import com.fasterxml.jackson.databind.ObjectMapper; + +import eu.dnetlib.dhp.broker.model.ConditionParams; +import eu.dnetlib.dhp.broker.model.MapCondition; +import eu.dnetlib.dhp.broker.oa.util.SubscriptionUtils; + +@Disabled +public class SimpleVariableJobTest { + + private static final Logger log = LoggerFactory.getLogger(SimpleVariableJobTest.class); + + private static Path workingDir; + + private static SparkSession spark; + + private final static List inputList = new ArrayList<>(); + + private static final Map>> staticMap = new HashMap<>(); + + @BeforeAll + public static void beforeAll() throws IOException { + + workingDir = Files.createTempDirectory(SimpleVariableJobTest.class.getSimpleName()); + log.info("using work dir {}", workingDir); + + final SparkConf conf = new SparkConf(); + conf.setAppName(SimpleVariableJobTest.class.getSimpleName()); + + conf.setMaster("local[*]"); + conf.set("spark.driver.host", "localhost"); + conf.set("hive.metastore.local", "true"); + conf.set("spark.ui.enabled", "false"); + // conf.set("spark.sql.warehouse.dir", workingDir.toString()); + // conf.set("hive.metastore.warehouse.dir", workingDir.resolve("warehouse").toString()); + + spark = SparkSession + .builder() + .appName(SimpleVariableJobTest.class.getSimpleName()) + .config(conf) + .getOrCreate(); + + for (int i = 0; i < 1_000_000; i++) { + inputList.add("record " + i); + } + } + + @AfterAll + public static void afterAll() throws IOException { + FileUtils.deleteDirectory(workingDir.toFile()); + spark.stop(); + } + + @Test + public void testSimpleVariableJob() throws Exception { + final Map>> map = fillMap(); + + final long n = spark + .createDataset(inputList, Encoders.STRING()) + .filter(s -> filter(map.get(s))) + .map((MapFunction) s -> s.toLowerCase(), Encoders.STRING()) + .count(); + + System.out.println(n); + } + + @Test + public void testSimpleVariableJob_static() throws Exception { + + staticMap.putAll(fillMap()); + + final long n = spark + .createDataset(inputList, Encoders.STRING()) + .filter(s -> filter(staticMap.get(s))) + .map((MapFunction) s -> s.toLowerCase(), Encoders.STRING()) + .count(); + + System.out.println(n); + } + + private static Map>> fillMap() + throws JsonParseException, JsonMappingException, IOException { + final String s = "[{\"field\":\"targetDatasourceName\",\"fieldType\":\"STRING\",\"operator\":\"EXACT\",\"listParams\":[{\"value\":\"reposiTUm\"}]},{\"field\":\"trust\",\"fieldType\":\"FLOAT\",\"operator\":\"RANGE\",\"listParams\":[{\"value\":\"0\",\"otherValue\":\"1\"}]}]"; + + final ObjectMapper mapper = new ObjectMapper(); + final List list = mapper + .readValue(s, mapper.getTypeFactory().constructCollectionType(List.class, MapCondition.class)); + final Map> conditions = list + .stream() + .filter(mc -> !mc.getListParams().isEmpty()) + .collect(Collectors.toMap(MapCondition::getField, MapCondition::getListParams)); + + final Map>> map = new HashMap<>(); + inputList.forEach(i -> map.put(i, conditions)); + return map; + } + + private static boolean filter(final Map> conditions) { + if (conditions.containsKey("targetDatasourceName") + && !SubscriptionUtils + .verifyExact("reposiTUm", conditions.get("targetDatasourceName").get(0).getValue())) { + return false; + } + return true; + } + +} From 210d6c0e6d002d76ada343ad4f526cc60e70b41d Mon Sep 17 00:00:00 2001 From: "michele.artini" Date: Tue, 5 Oct 2021 13:57:46 +0200 Subject: [PATCH 009/162] generateNotificationsJob and indexNotificationsJob --- .../broker/oa/GenerateNotificationsJob.java | 184 ++++++++++++++++++ .../dhp/broker/oa/IndexNotificationsJob.java | 168 +++------------- .../oa/generate_all/oozie_app/workflow.xml | 24 +++ .../dhp/broker/oa/generate_notifications.json | 14 ++ .../notifications_only/oozie_app/workflow.xml | 27 ++- 5 files changed, 279 insertions(+), 138 deletions(-) create mode 100644 dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/GenerateNotificationsJob.java create mode 100644 dhp-workflows/dhp-broker-events/src/main/resources/eu/dnetlib/dhp/broker/oa/generate_notifications.json diff --git a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/GenerateNotificationsJob.java b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/GenerateNotificationsJob.java new file mode 100644 index 000000000..21c6c64a6 --- /dev/null +++ b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/GenerateNotificationsJob.java @@ -0,0 +1,184 @@ + +package eu.dnetlib.dhp.broker.oa; + +import java.util.Date; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.stream.Collectors; + +import org.apache.commons.codec.digest.DigestUtils; +import org.apache.commons.io.IOUtils; +import org.apache.commons.lang3.StringUtils; +import org.apache.http.client.methods.CloseableHttpResponse; +import org.apache.http.client.methods.HttpGet; +import org.apache.http.impl.client.CloseableHttpClient; +import org.apache.http.impl.client.HttpClients; +import org.apache.spark.SparkConf; +import org.apache.spark.api.java.function.FlatMapFunction; +import org.apache.spark.api.java.function.MapFunction; +import org.apache.spark.sql.Dataset; +import org.apache.spark.sql.Encoder; +import org.apache.spark.sql.Encoders; +import org.apache.spark.sql.SparkSession; +import org.apache.spark.util.LongAccumulator; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import com.fasterxml.jackson.databind.ObjectMapper; + +import eu.dnetlib.dhp.application.ArgumentApplicationParser; +import eu.dnetlib.dhp.broker.model.ConditionParams; +import eu.dnetlib.dhp.broker.model.Event; +import eu.dnetlib.dhp.broker.model.MappedFields; +import eu.dnetlib.dhp.broker.model.Notification; +import eu.dnetlib.dhp.broker.model.Subscription; +import eu.dnetlib.dhp.broker.oa.util.ClusterUtils; +import eu.dnetlib.dhp.broker.oa.util.NotificationGroup; +import eu.dnetlib.dhp.broker.oa.util.SubscriptionUtils; + +public class GenerateNotificationsJob { + + private static final Logger log = LoggerFactory.getLogger(GenerateNotificationsJob.class); + + public static void main(final String[] args) throws Exception { + + final ArgumentApplicationParser parser = new ArgumentApplicationParser( + IOUtils + .toString(GenerateNotificationsJob.class + .getResourceAsStream("/eu/dnetlib/dhp/broker/oa/generate_notifications.json"))); + parser.parseArgument(args); + + final SparkConf conf = new SparkConf(); + + final String eventsPath = parser.get("outputDir") + "/events"; + log.info("eventsPath: {}", eventsPath); + + final String notificationsPath = parser.get("outputDir") + "/notifications"; + log.info("notificationsPath: {}", notificationsPath); + + final String brokerApiBaseUrl = parser.get("brokerApiBaseUrl"); + log.info("brokerApiBaseUrl: {}", brokerApiBaseUrl); + + final SparkSession spark = SparkSession.builder().config(conf).getOrCreate(); + + final LongAccumulator total = spark.sparkContext().longAccumulator("total_notifications"); + + final long startTime = new Date().getTime(); + + final List subscriptions = listSubscriptions(brokerApiBaseUrl); + + log.info("Number of subscriptions: " + subscriptions.size()); + + if (subscriptions.size() > 0) { + final Map>> conditionsMap = prepareConditionsMap(subscriptions); + + log.info("ConditionsMap: " + new ObjectMapper().writeValueAsString(conditionsMap)); + + final Encoder ngEncoder = Encoders.bean(NotificationGroup.class); + final Encoder nEncoder = Encoders.bean(Notification.class); + final Dataset notifications = ClusterUtils + .readPath(spark, eventsPath, Event.class) + .map((MapFunction) e -> generateNotifications(e, subscriptions, conditionsMap, startTime), ngEncoder) + .flatMap((FlatMapFunction) g -> g.getData().iterator(), nEncoder); + + ClusterUtils.save(notifications, notificationsPath, Notification.class, total); + } + } + + protected static Map>> prepareConditionsMap( + final List subscriptions) { + final Map>> map = new HashMap<>(); + subscriptions.forEach(s -> map.put(s.getSubscriptionId(), s.conditionsAsMap())); + return map; + } + + protected static NotificationGroup generateNotifications(final Event e, + final List subscriptions, + final Map>> conditionsMap, + final long date) { + final List list = subscriptions + .stream() + .filter(s -> StringUtils.isBlank(s.getTopic()) || s.getTopic().equals("*") || s.getTopic().equals(e.getTopic())) + .filter(s -> verifyConditions(e.getMap(), conditionsMap.get(s.getSubscriptionId()))) + .map(s -> generateNotification(s, e, date)) + .collect(Collectors.toList()); + + return new NotificationGroup(list); + } + + private static Notification generateNotification(final Subscription s, final Event e, final long date) { + final Notification n = new Notification(); + n.setNotificationId("ntf-" + DigestUtils.md5Hex(s.getSubscriptionId() + "@@@" + e.getEventId())); + n.setSubscriptionId(s.getSubscriptionId()); + n.setEventId(e.getEventId()); + n.setProducerId(e.getProducerId()); + n.setTopic(e.getTopic()); + n.setPayload(e.getPayload()); + n.setMap(e.getMap()); + n.setDate(date); + return n; + } + + private static boolean verifyConditions(final MappedFields map, + final Map> conditions) { + if (conditions.containsKey("targetDatasourceName") + && !SubscriptionUtils + .verifyExact(map.getTargetDatasourceName(), conditions.get("targetDatasourceName").get(0).getValue())) { + return false; + } + + if (conditions.containsKey("trust") + && !SubscriptionUtils + .verifyFloatRange(map.getTrust(), conditions.get("trust").get(0).getValue(), conditions.get("trust").get(0).getOtherValue())) { + return false; + } + + if (conditions.containsKey("targetDateofacceptance") && !conditions + .get("targetDateofacceptance") + .stream() + .anyMatch(c -> SubscriptionUtils + .verifyDateRange(map.getTargetDateofacceptance(), c.getValue(), c.getOtherValue()))) { + return false; + } + + if (conditions.containsKey("targetResultTitle") + && !conditions + .get("targetResultTitle") + .stream() + .anyMatch(c -> SubscriptionUtils.verifySimilar(map.getTargetResultTitle(), c.getValue()))) { + return false; + } + + if (conditions.containsKey("targetAuthors") + && !conditions + .get("targetAuthors") + .stream() + .allMatch(c -> SubscriptionUtils.verifyListSimilar(map.getTargetAuthors(), c.getValue()))) { + return false; + } + + return !conditions.containsKey("targetSubjects") + || conditions + .get("targetSubjects") + .stream() + .allMatch(c -> SubscriptionUtils.verifyListExact(map.getTargetSubjects(), c.getValue())); + + } + + private static List listSubscriptions(final String brokerApiBaseUrl) throws Exception { + final String url = brokerApiBaseUrl + "/api/subscriptions"; + final HttpGet req = new HttpGet(url); + + final ObjectMapper mapper = new ObjectMapper(); + + try (final CloseableHttpClient client = HttpClients.createDefault()) { + try (final CloseableHttpResponse response = client.execute(req)) { + final String s = IOUtils.toString(response.getEntity().getContent()); + return mapper + .readValue(s, mapper.getTypeFactory().constructCollectionType(List.class, Subscription.class)); + } + } + } + +} diff --git a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/IndexNotificationsJob.java b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/IndexNotificationsJob.java index fb2e26ba2..55e82446f 100644 --- a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/IndexNotificationsJob.java +++ b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/IndexNotificationsJob.java @@ -2,28 +2,22 @@ package eu.dnetlib.dhp.broker.oa; import java.io.IOException; -import java.util.Date; import java.util.HashMap; -import java.util.List; import java.util.Map; -import java.util.stream.Collectors; -import org.apache.commons.codec.digest.DigestUtils; import org.apache.commons.io.IOUtils; -import org.apache.commons.lang3.StringUtils; import org.apache.http.client.methods.CloseableHttpResponse; import org.apache.http.client.methods.HttpDelete; import org.apache.http.client.methods.HttpGet; import org.apache.http.impl.client.CloseableHttpClient; import org.apache.http.impl.client.HttpClients; import org.apache.spark.SparkConf; -import org.apache.spark.api.java.function.FlatMapFunction; +import org.apache.spark.api.java.JavaRDD; import org.apache.spark.api.java.function.MapFunction; -import org.apache.spark.sql.Dataset; -import org.apache.spark.sql.Encoder; import org.apache.spark.sql.Encoders; import org.apache.spark.sql.SparkSession; import org.apache.spark.util.LongAccumulator; +import org.elasticsearch.spark.rdd.api.java.JavaEsSpark; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -31,14 +25,8 @@ import com.fasterxml.jackson.core.JsonProcessingException; import com.fasterxml.jackson.databind.ObjectMapper; import eu.dnetlib.dhp.application.ArgumentApplicationParser; -import eu.dnetlib.dhp.broker.model.ConditionParams; -import eu.dnetlib.dhp.broker.model.Event; -import eu.dnetlib.dhp.broker.model.MappedFields; import eu.dnetlib.dhp.broker.model.Notification; -import eu.dnetlib.dhp.broker.model.Subscription; import eu.dnetlib.dhp.broker.oa.util.ClusterUtils; -import eu.dnetlib.dhp.broker.oa.util.NotificationGroup; -import eu.dnetlib.dhp.broker.oa.util.SubscriptionUtils; public class IndexNotificationsJob { @@ -48,15 +36,14 @@ public class IndexNotificationsJob { final ArgumentApplicationParser parser = new ArgumentApplicationParser( IOUtils - .toString( - IndexNotificationsJob.class - .getResourceAsStream("/eu/dnetlib/dhp/broker/oa/index_notifications.json"))); + .toString(IndexNotificationsJob.class + .getResourceAsStream("/eu/dnetlib/dhp/broker/oa/index_notifications.json"))); parser.parseArgument(args); final SparkConf conf = new SparkConf(); - final String eventsPath = parser.get("outputDir") + "/events"; - log.info("eventsPath: {}", eventsPath); + final String notificationsPath = parser.get("outputDir") + "/notifications"; + log.info("notificationsPath: {}", notificationsPath); final String index = parser.get("index"); log.info("index: {}", index); @@ -83,131 +70,38 @@ public class IndexNotificationsJob { final LongAccumulator total = spark.sparkContext().longAccumulator("total_indexed"); - final long startTime = new Date().getTime(); + final Long date = ClusterUtils + .readPath(spark, notificationsPath, Notification.class) + .first() + .getDate(); - final List subscriptions = listSubscriptions(brokerApiBaseUrl); + final JavaRDD toIndexRdd = ClusterUtils + .readPath(spark, notificationsPath, Notification.class) + .map((MapFunction) n -> prepareForIndexing(n, total), Encoders.STRING()) + .javaRDD(); - log.info("Number of subscriptions: " + subscriptions.size()); + final Map esCfg = new HashMap<>(); - if (subscriptions.size() > 0) { - final Map>> conditionsMap = prepareConditionsMap(subscriptions); + esCfg.put("es.index.auto.create", "false"); + esCfg.put("es.nodes", indexHost); + esCfg.put("es.mapping.id", "notificationId"); // THE PRIMARY KEY + esCfg.put("es.batch.write.retry.count", esBatchWriteRetryCount); + esCfg.put("es.batch.write.retry.wait", esBatchWriteRetryWait); + esCfg.put("es.batch.size.entries", esBatchSizeEntries); + esCfg.put("es.nodes.wan.only", esNodesWanOnly); - log.info("ConditionsMap: " + new ObjectMapper().writeValueAsString(conditionsMap)); + log.info("*** Start indexing"); + JavaEsSpark.saveJsonToEs(toIndexRdd, index, esCfg); + log.info("*** End indexing"); - final Encoder ngEncoder = Encoders.bean(NotificationGroup.class); - final Encoder nEncoder = Encoders.bean(Notification.class); - final Dataset notifications = ClusterUtils - .readPath(spark, eventsPath, Event.class) - .map( - (MapFunction) e -> generateNotifications( - e, subscriptions, conditionsMap, startTime), - ngEncoder) - .flatMap((FlatMapFunction) g -> g.getData().iterator(), nEncoder); + log.info("*** Deleting old notifications"); + final String message = deleteOldNotifications(brokerApiBaseUrl, date - 1000); + log.info("*** Deleted notifications: {}", message); - notifications - .map((MapFunction) n -> prepareForIndexing(n, total), Encoders.STRING()) - .javaRDD() - .saveAsTextFile("/tmp/IndexNotificationsJob_test_6504"); - } - } + log.info("*** sendNotifications (emails, ...)"); + sendNotifications(brokerApiBaseUrl, date - 1000); + log.info("*** ALL done."); - protected static Map>> prepareConditionsMap( - final List subscriptions) { - final Map>> map = new HashMap<>(); - subscriptions.forEach(s -> map.put(s.getSubscriptionId(), s.conditionsAsMap())); - return map; - } - - protected static NotificationGroup generateNotifications(final Event e, - final List subscriptions, - final Map>> conditionsMap, - final long date) { - final List list = subscriptions - .stream() - .filter( - s -> StringUtils.isBlank(s.getTopic()) || s.getTopic().equals("*") || s.getTopic().equals(e.getTopic())) - .filter(s -> verifyConditions(e.getMap(), conditionsMap.get(s.getSubscriptionId()))) - .map(s -> generateNotification(s, e, date)) - .collect(Collectors.toList()); - - return new NotificationGroup(list); - } - - private static Notification generateNotification(final Subscription s, final Event e, final long date) { - final Notification n = new Notification(); - n.setNotificationId("ntf-" + DigestUtils.md5Hex(s.getSubscriptionId() + "@@@" + e.getEventId())); - n.setSubscriptionId(s.getSubscriptionId()); - n.setEventId(e.getEventId()); - n.setProducerId(e.getProducerId()); - n.setTopic(e.getTopic()); - n.setPayload(e.getPayload()); - n.setMap(e.getMap()); - n.setDate(date); - return n; - } - - private static boolean verifyConditions(final MappedFields map, - final Map> conditions) { - if (conditions.containsKey("targetDatasourceName") - && !SubscriptionUtils - .verifyExact(map.getTargetDatasourceName(), conditions.get("targetDatasourceName").get(0).getValue())) { - return false; - } - - if (conditions.containsKey("trust") - && !SubscriptionUtils - .verifyFloatRange( - map.getTrust(), conditions.get("trust").get(0).getValue(), - conditions.get("trust").get(0).getOtherValue())) { - return false; - } - - if (conditions.containsKey("targetDateofacceptance") && !conditions - .get("targetDateofacceptance") - .stream() - .anyMatch( - c -> SubscriptionUtils - .verifyDateRange(map.getTargetDateofacceptance(), c.getValue(), c.getOtherValue()))) { - return false; - } - - if (conditions.containsKey("targetResultTitle") - && !conditions - .get("targetResultTitle") - .stream() - .anyMatch(c -> SubscriptionUtils.verifySimilar(map.getTargetResultTitle(), c.getValue()))) { - return false; - } - - if (conditions.containsKey("targetAuthors") - && !conditions - .get("targetAuthors") - .stream() - .allMatch(c -> SubscriptionUtils.verifyListSimilar(map.getTargetAuthors(), c.getValue()))) { - return false; - } - - return !conditions.containsKey("targetSubjects") - || conditions - .get("targetSubjects") - .stream() - .allMatch(c -> SubscriptionUtils.verifyListExact(map.getTargetSubjects(), c.getValue())); - - } - - private static List listSubscriptions(final String brokerApiBaseUrl) throws Exception { - final String url = brokerApiBaseUrl + "/api/subscriptions"; - final HttpGet req = new HttpGet(url); - - final ObjectMapper mapper = new ObjectMapper(); - - try (final CloseableHttpClient client = HttpClients.createDefault()) { - try (final CloseableHttpResponse response = client.execute(req)) { - final String s = IOUtils.toString(response.getEntity().getContent()); - return mapper - .readValue(s, mapper.getTypeFactory().constructCollectionType(List.class, Subscription.class)); - } - } } private static String deleteOldNotifications(final String brokerApiBaseUrl, final long l) throws Exception { diff --git a/dhp-workflows/dhp-broker-events/src/main/resources/eu/dnetlib/dhp/broker/oa/generate_all/oozie_app/workflow.xml b/dhp-workflows/dhp-broker-events/src/main/resources/eu/dnetlib/dhp/broker/oa/generate_all/oozie_app/workflow.xml index ea80c3acf..1a98a2513 100644 --- a/dhp-workflows/dhp-broker-events/src/main/resources/eu/dnetlib/dhp/broker/oa/generate_all/oozie_app/workflow.xml +++ b/dhp-workflows/dhp-broker-events/src/main/resources/eu/dnetlib/dhp/broker/oa/generate_all/oozie_app/workflow.xml @@ -542,6 +542,30 @@ --dbPassword${brokerDbPassword} --brokerApiBaseUrl${brokerApiBaseUrl} + + + + + + + yarn + cluster + GenerateNotificationsJob + eu.dnetlib.dhp.broker.oa.GenerateNotificationsJob + dhp-broker-events-${projectVersion}.jar + + --executor-cores=${sparkExecutorCores} + --executor-memory=${sparkExecutorMemory} + --driver-memory=${sparkDriverMemory} + --conf spark.extraListeners=${spark2ExtraListeners} + --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} + --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} + --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} + --conf spark.sql.shuffle.partitions=3840 + + --outputDir${outputDir} + --brokerApiBaseUrl${brokerApiBaseUrl} + diff --git a/dhp-workflows/dhp-broker-events/src/main/resources/eu/dnetlib/dhp/broker/oa/generate_notifications.json b/dhp-workflows/dhp-broker-events/src/main/resources/eu/dnetlib/dhp/broker/oa/generate_notifications.json new file mode 100644 index 000000000..6e12783b9 --- /dev/null +++ b/dhp-workflows/dhp-broker-events/src/main/resources/eu/dnetlib/dhp/broker/oa/generate_notifications.json @@ -0,0 +1,14 @@ +[ + { + "paramName": "o", + "paramLongName": "outputDir", + "paramDescription": "the dir that contains the events folder", + "paramRequired": true + }, + { + "paramName": "broker", + "paramLongName": "brokerApiBaseUrl", + "paramDescription": "the url of the broker service api", + "paramRequired": true + } +] diff --git a/dhp-workflows/dhp-broker-events/src/main/resources/eu/dnetlib/dhp/broker/oa/notifications_only/oozie_app/workflow.xml b/dhp-workflows/dhp-broker-events/src/main/resources/eu/dnetlib/dhp/broker/oa/notifications_only/oozie_app/workflow.xml index 70ab7d344..d2d7b6d11 100644 --- a/dhp-workflows/dhp-broker-events/src/main/resources/eu/dnetlib/dhp/broker/oa/notifications_only/oozie_app/workflow.xml +++ b/dhp-workflows/dhp-broker-events/src/main/resources/eu/dnetlib/dhp/broker/oa/notifications_only/oozie_app/workflow.xml @@ -119,12 +119,36 @@ - + Action failed, error message[${wf:errorMessage(wf:lastErrorNode())}] + + + yarn + cluster + GenerateNotificationsJob + eu.dnetlib.dhp.broker.oa.GenerateNotificationsJob + dhp-broker-events-${projectVersion}.jar + + --executor-cores=${sparkExecutorCores} + --executor-memory=${sparkExecutorMemory} + --driver-memory=${sparkDriverMemory} + --conf spark.extraListeners=${spark2ExtraListeners} + --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} + --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} + --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} + --conf spark.sql.shuffle.partitions=3840 + + --outputDir${outputDir} + --brokerApiBaseUrl${brokerApiBaseUrl} + + + + + yarn @@ -135,6 +159,7 @@ --executor-memory=${sparkExecutorMemory} --driver-memory=${sparkDriverMemory} + --conf spark.dynamicAllocation.maxExecutors="8" --conf spark.extraListeners=${spark2ExtraListeners} --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} From d6e1f224086380de758d6be2854a13b6fe8809b4 Mon Sep 17 00:00:00 2001 From: "michele.artini" Date: Tue, 5 Oct 2021 15:09:18 +0200 Subject: [PATCH 010/162] max numbers of workers for indexing --- .../dhp/broker/oa/generate_all/oozie_app/workflow.xml | 9 +++++++-- .../broker/oa/notifications_only/oozie_app/workflow.xml | 7 ++++++- .../dnetlib/dhp/broker/oa/reindex/oozie_app/workflow.xml | 7 ++++++- 3 files changed, 19 insertions(+), 4 deletions(-) diff --git a/dhp-workflows/dhp-broker-events/src/main/resources/eu/dnetlib/dhp/broker/oa/generate_all/oozie_app/workflow.xml b/dhp-workflows/dhp-broker-events/src/main/resources/eu/dnetlib/dhp/broker/oa/generate_all/oozie_app/workflow.xml index 1a98a2513..bc6778f52 100644 --- a/dhp-workflows/dhp-broker-events/src/main/resources/eu/dnetlib/dhp/broker/oa/generate_all/oozie_app/workflow.xml +++ b/dhp-workflows/dhp-broker-events/src/main/resources/eu/dnetlib/dhp/broker/oa/generate_all/oozie_app/workflow.xml @@ -115,6 +115,11 @@ spark2EventLogDir spark 2.* event log dir location + + sparkMaxExecutorsForIndexing + 8 + Max number of workers for ElasticSearch indexing + @@ -498,7 +503,7 @@ --executor-memory=${sparkExecutorMemory} --driver-memory=${sparkDriverMemory} - --conf spark.dynamicAllocation.maxExecutors="8" + --conf spark.dynamicAllocation.maxExecutors=${sparkMaxExecutorsForIndexing} --conf spark.extraListeners=${spark2ExtraListeners} --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} @@ -580,7 +585,7 @@ --executor-memory=${sparkExecutorMemory} --driver-memory=${sparkDriverMemory} - --conf spark.dynamicAllocation.maxExecutors="8" + --conf spark.dynamicAllocation.maxExecutors=${sparkMaxExecutorsForIndexing} --conf spark.extraListeners=${spark2ExtraListeners} --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} diff --git a/dhp-workflows/dhp-broker-events/src/main/resources/eu/dnetlib/dhp/broker/oa/notifications_only/oozie_app/workflow.xml b/dhp-workflows/dhp-broker-events/src/main/resources/eu/dnetlib/dhp/broker/oa/notifications_only/oozie_app/workflow.xml index d2d7b6d11..0d226d78e 100644 --- a/dhp-workflows/dhp-broker-events/src/main/resources/eu/dnetlib/dhp/broker/oa/notifications_only/oozie_app/workflow.xml +++ b/dhp-workflows/dhp-broker-events/src/main/resources/eu/dnetlib/dhp/broker/oa/notifications_only/oozie_app/workflow.xml @@ -98,6 +98,11 @@ spark2EventLogDir spark 2.* event log dir location + + sparkMaxExecutorsForIndexing + 8 + Max number of workers for ElasticSearch indexing + @@ -159,7 +164,7 @@ --executor-memory=${sparkExecutorMemory} --driver-memory=${sparkDriverMemory} - --conf spark.dynamicAllocation.maxExecutors="8" + --conf spark.dynamicAllocation.maxExecutors=${sparkMaxExecutorsForIndexing} --conf spark.extraListeners=${spark2ExtraListeners} --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} diff --git a/dhp-workflows/dhp-broker-events/src/main/resources/eu/dnetlib/dhp/broker/oa/reindex/oozie_app/workflow.xml b/dhp-workflows/dhp-broker-events/src/main/resources/eu/dnetlib/dhp/broker/oa/reindex/oozie_app/workflow.xml index 9095004ad..87adfffaa 100644 --- a/dhp-workflows/dhp-broker-events/src/main/resources/eu/dnetlib/dhp/broker/oa/reindex/oozie_app/workflow.xml +++ b/dhp-workflows/dhp-broker-events/src/main/resources/eu/dnetlib/dhp/broker/oa/reindex/oozie_app/workflow.xml @@ -75,6 +75,11 @@ spark2EventLogDir spark 2.* event log dir location + + sparkMaxExecutorsForIndexing + 8 + Max number of workers for ElasticSearch indexing + @@ -112,7 +117,7 @@ --executor-memory=${sparkExecutorMemory} --driver-memory=${sparkDriverMemory} - --conf spark.dynamicAllocation.maxExecutors="8" + --conf spark.dynamicAllocation.maxExecutors=${sparkMaxExecutorsForIndexing} --conf spark.extraListeners=${spark2ExtraListeners} --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} From 2f61054cd198ce563fba2149b23c53aafbaf7029 Mon Sep 17 00:00:00 2001 From: Claudio Atzori Date: Mon, 11 Oct 2021 18:29:42 +0200 Subject: [PATCH 011/162] code formatting --- .../main/java/eu/dnetlib/dhp/PropagationConstant.java | 11 ++++++++--- .../SparkOrcidToResultFromSemRelJob.java | 7 +++++-- .../SparkResultToCommunityFromOrganizationJob.java | 4 ++-- .../SparkResultToCommunityThroughSemRelJob.java | 4 ++-- .../dhp/oa/provision/IndexRecordTransformerTest.java | 11 ++++++----- 5 files changed, 23 insertions(+), 14 deletions(-) diff --git a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/PropagationConstant.java b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/PropagationConstant.java index 0d7c74475..23e97a97a 100644 --- a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/PropagationConstant.java +++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/PropagationConstant.java @@ -69,7 +69,7 @@ public class PropagationConstant { PROPAGATION_DATA_INFO_TYPE, PROPAGATION_COUNTRY_INSTREPO_CLASS_ID, PROPAGATION_COUNTRY_INSTREPO_CLASS_NAME, - ModelConstants.DNET_PROVENANCE_ACTIONS)); + ModelConstants.DNET_PROVENANCE_ACTIONS)); return nc; } @@ -84,7 +84,8 @@ public class PropagationConstant { return di; } - public static Qualifier getQualifier(String inference_class_id, String inference_class_name, String qualifierSchema) { + public static Qualifier getQualifier(String inference_class_id, String inference_class_name, + String qualifierSchema) { Qualifier pa = new Qualifier(); pa.setClassid(inference_class_id); pa.setClassname(inference_class_name); @@ -108,7 +109,11 @@ public class PropagationConstant { r.setRelClass(rel_class); r.setRelType(rel_type); r.setSubRelType(subrel_type); - r.setDataInfo(getDataInfo(inference_provenance, inference_class_id, inference_class_name, ModelConstants.DNET_PROVENANCE_ACTIONS)); + r + .setDataInfo( + getDataInfo( + inference_provenance, inference_class_id, inference_class_name, + ModelConstants.DNET_PROVENANCE_ACTIONS)); return r; } diff --git a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/orcidtoresultfromsemrel/SparkOrcidToResultFromSemRelJob.java b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/orcidtoresultfromsemrel/SparkOrcidToResultFromSemRelJob.java index 68949b900..a38b4da2e 100644 --- a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/orcidtoresultfromsemrel/SparkOrcidToResultFromSemRelJob.java +++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/orcidtoresultfromsemrel/SparkOrcidToResultFromSemRelJob.java @@ -173,14 +173,17 @@ public class SparkOrcidToResultFromSemRelJob { if (toaddpid) { StructuredProperty p = new StructuredProperty(); p.setValue(autoritative_author.getOrcid()); - p.setQualifier(getQualifier(ModelConstants.ORCID_PENDING, ModelConstants.ORCID_CLASSNAME, ModelConstants.DNET_PID_TYPES)); + p + .setQualifier( + getQualifier( + ModelConstants.ORCID_PENDING, ModelConstants.ORCID_CLASSNAME, ModelConstants.DNET_PID_TYPES)); p .setDataInfo( getDataInfo( PROPAGATION_DATA_INFO_TYPE, PROPAGATION_ORCID_TO_RESULT_FROM_SEM_REL_CLASS_ID, PROPAGATION_ORCID_TO_RESULT_FROM_SEM_REL_CLASS_NAME, - ModelConstants.DNET_PROVENANCE_ACTIONS)); + ModelConstants.DNET_PROVENANCE_ACTIONS)); Optional> authorPid = Optional.ofNullable(author.getPid()); if (authorPid.isPresent()) { diff --git a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttocommunityfromorganization/SparkResultToCommunityFromOrganizationJob.java b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttocommunityfromorganization/SparkResultToCommunityFromOrganizationJob.java index 1289ff644..50df08f8c 100644 --- a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttocommunityfromorganization/SparkResultToCommunityFromOrganizationJob.java +++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttocommunityfromorganization/SparkResultToCommunityFromOrganizationJob.java @@ -10,7 +10,6 @@ import java.util.List; import java.util.Optional; import java.util.stream.Collectors; -import eu.dnetlib.dhp.schema.common.ModelConstants; import org.apache.commons.io.IOUtils; import org.apache.spark.SparkConf; import org.apache.spark.api.java.function.MapFunction; @@ -22,6 +21,7 @@ import org.slf4j.Logger; import org.slf4j.LoggerFactory; import eu.dnetlib.dhp.application.ArgumentApplicationParser; +import eu.dnetlib.dhp.schema.common.ModelConstants; import eu.dnetlib.dhp.schema.oaf.Context; import eu.dnetlib.dhp.schema.oaf.Result; import scala.Tuple2; @@ -130,7 +130,7 @@ public class SparkResultToCommunityFromOrganizationJob { PROPAGATION_DATA_INFO_TYPE, PROPAGATION_RESULT_COMMUNITY_ORGANIZATION_CLASS_ID, PROPAGATION_RESULT_COMMUNITY_ORGANIZATION_CLASS_NAME, - ModelConstants.DNET_PROVENANCE_ACTIONS))); + ModelConstants.DNET_PROVENANCE_ACTIONS))); propagatedContexts.add(newContext); } } diff --git a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttocommunityfromsemrel/SparkResultToCommunityThroughSemRelJob.java b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttocommunityfromsemrel/SparkResultToCommunityThroughSemRelJob.java index 7f76ead94..f31a26230 100644 --- a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttocommunityfromsemrel/SparkResultToCommunityThroughSemRelJob.java +++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttocommunityfromsemrel/SparkResultToCommunityThroughSemRelJob.java @@ -7,7 +7,6 @@ import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkHiveSession; import java.util.*; import java.util.stream.Collectors; -import eu.dnetlib.dhp.schema.common.ModelConstants; import org.apache.commons.io.IOUtils; import org.apache.spark.SparkConf; import org.apache.spark.api.java.function.MapFunction; @@ -20,6 +19,7 @@ import org.slf4j.LoggerFactory; import eu.dnetlib.dhp.application.ArgumentApplicationParser; import eu.dnetlib.dhp.resulttocommunityfromorganization.ResultCommunityList; +import eu.dnetlib.dhp.schema.common.ModelConstants; import eu.dnetlib.dhp.schema.oaf.*; import scala.Tuple2; @@ -126,7 +126,7 @@ public class SparkResultToCommunityThroughSemRelJob { PROPAGATION_DATA_INFO_TYPE, PROPAGATION_RESULT_COMMUNITY_SEMREL_CLASS_ID, PROPAGATION_RESULT_COMMUNITY_SEMREL_CLASS_NAME, - ModelConstants.DNET_PROVENANCE_ACTIONS))); + ModelConstants.DNET_PROVENANCE_ACTIONS))); return newContext; } return null; diff --git a/dhp-workflows/dhp-graph-provision/src/test/java/eu/dnetlib/dhp/oa/provision/IndexRecordTransformerTest.java b/dhp-workflows/dhp-graph-provision/src/test/java/eu/dnetlib/dhp/oa/provision/IndexRecordTransformerTest.java index 1c7dce3f2..64935e79d 100644 --- a/dhp-workflows/dhp-graph-provision/src/test/java/eu/dnetlib/dhp/oa/provision/IndexRecordTransformerTest.java +++ b/dhp-workflows/dhp-graph-provision/src/test/java/eu/dnetlib/dhp/oa/provision/IndexRecordTransformerTest.java @@ -84,13 +84,15 @@ public class IndexRecordTransformerTest { @Test public void testForEOSCFutureTraining() throws IOException, TransformerException { - final String record = IOUtils.toString(getClass().getResourceAsStream("eosc-future/training-notebooks-seadatanet.xml")); + final String record = IOUtils + .toString(getClass().getResourceAsStream("eosc-future/training-notebooks-seadatanet.xml")); testRecordTransformation(record); } @Test public void testForEOSCFutureAirQualityCopernicus() throws IOException, TransformerException { - final String record = IOUtils.toString(getClass().getResourceAsStream("eosc-future/air-quality-copernicus.xml")); + final String record = IOUtils + .toString(getClass().getResourceAsStream("eosc-future/air-quality-copernicus.xml")); testRecordTransformation(record); } @@ -102,12 +104,11 @@ public class IndexRecordTransformerTest { @Test public void testForEOSCFutureB2SharePlotRelatedORP() throws IOException, TransformerException { - final String record = IOUtils.toString(getClass().getResourceAsStream("eosc-future/b2share-plot-related-orp.xml")); + final String record = IOUtils + .toString(getClass().getResourceAsStream("eosc-future/b2share-plot-related-orp.xml")); testRecordTransformation(record); } - - private void testRecordTransformation(final String record) throws IOException, TransformerException { final String fields = IOUtils.toString(getClass().getResourceAsStream("fields.xml")); final String xslt = IOUtils.toString(getClass().getResourceAsStream("layoutToRecordTransformer.xsl")); From 8db39c86e2ad41f0feefa21e94d8700690884608 Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Wed, 13 Oct 2021 11:34:24 +0200 Subject: [PATCH 012/162] added new parameter in the doiboost process workflow to specify a folder for the process of MAG dataset --- .../dnetlib/dhp/doiboost/process/oozie_app/workflow.xml | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/process/oozie_app/workflow.xml b/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/process/oozie_app/workflow.xml index eb82c3a7d..282c06f4e 100644 --- a/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/process/oozie_app/workflow.xml +++ b/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/process/oozie_app/workflow.xml @@ -48,7 +48,11 @@ inputPathMAG - the MAG working path + the MAG input path + + + workingPathMAG + the MAG working path where to store the intermediate process results @@ -138,7 +142,7 @@ --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} --sourcePath${inputPathMAG}/dataset - --workingPath${inputPathMAG}/process_p + --workingPath${workingPathMAG} --targetPath${workingPath} --masteryarn-cluster From 4b1920f008955e429ac20037fa66cb7676633b8f Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Wed, 13 Oct 2021 15:33:50 +0200 Subject: [PATCH 013/162] changed the working path parameter value as dependant from the dnet-workflow working dir parameter --- .../eu/dnetlib/dhp/doiboost/process/oozie_app/workflow.xml | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/process/oozie_app/workflow.xml b/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/process/oozie_app/workflow.xml index 282c06f4e..29a12f4df 100644 --- a/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/process/oozie_app/workflow.xml +++ b/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/process/oozie_app/workflow.xml @@ -50,10 +50,6 @@ inputPathMAG the MAG input path - - workingPathMAG - the MAG working path where to store the intermediate process results - @@ -142,7 +138,7 @@ --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} --sourcePath${inputPathMAG}/dataset - --workingPath${workingPathMAG} + --workingPath${workingPath}/MAG --targetPath${workingPath} --masteryarn-cluster From e15a1969a57726228f487e8b06b467ad5cdbef33 Mon Sep 17 00:00:00 2001 From: Claudio Atzori Date: Thu, 14 Oct 2021 14:33:56 +0200 Subject: [PATCH 014/162] applying fix on the DOIBoost construction process that somehow wasn't part of the merge done in 83c90c71809c947adcfc1e70eb9d9fad195d87aa --- .../eu/dnetlib/doiboost/SparkGenerateDoiBoost.scala | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/SparkGenerateDoiBoost.scala b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/SparkGenerateDoiBoost.scala index e501b4823..c08e09f53 100644 --- a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/SparkGenerateDoiBoost.scala +++ b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/SparkGenerateDoiBoost.scala @@ -206,9 +206,9 @@ object SparkGenerateDoiBoost { (r.getSource, r) else if (r.getTarget.startsWith("unresolved")) (r.getTarget,r) - else + else ("resolved", r) - }) + })(Encoders.tuple(Encoders.STRING, mapEncoderRel)) val openaireOrganization:Dataset[(String,String)] = spark.read.text(openaireOrganizationPath).as[String].flatMap(s => extractIdGRID(s)).groupByKey(_._2).reduceGroups((x,y) => if (x != null) x else y ).map(_._2) @@ -221,8 +221,8 @@ object SparkGenerateDoiBoost { currentRels.setSource(currentOrgs._1) else currentRels.setTarget(currentOrgs._1) - currentRels - }.write.save(s"$workingDirPath/doiBoostPublicationAffiliation") + currentRels + }.filter(r=> !r.getSource.startsWith("unresolved") && !r.getTarget.startsWith("unresolved")).write.mode(SaveMode.Overwrite).save(s"$workingDirPath/doiBoostPublicationAffiliation") magPubs.joinWith(a,magPubs("_1").equalTo(a("PaperId"))).map( item => { val affiliation = item._2 @@ -242,6 +242,6 @@ object SparkGenerateDoiBoost { else null }).filter(o=> o!=null).write.mode(SaveMode.Overwrite).save(s"$workingDirPath/doiBoostOrganization") - } + } -} +} \ No newline at end of file From e471f12d5e3eebfddc7ece9b804ef998112bd48f Mon Sep 17 00:00:00 2001 From: Claudio Atzori Date: Tue, 19 Oct 2021 12:35:38 +0200 Subject: [PATCH 015/162] hotfix: recovered implementation removing the hardcoded working_dirs --- .../CreateRelatedEntitiesJob_phase1.java | 39 +++++++++---------- 1 file changed, 18 insertions(+), 21 deletions(-) diff --git a/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/CreateRelatedEntitiesJob_phase1.java b/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/CreateRelatedEntitiesJob_phase1.java index a33a45517..48e5945c0 100644 --- a/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/CreateRelatedEntitiesJob_phase1.java +++ b/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/CreateRelatedEntitiesJob_phase1.java @@ -52,8 +52,11 @@ public class CreateRelatedEntitiesJob_phase1 { final String jsonConfiguration = IOUtils .toString( - PrepareRelationsJob.class - .getResourceAsStream("/eu/dnetlib/dhp/oa/provision/input_params_related_entities_pahase1.json")); + Objects + .requireNonNull( + CreateRelatedEntitiesJob_phase1.class + .getResourceAsStream( + "/eu/dnetlib/dhp/oa/provision/input_params_related_entities_pahase1.json"))); final ArgumentApplicationParser parser = new ArgumentApplicationParser(jsonConfiguration); parser.parseArgument(args); @@ -75,6 +78,7 @@ public class CreateRelatedEntitiesJob_phase1 { final String graphTableClassName = parser.get("graphTableClassName"); log.info("graphTableClassName: {}", graphTableClassName); + @SuppressWarnings("unchecked") final Class entityClazz = (Class) Class.forName(graphTableClassName); final SparkConf conf = new SparkConf(); @@ -101,22 +105,12 @@ public class CreateRelatedEntitiesJob_phase1 { Encoders.tuple(Encoders.STRING(), Encoders.kryo(Relation.class))) .cache(); - readPathEntity(spark, inputEntityPath, clazz) + final Dataset> entities = readPathEntity(spark, inputEntityPath, clazz) .filter("dataInfo.invisible == false") .map( (MapFunction>) e -> new Tuple2<>(e.getId(), asRelatedEntity(e, clazz)), - Encoders - .tuple(Encoders.STRING(), Encoders.kryo(RelatedEntity.class))) - .write() - .mode(SaveMode.Overwrite) - .save("/tmp/beta_provision/working_dir/update_solr/join_partial/relatedEntities/" + clazz.getSimpleName()); - - final Dataset> entities = spark - .read() - .load("/tmp/beta_provision/working_dir/update_solr/join_partial/relatedEntities/" + clazz.getSimpleName()) - .as( - Encoders - .tuple(Encoders.STRING(), Encoders.kryo(RelatedEntity.class))); + Encoders.tuple(Encoders.STRING(), Encoders.kryo(RelatedEntity.class))) + .cache(); relsByTarget .joinWith(entities, entities.col("_1").equalTo(relsByTarget.col("_1")), "inner") @@ -149,8 +143,10 @@ public class CreateRelatedEntitiesJob_phase1 { re.setId(entity.getId()); re.setType(EntityType.fromClass(clazz).name()); - if (entity.getPid() != null) + // TODO move the max number of PIDs to eu.dnetlib.dhp.schema.oaf.utils.ModelHardLimits + if (Objects.nonNull(entity.getPid())) { re.setPid(entity.getPid().stream().limit(400).collect(Collectors.toList())); + } re.setCollectedfrom(entity.getCollectedfrom()); switch (EntityType.fromClass(clazz)) { @@ -212,7 +208,7 @@ public class CreateRelatedEntitiesJob_phase1 { final List> f = p.getFundingtree(); if (!f.isEmpty()) { - re.setFundingtree(f.stream().map(s -> s.getValue()).collect(Collectors.toList())); + re.setFundingtree(f.stream().map(Field::getValue).collect(Collectors.toList())); } break; } @@ -227,15 +223,16 @@ public class CreateRelatedEntitiesJob_phase1 { return Optional .ofNullable(f) .filter(Objects::nonNull) - .map(x -> x.getValue()) + .map(Field::getValue) .orElse(defaultValue); } /** - * Reads a Dataset of eu.dnetlib.dhp.oa.provision.model.SortableRelation objects from a newline delimited json text file, + * Reads a Dataset of eu.dnetlib.dhp.oa.provision.model.SortableRelation objects from a newline delimited json text + * file * - * @param spark - * @param relationPath + * @param spark the SparkSession + * @param relationPath the path storing the relation objects * @return the Dataset containing all the relationships */ private static Dataset readPathRelation( From bdffa86c2f5cb8571520c5bfd79c35e56a6950e0 Mon Sep 17 00:00:00 2001 From: Claudio Atzori Date: Tue, 19 Oct 2021 15:39:38 +0200 Subject: [PATCH 016/162] undo last commit --- .../dhp/broker/oa/IndexNotificationsJob.java | 75 +++++++++---------- .../notifications_only/oozie_app/workflow.xml | 1 - 2 files changed, 34 insertions(+), 42 deletions(-) diff --git a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/IndexNotificationsJob.java b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/IndexNotificationsJob.java index e8ef5dd3e..fb2e26ba2 100644 --- a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/IndexNotificationsJob.java +++ b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/IndexNotificationsJob.java @@ -17,7 +17,6 @@ import org.apache.http.client.methods.HttpGet; import org.apache.http.impl.client.CloseableHttpClient; import org.apache.http.impl.client.HttpClients; import org.apache.spark.SparkConf; -import org.apache.spark.api.java.JavaRDD; import org.apache.spark.api.java.function.FlatMapFunction; import org.apache.spark.api.java.function.MapFunction; import org.apache.spark.sql.Dataset; @@ -25,7 +24,6 @@ import org.apache.spark.sql.Encoder; import org.apache.spark.sql.Encoders; import org.apache.spark.sql.SparkSession; import org.apache.spark.util.LongAccumulator; -import org.elasticsearch.spark.rdd.api.java.JavaEsSpark; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -33,7 +31,11 @@ import com.fasterxml.jackson.core.JsonProcessingException; import com.fasterxml.jackson.databind.ObjectMapper; import eu.dnetlib.dhp.application.ArgumentApplicationParser; -import eu.dnetlib.dhp.broker.model.*; +import eu.dnetlib.dhp.broker.model.ConditionParams; +import eu.dnetlib.dhp.broker.model.Event; +import eu.dnetlib.dhp.broker.model.MappedFields; +import eu.dnetlib.dhp.broker.model.Notification; +import eu.dnetlib.dhp.broker.model.Subscription; import eu.dnetlib.dhp.broker.oa.util.ClusterUtils; import eu.dnetlib.dhp.broker.oa.util.NotificationGroup; import eu.dnetlib.dhp.broker.oa.util.SubscriptionUtils; @@ -85,55 +87,46 @@ public class IndexNotificationsJob { final List subscriptions = listSubscriptions(brokerApiBaseUrl); - log.info("Number of subscriptions: {}", subscriptions.size()); + log.info("Number of subscriptions: " + subscriptions.size()); + + if (subscriptions.size() > 0) { + final Map>> conditionsMap = prepareConditionsMap(subscriptions); + + log.info("ConditionsMap: " + new ObjectMapper().writeValueAsString(conditionsMap)); - if (!subscriptions.isEmpty()) { final Encoder ngEncoder = Encoders.bean(NotificationGroup.class); final Encoder nEncoder = Encoders.bean(Notification.class); final Dataset notifications = ClusterUtils .readPath(spark, eventsPath, Event.class) .map( - (MapFunction) e -> generateNotifications(e, subscriptions, startTime), + (MapFunction) e -> generateNotifications( + e, subscriptions, conditionsMap, startTime), ngEncoder) .flatMap((FlatMapFunction) g -> g.getData().iterator(), nEncoder); - final JavaRDD inputRdd = notifications + notifications .map((MapFunction) n -> prepareForIndexing(n, total), Encoders.STRING()) - .javaRDD(); - - final Map esCfg = new HashMap<>(); - - esCfg.put("es.index.auto.create", "false"); - esCfg.put("es.nodes", indexHost); - esCfg.put("es.mapping.id", "notificationId"); // THE PRIMARY KEY - esCfg.put("es.batch.write.retry.count", esBatchWriteRetryCount); - esCfg.put("es.batch.write.retry.wait", esBatchWriteRetryWait); - esCfg.put("es.batch.size.entries", esBatchSizeEntries); - esCfg.put("es.nodes.wan.only", esNodesWanOnly); - - log.info("*** Start indexing"); - JavaEsSpark.saveJsonToEs(inputRdd, index, esCfg); - log.info("*** End indexing"); - - log.info("*** Deleting old notifications"); - final String message = deleteOldNotifications(brokerApiBaseUrl, startTime - 1000); - log.info("*** Deleted notifications: {}", message); - - log.info("*** sendNotifications (emails, ...)"); - sendNotifications(brokerApiBaseUrl, startTime - 1000); - log.info("*** ALL done."); - + .javaRDD() + .saveAsTextFile("/tmp/IndexNotificationsJob_test_6504"); } } - private static NotificationGroup generateNotifications(final Event e, + protected static Map>> prepareConditionsMap( + final List subscriptions) { + final Map>> map = new HashMap<>(); + subscriptions.forEach(s -> map.put(s.getSubscriptionId(), s.conditionsAsMap())); + return map; + } + + protected static NotificationGroup generateNotifications(final Event e, final List subscriptions, + final Map>> conditionsMap, final long date) { final List list = subscriptions .stream() .filter( s -> StringUtils.isBlank(s.getTopic()) || s.getTopic().equals("*") || s.getTopic().equals(e.getTopic())) - .filter(s -> verifyConditions(e.getMap(), s.conditionsAsMap())) + .filter(s -> verifyConditions(e.getMap(), conditionsMap.get(s.getSubscriptionId()))) .map(s -> generateNotification(s, e, date)) .collect(Collectors.toList()); @@ -169,28 +162,28 @@ public class IndexNotificationsJob { return false; } - if (conditions.containsKey("targetDateofacceptance") && conditions + if (conditions.containsKey("targetDateofacceptance") && !conditions .get("targetDateofacceptance") .stream() - .noneMatch( + .anyMatch( c -> SubscriptionUtils .verifyDateRange(map.getTargetDateofacceptance(), c.getValue(), c.getOtherValue()))) { return false; } if (conditions.containsKey("targetResultTitle") - && conditions + && !conditions .get("targetResultTitle") .stream() - .noneMatch(c -> SubscriptionUtils.verifySimilar(map.getTargetResultTitle(), c.getValue()))) { + .anyMatch(c -> SubscriptionUtils.verifySimilar(map.getTargetResultTitle(), c.getValue()))) { return false; } if (conditions.containsKey("targetAuthors") - && conditions + && !conditions .get("targetAuthors") .stream() - .noneMatch(c -> SubscriptionUtils.verifyListSimilar(map.getTargetAuthors(), c.getValue()))) { + .allMatch(c -> SubscriptionUtils.verifyListSimilar(map.getTargetAuthors(), c.getValue()))) { return false; } @@ -202,7 +195,7 @@ public class IndexNotificationsJob { } - private static List listSubscriptions(final String brokerApiBaseUrl) throws IOException { + private static List listSubscriptions(final String brokerApiBaseUrl) throws Exception { final String url = brokerApiBaseUrl + "/api/subscriptions"; final HttpGet req = new HttpGet(url); @@ -217,7 +210,7 @@ public class IndexNotificationsJob { } } - private static String deleteOldNotifications(final String brokerApiBaseUrl, final long l) throws IOException { + private static String deleteOldNotifications(final String brokerApiBaseUrl, final long l) throws Exception { final String url = brokerApiBaseUrl + "/api/notifications/byDate/0/" + l; final HttpDelete req = new HttpDelete(url); diff --git a/dhp-workflows/dhp-broker-events/src/main/resources/eu/dnetlib/dhp/broker/oa/notifications_only/oozie_app/workflow.xml b/dhp-workflows/dhp-broker-events/src/main/resources/eu/dnetlib/dhp/broker/oa/notifications_only/oozie_app/workflow.xml index 248326d57..70ab7d344 100644 --- a/dhp-workflows/dhp-broker-events/src/main/resources/eu/dnetlib/dhp/broker/oa/notifications_only/oozie_app/workflow.xml +++ b/dhp-workflows/dhp-broker-events/src/main/resources/eu/dnetlib/dhp/broker/oa/notifications_only/oozie_app/workflow.xml @@ -135,7 +135,6 @@ --executor-memory=${sparkExecutorMemory} --driver-memory=${sparkDriverMemory} - --conf spark.dynamicAllocation.maxExecutors="8" --conf spark.extraListeners=${spark2ExtraListeners} --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} From c4fce785ab623fcce1afaaecbbcefd45dbcf175f Mon Sep 17 00:00:00 2001 From: "michele.artini" Date: Tue, 19 Oct 2021 16:18:26 +0200 Subject: [PATCH 017/162] fixed a compilation problem of a unit test --- .../broker/oa/GenerateNotificationsJob.java | 22 +++++++++++++------ .../dhp/broker/oa/IndexNotificationsJob.java | 5 +++-- ...java => GenerateNotificationsJobTest.java} | 22 +++++++++---------- 3 files changed, 29 insertions(+), 20 deletions(-) rename dhp-workflows/dhp-broker-events/src/test/java/eu/dnetlib/dhp/broker/oa/{IndexNotificationsJobTest.java => GenerateNotificationsJobTest.java} (81%) diff --git a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/GenerateNotificationsJob.java b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/GenerateNotificationsJob.java index 21c6c64a6..6b8d60f40 100644 --- a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/GenerateNotificationsJob.java +++ b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/GenerateNotificationsJob.java @@ -45,8 +45,9 @@ public class GenerateNotificationsJob { final ArgumentApplicationParser parser = new ArgumentApplicationParser( IOUtils - .toString(GenerateNotificationsJob.class - .getResourceAsStream("/eu/dnetlib/dhp/broker/oa/generate_notifications.json"))); + .toString( + GenerateNotificationsJob.class + .getResourceAsStream("/eu/dnetlib/dhp/broker/oa/generate_notifications.json"))); parser.parseArgument(args); final SparkConf conf = new SparkConf(); @@ -79,7 +80,10 @@ public class GenerateNotificationsJob { final Encoder nEncoder = Encoders.bean(Notification.class); final Dataset notifications = ClusterUtils .readPath(spark, eventsPath, Event.class) - .map((MapFunction) e -> generateNotifications(e, subscriptions, conditionsMap, startTime), ngEncoder) + .map( + (MapFunction) e -> generateNotifications( + e, subscriptions, conditionsMap, startTime), + ngEncoder) .flatMap((FlatMapFunction) g -> g.getData().iterator(), nEncoder); ClusterUtils.save(notifications, notificationsPath, Notification.class, total); @@ -99,7 +103,8 @@ public class GenerateNotificationsJob { final long date) { final List list = subscriptions .stream() - .filter(s -> StringUtils.isBlank(s.getTopic()) || s.getTopic().equals("*") || s.getTopic().equals(e.getTopic())) + .filter( + s -> StringUtils.isBlank(s.getTopic()) || s.getTopic().equals("*") || s.getTopic().equals(e.getTopic())) .filter(s -> verifyConditions(e.getMap(), conditionsMap.get(s.getSubscriptionId()))) .map(s -> generateNotification(s, e, date)) .collect(Collectors.toList()); @@ -130,15 +135,18 @@ public class GenerateNotificationsJob { if (conditions.containsKey("trust") && !SubscriptionUtils - .verifyFloatRange(map.getTrust(), conditions.get("trust").get(0).getValue(), conditions.get("trust").get(0).getOtherValue())) { + .verifyFloatRange( + map.getTrust(), conditions.get("trust").get(0).getValue(), + conditions.get("trust").get(0).getOtherValue())) { return false; } if (conditions.containsKey("targetDateofacceptance") && !conditions .get("targetDateofacceptance") .stream() - .anyMatch(c -> SubscriptionUtils - .verifyDateRange(map.getTargetDateofacceptance(), c.getValue(), c.getOtherValue()))) { + .anyMatch( + c -> SubscriptionUtils + .verifyDateRange(map.getTargetDateofacceptance(), c.getValue(), c.getOtherValue()))) { return false; } diff --git a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/IndexNotificationsJob.java b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/IndexNotificationsJob.java index 55e82446f..a2aa30092 100644 --- a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/IndexNotificationsJob.java +++ b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/IndexNotificationsJob.java @@ -36,8 +36,9 @@ public class IndexNotificationsJob { final ArgumentApplicationParser parser = new ArgumentApplicationParser( IOUtils - .toString(IndexNotificationsJob.class - .getResourceAsStream("/eu/dnetlib/dhp/broker/oa/index_notifications.json"))); + .toString( + IndexNotificationsJob.class + .getResourceAsStream("/eu/dnetlib/dhp/broker/oa/index_notifications.json"))); parser.parseArgument(args); final SparkConf conf = new SparkConf(); diff --git a/dhp-workflows/dhp-broker-events/src/test/java/eu/dnetlib/dhp/broker/oa/IndexNotificationsJobTest.java b/dhp-workflows/dhp-broker-events/src/test/java/eu/dnetlib/dhp/broker/oa/GenerateNotificationsJobTest.java similarity index 81% rename from dhp-workflows/dhp-broker-events/src/test/java/eu/dnetlib/dhp/broker/oa/IndexNotificationsJobTest.java rename to dhp-workflows/dhp-broker-events/src/test/java/eu/dnetlib/dhp/broker/oa/GenerateNotificationsJobTest.java index 8ecb3061b..233963e2f 100644 --- a/dhp-workflows/dhp-broker-events/src/test/java/eu/dnetlib/dhp/broker/oa/IndexNotificationsJobTest.java +++ b/dhp-workflows/dhp-broker-events/src/test/java/eu/dnetlib/dhp/broker/oa/GenerateNotificationsJobTest.java @@ -16,7 +16,7 @@ import eu.dnetlib.dhp.broker.model.MappedFields; import eu.dnetlib.dhp.broker.model.Subscription; import eu.dnetlib.dhp.broker.oa.util.NotificationGroup; -class IndexNotificationsJobTest { +class GenerateNotificationsJobTest { private List subscriptions; @@ -32,7 +32,7 @@ class IndexNotificationsJobTest { .setConditions( "[{\"field\":\"targetDatasourceName\",\"fieldType\":\"STRING\",\"operator\":\"EXACT\",\"listParams\":[{\"value\":\"reposiTUm\"}]},{\"field\":\"trust\",\"fieldType\":\"FLOAT\",\"operator\":\"RANGE\",\"listParams\":[{\"value\":\"0\",\"otherValue\":\"1\"}]}]"); subscriptions = Arrays.asList(s); - conditionsMap = IndexNotificationsJob.prepareConditionsMap(subscriptions); + conditionsMap = GenerateNotificationsJob.prepareConditionsMap(subscriptions); } @Test @@ -40,7 +40,7 @@ class IndexNotificationsJobTest { final Event event = new Event(); event.setTopic("ENRICH/MISSING/PROJECT"); - final NotificationGroup res = IndexNotificationsJob + final NotificationGroup res = GenerateNotificationsJob .generateNotifications(event, subscriptions, conditionsMap, 0); assertEquals(0, res.getData().size()); } @@ -53,7 +53,7 @@ class IndexNotificationsJobTest { event.getMap().setTargetDatasourceName("reposiTUm"); event.getMap().setTrust(0.8f); - final NotificationGroup res = IndexNotificationsJob + final NotificationGroup res = GenerateNotificationsJob .generateNotifications(event, subscriptions, conditionsMap, 0); assertEquals(1, res.getData().size()); } @@ -66,7 +66,7 @@ class IndexNotificationsJobTest { event.getMap().setTargetDatasourceName("Puma"); event.getMap().setTrust(0.8f); - final NotificationGroup res = IndexNotificationsJob + final NotificationGroup res = GenerateNotificationsJob .generateNotifications(event, subscriptions, conditionsMap, 0); assertEquals(0, res.getData().size()); } @@ -77,11 +77,11 @@ class IndexNotificationsJobTest { event.setTopic("ENRICH/MISSING/PROJECT"); // warm up - IndexNotificationsJob.generateNotifications(event, subscriptions, conditionsMap, 0); + GenerateNotificationsJob.generateNotifications(event, subscriptions, conditionsMap, 0); final long start = System.currentTimeMillis(); for (int i = 0; i < N_TIMES; i++) { - IndexNotificationsJob.generateNotifications(event, subscriptions, conditionsMap, 0); + GenerateNotificationsJob.generateNotifications(event, subscriptions, conditionsMap, 0); } final long end = System.currentTimeMillis(); System.out @@ -98,11 +98,11 @@ class IndexNotificationsJobTest { event.getMap().setTrust(0.8f); // warm up - IndexNotificationsJob.generateNotifications(event, subscriptions, conditionsMap, 0); + GenerateNotificationsJob.generateNotifications(event, subscriptions, conditionsMap, 0); final long start = System.currentTimeMillis(); for (int i = 0; i < N_TIMES; i++) { - IndexNotificationsJob.generateNotifications(event, subscriptions, conditionsMap, 0); + GenerateNotificationsJob.generateNotifications(event, subscriptions, conditionsMap, 0); } final long end = System.currentTimeMillis(); System.out @@ -118,11 +118,11 @@ class IndexNotificationsJobTest { event.getMap().setTrust(0.8f); // warm up - IndexNotificationsJob.generateNotifications(event, subscriptions, conditionsMap, 0); + GenerateNotificationsJob.generateNotifications(event, subscriptions, conditionsMap, 0); final long start = System.currentTimeMillis(); for (int i = 0; i < N_TIMES; i++) { - IndexNotificationsJob.generateNotifications(event, subscriptions, conditionsMap, 0); + GenerateNotificationsJob.generateNotifications(event, subscriptions, conditionsMap, 0); } final long end = System.currentTimeMillis(); System.out From 43f4eb492b1dc8c747fdf8dd90d22e6d2f98dcb1 Mon Sep 17 00:00:00 2001 From: antleb Date: Thu, 14 Oct 2021 18:53:35 +0300 Subject: [PATCH 018/162] fetching affiliated results for 4 orgs in monitor. fixed affiliated orgs in stats db --- .../oa/graph/stats/oozie_app/scripts/step20-createMonitorDB.sql | 2 +- .../eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step7.sql | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step20-createMonitorDB.sql b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step20-createMonitorDB.sql index 9ea50d488..d39af37d5 100644 --- a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step20-createMonitorDB.sql +++ b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step20-createMonitorDB.sql @@ -17,7 +17,7 @@ create table TARGET.result as union all select * from SOURCE.result r where exists (select 1 from SOURCE.result_concepts rc where rc.id=r.id) union all - select * from SOURCE.result r where exists (select 1 from SOURCE.result_projects rp join SOURCE.project p on p.id=rp.project join SOURCE.project_organizations po on po.id=p.id where rp.id=r.id and po.organization in ( + select * from SOURCE.result r where exists (select 1 from SOURCE.result_organization ro where ro.id=r.id and ro.organization in ( 'openorgs____::759d59f05d77188faee99b7493b46805', 'openorgs____::b84450f9864182c67b8611b5593f4250', 'openorgs____::d41cf6bd4ab1b1362a44397e0b95c975', diff --git a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step7.sql b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step7.sql index b3cbc9b41..d461bd343 100644 --- a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step7.sql +++ b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step7.sql @@ -126,7 +126,7 @@ FROM ${stats_db_name}.otherresearchproduct_topics; CREATE TABLE ${stats_db_name}.result_organization AS SELECT substr(r.target, 4) AS id, substr(r.source, 4) AS organization FROM ${openaire_db_name}.relation r -WHERE r.reltype = 'resultOrganization' +WHERE r.reltype = 'resultOrganization' and r.relclass='hasAuthorInstitution' and r.subreltype='affiliation' and r.datainfo.deletedbyinference = false; CREATE TABLE ${stats_db_name}.result_projects AS From a7376907c2bc02b363912392cb06fcb177c53a92 Mon Sep 17 00:00:00 2001 From: antleb Date: Fri, 15 Oct 2021 13:42:55 +0300 Subject: [PATCH 019/162] invalidating medatadata before context thingies --- .../eu/dnetlib/dhp/oa/graph/stats/oozie_app/contexts.sh | 2 ++ 1 file changed, 2 insertions(+) diff --git a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/contexts.sh b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/contexts.sh index 6d42ab13d..2a40eb920 100644 --- a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/contexts.sh +++ b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/contexts.sh @@ -26,6 +26,8 @@ hdfs dfs -copyFromLocal concepts.csv ${TMP} hdfs dfs -chmod -R 777 ${TMP} echo "Creating and populating impala tables" +impala-shell -q "invalidate metadata" +impala-shell -d ${TARGET_DB} -q "invalidate metadata" impala-shell -q "create table ${TARGET_DB}.context (id string, name string) row format delimited fields terminated by ','" impala-shell -q "create table ${TARGET_DB}.category (context string, id string, name string) row format delimited fields terminated by ','" impala-shell -q "create table ${TARGET_DB}.concept (category string, id string, name string) row format delimited fields terminated by ','" From cece432adc0b4c929e768a94359d024f7e59fbef Mon Sep 17 00:00:00 2001 From: Claudio Atzori Date: Wed, 20 Oct 2021 14:14:53 +0200 Subject: [PATCH 020/162] [stats] reducing the step22 wait time --- .../eu/dnetlib/dhp/oa/graph/stats/oozie_app/updateCache.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/updateCache.sh b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/updateCache.sh index dc19f84b4..03aa535e1 100644 --- a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/updateCache.sh +++ b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/updateCache.sh @@ -1,4 +1,4 @@ #!/usr/bin/env bash curl --request GET $1/cache/updateCache -sleep 20h \ No newline at end of file +sleep 6h \ No newline at end of file From d66e20e7acb037a92b84dcf6d8b6f4a82206663c Mon Sep 17 00:00:00 2001 From: "michele.artini" Date: Thu, 21 Oct 2021 15:51:48 +0200 Subject: [PATCH 021/162] added hierarchy rel in ROR actionset --- .../ror/GenerateRorActionSetJob.java | 61 +++-- .../ror/GenerateRorActionSetJobTest.java | 39 +++- .../dhp/actionmanager/ror/ror_org.json | 215 ++++++++---------- 3 files changed, 173 insertions(+), 142 deletions(-) diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/ror/GenerateRorActionSetJob.java b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/ror/GenerateRorActionSetJob.java index 869e1cb68..e4d458780 100644 --- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/ror/GenerateRorActionSetJob.java +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/ror/GenerateRorActionSetJob.java @@ -3,6 +3,7 @@ package eu.dnetlib.dhp.actionmanager.ror; import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession; import static eu.dnetlib.dhp.schema.common.ModelConstants.ENTITYREGISTRY_PROVENANCE_ACTION; +import static eu.dnetlib.dhp.schema.common.ModelConstants.ORG_ORG_RELTYPE; import static eu.dnetlib.dhp.schema.oaf.utils.OafMapperUtils.dataInfo; import static eu.dnetlib.dhp.schema.oaf.utils.OafMapperUtils.field; import static eu.dnetlib.dhp.schema.oaf.utils.OafMapperUtils.listKeyValues; @@ -29,8 +30,7 @@ import org.apache.hadoop.fs.Path; import org.apache.hadoop.io.Text; import org.apache.hadoop.mapred.SequenceFileOutputFormat; import org.apache.spark.SparkConf; -import org.apache.spark.api.java.function.MapFunction; -import org.apache.spark.sql.Dataset; +import org.apache.spark.api.java.JavaRDD; import org.apache.spark.sql.Encoders; import org.apache.spark.sql.SparkSession; import org.slf4j.Logger; @@ -38,8 +38,8 @@ import org.slf4j.LoggerFactory; import com.fasterxml.jackson.databind.ObjectMapper; -import eu.dnetlib.dhp.actionmanager.project.SparkAtomicActionJob; import eu.dnetlib.dhp.actionmanager.ror.model.ExternalIdType; +import eu.dnetlib.dhp.actionmanager.ror.model.Relationship; import eu.dnetlib.dhp.actionmanager.ror.model.RorOrganization; import eu.dnetlib.dhp.application.ArgumentApplicationParser; import eu.dnetlib.dhp.common.HdfsSupport; @@ -48,8 +48,10 @@ import eu.dnetlib.dhp.schema.common.ModelConstants; import eu.dnetlib.dhp.schema.oaf.DataInfo; import eu.dnetlib.dhp.schema.oaf.Field; import eu.dnetlib.dhp.schema.oaf.KeyValue; +import eu.dnetlib.dhp.schema.oaf.Oaf; import eu.dnetlib.dhp.schema.oaf.Organization; import eu.dnetlib.dhp.schema.oaf.Qualifier; +import eu.dnetlib.dhp.schema.oaf.Relation; import eu.dnetlib.dhp.schema.oaf.StructuredProperty; import eu.dnetlib.dhp.utils.DHPUtils; import scala.Tuple2; @@ -112,24 +114,21 @@ public class GenerateRorActionSetJob { final String outputPath) throws IOException { readInputPath(spark, inputPath) - .map( - (MapFunction) GenerateRorActionSetJob::convertRorOrg, - Encoders.bean(Organization.class)) - .toJavaRDD() - .map(o -> new AtomicAction<>(Organization.class, o)) + .map(GenerateRorActionSetJob::convertRorOrg) + .flatMap(List::iterator) .mapToPair( aa -> new Tuple2<>(new Text(aa.getClazz().getCanonicalName()), new Text(OBJECT_MAPPER.writeValueAsString(aa)))) .saveAsHadoopFile(outputPath, Text.class, Text.class, SequenceFileOutputFormat.class); } - protected static Organization convertRorOrg(final RorOrganization r) { + protected static List> convertRorOrg(final RorOrganization r) { final Date now = new Date(); final Organization o = new Organization(); - o.setId(String.format("20|%s::%s", ROR_NS_PREFIX, DHPUtils.md5(r.getId()))); + o.setId(calculateOpenaireId(r.getId())); o.setOriginalId(Arrays.asList(String.format("%s::%s", ROR_NS_PREFIX, r.getId()))); o.setCollectedfrom(ROR_COLLECTED_FROM); o.setPid(pids(r)); @@ -166,7 +165,43 @@ public class GenerateRorActionSetJob { o.setDataInfo(ROR_DATA_INFO); o.setLastupdatetimestamp(now.getTime()); - return o; + final List> res = new ArrayList<>(); + res.add(new AtomicAction<>(Organization.class, o)); + + for (final Relationship rorRel : r.getRelationships()) { + if (rorRel.getType().equalsIgnoreCase("parent")) { + final String orgId1 = calculateOpenaireId(r.getId()); + final String orgId2 = calculateOpenaireId(rorRel.getId()); + res + .add( + new AtomicAction<>(Relation.class, + calculateHierarchyRel(orgId1, orgId2, ModelConstants.IS_PARENT_OF))); + res + .add( + new AtomicAction<>(Relation.class, + calculateHierarchyRel(orgId2, orgId1, ModelConstants.IS_CHILD_OF))); + } + } + + return res; + + } + + private static Relation calculateHierarchyRel(final String source, final String target, final String relClass) { + final Relation rel = new Relation(); + rel.setSource(source); + rel.setTarget(target); + rel.setRelType(ORG_ORG_RELTYPE); + rel.setSubRelType(ModelConstants.RELATIONSHIP); + rel.setRelClass(relClass); + rel.setCollectedfrom(ROR_COLLECTED_FROM); + rel.setDataInfo(ROR_DATA_INFO); + rel.setLastupdatetimestamp(System.currentTimeMillis()); + return rel; + } + + private static String calculateOpenaireId(final String rorId) { + return String.format("20|%s::%s", ROR_NS_PREFIX, DHPUtils.md5(rorId)); } private static List pids(final RorOrganization r) { @@ -202,14 +237,14 @@ public class GenerateRorActionSetJob { .collect(Collectors.toList()); } - private static Dataset readInputPath( + private static JavaRDD readInputPath( final SparkSession spark, final String path) throws IOException { try (final FileSystem fileSystem = FileSystem.get(new Configuration()); final InputStream is = fileSystem.open(new Path(path))) { final RorOrganization[] arr = OBJECT_MAPPER.readValue(is, RorOrganization[].class); - return spark.createDataset(Arrays.asList(arr), Encoders.bean(RorOrganization.class)); + return spark.createDataset(Arrays.asList(arr), Encoders.bean(RorOrganization.class)).toJavaRDD(); } } diff --git a/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/actionmanager/ror/GenerateRorActionSetJobTest.java b/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/actionmanager/ror/GenerateRorActionSetJobTest.java index aa11f4ab5..d50c1d5f3 100644 --- a/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/actionmanager/ror/GenerateRorActionSetJobTest.java +++ b/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/actionmanager/ror/GenerateRorActionSetJobTest.java @@ -1,7 +1,10 @@ package eu.dnetlib.dhp.actionmanager.ror; +import static org.junit.jupiter.api.Assertions.assertEquals; + import java.io.FileInputStream; +import java.util.List; import org.apache.commons.io.IOUtils; import org.apache.commons.lang3.StringUtils; @@ -13,9 +16,12 @@ import org.junit.jupiter.api.Test; import com.fasterxml.jackson.databind.ObjectMapper; import eu.dnetlib.dhp.actionmanager.ror.model.RorOrganization; +import eu.dnetlib.dhp.schema.action.AtomicAction; +import eu.dnetlib.dhp.schema.common.ModelConstants; +import eu.dnetlib.dhp.schema.oaf.Oaf; import eu.dnetlib.dhp.schema.oaf.Organization; +import eu.dnetlib.dhp.schema.oaf.Relation; -@Disabled class GenerateRorActionSetJobTest { private static final ObjectMapper mapper = new ObjectMapper(); @@ -30,21 +36,40 @@ class GenerateRorActionSetJobTest { void testConvertRorOrg() throws Exception { final RorOrganization r = mapper .readValue(IOUtils.toString(getClass().getResourceAsStream("ror_org.json")), RorOrganization.class); - final Organization org = GenerateRorActionSetJob.convertRorOrg(r); + final List> aas = GenerateRorActionSetJob.convertRorOrg(r); + + Assertions.assertEquals(3, aas.size()); + assertEquals(Organization.class, aas.get(0).getClazz()); + assertEquals(Relation.class, aas.get(1).getClazz()); + assertEquals(Relation.class, aas.get(2).getClazz()); + + final Organization o = (Organization) aas.get(0).getPayload(); + final Relation r1 = (Relation) aas.get(1).getPayload(); + final Relation r2 = (Relation) aas.get(2).getPayload(); + + assertEquals(o.getId(), r1.getSource()); + assertEquals(r1.getSource(), r2.getTarget()); + assertEquals(r2.getSource(), r1.getTarget()); + assertEquals(ModelConstants.IS_PARENT_OF, r1.getRelClass()); + assertEquals(ModelConstants.IS_CHILD_OF, r2.getRelClass()); + + System.out.println(mapper.writeValueAsString(o)); + System.out.println(mapper.writeValueAsString(r1)); + System.out.println(mapper.writeValueAsString(r2)); - final String s = mapper.writeValueAsString(org); - Assertions.assertTrue(StringUtils.isNotBlank(s)); - System.out.println(s); } @Test + @Disabled void testConvertAllRorOrg() throws Exception { final RorOrganization[] arr = mapper .readValue(IOUtils.toString(new FileInputStream(local_file_path)), RorOrganization[].class); for (final RorOrganization r : arr) { - Organization o = GenerateRorActionSetJob.convertRorOrg(r); - Assertions.assertNotNull(o); + final List> aas = GenerateRorActionSetJob.convertRorOrg(r); + Assertions.assertFalse(aas.isEmpty()); + Assertions.assertNotNull(aas.get(0)); + final Organization o = (Organization) aas.get(0).getPayload(); Assertions.assertTrue(StringUtils.isNotBlank(o.getId())); } } diff --git a/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/actionmanager/ror/ror_org.json b/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/actionmanager/ror/ror_org.json index d2b4fa64b..2bd79d06d 100644 --- a/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/actionmanager/ror/ror_org.json +++ b/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/actionmanager/ror/ror_org.json @@ -1,123 +1,94 @@ -{ - "ip_addresses": [], - "aliases": [], - "acronyms": [ - "ANU" - ], - "links": [ - "http://www.anu.edu.au/" - ], - "country": { - "country_code": "AU", - "country_name": "Australia" - }, - "name": "Australian National University", - "wikipedia_url": "http://en.wikipedia.org/wiki/Australian_National_University", - "addresses": [ - { - "lat": -35.2778, - "state_code": "AU-ACT", - "country_geonames_id": 2077456, - "lng": 149.1205, - "state": "Australian Capital Territory", - "city": "Canberra", - "geonames_city": { - "nuts_level2": { - "name": null, - "code": null - }, - "geonames_admin2": { - "ascii_name": null, - "id": null, - "name": null, - "code": null - }, - "geonames_admin1": { - "ascii_name": "ACT", - "id": 2177478, - "name": "ACT", - "code": "AU.01" - }, - "city": "Canberra", - "id": 2172517, - "nuts_level1": { - "name": null, - "code": null - }, - "nuts_level3": { - "name": null, - "code": null - }, - "license": { - "attribution": "Data from geonames.org under a CC-BY 3.0 license", - "license": "http://creativecommons.org/licenses/by/3.0/" - } - }, - "postcode": null, - "primary": false, - "line": null - } - ], - "types": [ - "Education" - ], - "established": 1946, - "relationships": [ - { - "type": "Related", - "id": "https://ror.org/041c7s516", - "label": "Calvary Hospital" - }, - { - "type": "Related", - "id": "https://ror.org/04h7nbn38", - "label": "Canberra Hospital" - }, - { - "type": "Related", - "id": "https://ror.org/030jpqj15", - "label": "Goulburn Base Hospital" - }, - { - "type": "Child", - "id": "https://ror.org/006a4jj40", - "label": "Mount Stromlo Observatory" - } - ], - "email_address": null, - "external_ids": { - "Wikidata": { - "all": [ - "Q127990" - ], - "preferred": null - }, - "OrgRef": { - "all": [ - "285106" - ], - "preferred": null - }, - "ISNI": { - "all": [ - "0000 0001 2180 7477" - ], - "preferred": null - }, - "FundRef": { - "all": [ - "501100000995", - "501100001151", - "100009020" - ], - "preferred": "501100000995" - }, - "GRID": { - "all": "grid.1001.0", - "preferred": "grid.1001.0" - } - }, - "id": "https://ror.org/019wvm592", - "labels": [], - "status": "active" + { + "ip_addresses": [], + "aliases": [], + "acronyms": [ + "MSO" + ], + "links": [ + "https://rsaa.anu.edu.au/observatories/mount-stromlo-observatory" + ], + "country": { + "country_code": "AU", + "country_name": "Australia" + }, + "name": "Mount Stromlo Observatory", + "wikipedia_url": "https://en.wikipedia.org/wiki/Mount_Stromlo_Observatory", + "addresses": [ + { + "lat": -35.320278, + "state_code": "AU-ACT", + "country_geonames_id": 2077456, + "lng": 149.006944, + "state": "Australian Capital Territory", + "city": "Canberra", + "geonames_city": { + "nuts_level2": { + "name": null, + "code": null + }, + "geonames_admin2": { + "ascii_name": null, + "id": null, + "name": null, + "code": null + }, + "geonames_admin1": { + "ascii_name": "ACT", + "id": 2177478, + "name": "ACT", + "code": "AU.01" + }, + "city": "Canberra", + "id": 2172517, + "nuts_level1": { + "name": null, + "code": null + }, + "nuts_level3": { + "name": null, + "code": null + }, + "license": { + "attribution": "Data from geonames.org under a CC-BY 3.0 license", + "license": "http://creativecommons.org/licenses/by/3.0/" + } + }, + "postcode": null, + "primary": false, + "line": null + } + ], + "types": [ + "Education" + ], + "established": 1924, + "relationships": [ + { + "type": "Parent", + "id": "https://ror.org/019wvm592", + "label": "Australian National University" + } + ], + "email_address": null, + "external_ids": { + "ISNI": { + "all": [ + "0000 0004 0459 2816" + ], + "preferred": null + }, + "Wikidata": { + "all": [ + "Q1310548" + ], + "preferred": null + }, + "GRID": { + "all": "grid.440325.4", + "preferred": "grid.440325.4" + } + }, + "id": "https://ror.org/006a4jj40", + "labels": [], + "status": "active" } \ No newline at end of file From b97b78f874ee744e87f1bc4d157cbeb6dfb569a9 Mon Sep 17 00:00:00 2001 From: Antonis Lempesis Date: Mon, 1 Nov 2021 15:42:29 +0200 Subject: [PATCH 022/162] removed hardcoded reference --- .../dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step11.sql | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step11.sql b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step11.sql index b977302df..e892da0be 100644 --- a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step11.sql +++ b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step11.sql @@ -65,7 +65,7 @@ FROM ${stats_db_name}.project_tmp p UPDATE ${stats_db_name}.publication_tmp SET delayed = 'yes' WHERE publication_tmp.id IN (SELECT distinct r.id - FROM stats_wf_db_obs.result r, + FROM ${stats_db_name}.result r, ${stats_db_name}.project_results pr, ${stats_db_name}.project_tmp p WHERE r.id = pr.result @@ -75,7 +75,7 @@ WHERE publication_tmp.id IN (SELECT distinct r.id UPDATE ${stats_db_name}.dataset_tmp SET delayed = 'yes' WHERE dataset_tmp.id IN (SELECT distinct r.id - FROM stats_wf_db_obs.result r, + FROM ${stats_db_name}.result r, ${stats_db_name}.project_results pr, ${stats_db_name}.project_tmp p WHERE r.id = pr.result From b9d4f115ccf8d6bb3d2b033a3ae351be2f575932 Mon Sep 17 00:00:00 2001 From: Alessia Bardi Date: Tue, 9 Nov 2021 12:04:45 +0100 Subject: [PATCH 023/162] fixed Crossref mappign for SFI projects --- .../main/java/eu/dnetlib/doiboost/crossref/Crossref2Oaf.scala | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/crossref/Crossref2Oaf.scala b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/crossref/Crossref2Oaf.scala index 25f0ff381..91d4bd4d3 100644 --- a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/crossref/Crossref2Oaf.scala +++ b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/crossref/Crossref2Oaf.scala @@ -358,7 +358,7 @@ case object Crossref2Oaf { case "10.13039/100000001" => generateSimpleRelationFromAward(funder, "nsf_________", a => a) case "10.13039/501100001665" => generateSimpleRelationFromAward(funder, "anr_________", a => a) case "10.13039/501100002341" => generateSimpleRelationFromAward(funder, "aka_________", a => a) - case "10.13039/501100001602" => generateSimpleRelationFromAward(funder, "aka_________", a => a.replace("SFI", "")) + case "10.13039/501100001602" => generateSimpleRelationFromAward(funder, "sfi_________", a => a.replace("SFI", "")) case "10.13039/501100000923" => generateSimpleRelationFromAward(funder, "arc_________", a => a) case "10.13039/501100000038"=> val targetId = getProjectId("nserc_______" , "1e5e62235d094afd01cd56e65112fc63") queue += generateRelation(sourceId, targetId, ModelConstants.IS_PRODUCED_BY) From 6cd91004e33c0eda2fa6f1381a1a65cd21f8e40f Mon Sep 17 00:00:00 2001 From: Alessia Bardi Date: Tue, 9 Nov 2021 12:22:57 +0100 Subject: [PATCH 024/162] fixed DOI for Wellcome Trust in mapping relationships from Crossref --- .../main/java/eu/dnetlib/doiboost/crossref/Crossref2Oaf.scala | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/crossref/Crossref2Oaf.scala b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/crossref/Crossref2Oaf.scala index 91d4bd4d3..1404469ef 100644 --- a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/crossref/Crossref2Oaf.scala +++ b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/crossref/Crossref2Oaf.scala @@ -382,8 +382,8 @@ case object Crossref2Oaf { case "10.13039/501100006769"=> generateSimpleRelationFromAward(funder, "rsf_________", a=>a) case "10.13039/501100001711"=> generateSimpleRelationFromAward(funder, "snsf________", snsfRule) case "10.13039/501100004410"=> generateSimpleRelationFromAward(funder, "tubitakf____", a =>a) - case "10.10.13039/100004440"=> generateSimpleRelationFromAward(funder, "wt__________", a =>a) - case "10.13039/100004440"=> val targetId = getProjectId("wt__________" , "1e5e62235d094afd01cd56e65112fc63") + case "10.13039/100004440" => generateSimpleRelationFromAward(funder, "wt__________", a =>a) + val targetId = getProjectId("wt__________" , "1e5e62235d094afd01cd56e65112fc63") queue += generateRelation(sourceId,targetId, ModelConstants.IS_PRODUCED_BY) queue += generateRelation(targetId,sourceId, ModelConstants.PRODUCES) From fc8fceaac3c5c4f70289ed42735d9d1565f7c385 Mon Sep 17 00:00:00 2001 From: Alessia Bardi Date: Wed, 10 Nov 2021 14:11:52 +0100 Subject: [PATCH 025/162] create direct link to WT projects as well --- .../main/java/eu/dnetlib/doiboost/crossref/Crossref2Oaf.scala | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/crossref/Crossref2Oaf.scala b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/crossref/Crossref2Oaf.scala index 1404469ef..0ce55a96b 100644 --- a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/crossref/Crossref2Oaf.scala +++ b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/crossref/Crossref2Oaf.scala @@ -402,7 +402,8 @@ case object Crossref2Oaf { case "The French National Research Agency (ANR)" | "The French National Research Agency" => generateSimpleRelationFromAward(funder, "anr_________", a => a) case "CONICYT, Programa de Formación de Capital Humano Avanzado" => generateSimpleRelationFromAward(funder, "conicytf____", extractECAward) - case "Wellcome Trust Masters Fellowship" => val targetId = getProjectId("wt__________", "1e5e62235d094afd01cd56e65112fc63") + case "Wellcome Trust Masters Fellowship" => generateSimpleRelationFromAward(funder, "wt__________", a =>a) + val targetId = getProjectId("wt__________", "1e5e62235d094afd01cd56e65112fc63") queue += generateRelation(sourceId, targetId, ModelConstants.IS_PRODUCED_BY ) queue += generateRelation(targetId, sourceId, ModelConstants.PRODUCES ) case _ => logger.debug("no match for "+funder.name ) From 3f9b2ba8ce7d987e3bb65d1d76bc517636362814 Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Mon, 22 Nov 2021 16:59:43 +0100 Subject: [PATCH 026/162] [Hosted By Map] fix issue in test --- .../java/eu/dnetlib/dhp/oa/graph/hostedbymap/TestApply.scala | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/hostedbymap/TestApply.scala b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/hostedbymap/TestApply.scala index 1bdcb60aa..4613d5636 100644 --- a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/hostedbymap/TestApply.scala +++ b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/hostedbymap/TestApply.scala @@ -54,7 +54,7 @@ class TestApply extends java.io.Serializable{ assertTrue(pa.getInstance().get(0).getHostedby.getValue.equals("Academic Therapy")) assertTrue(pa.getInstance().get(0).getAccessright.getClassid.equals("OPEN")) assertTrue(pa.getInstance().get(0).getAccessright.getClassname.equals("Open Access")) - assertTrue(pa.getInstance().get(0).getAccessright.getOpenAccessRoute.equals(OpenAccessRoute.hybrid)) + assertTrue(pa.getInstance().get(0).getAccessright.getOpenAccessRoute.equals(OpenAccessRoute.gold)) assertTrue(pa.getBestaccessright.getClassid.equals("OPEN")) assertTrue(pa.getBestaccessright.getClassname.equals("Open Access")) From cd9c51fd7a139cb5b410af8f83ce855c6f49aaee Mon Sep 17 00:00:00 2001 From: Claudio Atzori Date: Thu, 9 Dec 2021 13:57:53 +0100 Subject: [PATCH 027/162] vocabulary based cleaning considers also the term label when looking up for a synonym --- .../common/vocabulary/VocabularyGroup.java | 8 ++ .../clean/GraphCleaningFunctionsTest.java | 7 +- .../eu/dnetlib/dhp/oa/graph/clean/result.json | 86 +++++++++++++++++++ 3 files changed, 99 insertions(+), 2 deletions(-) diff --git a/dhp-common/src/main/java/eu/dnetlib/dhp/common/vocabulary/VocabularyGroup.java b/dhp-common/src/main/java/eu/dnetlib/dhp/common/vocabulary/VocabularyGroup.java index d5f57849c..1c129ff9c 100644 --- a/dhp-common/src/main/java/eu/dnetlib/dhp/common/vocabulary/VocabularyGroup.java +++ b/dhp-common/src/main/java/eu/dnetlib/dhp/common/vocabulary/VocabularyGroup.java @@ -57,9 +57,17 @@ public class VocabularyGroup implements Serializable { final String syn = arr[2].trim(); vocs.addSynonyms(vocId, termId, syn); + } } + // add the term names as synonyms + vocs.vocs.values().forEach(voc -> { + voc.getTerms().values().forEach(term -> { + voc.addSynonym(term.getName().toLowerCase(), term.getId()); + }); + }); + return vocs; } diff --git a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/clean/GraphCleaningFunctionsTest.java b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/clean/GraphCleaningFunctionsTest.java index c8a368dd6..f822daa0e 100644 --- a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/clean/GraphCleaningFunctionsTest.java +++ b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/clean/GraphCleaningFunctionsTest.java @@ -101,6 +101,9 @@ public class GraphCleaningFunctionsTest { assertEquals("0018", p_out.getInstance().get(0).getInstancetype().getClassid()); assertEquals("Annotation", p_out.getInstance().get(0).getInstancetype().getClassname()); + assertEquals("0033", p_out.getInstance().get(1).getInstancetype().getClassid()); + assertEquals("Audiovisual", p_out.getInstance().get(1).getInstancetype().getClassname()); + assertEquals("CLOSED", p_out.getInstance().get(0).getAccessright().getClassid()); assertEquals("Closed Access", p_out.getInstance().get(0).getAccessright().getClassname()); @@ -114,7 +117,7 @@ public class GraphCleaningFunctionsTest { List poi = p_out.getInstance(); assertNotNull(poi); - assertEquals(1, poi.size()); + assertEquals(2, poi.size()); final Instance poii = poi.get(0); assertNotNull(poii); @@ -163,7 +166,7 @@ public class GraphCleaningFunctionsTest { final List pci = p_cleaned.getInstance(); assertNotNull(pci); - assertEquals(1, pci.size()); + assertEquals(2, pci.size()); final Instance pcii = pci.get(0); assertNotNull(pcii); diff --git a/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/clean/result.json b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/clean/result.json index b3e302474..5b9e86c65 100644 --- a/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/clean/result.json +++ b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/clean/result.json @@ -403,6 +403,92 @@ "http://juuli.fi/Record/0275158616", "http://dx.doi.org/10.1007/s109090161569x" ] + }, + { + "pid": [ + { + "dataInfo": null, + "qualifier": { + "classid": "doi", + "classname": "doi", + "schemeid": "dnet:pid_types", + "schemename": "dnet:pid_types" + }, + "value": "10.1002/s21010127267xy" + }, + { + "dataInfo": null, + "qualifier": { + "classid": "doi", + "classname": "doi", + "schemeid": "dnet:pid_types", + "schemename": "dnet:pid_types" + }, + "value": "10.1008/abcd" + } + ], + "alternateIdentifier": [ + { + "dataInfo": null, + "qualifier": { + "classid": "doi", + "classname": "doi", + "schemeid": "dnet:pid_types", + "schemename": "dnet:pid_types" + }, + "value": "10.1007/s109090161569x" + }, + { + "dataInfo": null, + "qualifier": { + "classid": "doi", + "classname": "doi", + "schemeid": "dnet:pid_types", + "schemename": "dnet:pid_types" + }, + "value": "10.1009/qwerty" + } + ], + "accessright": { + "classid": "CLOSED", + "classname": "CLOSED", + "schemeid": "dnet:access_modes", + "schemename": "dnet:access_modes" + }, + "collectedfrom": { + "key": "10|CSC_________::a2b9ce8435390bcbfc05f3cae3948747", + "value": "VIRTA" + }, + "dateofacceptance": { + "dataInfo": { + "deletedbyinference": false, + "inferenceprovenance": "", + "inferred": false, + "invisible": false, + "provenanceaction": { + "classid": "sysimport:crosswalk:datasetarchive", + "classname": "sysimport:crosswalk:datasetarchive", + "schemeid": "dnet:provenanceActions", + "schemename": "dnet:provenanceActions" + }, + "trust": "0.9" + }, + "value": "2016-01-01" + }, + "distributionlocation": "", + "hostedby": { + "key": "10|CSC_________::a2b9ce8435390bcbfc05f3cae3948747", + "value": "VIRTA" + }, + "instancetype": { + "classid": "Audiovisual", + "classname": "Audiovisual", + "schemeid": "dnet:publication_resource", + "schemename": "dnet:publication_resource" + }, + "url": [ + "http://dx.doi.org/10.1002/s21010127267xy" + ] } ], "journal": { From c1b6ae47cd4af0bd2db0901b8c1c623ef3800d5d Mon Sep 17 00:00:00 2001 From: Claudio Atzori Date: Thu, 9 Dec 2021 16:44:28 +0100 Subject: [PATCH 028/162] cleaning workflow assigns the proper default instance type when a value could not be cleaned using the vocabularies --- .../oaf/utils/GraphCleaningFunctions.java | 36 +++++++- .../oa/graph/clean/CleanGraphSparkJob.java | 2 +- .../clean/GraphCleaningFunctionsTest.java | 20 +++-- .../dnetlib/dhp/oa/graph/raw/MappersTest.java | 2 +- .../eu/dnetlib/dhp/oa/graph/clean/result.json | 90 ++++++++++++++++++- 5 files changed, 138 insertions(+), 12 deletions(-) diff --git a/dhp-common/src/main/java/eu/dnetlib/dhp/schema/oaf/utils/GraphCleaningFunctions.java b/dhp-common/src/main/java/eu/dnetlib/dhp/schema/oaf/utils/GraphCleaningFunctions.java index 592580ab8..f49a47c0a 100644 --- a/dhp-common/src/main/java/eu/dnetlib/dhp/schema/oaf/utils/GraphCleaningFunctions.java +++ b/dhp-common/src/main/java/eu/dnetlib/dhp/schema/oaf/utils/GraphCleaningFunctions.java @@ -16,6 +16,8 @@ import com.github.sisyphsu.dateparser.DateParserUtils; import com.google.common.collect.Lists; import com.google.common.collect.Sets; +import eu.dnetlib.dhp.common.vocabulary.Vocabulary; +import eu.dnetlib.dhp.common.vocabulary.VocabularyGroup; import eu.dnetlib.dhp.schema.common.ModelConstants; import eu.dnetlib.dhp.schema.common.ModelSupport; import eu.dnetlib.dhp.schema.oaf.*; @@ -115,7 +117,7 @@ public class GraphCleaningFunctions extends CleaningFunctions { return true; } - public static T cleanup(T value) { + public static T cleanup(T value, VocabularyGroup vocs) { if (value instanceof Datasource) { // nothing to clean here } else if (value instanceof Project) { @@ -234,6 +236,38 @@ public class GraphCleaningFunctions extends CleaningFunctions { if (Objects.nonNull(r.getInstance())) { for (Instance i : r.getInstance()) { + if (!vocs.termExists(ModelConstants.DNET_PUBLICATION_RESOURCE, i.getInstancetype().getClassid())) { + if (r instanceof Publication) { + i + .setInstancetype( + OafMapperUtils + .qualifier( + "0038", "Other literature type", ModelConstants.DNET_PUBLICATION_RESOURCE, + ModelConstants.DNET_PUBLICATION_RESOURCE)); + } else if (r instanceof Dataset) { + i + .setInstancetype( + OafMapperUtils + .qualifier( + "0039", "Other dataset type", ModelConstants.DNET_PUBLICATION_RESOURCE, + ModelConstants.DNET_PUBLICATION_RESOURCE)); + } else if (r instanceof Software) { + i + .setInstancetype( + OafMapperUtils + .qualifier( + "0040", "Other software type", ModelConstants.DNET_PUBLICATION_RESOURCE, + ModelConstants.DNET_PUBLICATION_RESOURCE)); + } else if (r instanceof OtherResearchProduct) { + i + .setInstancetype( + OafMapperUtils + .qualifier( + "0020", "Other ORP type", ModelConstants.DNET_PUBLICATION_RESOURCE, + ModelConstants.DNET_PUBLICATION_RESOURCE)); + } + } + if (Objects.nonNull(i.getPid())) { i.setPid(processPidCleaning(i.getPid())); } diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/clean/CleanGraphSparkJob.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/clean/CleanGraphSparkJob.java index d43d7ce28..2e2ea567a 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/clean/CleanGraphSparkJob.java +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/clean/CleanGraphSparkJob.java @@ -88,7 +88,7 @@ public class CleanGraphSparkJob { readTableFromPath(spark, inputPath, clazz) .map((MapFunction) GraphCleaningFunctions::fixVocabularyNames, Encoders.bean(clazz)) .map((MapFunction) value -> OafCleaner.apply(value, mapping), Encoders.bean(clazz)) - .map((MapFunction) GraphCleaningFunctions::cleanup, Encoders.bean(clazz)) + .map((MapFunction) value -> GraphCleaningFunctions.cleanup(value, vocs), Encoders.bean(clazz)) .filter((FilterFunction) GraphCleaningFunctions::filter) .write() .mode(SaveMode.Overwrite) diff --git a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/clean/GraphCleaningFunctionsTest.java b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/clean/GraphCleaningFunctionsTest.java index f822daa0e..0e9efed21 100644 --- a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/clean/GraphCleaningFunctionsTest.java +++ b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/clean/GraphCleaningFunctionsTest.java @@ -101,8 +101,11 @@ public class GraphCleaningFunctionsTest { assertEquals("0018", p_out.getInstance().get(0).getInstancetype().getClassid()); assertEquals("Annotation", p_out.getInstance().get(0).getInstancetype().getClassname()); - assertEquals("0033", p_out.getInstance().get(1).getInstancetype().getClassid()); - assertEquals("Audiovisual", p_out.getInstance().get(1).getInstancetype().getClassname()); + assertEquals("0027", p_out.getInstance().get(1).getInstancetype().getClassid()); + assertEquals("Model", p_out.getInstance().get(1).getInstancetype().getClassname()); + + assertEquals("xyz", p_out.getInstance().get(2).getInstancetype().getClassid()); + assertEquals("xyz", p_out.getInstance().get(2).getInstancetype().getClassname()); assertEquals("CLOSED", p_out.getInstance().get(0).getAccessright().getClassid()); assertEquals("Closed Access", p_out.getInstance().get(0).getAccessright().getClassname()); @@ -117,7 +120,7 @@ public class GraphCleaningFunctionsTest { List poi = p_out.getInstance(); assertNotNull(poi); - assertEquals(2, poi.size()); + assertEquals(3, poi.size()); final Instance poii = poi.get(0); assertNotNull(poii); @@ -145,7 +148,7 @@ public class GraphCleaningFunctionsTest { assertEquals(5, p_out.getTitle().size()); - Publication p_cleaned = GraphCleaningFunctions.cleanup(p_out); + Publication p_cleaned = GraphCleaningFunctions.cleanup(p_out, vocabularies); assertEquals(3, p_cleaned.getTitle().size()); @@ -164,9 +167,12 @@ public class GraphCleaningFunctionsTest { assertEquals("1970-10-07", p_cleaned.getDateofacceptance().getValue()); + assertEquals("0038", p_cleaned.getInstance().get(2).getInstancetype().getClassid()); + assertEquals("Other literature type", p_cleaned.getInstance().get(2).getInstancetype().getClassname()); + final List pci = p_cleaned.getInstance(); assertNotNull(pci); - assertEquals(2, pci.size()); + assertEquals(3, pci.size()); final Instance pcii = pci.get(0); assertNotNull(pcii); @@ -234,7 +240,7 @@ public class GraphCleaningFunctionsTest { .toString(getClass().getResourceAsStream("/eu/dnetlib/dhp/oa/graph/clean/doiboostpub.json")); Publication p_in = MAPPER.readValue(json, Publication.class); Publication p_out = OafCleaner.apply(GraphCleaningFunctions.fixVocabularyNames(p_in), mapping); - Publication cleaned = GraphCleaningFunctions.cleanup(p_out); + Publication cleaned = GraphCleaningFunctions.cleanup(p_out, vocabularies); Assertions.assertEquals(true, GraphCleaningFunctions.filter(cleaned)); } @@ -245,7 +251,7 @@ public class GraphCleaningFunctionsTest { .toString(getClass().getResourceAsStream("/eu/dnetlib/dhp/oa/graph/clean/doiboostpub2.json")); Publication p_in = MAPPER.readValue(json, Publication.class); Publication p_out = OafCleaner.apply(GraphCleaningFunctions.fixVocabularyNames(p_in), mapping); - Publication cleaned = GraphCleaningFunctions.cleanup(p_out); + Publication cleaned = GraphCleaningFunctions.cleanup(p_out, vocabularies); Assertions.assertEquals(true, GraphCleaningFunctions.filter(cleaned)); diff --git a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/raw/MappersTest.java b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/raw/MappersTest.java index 27e33bf27..de79b750a 100644 --- a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/raw/MappersTest.java +++ b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/raw/MappersTest.java @@ -708,7 +708,7 @@ class MappersTest { assertEquals(1, p.getTitle().size()); assertTrue(StringUtils.isNotBlank(p.getTitle().get(0).getValue())); - final Publication p_cleaned = cleanup(fixVocabularyNames(p)); + final Publication p_cleaned = cleanup(fixVocabularyNames(p), vocs); assertNotNull(p_cleaned.getTitle()); assertFalse(p_cleaned.getTitle().isEmpty()); diff --git a/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/clean/result.json b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/clean/result.json index 5b9e86c65..78fdc4c9d 100644 --- a/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/clean/result.json +++ b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/clean/result.json @@ -481,14 +481,100 @@ "value": "VIRTA" }, "instancetype": { - "classid": "Audiovisual", - "classname": "Audiovisual", + "classid": "Model", + "classname": "Model", "schemeid": "dnet:publication_resource", "schemename": "dnet:publication_resource" }, "url": [ "http://dx.doi.org/10.1002/s21010127267xy" ] + }, + { + "pid": [ + { + "dataInfo": null, + "qualifier": { + "classid": "doi", + "classname": "doi", + "schemeid": "dnet:pid_types", + "schemename": "dnet:pid_types" + }, + "value": "10.1002/s21010127267xy" + }, + { + "dataInfo": null, + "qualifier": { + "classid": "doi", + "classname": "doi", + "schemeid": "dnet:pid_types", + "schemename": "dnet:pid_types" + }, + "value": "10.1008/abcd" + } + ], + "alternateIdentifier": [ + { + "dataInfo": null, + "qualifier": { + "classid": "doi", + "classname": "doi", + "schemeid": "dnet:pid_types", + "schemename": "dnet:pid_types" + }, + "value": "10.1007/s109090161569x" + }, + { + "dataInfo": null, + "qualifier": { + "classid": "doi", + "classname": "doi", + "schemeid": "dnet:pid_types", + "schemename": "dnet:pid_types" + }, + "value": "10.1009/qwerty" + } + ], + "accessright": { + "classid": "CLOSED", + "classname": "CLOSED", + "schemeid": "dnet:access_modes", + "schemename": "dnet:access_modes" + }, + "collectedfrom": { + "key": "10|CSC_________::a2b9ce8435390bcbfc05f3cae3948747", + "value": "VIRTA" + }, + "dateofacceptance": { + "dataInfo": { + "deletedbyinference": false, + "inferenceprovenance": "", + "inferred": false, + "invisible": false, + "provenanceaction": { + "classid": "sysimport:crosswalk:datasetarchive", + "classname": "sysimport:crosswalk:datasetarchive", + "schemeid": "dnet:provenanceActions", + "schemename": "dnet:provenanceActions" + }, + "trust": "0.9" + }, + "value": "2016-01-01" + }, + "distributionlocation": "", + "hostedby": { + "key": "10|CSC_________::a2b9ce8435390bcbfc05f3cae3948747", + "value": "VIRTA" + }, + "instancetype": { + "classid": "xyz", + "classname": "xyz", + "schemeid": "dnet:publication_resource", + "schemename": "dnet:publication_resource" + }, + "url": [ + "http://dx.doi.org/10.1002/t32121238378t" + ] } ], "journal": { From f0b523cfa7eb64c6adca18bee27ce6d0f80d7ecd Mon Sep 17 00:00:00 2001 From: Antonis Lempesis Date: Thu, 11 Nov 2021 12:57:19 +0200 Subject: [PATCH 029/162] removed the too restrctive clause. will discuss again --- .../eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step7.sql | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step7.sql b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step7.sql index d461bd343..b3cbc9b41 100644 --- a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step7.sql +++ b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step7.sql @@ -126,7 +126,7 @@ FROM ${stats_db_name}.otherresearchproduct_topics; CREATE TABLE ${stats_db_name}.result_organization AS SELECT substr(r.target, 4) AS id, substr(r.source, 4) AS organization FROM ${openaire_db_name}.relation r -WHERE r.reltype = 'resultOrganization' and r.relclass='hasAuthorInstitution' and r.subreltype='affiliation' +WHERE r.reltype = 'resultOrganization' and r.datainfo.deletedbyinference = false; CREATE TABLE ${stats_db_name}.result_projects AS From 9458ee793834c878c1c2d6f2ca12c52f70dc34dc Mon Sep 17 00:00:00 2001 From: Claudio Atzori Date: Mon, 3 Jan 2022 17:25:26 +0100 Subject: [PATCH 030/162] serialise records in the OAF-store-graph mdstores in json format. Read them again in the graph construction phase using a tolerant parser to support backward compatible changes in the evolution of the schema --- .../dhp/collection/CollectionUtils.scala | 13 +++++++ .../GenerateDataciteDatasetSpark.scala | 16 ++++----- .../bio/SparkTransformBioDatabaseToOAF.scala | 14 ++++---- .../ebi/SparkCreateBaselineDataFrame.scala | 16 +++++---- .../dhp/sx/bio/ebi/SparkEBILinksToOaf.scala | 10 +++--- .../dhp/sx/bio/pubmed/PubMedToOaf.scala | 2 +- .../raw/CopyHdfsOafSparkApplication.scala | 36 +++++++++++++------ 7 files changed, 68 insertions(+), 39 deletions(-) diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/CollectionUtils.scala b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/CollectionUtils.scala index 11ecfd6cb..26fcecbbd 100644 --- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/CollectionUtils.scala +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/CollectionUtils.scala @@ -1,7 +1,9 @@ package eu.dnetlib.dhp.collection +import com.fasterxml.jackson.databind.ObjectMapper import eu.dnetlib.dhp.schema.common.ModelSupport import eu.dnetlib.dhp.schema.oaf.{Oaf, OafEntity, Relation} +import org.apache.spark.sql.{Dataset, Encoder, Encoders, SaveMode} object CollectionUtils { @@ -46,4 +48,15 @@ object CollectionUtils { List() } + def saveDataset(d: Dataset[Oaf], targetPath: String):Unit = { + implicit val resultEncoder: Encoder[Oaf] = Encoders.kryo(classOf[Oaf]) + val mapper = new ObjectMapper + + d + .flatMap(i => CollectionUtils.fixRelations(i)) + .filter(i => i != null) + .map(r => mapper.writeValueAsString(r))(Encoders.STRING) + .write.mode(SaveMode.Overwrite).save(targetPath) + } + } diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/datacite/GenerateDataciteDatasetSpark.scala b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/datacite/GenerateDataciteDatasetSpark.scala index e1607ee9c..d11c33fb4 100644 --- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/datacite/GenerateDataciteDatasetSpark.scala +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/datacite/GenerateDataciteDatasetSpark.scala @@ -2,14 +2,14 @@ package eu.dnetlib.dhp.datacite import com.fasterxml.jackson.databind.ObjectMapper import eu.dnetlib.dhp.application.AbstractScalaApplication -import eu.dnetlib.dhp.collection.CollectionUtils.fixRelations +import eu.dnetlib.dhp.collection.CollectionUtils import eu.dnetlib.dhp.common.Constants.{MDSTORE_DATA_PATH, MDSTORE_SIZE_PATH} import eu.dnetlib.dhp.common.vocabulary.VocabularyGroup import eu.dnetlib.dhp.schema.mdstore.{MDStoreVersion, MetadataRecord} import eu.dnetlib.dhp.schema.oaf.Oaf import eu.dnetlib.dhp.utils.DHPUtils.writeHdfsFile import eu.dnetlib.dhp.utils.ISLookupClientFactory -import org.apache.spark.sql.{Encoder, Encoders, SaveMode, SparkSession} +import org.apache.spark.sql.{Encoder, Encoders, SparkSession} import org.slf4j.{Logger, LoggerFactory} @@ -73,12 +73,12 @@ class GenerateDataciteDatasetSpark (propertyPath:String, args:Array[String], log implicit val mrEncoder: Encoder[MetadataRecord] = Encoders.kryo[MetadataRecord] implicit val resEncoder: Encoder[Oaf] = Encoders.kryo[Oaf] - spark.read.load(sourcePath).as[DataciteType] - .filter(d => d.isActive) - .flatMap(d => DataciteToOAFTransformation.generateOAF(d.json, d.timestamp, d.timestamp, vocabularies, exportLinks)) - .filter(d => d != null) - .flatMap(i => fixRelations(i)).filter(i => i != null) - .write.mode(SaveMode.Overwrite).save(targetPath) + CollectionUtils.saveDataset( + spark.read.load(sourcePath).as[DataciteType] + .filter(d => d.isActive) + .flatMap(d => DataciteToOAFTransformation.generateOAF(d.json, d.timestamp, d.timestamp, vocabularies, exportLinks)) + .filter(d => d != null), + targetPath) } } diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/sx/bio/SparkTransformBioDatabaseToOAF.scala b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/sx/bio/SparkTransformBioDatabaseToOAF.scala index 8ae8285e3..27caa8f36 100644 --- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/sx/bio/SparkTransformBioDatabaseToOAF.scala +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/sx/bio/SparkTransformBioDatabaseToOAF.scala @@ -1,12 +1,12 @@ package eu.dnetlib.dhp.sx.bio import eu.dnetlib.dhp.application.ArgumentApplicationParser -import eu.dnetlib.dhp.schema.oaf.Oaf -import BioDBToOAF.ScholixResolved import eu.dnetlib.dhp.collection.CollectionUtils +import eu.dnetlib.dhp.schema.oaf.Oaf +import eu.dnetlib.dhp.sx.bio.BioDBToOAF.ScholixResolved import org.apache.commons.io.IOUtils import org.apache.spark.SparkConf -import org.apache.spark.sql.{Encoder, Encoders, SaveMode, SparkSession} +import org.apache.spark.sql.{Encoder, Encoders, SparkSession} import org.slf4j.{Logger, LoggerFactory} object SparkTransformBioDatabaseToOAF { @@ -36,13 +36,13 @@ object SparkTransformBioDatabaseToOAF { import spark.implicits._ database.toUpperCase() match { case "UNIPROT" => - spark.createDataset(sc.textFile(dbPath).flatMap(i => BioDBToOAF.uniprotToOAF(i))).flatMap(i=> CollectionUtils.fixRelations(i)).filter(i => i != null).write.mode(SaveMode.Overwrite).save(targetPath) + CollectionUtils.saveDataset(spark.createDataset(sc.textFile(dbPath).flatMap(i => BioDBToOAF.uniprotToOAF(i))), targetPath) case "PDB" => - spark.createDataset(sc.textFile(dbPath).flatMap(i => BioDBToOAF.pdbTOOaf(i))).flatMap(i=> CollectionUtils.fixRelations(i)).filter(i => i != null).write.mode(SaveMode.Overwrite).save(targetPath) + CollectionUtils.saveDataset(spark.createDataset(sc.textFile(dbPath).flatMap(i => BioDBToOAF.pdbTOOaf(i))), targetPath) case "SCHOLIX" => - spark.read.load(dbPath).as[ScholixResolved].map(i => BioDBToOAF.scholixResolvedToOAF(i)).flatMap(i=> CollectionUtils.fixRelations(i)).filter(i => i != null).write.mode(SaveMode.Overwrite).save(targetPath) + CollectionUtils.saveDataset(spark.read.load(dbPath).as[ScholixResolved].map(i => BioDBToOAF.scholixResolvedToOAF(i)), targetPath) case "CROSSREF_LINKS" => - spark.createDataset(sc.textFile(dbPath).map(i => BioDBToOAF.crossrefLinksToOaf(i))).flatMap(i=> CollectionUtils.fixRelations(i)).filter(i => i != null).write.mode(SaveMode.Overwrite).save(targetPath) + CollectionUtils.saveDataset(spark.createDataset(sc.textFile(dbPath).map(i => BioDBToOAF.crossrefLinksToOaf(i))), targetPath) } } diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/sx/bio/ebi/SparkCreateBaselineDataFrame.scala b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/sx/bio/ebi/SparkCreateBaselineDataFrame.scala index 17d21f19c..0fea4ff7f 100644 --- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/sx/bio/ebi/SparkCreateBaselineDataFrame.scala +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/sx/bio/ebi/SparkCreateBaselineDataFrame.scala @@ -1,9 +1,10 @@ package eu.dnetlib.dhp.sx.bio.ebi import eu.dnetlib.dhp.application.ArgumentApplicationParser +import eu.dnetlib.dhp.collection.CollectionUtils import eu.dnetlib.dhp.common.vocabulary.VocabularyGroup -import eu.dnetlib.dhp.schema.oaf.Result -import eu.dnetlib.dhp.sx.bio.pubmed.{PMArticle, PMAuthor, PMJournal, PMParser, PubMedToOaf} +import eu.dnetlib.dhp.schema.oaf.{Oaf, Result} +import eu.dnetlib.dhp.sx.bio.pubmed._ import eu.dnetlib.dhp.utils.ISLookupClientFactory import org.apache.commons.io.IOUtils import org.apache.hadoop.conf.Configuration @@ -177,7 +178,7 @@ object SparkCreateBaselineDataFrame { implicit val PMEncoder: Encoder[PMArticle] = Encoders.kryo(classOf[PMArticle]) implicit val PMJEncoder: Encoder[PMJournal] = Encoders.kryo(classOf[PMJournal]) implicit val PMAEncoder: Encoder[PMAuthor] = Encoders.kryo(classOf[PMAuthor]) - implicit val resultEncoder: Encoder[Result] = Encoders.kryo(classOf[Result]) + implicit val resultEncoder: Encoder[Oaf] = Encoders.kryo(classOf[Oaf]) if (!"true".equalsIgnoreCase(skipUpdate)) { downloadBaseLineUpdate(s"$workingPath/baseline", hdfsServerUri) @@ -192,9 +193,10 @@ object SparkCreateBaselineDataFrame { } val exported_dataset = spark.read.load(s"$workingPath/baseline_dataset").as[PMArticle] - exported_dataset - .map(a => PubMedToOaf.convert(a, vocabularies)).as[Result] - .filter(p => p != null) - .write.mode(SaveMode.Overwrite).save(targetPath) + CollectionUtils.saveDataset(exported_dataset + .map(a => PubMedToOaf.convert(a, vocabularies)).as[Oaf] + .filter(p => p != null), + targetPath) + } } diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/sx/bio/ebi/SparkEBILinksToOaf.scala b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/sx/bio/ebi/SparkEBILinksToOaf.scala index 8da617ca0..cd03f004d 100644 --- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/sx/bio/ebi/SparkEBILinksToOaf.scala +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/sx/bio/ebi/SparkEBILinksToOaf.scala @@ -1,11 +1,10 @@ package eu.dnetlib.dhp.sx.bio.ebi import eu.dnetlib.dhp.application.ArgumentApplicationParser +import eu.dnetlib.dhp.collection.CollectionUtils import eu.dnetlib.dhp.schema.oaf.Oaf import eu.dnetlib.dhp.sx.bio.BioDBToOAF import eu.dnetlib.dhp.sx.bio.BioDBToOAF.EBILinkItem -import BioDBToOAF.EBILinkItem -import eu.dnetlib.dhp.collection.CollectionUtils import org.apache.commons.io.IOUtils import org.apache.spark.SparkConf import org.apache.spark.sql._ @@ -35,10 +34,9 @@ object SparkEBILinksToOaf { val ebLinks: Dataset[EBILinkItem] = spark.read.load(sourcePath).as[EBILinkItem].filter(l => l.links != null && l.links.startsWith("{")) - ebLinks.flatMap(j => BioDBToOAF.parse_ebi_links(j.links)) + CollectionUtils.saveDataset(ebLinks.flatMap(j => BioDBToOAF.parse_ebi_links(j.links)) .filter(p => BioDBToOAF.EBITargetLinksFilter(p)) - .flatMap(p => BioDBToOAF.convertEBILinksToOaf(p)) - .flatMap(i=> CollectionUtils.fixRelations(i)).filter(i => i != null) - .write.mode(SaveMode.Overwrite).save(targetPath) + .flatMap(p => BioDBToOAF.convertEBILinksToOaf(p)), + targetPath) } } diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/sx/bio/pubmed/PubMedToOaf.scala b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/sx/bio/pubmed/PubMedToOaf.scala index ecef32202..d09237842 100644 --- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/sx/bio/pubmed/PubMedToOaf.scala +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/sx/bio/pubmed/PubMedToOaf.scala @@ -111,7 +111,7 @@ object PubMedToOaf { * @param vocabularies the vocabularies * @return The OAF instance if the mapping did not fail */ - def convert(article: PMArticle, vocabularies: VocabularyGroup): Result = { + def convert(article: PMArticle, vocabularies: VocabularyGroup): Oaf = { if (article.getPublicationTypes == null) return null diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/CopyHdfsOafSparkApplication.scala b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/CopyHdfsOafSparkApplication.scala index c7ad1890d..1376c6b35 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/CopyHdfsOafSparkApplication.scala +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/CopyHdfsOafSparkApplication.scala @@ -1,18 +1,15 @@ package eu.dnetlib.dhp.oa.graph.raw -import com.fasterxml.jackson.databind.ObjectMapper +import com.fasterxml.jackson.databind.{DeserializationFeature, ObjectMapper} import eu.dnetlib.dhp.application.ArgumentApplicationParser import eu.dnetlib.dhp.common.HdfsSupport import eu.dnetlib.dhp.schema.common.ModelSupport -import eu.dnetlib.dhp.schema.mdstore.MDStoreWithInfo import eu.dnetlib.dhp.schema.oaf.Oaf import eu.dnetlib.dhp.utils.DHPUtils -import org.apache.commons.io.IOUtils -import org.apache.commons.lang3.StringUtils -import org.apache.http.client.methods.HttpGet -import org.apache.http.impl.client.HttpClients import org.apache.spark.sql.{Encoder, Encoders, SaveMode, SparkSession} import org.apache.spark.{SparkConf, SparkContext} +import org.json4s.DefaultFormats +import org.json4s.jackson.JsonMethods.parse import org.slf4j.LoggerFactory import scala.collection.JavaConverters._ @@ -51,18 +48,21 @@ object CopyHdfsOafSparkApplication { log.info("hdfsPath: {}", hdfsPath) implicit val oafEncoder: Encoder[Oaf] = Encoders.kryo[Oaf] + import spark.implicits._ val paths = DHPUtils.mdstorePaths(mdstoreManagerUrl, mdFormat, mdLayout, mdInterpretation, true).asScala val validPaths: List[String] = paths.filter(p => HdfsSupport.exists(p, sc.hadoopConfiguration)).toList if (validPaths.nonEmpty) { - val oaf = spark.read.load(validPaths: _*).as[Oaf] - val mapper = new ObjectMapper() - val l =ModelSupport.oafTypes.entrySet.asScala.map(e => e.getKey).toList + val oaf = spark.read.load(validPaths: _*).as[String] + val mapper = new ObjectMapper().configure(DeserializationFeature.FAIL_ON_UNKNOWN_PROPERTIES, false) + val l = ModelSupport.oafTypes.entrySet.asScala.toList l.foreach( e => - oaf.filter(o => o.getClass.getSimpleName.equalsIgnoreCase(e)) + oaf + .filter(o => isOafType(o, e.getKey)) + .map(j => mapper.readValue(j, e.getValue).asInstanceOf[Oaf]) .map(s => mapper.writeValueAsString(s))(Encoders.STRING) .write .option("compression", "gzip") @@ -71,4 +71,20 @@ object CopyHdfsOafSparkApplication { ) } } + + def isOafType(input: String, oafType: String): Boolean = { + implicit lazy val formats: DefaultFormats.type = org.json4s.DefaultFormats + lazy val json: org.json4s.JValue = parse(input) + if (oafType == "relation") { + val hasSource = (json \ "source").extractOrElse[String](null) + val hasTarget = (json \ "target").extractOrElse[String](null) + + hasSource != null && hasTarget != null + } else { + val hasId = (json \ "id").extractOrElse[String](null) + val resultType = (json \ "resulttype" \ "classid").extractOrElse[String](null) + hasId != null && oafType.equalsIgnoreCase(resultType) + } + + } } From 8d13effa311565955a2b625d652f44551baabdab Mon Sep 17 00:00:00 2001 From: Claudio Atzori Date: Tue, 4 Jan 2022 11:26:56 +0100 Subject: [PATCH 031/162] test for the tolerant deserialisation utility method --- .../raw/CopyHdfsOafSparkApplicationTest.java | 65 +++++++++++++++++++ .../dnetlib/dhp/oa/graph/raw/dataset_1.json | 3 + .../dhp/oa/graph/raw/publication_1.json | 1 + .../raw/publication_2_unknownProperty.json | 1 + .../dnetlib/dhp/oa/graph/raw/relation_1.json | 6 ++ 5 files changed, 76 insertions(+) create mode 100644 dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/raw/CopyHdfsOafSparkApplicationTest.java create mode 100644 dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/raw/dataset_1.json create mode 100644 dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/raw/publication_1.json create mode 100644 dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/raw/publication_2_unknownProperty.json create mode 100644 dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/raw/relation_1.json diff --git a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/raw/CopyHdfsOafSparkApplicationTest.java b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/raw/CopyHdfsOafSparkApplicationTest.java new file mode 100644 index 000000000..85cb551bc --- /dev/null +++ b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/raw/CopyHdfsOafSparkApplicationTest.java @@ -0,0 +1,65 @@ + +package eu.dnetlib.dhp.oa.graph.raw; + +import static org.junit.jupiter.api.Assertions.assertFalse; +import static org.junit.jupiter.api.Assertions.assertTrue; + +import java.io.IOException; + +import org.apache.commons.io.IOUtils; +import org.junit.jupiter.api.Test; + +public class CopyHdfsOafSparkApplicationTest { + + @Test + void testIsOafType() throws IOException { + assertTrue( + CopyHdfsOafSparkApplication + .isOafType( + IOUtils + .toString( + getClass().getResourceAsStream("/eu/dnetlib/dhp/oa/graph/raw/publication_1.json")), + "publication")); + assertTrue( + CopyHdfsOafSparkApplication + .isOafType( + IOUtils + .toString( + getClass().getResourceAsStream("/eu/dnetlib/dhp/oa/graph/raw/dataset_1.json")), + "dataset")); + assertTrue( + CopyHdfsOafSparkApplication + .isOafType( + IOUtils + .toString( + getClass().getResourceAsStream("/eu/dnetlib/dhp/oa/graph/raw/relation_1.json")), + "relation")); + + assertFalse( + CopyHdfsOafSparkApplication + .isOafType( + IOUtils + .toString( + getClass().getResourceAsStream("/eu/dnetlib/dhp/oa/graph/raw/publication_1.json")), + "dataset")); + assertFalse( + CopyHdfsOafSparkApplication + .isOafType( + IOUtils + .toString( + getClass().getResourceAsStream("/eu/dnetlib/dhp/oa/graph/raw/dataset_1.json")), + "publication")); + + assertTrue( + CopyHdfsOafSparkApplication + .isOafType( + IOUtils + .toString( + getClass() + .getResourceAsStream( + "/eu/dnetlib/dhp/oa/graph/raw/publication_2_unknownProperty.json")), + "publication")); + + } + +} diff --git a/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/raw/dataset_1.json b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/raw/dataset_1.json new file mode 100644 index 000000000..c22dc94e3 --- /dev/null +++ b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/raw/dataset_1.json @@ -0,0 +1,3 @@ +{"author":[{"affiliation":[],"fullname":"Greenough, B","name":"B","pid":[],"rank":1,"surname":"Greenough"}],"bestaccessright":{"classid":"UNKNOWN","classname":"not available","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":[{"key":"10|openaire____::c6df70599aa984f16ee52b4b86d2e89f","value":"DANS (Data Archiving and Networked Services)"}],"context":[],"contributor":[],"country":[],"coverage":[],"dataInfo":{"deletedbyinference":true,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"dateofcollection":"2021-09-25T10:55:00.639Z","dateoftransformation":"2021-09-25T11:00:04.201Z","description":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"Heritage Education"}],"externalReference":[],"extraInfo":[],"format":[],"fulltext":[],"geolocation":[],"id":"50|DansKnawCris::09821844208a5cd6300b2bfb13bca1b9","instance":[{"accessright":{"classid":"UNKNOWN","classname":"not available","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"pid":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"urn","classname":"urn","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"value":"urn:nbn:nl:ui:13-59-cjhf"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"doi","classname":"Digital Object Identifier","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"value":"10.17632/96bpgw5j9d.1"}],"collectedfrom":{"key":"10|openaire____::c6df70599aa984f16ee52b4b86d2e89f","value":"DANS (Data Archiving and Networked Services)"},"hostedby":{"key":"10|openaire____::c6df70599aa984f16ee52b4b86d2e89f","value":"DANS (Data Archiving and Networked Services)"},"instancetype":{"classid":"0021","classname":"Dataset","schemeid":"dnet:publication_resource","schemename":"dnet:publication_resource"},"alternateIdentifier":[],"refereed":{"classid":"0000","classname":"Unknown","schemeid":"dnet:review_levels","schemename":"dnet:review_levels"},"url":["","http://dx.doi.org/10.17632/96bpgw5j9d.1"]}],"language":{"classid":"und","classname":"Undetermined","schemeid":"dnet:languages","schemename":"dnet:languages"},"lastupdatetimestamp":1635434801681,"oaiprovenance":{"originDescription":{"altered":true,"baseURL":"http%3A%2F%2Fservices.nod.dans.knaw.nl%2Foa-cerif","datestamp":"2021-08-16T15:29:45Z","harvestDate":"2021-09-25T10:55:00.639Z","identifier":"oai:services.nod.dans.knaw.nl:Products/dans:oai:easy.dans.knaw.nl:easy-dataset:211323","metadataNamespace":""}},"originalId":["50|DansKnawCris::09821844208a5cd6300b2bfb13bca1b9","oai:services.nod.dans.knaw.nl:Products/dans:oai:easy.dans.knaw.nl:easy-dataset:211323"],"pid":[],"relevantdate":[],"resourcetype":{"classid":"0021","classname":"0021","schemeid":"dnet:dataCite_resource","schemename":"dnet:dataCite_resource"},"resulttype":{"classid":"dataset","classname":"dataset","schemeid":"dnet:result_typologies","schemename":"dnet:result_typologies"},"source":[],"subject":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"Interdisciplinary sciences"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"Interdisciplinary sciences"}],"title":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"main title","classname":"main title","schemeid":"dnet:dataCite_title","schemename":"dnet:dataCite_title"},"value":"Heritage Education"}]} +{"author":[{"affiliation":[],"fullname":"Keijers, D.M.G.","name":"D.M.G.","pid":[],"rank":1,"surname":"Keijers"}],"bestaccessright":{"classid":"UNKNOWN","classname":"not available","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":[{"key":"10|openaire____::c6df70599aa984f16ee52b4b86d2e89f","value":"DANS (Data Archiving and Networked Services)"}],"context":[],"contributor":[],"country":[],"coverage":[],"dataInfo":{"deletedbyinference":true,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"dateofcollection":"2021-09-25T10:41:59.767Z","dateoftransformation":"2021-09-25T11:00:19.238Z","description":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"onderzoeksrapport"}],"externalReference":[],"extraInfo":[],"format":[],"fulltext":[],"geolocation":[],"id":"50|DansKnawCris::0dd644304b7116e8e58da3a5e3adc37a","instance":[{"accessright":{"classid":"UNKNOWN","classname":"not available","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"pid":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"urn","classname":"urn","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"value":"urn:nbn:nl:ui:13-das-fkq"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"doi","classname":"Digital Object Identifier","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"value":"10.17026/dans-xsw-qtnx"}],"collectedfrom":{"key":"10|openaire____::c6df70599aa984f16ee52b4b86d2e89f","value":"DANS (Data Archiving and Networked Services)"},"hostedby":{"key":"10|openaire____::c6df70599aa984f16ee52b4b86d2e89f","value":"DANS (Data Archiving and Networked Services)"},"instancetype":{"classid":"0021","classname":"Dataset","schemeid":"dnet:publication_resource","schemename":"dnet:publication_resource"},"alternateIdentifier":[],"refereed":{"classid":"0000","classname":"Unknown","schemeid":"dnet:review_levels","schemename":"dnet:review_levels"},"url":["","http://dx.doi.org/10.17026/dans-xsw-qtnx"]}],"language":{"classid":"dut/nld","classname":"Dutch; Flemish","schemeid":"dnet:languages","schemename":"dnet:languages"},"lastupdatetimestamp":1635434847381,"oaiprovenance":{"originDescription":{"altered":true,"baseURL":"http%3A%2F%2Fservices.nod.dans.knaw.nl%2Foa-cerif","datestamp":"2021-08-16T13:53:29Z","harvestDate":"2021-09-25T10:41:59.767Z","identifier":"oai:services.nod.dans.knaw.nl:Products/dans:oai:easy.dans.knaw.nl:easy-dataset:20759","metadataNamespace":""}},"originalId":["oai:services.nod.dans.knaw.nl:Products/dans:oai:easy.dans.knaw.nl:easy-dataset:20759","50|DansKnawCris::0dd644304b7116e8e58da3a5e3adc37a"],"pid":[],"relevantdate":[],"resourcetype":{"classid":"0021","classname":"0021","schemeid":"dnet:dataCite_resource","schemename":"dnet:dataCite_resource"},"resulttype":{"classid":"dataset","classname":"dataset","schemeid":"dnet:result_typologies","schemename":"dnet:result_typologies"},"source":[],"subject":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"PROSPECTIE"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"Archaeology"}],"title":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"main title","classname":"main title","schemeid":"dnet:dataCite_title","schemename":"dnet:dataCite_title"},"value":"Plangebied Lange Ekker te Vessem, gemeente Eersel"}]} +{"author":[],"bestaccessright":{"classid":"UNKNOWN","classname":"not available","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":[{"key":"10|openaire____::c6df70599aa984f16ee52b4b86d2e89f","value":"DANS (Data Archiving and Networked Services)"}],"context":[],"contributor":[],"country":[],"coverage":[],"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"dateofcollection":"2021-09-25T10:43:13.768Z","dateoftransformation":"2021-09-25T11:01:22.863Z","description":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"This find is registered at Portable Antiquities of the Netherlands with number PAN-00054604"}],"externalReference":[],"extraInfo":[],"format":[],"fulltext":[],"geolocation":[],"id":"50|DansKnawCris::203a27996ddc0fd1948258e5b7dec61c","instance":[{"accessright":{"classid":"UNKNOWN","classname":"not available","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"pid":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"urn","classname":"urn","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"value":"urn:nbn:nl:ui:13-a7-hwgy"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"doi","classname":"Digital Object Identifier","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"value":"10.17026/dans-x3z-fsq5"}],"collectedfrom":{"key":"10|openaire____::c6df70599aa984f16ee52b4b86d2e89f","value":"DANS (Data Archiving and Networked Services)"},"hostedby":{"key":"10|openaire____::c6df70599aa984f16ee52b4b86d2e89f","value":"DANS (Data Archiving and Networked Services)"},"instancetype":{"classid":"0021","classname":"Dataset","schemeid":"dnet:publication_resource","schemename":"dnet:publication_resource"},"alternateIdentifier":[],"refereed":{"classid":"0000","classname":"Unknown","schemeid":"dnet:review_levels","schemename":"dnet:review_levels"},"url":["","http://dx.doi.org/10.17026/dans-x3z-fsq5"]}],"language":{"classid":"eng","classname":"English","schemeid":"dnet:languages","schemename":"dnet:languages"},"lastupdatetimestamp":1635434508886,"oaiprovenance":{"originDescription":{"altered":true,"baseURL":"http%3A%2F%2Fservices.nod.dans.knaw.nl%2Foa-cerif","datestamp":"2021-08-16T14:01:37Z","harvestDate":"2021-09-25T10:43:13.768Z","identifier":"oai:services.nod.dans.knaw.nl:Products/dans:oai:easy.dans.knaw.nl:easy-dataset:129566","metadataNamespace":""}},"originalId":["oai:services.nod.dans.knaw.nl:Products/dans:oai:easy.dans.knaw.nl:easy-dataset:129566","50|DansKnawCris::203a27996ddc0fd1948258e5b7dec61c"],"pid":[],"relevantdate":[],"resourcetype":{"classid":"0021","classname":"0021","schemeid":"dnet:dataCite_resource","schemename":"dnet:dataCite_resource"},"resulttype":{"classid":"dataset","classname":"dataset","schemeid":"dnet:result_typologies","schemename":"dnet:result_typologies"},"source":[],"subject":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"early medieval enamelled disc brooch variant A9"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"Broader Match: disc brooches"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"Broader Match: schijffibula - geemailleerd"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"metal"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"copper alloy"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"Temporal coverage: Early Middle Ages C"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"Temporal coverage: Early Middle Ages D"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"Temporal coverage: 800 until 1000"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"Archaeology"}],"title":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"main title","classname":"main title","schemeid":"dnet:dataCite_title","schemename":"dnet:dataCite_title"},"value":"PAN-00054604 - early medieval enamelled disc brooch variant A9"}]} diff --git a/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/raw/publication_1.json b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/raw/publication_1.json new file mode 100644 index 000000000..bd3710234 --- /dev/null +++ b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/raw/publication_1.json @@ -0,0 +1 @@ +{"author":[{"fullname":"Makkonen, Lasse","name":"Lasse","pid":[],"rank":1,"surname":"Makkonen"}],"bestaccessright":{"classid":"RESTRICTED","classname":"Restricted","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":[{"key":"10|openaire____::4692342f0992d91f9e705c26959f09e0","value":"VTT Research Information System"}],"context":[],"contributor":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"Watson, Rick"}],"country":[],"coverage":[],"dataInfo":{"deletedbyinference":true,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"1997-01-01"},"dateofcollection":"2021-10-04T12:42:57.502Z","dateoftransformation":"2021-10-04T15:32:51.877Z","description":[],"externalReference":[],"extraInfo":[],"format":[],"fulltext":[],"id":"50|355e65625b88::0b579a3501cf87921448e0a1c7fc8353","instance":[{"accessright":{"classid":"RESTRICTED","classname":"Restricted","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"alternateIdentifier":[],"collectedfrom":{"key":"10|openaire____::4692342f0992d91f9e705c26959f09e0","value":"VTT Research Information System"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"1997-01-01"},"distributionlocation":"","hostedby":{"key":"10|openaire____::4692342f0992d91f9e705c26959f09e0","value":"VTT Research Information System"},"instancetype":{"classid":"0005","classname":"Contribution for newspaper or weekly magazine","schemeid":"dnet:publication_resource","schemename":"dnet:publication_resource"},"pid":[],"refereed":{"classid":"0000","classname":"Unknown","schemeid":"dnet:review_levels","schemename":"dnet:review_levels"},"url":["https://cris.vtt.fi/en/publications/5a6fbe30-2096-4106-96f4-ed36620d3f73"]}],"language":{"classid":"eng","classname":"English","schemeid":"dnet:languages","schemename":"dnet:languages"},"lastupdatetimestamp":1635433424020,"oaiprovenance":{"originDescription":{"altered":true,"baseURL":"https%3A%2F%2Fcris.vtt.fi%2Fws%2Foai","datestamp":"2020-10-26T12:07:44Z","harvestDate":"2021-10-04T12:42:57.502Z","identifier":"oai:cris.vtt.fi:publications/5a6fbe30-2096-4106-96f4-ed36620d3f73","metadataNamespace":"http://www.openarchives.org/OAI/2.0/oai_dc/"}},"originalId":["50|355e65625b88::0b579a3501cf87921448e0a1c7fc8353","oai:cris.vtt.fi:publications/5a6fbe30-2096-4106-96f4-ed36620d3f73"],"pid":[],"publisher":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"Irish Wind Energy Association"},"relevantdate":[],"resourcetype":{"classid":"UNKNOWN","classname":"Unknown","schemeid":"dnet:dataCite_resource","schemename":"dnet:dataCite_resource"},"resulttype":{"classid":"publication","classname":"publication","schemeid":"dnet:result_typologies","schemename":"dnet:result_typologies"},"source":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"Makkonen , L 1997 , Calculation of ice accretion on wind turbine blades . in R Watson (ed.) , EWEC '97: European Wind Energy Conference : Proceedings of the international conference . Irish Wind Energy Association , Slane , European Wind Energy Conference EWEC '97 , Dublin , Ireland , 6/10/97 ."}],"subject":[],"title":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"main title","classname":"main title","schemeid":"dnet:dataCite_title","schemename":"dnet:dataCite_title"},"value":"Calculation of ice accretion on wind turbine blades"}]} \ No newline at end of file diff --git a/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/raw/publication_2_unknownProperty.json b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/raw/publication_2_unknownProperty.json new file mode 100644 index 000000000..364796c24 --- /dev/null +++ b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/raw/publication_2_unknownProperty.json @@ -0,0 +1 @@ +{"id":"50|355e65625b88::0b579a3501cf87921448e0a1c7fc8353", "resulttype":{"classid":"publication","classname":"publication","schemeid":"dnet:result_typologies","schemename":"dnet:result_typologies"}, "unknownProperty": "asdasd"} \ No newline at end of file diff --git a/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/raw/relation_1.json b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/raw/relation_1.json new file mode 100644 index 000000000..31755c53d --- /dev/null +++ b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/raw/relation_1.json @@ -0,0 +1,6 @@ +{"source":"1a","target":"10a","collectedfrom":[{"key":"10|driver______::bee53aa31dc2cbb538c10c2b65fa5824","value":"DOAJ-Articles","dataInfo":null}],"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.900","inferenceprovenance":null,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"sysimport:crosswalk:entityregistry","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}},"lastupdatetimestamp":1626336932282,"relType":"datasourceOrganization","subRelType":"provision","relClass":"provides","validated":false,"validationDate":null,"properties":[]} +{"source":"10a","target":"1a","collectedfrom":[{"key":"10|driver______::bee53aa31dc2cbb538c10c2b65fa5824","value":"DOAJ-Articles","dataInfo":null}],"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.900","inferenceprovenance":null,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"sysimport:crosswalk:entityregistry","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}},"lastupdatetimestamp":1626336932282,"relType":"datasourceOrganization","subRelType":"provision","relClass":"provides","validated":false,"validationDate":null,"properties":[]} +{"source":"2a","target":"20a","collectedfrom":[{"key":"10|driver______::bee53aa31dc2cbb538c10c2b65fa5824","value":"DOAJ-Articles","dataInfo":null}],"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.900","inferenceprovenance":null,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"sysimport:crosswalk:entityregistry","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}},"lastupdatetimestamp":1626336932282,"relType":"datasourceOrganization","subRelType":"provision","relClass":"provides","validated":false,"validationDate":null,"properties":[]} +{"source":"20a","target":"2a","collectedfrom":[{"key":"10|driver______::bee53aa31dc2cbb538c10c2b65fa5824","value":"DOAJ-Articles","dataInfo":null}],"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.900","inferenceprovenance":null,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"sysimport:crosswalk:entityregistry","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}},"lastupdatetimestamp":1626336932282,"relType":"datasourceOrganization","subRelType":"provision","relClass":"provides","validated":false,"validationDate":null,"properties":[]} +{"source":"15a","target":"25a","collectedfrom":[{"key":"10|driver______::bee53aa31dc2cbb538c10c2b65fa5824","value":"DOAJ-Articles","dataInfo":null}],"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.900","inferenceprovenance":null,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"sysimport:crosswalk:entityregistry","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}},"lastupdatetimestamp":1626336932282,"relType":"datasourceOrganization","subRelType":"provision","relClass":"provides","validated":false,"validationDate":null,"properties":[]} +{"source":"25a","target":"15a","collectedfrom":[{"key":"10|driver______::bee53aa31dc2cbb538c10c2b65fa5824","value":"DOAJ-Articles","dataInfo":null}],"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.900","inferenceprovenance":null,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"sysimport:crosswalk:entityregistry","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}},"lastupdatetimestamp":1626336932282,"relType":"datasourceOrganization","subRelType":"provision","relClass":"provides","validated":false,"validationDate":null,"properties":[]} \ No newline at end of file From f82db765db357a76fa2421ca4e430eb6e8ef5d67 Mon Sep 17 00:00:00 2001 From: Claudio Atzori Date: Tue, 4 Jan 2022 14:23:01 +0100 Subject: [PATCH 032/162] OAF-store-graph mdstores: save them in text format --- .../java/eu/dnetlib/dhp/collection/CollectionUtils.scala | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/CollectionUtils.scala b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/CollectionUtils.scala index 26fcecbbd..509e464e5 100644 --- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/CollectionUtils.scala +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/CollectionUtils.scala @@ -56,7 +56,10 @@ object CollectionUtils { .flatMap(i => CollectionUtils.fixRelations(i)) .filter(i => i != null) .map(r => mapper.writeValueAsString(r))(Encoders.STRING) - .write.mode(SaveMode.Overwrite).save(targetPath) + .write + .mode(SaveMode.Overwrite) + .option("compression", "gzip") + .text(targetPath) } } From 3dc48c7ab51fcf843c615614db9e5ea9eec6cb4b Mon Sep 17 00:00:00 2001 From: Claudio Atzori Date: Tue, 4 Jan 2022 14:40:16 +0100 Subject: [PATCH 033/162] OAF-store-graph mdstores: save them in text format --- .../java/eu/dnetlib/dhp/collection/CollectionUtils.scala | 8 +++----- .../dhp/datacite/GenerateDataciteDatasetSpark.scala | 4 ++-- 2 files changed, 5 insertions(+), 7 deletions(-) diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/CollectionUtils.scala b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/CollectionUtils.scala index 509e464e5..e7de60e07 100644 --- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/CollectionUtils.scala +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/CollectionUtils.scala @@ -48,18 +48,16 @@ object CollectionUtils { List() } - def saveDataset(d: Dataset[Oaf], targetPath: String):Unit = { + def saveDataset(dataset: Dataset[Oaf], targetPath: String): Unit = { implicit val resultEncoder: Encoder[Oaf] = Encoders.kryo(classOf[Oaf]) - val mapper = new ObjectMapper - d + dataset .flatMap(i => CollectionUtils.fixRelations(i)) .filter(i => i != null) - .map(r => mapper.writeValueAsString(r))(Encoders.STRING) .write .mode(SaveMode.Overwrite) .option("compression", "gzip") - .text(targetPath) + .json(targetPath) } } diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/datacite/GenerateDataciteDatasetSpark.scala b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/datacite/GenerateDataciteDatasetSpark.scala index d11c33fb4..a205edcf2 100644 --- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/datacite/GenerateDataciteDatasetSpark.scala +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/datacite/GenerateDataciteDatasetSpark.scala @@ -38,7 +38,7 @@ class GenerateDataciteDatasetSpark (propertyPath:String, args:Array[String], log val cleanedMdStoreVersion = mapper.readValue(mdstoreOutputVersion, classOf[MDStoreVersion]) val outputBasePath = cleanedMdStoreVersion.getHdfsPath log.info(s"outputBasePath is '$outputBasePath'") - val targetPath = s"$outputBasePath/$MDSTORE_DATA_PATH" + val targetPath = s"$outputBasePath$MDSTORE_DATA_PATH" log.info(s"targetPath is '$targetPath'") generateDataciteDataset(sourcePath, exportLinks, vocabularies, targetPath, spark) @@ -54,7 +54,7 @@ class GenerateDataciteDatasetSpark (propertyPath:String, args:Array[String], log * @param outputBasePath */ def reportTotalSize( targetPath: String, outputBasePath: String ):Unit = { - val total_items = spark.read.load(targetPath).count() + val total_items = spark.read.text(targetPath).count() writeHdfsFile(spark.sparkContext.hadoopConfiguration, s"$total_items", outputBasePath + MDSTORE_SIZE_PATH) } From 3bd3653be9e884679c390f8b9f9f36800b657692 Mon Sep 17 00:00:00 2001 From: Claudio Atzori Date: Tue, 4 Jan 2022 15:02:09 +0100 Subject: [PATCH 034/162] OAF-store-graph mdstores: save them in text format --- .../main/java/eu/dnetlib/dhp/collection/CollectionUtils.scala | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/CollectionUtils.scala b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/CollectionUtils.scala index e7de60e07..86a28ac10 100644 --- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/CollectionUtils.scala +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/CollectionUtils.scala @@ -50,14 +50,16 @@ object CollectionUtils { def saveDataset(dataset: Dataset[Oaf], targetPath: String): Unit = { implicit val resultEncoder: Encoder[Oaf] = Encoders.kryo(classOf[Oaf]) + val mapper = new ObjectMapper dataset .flatMap(i => CollectionUtils.fixRelations(i)) .filter(i => i != null) + .map(r => mapper.writeValueAsString(r))(Encoders.STRING) .write .mode(SaveMode.Overwrite) .option("compression", "gzip") - .json(targetPath) + .text(targetPath) } } From 8ae46ca7896d88561417a34394dc502e6e9be6b6 Mon Sep 17 00:00:00 2001 From: Claudio Atzori Date: Wed, 5 Jan 2022 15:49:05 +0100 Subject: [PATCH 035/162] OAF-store-graph mdstores: firther fix for PR#180 --- .../raw/CopyHdfsOafSparkApplication.scala | 27 ++++++++++--------- 1 file changed, 14 insertions(+), 13 deletions(-) diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/CopyHdfsOafSparkApplication.scala b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/CopyHdfsOafSparkApplication.scala index 1376c6b35..fa13f477c 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/CopyHdfsOafSparkApplication.scala +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/CopyHdfsOafSparkApplication.scala @@ -48,26 +48,27 @@ object CopyHdfsOafSparkApplication { log.info("hdfsPath: {}", hdfsPath) implicit val oafEncoder: Encoder[Oaf] = Encoders.kryo[Oaf] - import spark.implicits._ val paths = DHPUtils.mdstorePaths(mdstoreManagerUrl, mdFormat, mdLayout, mdInterpretation, true).asScala val validPaths: List[String] = paths.filter(p => HdfsSupport.exists(p, sc.hadoopConfiguration)).toList + val types = ModelSupport.oafTypes.entrySet + .asScala + .map(e => Tuple2(e.getKey, e.getValue)) + if (validPaths.nonEmpty) { - val oaf = spark.read.load(validPaths: _*).as[String] + val oaf = spark.read.textFile(validPaths: _*) val mapper = new ObjectMapper().configure(DeserializationFeature.FAIL_ON_UNKNOWN_PROPERTIES, false) - val l = ModelSupport.oafTypes.entrySet.asScala.toList - l.foreach( - e => - oaf - .filter(o => isOafType(o, e.getKey)) - .map(j => mapper.readValue(j, e.getValue).asInstanceOf[Oaf]) - .map(s => mapper.writeValueAsString(s))(Encoders.STRING) - .write - .option("compression", "gzip") - .mode(SaveMode.Append) - .text(s"$hdfsPath/${e}") + + types.foreach(t => oaf + .filter(o => isOafType(o, t._1)) + .map(j => mapper.readValue(j, t._2).asInstanceOf[Oaf]) + .map(s => mapper.writeValueAsString(s))(Encoders.STRING) + .write + .option("compression", "gzip") + .mode(SaveMode.Append) + .text(s"$hdfsPath/${t._1}") ) } } From bb17e070d8de9fadfdff214a5869e360e0a3299c Mon Sep 17 00:00:00 2001 From: Antonis Lempesis Date: Thu, 23 Dec 2021 15:46:17 +0200 Subject: [PATCH 036/162] added result_result relations --- .../oa/graph/stats/oozie_app/scripts/step13.sql | 15 ++++++++++++++- .../oozie_app/scripts/step20-createMonitorDB.sql | 3 +++ 2 files changed, 17 insertions(+), 1 deletion(-) diff --git a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step13.sql b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step13.sql index e4e81175c..315d296fc 100644 --- a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step13.sql +++ b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step13.sql @@ -67,4 +67,17 @@ from ( LATERAL VIEW explode(author) a as auth LATERAL VIEW explode(auth.pid) ap as auth_pid LATERAL VIEW explode(auth.pid.qualifier.classid) apt as author_pid_type - WHERE res.datainfo.deletedbyinference = FALSE and res.datainfo.invisible = FALSE and author_pid_type = 'orcid') as res \ No newline at end of file + WHERE res.datainfo.deletedbyinference = FALSE and res.datainfo.invisible = FALSE and author_pid_type = 'orcid') as res; + +create table ${stats_db_name}.result_result stored as parquet as +select substr(rel.source, 4) as source, substr(rel.target, 4) as target, relclass, subreltype +from ${openaire_db_name}.relation rel +join ${openaire_db_name}.result r1 on rel.source=r1.id +join ${openaire_db_name}.result r2 on r2.id=rel.target +where reltype='resultResult' + and r1.resulttype.classname!=r2.resulttype.classname + and r1.datainfo.deletedbyinference=false + and r2.datainfo.deletedbyinference=false + and r1.resulttype.classname != 'other' + and r2.resulttype.classname != 'other' + and rel.datainfo.deletedbyinference=false; \ No newline at end of file diff --git a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step20-createMonitorDB.sql b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step20-createMonitorDB.sql index fa8e4c6a7..ce7b4bd9c 100644 --- a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step20-createMonitorDB.sql +++ b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step20-createMonitorDB.sql @@ -79,6 +79,9 @@ compute stats TARGET.result_sources; create table TARGET.result_topics stored as parquet as select * from SOURCE.result_topics orig where exists (select 1 from TARGET.result r where r.id=orig.id); compute stats TARGET.result_topics; +create table TARGET.result_result stored as parquet as select * from SOURCE.result_result orig where exists (select 1 from TARGET.result r where r.id=orig.source or r.id=orig.target); +compute stats TARGET.result_result; + -- datasources create view if not exists TARGET.datasource as select * from SOURCE.datasource; create view if not exists TARGET.datasource_oids as select * from SOURCE.datasource_oids; From a4316bafedbc312de48096caa2e9e77b912b8224 Mon Sep 17 00:00:00 2001 From: Antonis Lempesis Date: Thu, 23 Dec 2021 15:33:53 +0200 Subject: [PATCH 037/162] fixed a typo --- .../scripts/step20-createMonitorDB.sql | 24 ++++++++++++------- 1 file changed, 15 insertions(+), 9 deletions(-) diff --git a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step20-createMonitorDB.sql b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step20-createMonitorDB.sql index ce7b4bd9c..2d6ff41d4 100644 --- a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step20-createMonitorDB.sql +++ b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step20-createMonitorDB.sql @@ -49,8 +49,10 @@ compute stats TARGET.result_greenoa; create table TARGET.result_languages stored as parquet as select * from SOURCE.result_languages orig where exists (select 1 from TARGET.result r where r.id=orig.id); compute stats TARGET.result_languages; -create table TARGET.result_licences stored as parquet as select * from SOURCE.result_licenses orig where exists (select 1 from TARGET.result r where r.id=orig.id); -compute stats TARGET.result_licences; +create table TARGET.result_licenses stored as parquet as select * from SOURCE.result_licenses orig where exists (select 1 from TARGET.result r where r.id=orig.id); +compute stats TARGET.result_licenses; + +create table TARGET.licenses_normalized as select * from SOURCE.licenses_normalized; create table TARGET.result_oids stored as parquet as select * from SOURCE.result_oids orig where exists (select 1 from TARGET.result r where r.id=orig.id); compute stats TARGET.result_oids; @@ -79,9 +81,6 @@ compute stats TARGET.result_sources; create table TARGET.result_topics stored as parquet as select * from SOURCE.result_topics orig where exists (select 1 from TARGET.result r where r.id=orig.id); compute stats TARGET.result_topics; -create table TARGET.result_result stored as parquet as select * from SOURCE.result_result orig where exists (select 1 from TARGET.result r where r.id=orig.source or r.id=orig.target); -compute stats TARGET.result_result; - -- datasources create view if not exists TARGET.datasource as select * from SOURCE.datasource; create view if not exists TARGET.datasource_oids as select * from SOURCE.datasource_oids; @@ -103,6 +102,7 @@ create view if not exists TARGET.project as select * from SOURCE.project; create view if not exists TARGET.project_oids as select * from SOURCE.project_oids; create view if not exists TARGET.project_organizations as select * from SOURCE.project_organizations; create view if not exists TARGET.project_resultcount as select * from SOURCE.project_resultcount; +create view if not exists TARGET.project_classification as select * from SOURCE.project_classification; create table TARGET.project_results stored as parquet as select id as result, project as id from TARGET.result_projects; compute stats TARGET.project_results; @@ -118,10 +118,10 @@ create table TARGET.indi_pub_gold_oa stored as parquet as select * from SOURCE.i compute stats TARGET.indi_pub_gold_oa; create table TARGET.indi_pub_has_abstract stored as parquet as select * from SOURCE.indi_pub_has_abstract orig where exists (select 1 from TARGET.result r where r.id=orig.id); compute stats TARGET.indi_pub_has_abstract; -create table TARGET.indi_pub_has_cc_licence stored as parquet as select * from SOURCE.indi_pub_has_cc_licence orig where exists (select 1 from TARGET.result r where r.id=orig.id); -compute stats TARGET.indi_pub_has_cc_licence; -create table TARGET.indi_pub_has_cc_licence_url stored as parquet as select * from SOURCE.indi_pub_has_cc_licence_url orig where exists (select 1 from TARGET.result r where r.id=orig.id); -compute stats TARGET.indi_pub_has_cc_licence_url; +create table TARGET.indi_result_has_cc_licence stored as parquet as select * from SOURCE.indi_result_has_cc_licence orig where exists (select 1 from TARGET.result r where r.id=orig.id); +compute stats TARGET.indi_result_has_cc_licence; +create table TARGET.indi_result_has_cc_licence_url stored as parquet as select * from SOURCE.indi_result_has_cc_licence_url orig where exists (select 1 from TARGET.result r where r.id=orig.id); +compute stats TARGET.indi_result_has_cc_licence_url; create view TARGET.indi_funder_country_collab stored as select * from SOURCE.indi_funder_country_collab; @@ -138,6 +138,12 @@ compute stats TARGET.indi_pub_in_transformative; create table TARGET.indi_pub_closed_other_open stored as parquet as select * from SOURCE.indi_pub_closed_other_open orig where exists (select 1 from TARGET.result r where r.id=orig.id); compute stats TARGET.indi_pub_closed_other_open; +create table TARGET.indi_result_no_of_copies stored as parquet as select * from SOURCE.indi_result_no_of_copies orig where exists (select 1 from TARGET.result r where r.id=orig.id); +compute stats TARGET.indi_result_no_of_copies; + +--- Usage statistics +create table TARGET.usage_stats stored as parquet as select * from SOURCE.usage_stats orig where exists (select 1 from TARGET.result r where r.id=orig.result_id); + --denorm alter table TARGET.result rename to TARGET.res_tmp; From 4878d7485c1e1f79a3bdbb83eaed364c5ce4f576 Mon Sep 17 00:00:00 2001 From: Antonis Lempesis Date: Wed, 22 Dec 2021 02:54:42 +0200 Subject: [PATCH 038/162] added usage stats --- .../oa/graph/stats/oozie_app/scripts/step10.sql | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step10.sql b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step10.sql index fc0162a9c..13a4803a9 100644 --- a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step10.sql +++ b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step10.sql @@ -27,6 +27,22 @@ CREATE OR REPLACE VIEW ${stats_db_name}.licenses_normalized AS SELECT * FROM ${external_stats_db_name}.licenses_normalized; +------------------------------------------------------------------------------------------------ +------------------------------------------------------------------------------------------------ +-- Usage statistics +------------------------------------------------------------------------------------------------ +------------------------------------------------------------------------------------------------ +create or replace view ${stats_db_name}.usage_stats as +select * from openaire_prod_usage_stats.usage_stats; + +create or replace view ${stats_db_name}.downloads_stats as +select * from openaire_prod_usage_stats.downloads_stats; + +create or replace view ${stats_db_name}.pageviews_stats as +select * from openaire_prod_usage_stats.pageviews_stats; + +create or replace view ${stats_db_name}.views_stats as +select * from openaire_prod_usage_stats.views_stats; ------------------------------------------------------------------------------------------------ ------------------------------------------------------------------------------------------------ From 3ce1976627f5ea79d3b34dadcd5707aaa8da79c3 Mon Sep 17 00:00:00 2001 From: Antonis Lempesis Date: Tue, 21 Dec 2021 22:55:04 +0200 Subject: [PATCH 039/162] fixed column names --- .../stats/oozie_app/scripts/step16-createIndicatorsTables.sql | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step16-createIndicatorsTables.sql b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step16-createIndicatorsTables.sql index 926c8825f..3b1a7eeaf 100644 --- a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step16-createIndicatorsTables.sql +++ b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step16-createIndicatorsTables.sql @@ -304,7 +304,7 @@ with tmp as (select funder, project, country from organization_projects op join organization o on o.id=op.id join project p on p.id=op.project where country <> 'UNKNOWN') -select f1.funder, f1.country, f2.country, count(distinct f1.project) as collaborations +select f1.funder, f1.country as country1, f2.country as country2, count(distinct f1.project) as collaborations from tmp as f1 join tmp as f2 on f1.project=f2.project where f1.country<>f2.country From e91f06f39bf38016c1d8565d53ff5e853afcc76c Mon Sep 17 00:00:00 2001 From: Antonis Lempesis Date: Tue, 21 Dec 2021 15:54:38 +0200 Subject: [PATCH 040/162] fixed typos in indicators. Added extra views in monitor --- .../scripts/step16-createIndicatorsTables.sql | 336 ++++-------------- .../scripts/step20-createMonitorDB.sql | 6 +- 2 files changed, 79 insertions(+), 263 deletions(-) mode change 100644 => 100755 dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step16-createIndicatorsTables.sql diff --git a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step16-createIndicatorsTables.sql b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step16-createIndicatorsTables.sql old mode 100644 new mode 100755 index 3b1a7eeaf..bdd681f8d --- a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step16-createIndicatorsTables.sql +++ b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step16-createIndicatorsTables.sql @@ -1,28 +1,29 @@ +---- Sprint 1 ---- create table indi_pub_green_oa stored as parquet as -select distinct p.id, coalesce(green_oa, 0) as green_oa +select distinct p.id, coalesce(green_oa, 0) as green_oa from publication p -left outer join ( -select p.id, 1 as green_oa +left outer join ( +select p.id, 1 as green_oa from publication p join result_instance ri on ri.id = p.id join datasource on datasource.id = ri.hostedby where datasource.type like '%Repository%' -and (ri.accessright = 'Open Access' -or ri.accessright = 'Embargo' or ri.accessright = 'Open Source')) tmp +and (ri.accessright = 'Open Access' +or ri.accessright = 'Embargo' or ri.accessright = 'Open Source')) tmp on p.id= tmp.id; create table indi_pub_grey_lit stored as parquet as select distinct p.id, coalesce(grey_lit, 0) as grey_lit from publication p left outer join ( -select p.id, 1 as grey_lit +select p.id, 1 as grey_lit from publication p join result_classifications rt on rt.id = p.id -where rt.type not in ('Article','Part of book or chapter of book','Book','Doctoral thesis','Master thesis','Data Paper', 'Thesis', 'Bachelor thesis', 'Conference object') and +where rt.type not in ('Article','Part of book or chapter of book','Book','Doctoral thesis','Master thesis','Data Paper', 'Thesis', 'Bachelor thesis', 'Conference object') and not exists (select 1 from result_classifications rc where type ='Other literature type' and rc.id=p.id)) tmp on p.id=tmp.id; create table indi_pub_doi_from_crossref stored as parquet as -select distinct p.id, coalesce(doi_from_crossref, 0) as doi_from_crossref +select distinct p.id, coalesce(doi_from_crossref, 0) as doi_from_crossref from publication p left outer join (select ri.id, 1 as doi_from_crossref from result_instance ri @@ -33,7 +34,7 @@ on tmp.id=p.id; create table indi_pub_gold_oa stored as parquet as select distinct p.id, coalesce(gold_oa, 0) as gold_oa from publication p -left outer join ( +left outer join ( select p.id, 1 as gold_oa from publication p join result_instance ri on ri.id = p.id @@ -41,255 +42,56 @@ join datasource on datasource.id = ri.hostedby where datasource.id like '%doajarticles%') tmp on p.id= tmp.id; ---create table indi_project_pubs_count stored as parquet as ---select pr.id id, count(p.id) total_pubs from project_results pr ---join publication p on p.id=pr.result ---group by pr.id; - ---create table indi_project_datasets_count stored as parquet as ---select pr.id id, count(d.id) total_datasets from project_results pr ---join dataset d on d.id=pr.result ---group by pr.id; - ---create table indi_project_software_count stored as parquet as ---select pr.id id, count(s.id) total_software from project_results pr ---join software s on s.id=pr.result ---group by pr.id; - ---create table indi_project_otherresearch_count stored as parquet as ---select pr.id id, count(o.id) total_other from project_results pr ---join otherresearchproduct o on o.id=pr.result ---group by pr.id; - ---create table indi_pub_avg_year_country_oa stored as parquet as ---select year, country, round(OpenAccess/(OpenAccess+NonOpenAccess)*100,3) as averageOA, ---round(NonOpenAccess/(OpenAccess+NonOpenAccess)*100,3) as averageNonOA - --from - --(SELECT year, country, SUM(CASE - --WHEN bestlicence='Open Access' THEN 1 - --ELSE 0 - --END) AS OpenAccess, SUM(CASE - --WHEN bestlicence<>'Open Access' THEN 1 - --ELSE 0 - --END) AS NonOpenAccess - --FROM publication p - --join result_organization ro on p.id=ro.id - --join organization o on o.id=ro.organization - --where cast(year as int)>=2003 and cast(year as int)<=2021 - --group by year, country) tmp; - ---create table indi_dataset_avg_year_country_oa stored as parquet as ---select year, country, round(OpenAccess/(OpenAccess+NonOpenAccess)*100,3) as averageOA, ---round(NonOpenAccess/(OpenAccess+NonOpenAccess)*100,3) as averageNonOA ---from - --(SELECT year, country, SUM(CASE - --WHEN bestlicence='Open Access' THEN 1 - --ELSE 0 - --END) AS OpenAccess, SUM(CASE - --WHEN bestlicence<>'Open Access' THEN 1 - --ELSE 0 - --END) AS NonOpenAccess - --FROM dataset d - --join result_organization ro on d.id=ro.id - --join organization o on o.id=ro.organization - --where cast(year as int)>=2003 and cast(year as int)<=2021 - --group by year, country) tmp; - ---create table indi_software_avg_year_country_oa stored as parquet as ---select year, country, round(OpenAccess/(OpenAccess+NonOpenAccess)*100,3) as averageOA, ---round(NonOpenAccess/(OpenAccess+NonOpenAccess)*100,3) as averageNonOA ---from --- (SELECT year, country, SUM(CASE - --WHEN bestlicence='Open Access' THEN 1 --- ELSE 0 ---END) AS OpenAccess, SUM(CASE --- WHEN bestlicence<>'Open Access' THEN 1 --- ELSE 0 --- END) AS NonOpenAccess --- FROM software s --- join result_organization ro on s.id=ro.id --- join organization o on o.id=ro.organization --- where cast(year as int)>=2003 and cast(year as int)<=2021 --- group by year, country) tmp; - - ---create table indi_other_avg_year_country_oa stored as parquet as ---select year, country, round(OpenAccess/(OpenAccess+NonOpenAccess)*100,3) as averageOA, ---round(NonOpenAccess/(OpenAccess+NonOpenAccess)*100,3) as averageNonOA --- from --- (SELECT year, country, SUM(CASE --- WHEN bestlicence='Open Access' THEN 1 --- ELSE 0 --- END) AS OpenAccess, SUM(CASE --- WHEN bestlicence<>'Open Access' THEN 1 --- ELSE 0 --- END) AS NonOpenAccess --- FROM otherresearchproduct orp --- join result_organization ro on orp.id=ro.id --- join organization o on o.id=ro.organization --- where cast(year as int)>=2003 and cast(year as int)<=2021 --- group by year, country) tmp; - ---create table indi_pub_avg_year_context_oa stored as parquet as ---with total as ---(select count(distinct pc.id) no_of_pubs, year, c.name name, sum(count(distinct pc.id)) over(PARTITION by year) as total from publication_concepts pc ---join context c on pc.concept like concat('%',c.id,'%') ---join publication p on p.id=pc.id ---where cast(year as int)>=2003 and cast(year as int)<=2021 ---group by c.name, year ) ---select year, name, round(no_of_pubs/total*100,3) averageofpubs ---from total; - ---create table indi_dataset_avg_year_context_oa stored as parquet as ---with total as ---(select count(distinct pc.id) no_of_pubs, year, c.name name, sum(count(distinct pc.id)) over(PARTITION by year) as total from dataset_concepts pc ---join context c on pc.concept like concat('%',c.id,'%') ---join dataset p on p.id=pc.id ---where cast(year as int)>=2003 and cast(year as int)<=2021 ---group by c.name, year ) ---select year, name, round(no_of_pubs/total*100,3) averageofdataset ---from total; - ---create table indi_software_avg_year_context_oa stored as parquet as ---with total as ---(select count(distinct pc.id) no_of_pubs, year, c.name name, sum(count(distinct pc.id)) over(PARTITION by year) as total from software_concepts pc ---join context c on pc.concept like concat('%',c.id,'%') ---join software p on p.id=pc.id ---where cast(year as int)>=2003 and cast(year as int)<=2021 ---group by c.name, year ) ---select year, name, round(no_of_pubs/total*100,3) averageofsoftware ---from total; - ---create table indi_other_avg_year_context_oa stored as parquet as ---with total as ---(select count(distinct pc.id) no_of_pubs, year, c.name name, sum(count(distinct pc.id)) over(PARTITION by year) as total from otherresearchproduct_concepts pc ---join context c on pc.concept like concat('%',c.id,'%') ---join otherresearchproduct p on p.id=pc.id ---where cast(year as int)>=2003 and cast(year as int)<=2021 ---group by c.name, year ) ---select year, name, round(no_of_pubs/total*100,3) averageofother ---from total; - ---create table indi_other_avg_year_content_oa stored as parquet as ---with total as ---(select count(distinct pd.id) no_of_pubs, year, d.type type, sum(count(distinct pd.id)) over(PARTITION by year) as total ---from otherresearchproduct_datasources pd ---join datasource d on datasource=d.id ---join otherresearchproduct p on p.id=pd.id ---where cast(year as int)>=2003 and cast(year as int)<=2021 ---group by d.type, year) ---select year, type, round(no_of_pubs/total*100,3) averageOfOtherresearchproduct ---from total; - ---create table indi_software_avg_year_content_oa stored as parquet as ---with total as ---(select count(distinct pd.id) no_of_pubs, year, d.type type, sum(count(distinct pd.id)) over(PARTITION by year) as total ---from software_datasources pd ---join datasource d on datasource=d.id ---join software p on p.id=pd.id ---where cast(year as int)>=2003 and cast(year as int)<=2021 ---group by d.type, year) ---select year, type, round(no_of_pubs/total*100,3) averageOfSoftware ---from total; - ---create table indi_dataset_avg_year_content_oa stored as parquet as ---with total as ---(select count(distinct pd.id) no_of_pubs, year, d.type type, sum(count(distinct pd.id)) over(PARTITION by year) as total ---from dataset_datasources pd ---join datasource d on datasource=d.id ---join dataset p on p.id=pd.id ---where cast(year as int)>=2003 and cast(year as int)<=2021 ---group by d.type, year) ---select year, type, round(no_of_pubs/total*100,3) averageOfDatasets ---from total; - ---create table indi_pub_avg_year_content_oa stored as parquet as ---with total as ---(select count(distinct pd.id) no_of_pubs, year, d.type type, sum(count(distinct pd.id)) over(PARTITION by year) as total ---from publication_datasources pd ---join datasource d on datasource=d.id ---join publication p on p.id=pd.id ---where cast(year as int)>=2003 and cast(year as int)<=2021 ---group by d.type, year) ---select year, type, round(no_of_pubs/total*100,3) averageOfPubs ---from total; - -create table indi_pub_has_cc_licence stored as parquet as -select distinct p.id, (case when lic='' or lic is null then 0 else 1 end) as has_cc_license -from publication p -left outer join (select p.id, license.type as lic from publication p -join publication_licenses as license on license.id = p.id +---- Sprint 2 ---- +create table indi_result_has_cc_licence stored as parquet as +select distinct r.id, (case when lic='' or lic is null then 0 else 1 end) as has_cc_license +from result r +left outer join (select r.id, license.type as lic from result r +join result_licenses as license on license.id = r.id where lower(license.type) LIKE '%creativecommons.org%' OR lower(license.type) LIKE '%cc-%') tmp -on p.id= tmp.id; +on r.id= tmp.id; -create table indi_pub_has_cc_licence_url stored as parquet as -select distinct p.id, (case when lic_host='' or lic_host is null then 0 else 1 end) as has_cc_license_url -from publication p -left outer join (select p.id, lower(parse_url(license.type, "HOST")) as lic_host -from publication p -join publication_licenses as license on license.id = p.id -WHERE lower(parse_url(license.type, 'HOST')) = 'creativecommons.org') tmp -on p.id= tmp.id; - --- EOSC-TR1.1-02M: --- ## Indicator: has_cc_license. Creative Commons licensing has become a --- de facto standard in scholarly communication and is promoted by many initiatives --- like Plan S. This indicator might be only useful when applied --- to openly available publications. ---create table indi_pub_has_cc_licence_tr stored as parquet as ---select distinct p.id, case when lic='' or lic is null then 0 else 1 end as has_cc_license_tr ---from publication p ---left outer join (select p.id, license.type as lic from publication p ---join publication_licenses as license on license.id = p.id ---where lower(license.type) LIKE '%creativecommons.org%' OR lower(license.type) LIKE '%cc-%') tmp ---on p.id= tmp.id - --- #EOSC-F2-01M_cc Rich metadata for scholarly publications --- ## Indicator: has_cc_license. Creative Commons licensing has become a --- de facto standard in scholarly communication and is promoted by many initiatives --- like Plan S. This indicator might be only useful when applied --- to openly available publications. - --- Same indicator as EOSC-TR1.1-02M (Najko's instructions) --- create table indi_pub_has_cc_licence_f stored as parquet as --- select --- distinct p.id, case when lic='' or lic is null then 0 else 1 end as has_cc_license_f --- from publication p --- left outer join (selectp.id,license.type as lic from publication p --- join publication_licenses as license on license.id = p.id --- where lower(license.type) LIKE '%creativecommons.org%' OR lower(license.type) LIKE '%cc-%') tmp --- on p.id= tmp.id +create table indi_result_has_cc_licence_url stored as parquet as +select distinct r.id, case when lic_host='' or lic_host is null then 0 else 1 end as has_cc_license_url +from result r +left outer join (select r.id, lower(parse_url(license.type, "HOST")) as lic_host +from result r +join result_licenses as license on license.id = r.id +WHERE lower(parse_url(license.type, "HOST")) = "creativecommons.org") tmp +on r.id= tmp.id; create table indi_pub_has_abstract stored as parquet as select distinct publication.id, coalesce(abstract, 1) has_abstract from publication; -create table indi_result_with_orcid stored as parquet as +create table indi_result_with_orcid stored as parquet as select distinct r.id, coalesce(has_orcid, 0) as has_orcid -from result r -left outer join (select id, 1 as has_orcid from result_orcid) tmp -on r.id= tmp.id +from result r +left outer join (select id, 1 as has_orcid from result_orcid) tmp +on r.id= tmp.id; -create table indi_funded_result_with_fundref stored as parquet as + +---- Sprint 3 ---- +create table indi_funded_result_with_fundref stored as parquet as select distinct r.id, coalesce(fundref, 0) as fundref -from project_results r +from project_results r left outer join (select distinct id, 1 as fundref from project_results -where provenance='Harvested') tmp -on r.id= tmp.id +where provenance='Harvested') tmp +on r.id= tmp.id; -create table indi_result_org_country_collab stored as parquet as -with tmp as +create table indi_result_org_country_collab stored as parquet as +with tmp as (select o.id as id, o.country , ro.id as result,r.type from organization o join result_organization ro on o.id=ro.organization join result r on r.id=ro.id where o.country <> 'UNKNOWN') select o1.id org1,o2.country country2, o1.type, count(distinct o1.result) as collaborations from tmp as o1 join tmp as o2 on o1.result=o2.result -where o1.id<>o2.id and o1.country<>o2.country -group by o1.id, o1.type,o2.country +where o1.id<>o2.id and o1.country<>o2.country +group by o1.id, o1.type,o2.country; -create table indi_result_org_collab stored as parquet as -with tmp as +create table indi_result_org_collab stored as parquet as +with tmp as (select o.id, ro.id as result,r.type from organization o join result_organization ro on o.id=ro.organization join result r on r.id=ro.id) @@ -297,66 +99,82 @@ select o1.id org1,o2.id org2, o1.type, count(distinct o1.result) as collaboratio from tmp as o1 join tmp as o2 on o1.result=o2.result where o1.id<>o2.id -group by o1.id, o2.id, o1.type +group by o1.id, o2.id, o1.type; -create table indi_funder_country_collab stored as parquet as -with tmp as (select funder, project, country from organization_projects op -join organization o on o.id=op.id -join project p on p.id=op.project +create table indi_funder_country_collab stored as parquet as +with tmp as (select funder, project, country from organization_projects op +join organization o on o.id=op.id +join project p on p.id=op.project where country <> 'UNKNOWN') -select f1.funder, f1.country as country1, f2.country as country2, count(distinct f1.project) as collaborations +select f1.funder, f1.country, f2.country, count(distinct f1.project) as collaborations from tmp as f1 join tmp as f2 on f1.project=f2.project -where f1.country<>f2.country -group by f1.funder, f2.country, f1.country +where f1.country<>f2.country +group by f1.funder, f2.country, f1.country; +create table indi_result_country_collab stored as parquet as +with tmp as +(select country, ro.id as result,r.type from organization o +join result_organization ro on o.id=ro.organization +join result r on r.id=ro.id) +select o1.country country1, o2.country country2, o1.type, count(distinct o1.result) as collaborations +from tmp as o1 +join tmp as o2 on o1.result=o2.result +where o1.country<>o2.country +group by o1.country, o2.country, o1.type; + +---- Sprint 4 ---- create table indi_pub_diamond stored as parquet as -select distinct pd.id, coalesce(in_diamond_journal, 0) as in_diamond_journal +select distinct pd.id, coalesce(in_diamond_journal, 0) as in_diamond_journal from publication_datasources pd left outer join ( select pd.id, 1 as in_diamond_journal from publication_datasources pd join datasource d on d.id=pd.datasource join stats_ext.plan_s_jn ps where (ps.issn_print=d.issn_printed and ps.issn_online=d.issn_online) and (ps.journal_is_in_doaj=true or ps.journal_is_oa=true) and ps.has_apc=false) tmp -on pd.id=tmp.id +on pd.id=tmp.id; create table indi_pub_hybrid stored as parquet as -select distinct pd.id, coalesce(is_hybrid, 0) as is_hybrid +select distinct pd.id, coalesce(is_hybrid, 0) as is_hybrid from publication_datasources pd left outer join ( select pd.id, 1 as is_hybrid from publication_datasources pd join datasource d on d.id=pd.datasource join stats_ext.plan_s_jn ps where (ps.issn_print=d.issn_printed and ps.issn_online=d.issn_online) and (ps.journal_is_in_doaj=false and ps.journal_is_oa=false)) tmp -on pd.id=tmp.id +on pd.id=tmp.id; create table indi_is_gold_oa stored as parquet as -(select distinct pd.id, coalesce(gold_oa, 0) as gold_oa +(select distinct pd.id, coalesce(gold_oa, 0) as gold_oa from publication_datasources pd left outer join ( select pd.id, 1 as gold_oa from publication_datasources pd join datasource d on d.id=pd.datasource join stats_ext.plan_s_jn ps on (ps.issn_print=d.issn_printed or ps.issn_online=d.issn_online) where ps.journal_is_in_doaj is true or ps.journal_is_oa is true) tmp -on pd.id=tmp.id) - +on pd.id=tmp.id); create table indi_pub_in_transformative stored as parquet as -select distinct pd.id, coalesce(is_transformative, 0) as is_transformative +select distinct pd.id, coalesce(is_transformative, 0) as is_transformative from publication pd left outer join ( select pd.id, 1 as is_transformative from publication_datasources pd join datasource d on d.id=pd.datasource join stats_ext.plan_s_jn ps where (ps.issn_print=d.issn_printed and ps.issn_online=d.issn_online) and ps.is_transformative_journal=true) tmp -on pd.id=tmp.id +on pd.id=tmp.id; create table indi_pub_closed_other_open stored as parquet as select distinct ri.id, coalesce(pub_closed_other_open, 0) as pub_closed_other_open from result_instance ri -left outer join +left outer join (select ri.id, 1 as pub_closed_other_open from result_instance ri join publication p on p.id=ri.id join datasource d on ri.hostedby=d.id where d.type like '%Journal%' and ri.accessright='Closed Access' and -(p.bestlicence='Open Access' or p.bestlicence='Open Source')) tmp -on tmp.id=ri.id \ No newline at end of file +(p.bestlicence='Open Access' or p.bestlicence='Open Source')) tmp +on tmp.id=ri.id; + + +---- Sprint 5 ---- +create table indi_result_no_of_copies stored as parquet as +select id, count(id) as number_of_copies from result_instance group by id; \ No newline at end of file diff --git a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step20-createMonitorDB.sql b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step20-createMonitorDB.sql index 2d6ff41d4..9bd90aa1f 100644 --- a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step20-createMonitorDB.sql +++ b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step20-createMonitorDB.sql @@ -52,7 +52,8 @@ compute stats TARGET.result_languages; create table TARGET.result_licenses stored as parquet as select * from SOURCE.result_licenses orig where exists (select 1 from TARGET.result r where r.id=orig.id); compute stats TARGET.result_licenses; -create table TARGET.licenses_normalized as select * from SOURCE.licenses_normalized; +create table TARGET.licenses_normalized stored as parquet as select * from SOURCE.licenses_normalized orig where exists (select 1 from TARGET.result r where r.id=orig.id); +compute stats TARGET.licenses_normalized; create table TARGET.result_oids stored as parquet as select * from SOURCE.result_oids orig where exists (select 1 from TARGET.result r where r.id=orig.id); compute stats TARGET.result_oids; @@ -141,9 +142,6 @@ compute stats TARGET.indi_pub_closed_other_open; create table TARGET.indi_result_no_of_copies stored as parquet as select * from SOURCE.indi_result_no_of_copies orig where exists (select 1 from TARGET.result r where r.id=orig.id); compute stats TARGET.indi_result_no_of_copies; ---- Usage statistics -create table TARGET.usage_stats stored as parquet as select * from SOURCE.usage_stats orig where exists (select 1 from TARGET.result r where r.id=orig.result_id); - --denorm alter table TARGET.result rename to TARGET.res_tmp; From b6b4bc0df975508763e86d7cc868d5d81f6db176 Mon Sep 17 00:00:00 2001 From: Antonis Lempesis Date: Tue, 21 Dec 2021 03:35:04 +0200 Subject: [PATCH 041/162] added first indicator of sprint 5 --- .../oozie_app/scripts/step20-createMonitorDB.sql | 12 ++++-------- 1 file changed, 4 insertions(+), 8 deletions(-) diff --git a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step20-createMonitorDB.sql b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step20-createMonitorDB.sql index 9bd90aa1f..1ae459640 100644 --- a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step20-createMonitorDB.sql +++ b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step20-createMonitorDB.sql @@ -49,11 +49,8 @@ compute stats TARGET.result_greenoa; create table TARGET.result_languages stored as parquet as select * from SOURCE.result_languages orig where exists (select 1 from TARGET.result r where r.id=orig.id); compute stats TARGET.result_languages; -create table TARGET.result_licenses stored as parquet as select * from SOURCE.result_licenses orig where exists (select 1 from TARGET.result r where r.id=orig.id); -compute stats TARGET.result_licenses; - -create table TARGET.licenses_normalized stored as parquet as select * from SOURCE.licenses_normalized orig where exists (select 1 from TARGET.result r where r.id=orig.id); -compute stats TARGET.licenses_normalized; +create table TARGET.result_licences stored as parquet as select * from SOURCE.result_licenses orig where exists (select 1 from TARGET.result r where r.id=orig.id); +compute stats TARGET.result_licences; create table TARGET.result_oids stored as parquet as select * from SOURCE.result_oids orig where exists (select 1 from TARGET.result r where r.id=orig.id); compute stats TARGET.result_oids; @@ -103,7 +100,6 @@ create view if not exists TARGET.project as select * from SOURCE.project; create view if not exists TARGET.project_oids as select * from SOURCE.project_oids; create view if not exists TARGET.project_organizations as select * from SOURCE.project_organizations; create view if not exists TARGET.project_resultcount as select * from SOURCE.project_resultcount; -create view if not exists TARGET.project_classification as select * from SOURCE.project_classification; create table TARGET.project_results stored as parquet as select id as result, project as id from TARGET.result_projects; compute stats TARGET.project_results; @@ -119,8 +115,8 @@ create table TARGET.indi_pub_gold_oa stored as parquet as select * from SOURCE.i compute stats TARGET.indi_pub_gold_oa; create table TARGET.indi_pub_has_abstract stored as parquet as select * from SOURCE.indi_pub_has_abstract orig where exists (select 1 from TARGET.result r where r.id=orig.id); compute stats TARGET.indi_pub_has_abstract; -create table TARGET.indi_result_has_cc_licence stored as parquet as select * from SOURCE.indi_result_has_cc_licence orig where exists (select 1 from TARGET.result r where r.id=orig.id); -compute stats TARGET.indi_result_has_cc_licence; +create table TARGET.indi_result_has_cc_licence_f stored as parquet as select * from SOURCE.indi_result_has_cc_licence_f orig where exists (select 1 from TARGET.result r where r.id=orig.id); +compute stats TARGET.indi_result_has_cc_licence_f; create table TARGET.indi_result_has_cc_licence_url stored as parquet as select * from SOURCE.indi_result_has_cc_licence_url orig where exists (select 1 from TARGET.result r where r.id=orig.id); compute stats TARGET.indi_result_has_cc_licence_url; From b053b0178e0624c3416cb38eaa82014b99e64be2 Mon Sep 17 00:00:00 2001 From: dimitrispie Date: Mon, 20 Dec 2021 19:23:57 +0200 Subject: [PATCH 042/162] Sprint 5 and other changes --- .../scripts/step16-createIndicatorsTables.sql | 234 ++++++++++++++++-- 1 file changed, 219 insertions(+), 15 deletions(-) diff --git a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step16-createIndicatorsTables.sql b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step16-createIndicatorsTables.sql index bdd681f8d..9c67da883 100755 --- a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step16-createIndicatorsTables.sql +++ b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step16-createIndicatorsTables.sql @@ -42,8 +42,181 @@ join datasource on datasource.id = ri.hostedby where datasource.id like '%doajarticles%') tmp on p.id= tmp.id; +--create table indi_project_pubs_count stored as parquet as +--select pr.id id, count(p.id) total_pubs from project_results pr +--join publication p on p.id=pr.result +--group by pr.id; + +--create table indi_project_datasets_count stored as parquet as +--select pr.id id, count(d.id) total_datasets from project_results pr +--join dataset d on d.id=pr.result +--group by pr.id; + +--create table indi_project_software_count stored as parquet as +--select pr.id id, count(s.id) total_software from project_results pr +--join software s on s.id=pr.result +--group by pr.id; + +--create table indi_project_otherresearch_count stored as parquet as +--select pr.id id, count(o.id) total_other from project_results pr +--join otherresearchproduct o on o.id=pr.result +--group by pr.id; + +--create table indi_pub_avg_year_country_oa stored as parquet as +--select year, country, round(OpenAccess/(OpenAccess+NonOpenAccess)*100,3) as averageOA, +--round(NonOpenAccess/(OpenAccess+NonOpenAccess)*100,3) as averageNonOA + --from + --(SELECT year, country, SUM(CASE + --WHEN bestlicence='Open Access' THEN 1 + --ELSE 0 + --END) AS OpenAccess, SUM(CASE + --WHEN bestlicence<>'Open Access' THEN 1 + --ELSE 0 + --END) AS NonOpenAccess + --FROM publication p + --join result_organization ro on p.id=ro.id + --join organization o on o.id=ro.organization + --where cast(year as int)>=2003 and cast(year as int)<=2021 + --group by year, country) tmp; + +--create table indi_dataset_avg_year_country_oa stored as parquet as +--select year, country, round(OpenAccess/(OpenAccess+NonOpenAccess)*100,3) as averageOA, +--round(NonOpenAccess/(OpenAccess+NonOpenAccess)*100,3) as averageNonOA +--from + --(SELECT year, country, SUM(CASE + --WHEN bestlicence='Open Access' THEN 1 + --ELSE 0 + --END) AS OpenAccess, SUM(CASE + --WHEN bestlicence<>'Open Access' THEN 1 + --ELSE 0 + --END) AS NonOpenAccess + --FROM dataset d + --join result_organization ro on d.id=ro.id + --join organization o on o.id=ro.organization + --where cast(year as int)>=2003 and cast(year as int)<=2021 + --group by year, country) tmp; + +--create table indi_software_avg_year_country_oa stored as parquet as +--select year, country, round(OpenAccess/(OpenAccess+NonOpenAccess)*100,3) as averageOA, +--round(NonOpenAccess/(OpenAccess+NonOpenAccess)*100,3) as averageNonOA +--from +-- (SELECT year, country, SUM(CASE + --WHEN bestlicence='Open Access' THEN 1 +-- ELSE 0 +--END) AS OpenAccess, SUM(CASE +-- WHEN bestlicence<>'Open Access' THEN 1 +-- ELSE 0 +-- END) AS NonOpenAccess +-- FROM software s +-- join result_organization ro on s.id=ro.id +-- join organization o on o.id=ro.organization +-- where cast(year as int)>=2003 and cast(year as int)<=2021 +-- group by year, country) tmp; + + +--create table indi_other_avg_year_country_oa stored as parquet as +--select year, country, round(OpenAccess/(OpenAccess+NonOpenAccess)*100,3) as averageOA, +--round(NonOpenAccess/(OpenAccess+NonOpenAccess)*100,3) as averageNonOA +-- from +-- (SELECT year, country, SUM(CASE +-- WHEN bestlicence='Open Access' THEN 1 +-- ELSE 0 +-- END) AS OpenAccess, SUM(CASE +-- WHEN bestlicence<>'Open Access' THEN 1 +-- ELSE 0 +-- END) AS NonOpenAccess +-- FROM otherresearchproduct orp +-- join result_organization ro on orp.id=ro.id +-- join organization o on o.id=ro.organization +-- where cast(year as int)>=2003 and cast(year as int)<=2021 +-- group by year, country) tmp; + +--create table indi_pub_avg_year_context_oa stored as parquet as +--with total as +--(select count(distinct pc.id) no_of_pubs, year, c.name name, sum(count(distinct pc.id)) over(PARTITION by year) as total from publication_concepts pc +--join context c on pc.concept like concat('%',c.id,'%') +--join publication p on p.id=pc.id +--where cast(year as int)>=2003 and cast(year as int)<=2021 +--group by c.name, year ) +--select year, name, round(no_of_pubs/total*100,3) averageofpubs +--from total; + +--create table indi_dataset_avg_year_context_oa stored as parquet as +--with total as +--(select count(distinct pc.id) no_of_pubs, year, c.name name, sum(count(distinct pc.id)) over(PARTITION by year) as total from dataset_concepts pc +--join context c on pc.concept like concat('%',c.id,'%') +--join dataset p on p.id=pc.id +--where cast(year as int)>=2003 and cast(year as int)<=2021 +--group by c.name, year ) +--select year, name, round(no_of_pubs/total*100,3) averageofdataset +--from total; + +--create table indi_software_avg_year_context_oa stored as parquet as +--with total as +--(select count(distinct pc.id) no_of_pubs, year, c.name name, sum(count(distinct pc.id)) over(PARTITION by year) as total from software_concepts pc +--join context c on pc.concept like concat('%',c.id,'%') +--join software p on p.id=pc.id +--where cast(year as int)>=2003 and cast(year as int)<=2021 +--group by c.name, year ) +--select year, name, round(no_of_pubs/total*100,3) averageofsoftware +--from total; + +--create table indi_other_avg_year_context_oa stored as parquet as +--with total as +--(select count(distinct pc.id) no_of_pubs, year, c.name name, sum(count(distinct pc.id)) over(PARTITION by year) as total from otherresearchproduct_concepts pc +--join context c on pc.concept like concat('%',c.id,'%') +--join otherresearchproduct p on p.id=pc.id +--where cast(year as int)>=2003 and cast(year as int)<=2021 +--group by c.name, year ) +--select year, name, round(no_of_pubs/total*100,3) averageofother +--from total; + +--create table indi_other_avg_year_content_oa stored as parquet as +--with total as +--(select count(distinct pd.id) no_of_pubs, year, d.type type, sum(count(distinct pd.id)) over(PARTITION by year) as total +--from otherresearchproduct_datasources pd +--join datasource d on datasource=d.id +--join otherresearchproduct p on p.id=pd.id +--where cast(year as int)>=2003 and cast(year as int)<=2021 +--group by d.type, year) +--select year, type, round(no_of_pubs/total*100,3) averageOfOtherresearchproduct +--from total; + +--create table indi_software_avg_year_content_oa stored as parquet as +--with total as +--(select count(distinct pd.id) no_of_pubs, year, d.type type, sum(count(distinct pd.id)) over(PARTITION by year) as total +--from software_datasources pd +--join datasource d on datasource=d.id +--join software p on p.id=pd.id +--where cast(year as int)>=2003 and cast(year as int)<=2021 +--group by d.type, year) +--select year, type, round(no_of_pubs/total*100,3) averageOfSoftware +--from total; + +--create table indi_dataset_avg_year_content_oa stored as parquet as +--with total as +--(select count(distinct pd.id) no_of_pubs, year, d.type type, sum(count(distinct pd.id)) over(PARTITION by year) as total +--from dataset_datasources pd +--join datasource d on datasource=d.id +--join dataset p on p.id=pd.id +--where cast(year as int)>=2003 and cast(year as int)<=2021 +--group by d.type, year) +--select year, type, round(no_of_pubs/total*100,3) averageOfDatasets +--from total; + +--create table indi_pub_avg_year_content_oa stored as parquet as +--with total as +--(select count(distinct pd.id) no_of_pubs, year, d.type type, sum(count(distinct pd.id)) over(PARTITION by year) as total +--from publication_datasources pd +--join datasource d on datasource=d.id +--join publication p on p.id=pd.id +--where cast(year as int)>=2003 and cast(year as int)<=2021 +--group by d.type, year) +--select year, type, round(no_of_pubs/total*100,3) averageOfPubs +--from total; + ---- Sprint 2 ---- -create table indi_result_has_cc_licence stored as parquet as +create table indi_result_has_cc_licence_f stored as parquet as select distinct r.id, (case when lic='' or lic is null then 0 else 1 end) as has_cc_license from result r left outer join (select r.id, license.type as lic from result r @@ -60,6 +233,35 @@ join result_licenses as license on license.id = r.id WHERE lower(parse_url(license.type, "HOST")) = "creativecommons.org") tmp on r.id= tmp.id; +-- EOSC-TR1.1-02M: +-- ## Indicator: has_cc_license. Creative Commons licensing has become a +-- de facto standard in scholarly communication and is promoted by many initiatives +-- like Plan S. This indicator might be only useful when applied +-- to openly available publications. +--create table indi_pub_has_cc_licence_tr stored as parquet as +--select distinct p.id, case when lic='' or lic is null then 0 else 1 end indi_result_org_collabas has_cc_license_tr +--from publication p +--left outer join (select p.id, license.type as lic from publication p +--join publication_licenses as license on license.id = p.id +--where lower(license.type) LIKE '%creativecommons.org%' OR lower(license.type) LIKE '%cc-%') tmp +--on p.id= tmp.id + +-- #EOSC-F2-01M_cc Rich metadata for scholarly publications +-- ## Indicator: has_cc_license. Creative Commons licensing has become a +-- de facto standard in scholarly communication and is promoted by many initiatives +-- like Plan S. This indicator might be only useful when applied +-- to openly available publications. + +-- Same indicator as EOSC-TR1.1-02M (Najko's instructions) +-- create table indi_pub_has_cc_licence_f stored as parquet as +-- select +-- distinct p.id, case when lic='' or lic is null then 0 else 1 end as has_cc_license_f +-- from publication p +-- left outer join (selectp.id,license.type as lic from publication p +-- join publication_licenses as license on license.id = p.id +-- where lower(license.type) LIKE '%creativecommons.org%' OR lower(license.type) LIKE '%cc-%') tmp +-- on p.id= tmp.id + create table indi_pub_has_abstract stored as parquet as select distinct publication.id, coalesce(abstract, 1) has_abstract from publication; @@ -68,16 +270,16 @@ create table indi_result_with_orcid stored as parquet as select distinct r.id, coalesce(has_orcid, 0) as has_orcid from result r left outer join (select id, 1 as has_orcid from result_orcid) tmp -on r.id= tmp.id; - +on r.id= tmp.id ---- Sprint 3 ---- + create table indi_funded_result_with_fundref stored as parquet as select distinct r.id, coalesce(fundref, 0) as fundref from project_results r left outer join (select distinct id, 1 as fundref from project_results where provenance='Harvested') tmp -on r.id= tmp.id; +on r.id= tmp.id create table indi_result_org_country_collab stored as parquet as with tmp as @@ -88,7 +290,7 @@ select o1.id org1,o2.country country2, o1.type, count(distinct o1.result) as col from tmp as o1 join tmp as o2 on o1.result=o2.result where o1.id<>o2.id and o1.country<>o2.country -group by o1.id, o1.type,o2.country; +group by o1.id, o1.type,o2.country create table indi_result_org_collab stored as parquet as with tmp as @@ -99,7 +301,7 @@ select o1.id org1,o2.id org2, o1.type, count(distinct o1.result) as collaboratio from tmp as o1 join tmp as o2 on o1.result=o2.result where o1.id<>o2.id -group by o1.id, o2.id, o1.type; +group by o1.id, o2.id, o1.type create table indi_funder_country_collab stored as parquet as with tmp as (select funder, project, country from organization_projects op @@ -110,7 +312,7 @@ select f1.funder, f1.country, f2.country, count(distinct f1.project) as collabor from tmp as f1 join tmp as f2 on f1.project=f2.project where f1.country<>f2.country -group by f1.funder, f2.country, f1.country; +group by f1.funder, f2.country, f1.country create table indi_result_country_collab stored as parquet as with tmp as @@ -121,9 +323,10 @@ select o1.country country1, o2.country country2, o1.type, count(distinct o1.resu from tmp as o1 join tmp as o2 on o1.result=o2.result where o1.country<>o2.country -group by o1.country, o2.country, o1.type; +group by o1.country, o2.country, o1.type ---- Sprint 4 ---- + create table indi_pub_diamond stored as parquet as select distinct pd.id, coalesce(in_diamond_journal, 0) as in_diamond_journal from publication_datasources pd @@ -132,7 +335,7 @@ select pd.id, 1 as in_diamond_journal from publication_datasources pd join datasource d on d.id=pd.datasource join stats_ext.plan_s_jn ps where (ps.issn_print=d.issn_printed and ps.issn_online=d.issn_online) and (ps.journal_is_in_doaj=true or ps.journal_is_oa=true) and ps.has_apc=false) tmp -on pd.id=tmp.id; +on pd.id=tmp.id create table indi_pub_hybrid stored as parquet as select distinct pd.id, coalesce(is_hybrid, 0) as is_hybrid @@ -142,7 +345,7 @@ select pd.id, 1 as is_hybrid from publication_datasources pd join datasource d on d.id=pd.datasource join stats_ext.plan_s_jn ps where (ps.issn_print=d.issn_printed and ps.issn_online=d.issn_online) and (ps.journal_is_in_doaj=false and ps.journal_is_oa=false)) tmp -on pd.id=tmp.id; +on pd.id=tmp.id create table indi_is_gold_oa stored as parquet as (select distinct pd.id, coalesce(gold_oa, 0) as gold_oa @@ -152,7 +355,8 @@ select pd.id, 1 as gold_oa from publication_datasources pd join datasource d on d.id=pd.datasource join stats_ext.plan_s_jn ps on (ps.issn_print=d.issn_printed or ps.issn_online=d.issn_online) where ps.journal_is_in_doaj is true or ps.journal_is_oa is true) tmp -on pd.id=tmp.id); +on pd.id=tmp.id) + create table indi_pub_in_transformative stored as parquet as select distinct pd.id, coalesce(is_transformative, 0) as is_transformative @@ -162,7 +366,7 @@ select pd.id, 1 as is_transformative from publication_datasources pd join datasource d on d.id=pd.datasource join stats_ext.plan_s_jn ps where (ps.issn_print=d.issn_printed and ps.issn_online=d.issn_online) and ps.is_transformative_journal=true) tmp -on pd.id=tmp.id; +on pd.id=tmp.id create table indi_pub_closed_other_open stored as parquet as select distinct ri.id, coalesce(pub_closed_other_open, 0) as pub_closed_other_open from result_instance ri @@ -172,9 +376,9 @@ join publication p on p.id=ri.id join datasource d on ri.hostedby=d.id where d.type like '%Journal%' and ri.accessright='Closed Access' and (p.bestlicence='Open Access' or p.bestlicence='Open Source')) tmp -on tmp.id=ri.id; - +on tmp.id=ri.id ---- Sprint 5 ---- + create table indi_result_no_of_copies stored as parquet as -select id, count(id) as number_of_copies from result_instance group by id; \ No newline at end of file +select id, count(id) as number_of_copies from result_instance group by id \ No newline at end of file From 9acc32faa69ae1d6dae8bc2f9a0e941d130a5fa3 Mon Sep 17 00:00:00 2001 From: Claudio Atzori Date: Wed, 12 Jan 2022 12:04:31 +0100 Subject: [PATCH 043/162] [stats wf] final touches for the integration of PRs #166, #179 in the master branch --- .../scripts/step16-createIndicatorsTables.sql | 236 ++---------------- .../scripts/step20-createMonitorDB.sql | 17 +- 2 files changed, 29 insertions(+), 224 deletions(-) diff --git a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step16-createIndicatorsTables.sql b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step16-createIndicatorsTables.sql index 9c67da883..9f11fa49d 100755 --- a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step16-createIndicatorsTables.sql +++ b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step16-createIndicatorsTables.sql @@ -42,181 +42,8 @@ join datasource on datasource.id = ri.hostedby where datasource.id like '%doajarticles%') tmp on p.id= tmp.id; ---create table indi_project_pubs_count stored as parquet as ---select pr.id id, count(p.id) total_pubs from project_results pr ---join publication p on p.id=pr.result ---group by pr.id; - ---create table indi_project_datasets_count stored as parquet as ---select pr.id id, count(d.id) total_datasets from project_results pr ---join dataset d on d.id=pr.result ---group by pr.id; - ---create table indi_project_software_count stored as parquet as ---select pr.id id, count(s.id) total_software from project_results pr ---join software s on s.id=pr.result ---group by pr.id; - ---create table indi_project_otherresearch_count stored as parquet as ---select pr.id id, count(o.id) total_other from project_results pr ---join otherresearchproduct o on o.id=pr.result ---group by pr.id; - ---create table indi_pub_avg_year_country_oa stored as parquet as ---select year, country, round(OpenAccess/(OpenAccess+NonOpenAccess)*100,3) as averageOA, ---round(NonOpenAccess/(OpenAccess+NonOpenAccess)*100,3) as averageNonOA - --from - --(SELECT year, country, SUM(CASE - --WHEN bestlicence='Open Access' THEN 1 - --ELSE 0 - --END) AS OpenAccess, SUM(CASE - --WHEN bestlicence<>'Open Access' THEN 1 - --ELSE 0 - --END) AS NonOpenAccess - --FROM publication p - --join result_organization ro on p.id=ro.id - --join organization o on o.id=ro.organization - --where cast(year as int)>=2003 and cast(year as int)<=2021 - --group by year, country) tmp; - ---create table indi_dataset_avg_year_country_oa stored as parquet as ---select year, country, round(OpenAccess/(OpenAccess+NonOpenAccess)*100,3) as averageOA, ---round(NonOpenAccess/(OpenAccess+NonOpenAccess)*100,3) as averageNonOA ---from - --(SELECT year, country, SUM(CASE - --WHEN bestlicence='Open Access' THEN 1 - --ELSE 0 - --END) AS OpenAccess, SUM(CASE - --WHEN bestlicence<>'Open Access' THEN 1 - --ELSE 0 - --END) AS NonOpenAccess - --FROM dataset d - --join result_organization ro on d.id=ro.id - --join organization o on o.id=ro.organization - --where cast(year as int)>=2003 and cast(year as int)<=2021 - --group by year, country) tmp; - ---create table indi_software_avg_year_country_oa stored as parquet as ---select year, country, round(OpenAccess/(OpenAccess+NonOpenAccess)*100,3) as averageOA, ---round(NonOpenAccess/(OpenAccess+NonOpenAccess)*100,3) as averageNonOA ---from --- (SELECT year, country, SUM(CASE - --WHEN bestlicence='Open Access' THEN 1 --- ELSE 0 ---END) AS OpenAccess, SUM(CASE --- WHEN bestlicence<>'Open Access' THEN 1 --- ELSE 0 --- END) AS NonOpenAccess --- FROM software s --- join result_organization ro on s.id=ro.id --- join organization o on o.id=ro.organization --- where cast(year as int)>=2003 and cast(year as int)<=2021 --- group by year, country) tmp; - - ---create table indi_other_avg_year_country_oa stored as parquet as ---select year, country, round(OpenAccess/(OpenAccess+NonOpenAccess)*100,3) as averageOA, ---round(NonOpenAccess/(OpenAccess+NonOpenAccess)*100,3) as averageNonOA --- from --- (SELECT year, country, SUM(CASE --- WHEN bestlicence='Open Access' THEN 1 --- ELSE 0 --- END) AS OpenAccess, SUM(CASE --- WHEN bestlicence<>'Open Access' THEN 1 --- ELSE 0 --- END) AS NonOpenAccess --- FROM otherresearchproduct orp --- join result_organization ro on orp.id=ro.id --- join organization o on o.id=ro.organization --- where cast(year as int)>=2003 and cast(year as int)<=2021 --- group by year, country) tmp; - ---create table indi_pub_avg_year_context_oa stored as parquet as ---with total as ---(select count(distinct pc.id) no_of_pubs, year, c.name name, sum(count(distinct pc.id)) over(PARTITION by year) as total from publication_concepts pc ---join context c on pc.concept like concat('%',c.id,'%') ---join publication p on p.id=pc.id ---where cast(year as int)>=2003 and cast(year as int)<=2021 ---group by c.name, year ) ---select year, name, round(no_of_pubs/total*100,3) averageofpubs ---from total; - ---create table indi_dataset_avg_year_context_oa stored as parquet as ---with total as ---(select count(distinct pc.id) no_of_pubs, year, c.name name, sum(count(distinct pc.id)) over(PARTITION by year) as total from dataset_concepts pc ---join context c on pc.concept like concat('%',c.id,'%') ---join dataset p on p.id=pc.id ---where cast(year as int)>=2003 and cast(year as int)<=2021 ---group by c.name, year ) ---select year, name, round(no_of_pubs/total*100,3) averageofdataset ---from total; - ---create table indi_software_avg_year_context_oa stored as parquet as ---with total as ---(select count(distinct pc.id) no_of_pubs, year, c.name name, sum(count(distinct pc.id)) over(PARTITION by year) as total from software_concepts pc ---join context c on pc.concept like concat('%',c.id,'%') ---join software p on p.id=pc.id ---where cast(year as int)>=2003 and cast(year as int)<=2021 ---group by c.name, year ) ---select year, name, round(no_of_pubs/total*100,3) averageofsoftware ---from total; - ---create table indi_other_avg_year_context_oa stored as parquet as ---with total as ---(select count(distinct pc.id) no_of_pubs, year, c.name name, sum(count(distinct pc.id)) over(PARTITION by year) as total from otherresearchproduct_concepts pc ---join context c on pc.concept like concat('%',c.id,'%') ---join otherresearchproduct p on p.id=pc.id ---where cast(year as int)>=2003 and cast(year as int)<=2021 ---group by c.name, year ) ---select year, name, round(no_of_pubs/total*100,3) averageofother ---from total; - ---create table indi_other_avg_year_content_oa stored as parquet as ---with total as ---(select count(distinct pd.id) no_of_pubs, year, d.type type, sum(count(distinct pd.id)) over(PARTITION by year) as total ---from otherresearchproduct_datasources pd ---join datasource d on datasource=d.id ---join otherresearchproduct p on p.id=pd.id ---where cast(year as int)>=2003 and cast(year as int)<=2021 ---group by d.type, year) ---select year, type, round(no_of_pubs/total*100,3) averageOfOtherresearchproduct ---from total; - ---create table indi_software_avg_year_content_oa stored as parquet as ---with total as ---(select count(distinct pd.id) no_of_pubs, year, d.type type, sum(count(distinct pd.id)) over(PARTITION by year) as total ---from software_datasources pd ---join datasource d on datasource=d.id ---join software p on p.id=pd.id ---where cast(year as int)>=2003 and cast(year as int)<=2021 ---group by d.type, year) ---select year, type, round(no_of_pubs/total*100,3) averageOfSoftware ---from total; - ---create table indi_dataset_avg_year_content_oa stored as parquet as ---with total as ---(select count(distinct pd.id) no_of_pubs, year, d.type type, sum(count(distinct pd.id)) over(PARTITION by year) as total ---from dataset_datasources pd ---join datasource d on datasource=d.id ---join dataset p on p.id=pd.id ---where cast(year as int)>=2003 and cast(year as int)<=2021 ---group by d.type, year) ---select year, type, round(no_of_pubs/total*100,3) averageOfDatasets ---from total; - ---create table indi_pub_avg_year_content_oa stored as parquet as ---with total as ---(select count(distinct pd.id) no_of_pubs, year, d.type type, sum(count(distinct pd.id)) over(PARTITION by year) as total ---from publication_datasources pd ---join datasource d on datasource=d.id ---join publication p on p.id=pd.id ---where cast(year as int)>=2003 and cast(year as int)<=2021 ---group by d.type, year) ---select year, type, round(no_of_pubs/total*100,3) averageOfPubs ---from total; - ---- Sprint 2 ---- -create table indi_result_has_cc_licence_f stored as parquet as +create table indi_result_has_cc_licence stored as parquet as select distinct r.id, (case when lic='' or lic is null then 0 else 1 end) as has_cc_license from result r left outer join (select r.id, license.type as lic from result r @@ -233,35 +60,6 @@ join result_licenses as license on license.id = r.id WHERE lower(parse_url(license.type, "HOST")) = "creativecommons.org") tmp on r.id= tmp.id; --- EOSC-TR1.1-02M: --- ## Indicator: has_cc_license. Creative Commons licensing has become a --- de facto standard in scholarly communication and is promoted by many initiatives --- like Plan S. This indicator might be only useful when applied --- to openly available publications. ---create table indi_pub_has_cc_licence_tr stored as parquet as ---select distinct p.id, case when lic='' or lic is null then 0 else 1 end indi_result_org_collabas has_cc_license_tr ---from publication p ---left outer join (select p.id, license.type as lic from publication p ---join publication_licenses as license on license.id = p.id ---where lower(license.type) LIKE '%creativecommons.org%' OR lower(license.type) LIKE '%cc-%') tmp ---on p.id= tmp.id - --- #EOSC-F2-01M_cc Rich metadata for scholarly publications --- ## Indicator: has_cc_license. Creative Commons licensing has become a --- de facto standard in scholarly communication and is promoted by many initiatives --- like Plan S. This indicator might be only useful when applied --- to openly available publications. - --- Same indicator as EOSC-TR1.1-02M (Najko's instructions) --- create table indi_pub_has_cc_licence_f stored as parquet as --- select --- distinct p.id, case when lic='' or lic is null then 0 else 1 end as has_cc_license_f --- from publication p --- left outer join (selectp.id,license.type as lic from publication p --- join publication_licenses as license on license.id = p.id --- where lower(license.type) LIKE '%creativecommons.org%' OR lower(license.type) LIKE '%cc-%') tmp --- on p.id= tmp.id - create table indi_pub_has_abstract stored as parquet as select distinct publication.id, coalesce(abstract, 1) has_abstract from publication; @@ -270,16 +68,16 @@ create table indi_result_with_orcid stored as parquet as select distinct r.id, coalesce(has_orcid, 0) as has_orcid from result r left outer join (select id, 1 as has_orcid from result_orcid) tmp -on r.id= tmp.id +on r.id= tmp.id; + ---- Sprint 3 ---- - create table indi_funded_result_with_fundref stored as parquet as select distinct r.id, coalesce(fundref, 0) as fundref from project_results r left outer join (select distinct id, 1 as fundref from project_results where provenance='Harvested') tmp -on r.id= tmp.id +on r.id= tmp.id; create table indi_result_org_country_collab stored as parquet as with tmp as @@ -290,7 +88,7 @@ select o1.id org1,o2.country country2, o1.type, count(distinct o1.result) as col from tmp as o1 join tmp as o2 on o1.result=o2.result where o1.id<>o2.id and o1.country<>o2.country -group by o1.id, o1.type,o2.country +group by o1.id, o1.type,o2.country; create table indi_result_org_collab stored as parquet as with tmp as @@ -301,18 +99,18 @@ select o1.id org1,o2.id org2, o1.type, count(distinct o1.result) as collaboratio from tmp as o1 join tmp as o2 on o1.result=o2.result where o1.id<>o2.id -group by o1.id, o2.id, o1.type +group by o1.id, o2.id, o1.type; create table indi_funder_country_collab stored as parquet as with tmp as (select funder, project, country from organization_projects op join organization o on o.id=op.id join project p on p.id=op.project where country <> 'UNKNOWN') -select f1.funder, f1.country, f2.country, count(distinct f1.project) as collaborations +select f1.funder, f1.country as country1, f2.country as country2, count(distinct f1.project) as collaborations from tmp as f1 join tmp as f2 on f1.project=f2.project where f1.country<>f2.country -group by f1.funder, f2.country, f1.country +group by f1.funder, f2.country, f1.country; create table indi_result_country_collab stored as parquet as with tmp as @@ -323,10 +121,9 @@ select o1.country country1, o2.country country2, o1.type, count(distinct o1.resu from tmp as o1 join tmp as o2 on o1.result=o2.result where o1.country<>o2.country -group by o1.country, o2.country, o1.type +group by o1.country, o2.country, o1.type; ---- Sprint 4 ---- - create table indi_pub_diamond stored as parquet as select distinct pd.id, coalesce(in_diamond_journal, 0) as in_diamond_journal from publication_datasources pd @@ -335,7 +132,7 @@ select pd.id, 1 as in_diamond_journal from publication_datasources pd join datasource d on d.id=pd.datasource join stats_ext.plan_s_jn ps where (ps.issn_print=d.issn_printed and ps.issn_online=d.issn_online) and (ps.journal_is_in_doaj=true or ps.journal_is_oa=true) and ps.has_apc=false) tmp -on pd.id=tmp.id +on pd.id=tmp.id; create table indi_pub_hybrid stored as parquet as select distinct pd.id, coalesce(is_hybrid, 0) as is_hybrid @@ -345,7 +142,7 @@ select pd.id, 1 as is_hybrid from publication_datasources pd join datasource d on d.id=pd.datasource join stats_ext.plan_s_jn ps where (ps.issn_print=d.issn_printed and ps.issn_online=d.issn_online) and (ps.journal_is_in_doaj=false and ps.journal_is_oa=false)) tmp -on pd.id=tmp.id +on pd.id=tmp.id; create table indi_is_gold_oa stored as parquet as (select distinct pd.id, coalesce(gold_oa, 0) as gold_oa @@ -355,8 +152,7 @@ select pd.id, 1 as gold_oa from publication_datasources pd join datasource d on d.id=pd.datasource join stats_ext.plan_s_jn ps on (ps.issn_print=d.issn_printed or ps.issn_online=d.issn_online) where ps.journal_is_in_doaj is true or ps.journal_is_oa is true) tmp -on pd.id=tmp.id) - +on pd.id=tmp.id); create table indi_pub_in_transformative stored as parquet as select distinct pd.id, coalesce(is_transformative, 0) as is_transformative @@ -366,7 +162,7 @@ select pd.id, 1 as is_transformative from publication_datasources pd join datasource d on d.id=pd.datasource join stats_ext.plan_s_jn ps where (ps.issn_print=d.issn_printed and ps.issn_online=d.issn_online) and ps.is_transformative_journal=true) tmp -on pd.id=tmp.id +on pd.id=tmp.id; create table indi_pub_closed_other_open stored as parquet as select distinct ri.id, coalesce(pub_closed_other_open, 0) as pub_closed_other_open from result_instance ri @@ -376,9 +172,9 @@ join publication p on p.id=ri.id join datasource d on ri.hostedby=d.id where d.type like '%Journal%' and ri.accessright='Closed Access' and (p.bestlicence='Open Access' or p.bestlicence='Open Source')) tmp -on tmp.id=ri.id +on tmp.id=ri.id; + ---- Sprint 5 ---- - create table indi_result_no_of_copies stored as parquet as -select id, count(id) as number_of_copies from result_instance group by id \ No newline at end of file +select id, count(id) as number_of_copies from result_instance group by id; \ No newline at end of file diff --git a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step20-createMonitorDB.sql b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step20-createMonitorDB.sql index 1ae459640..e72378f56 100644 --- a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step20-createMonitorDB.sql +++ b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step20-createMonitorDB.sql @@ -49,8 +49,10 @@ compute stats TARGET.result_greenoa; create table TARGET.result_languages stored as parquet as select * from SOURCE.result_languages orig where exists (select 1 from TARGET.result r where r.id=orig.id); compute stats TARGET.result_languages; -create table TARGET.result_licences stored as parquet as select * from SOURCE.result_licenses orig where exists (select 1 from TARGET.result r where r.id=orig.id); -compute stats TARGET.result_licences; +create table TARGET.result_licenses stored as parquet as select * from SOURCE.result_licenses orig where exists (select 1 from TARGET.result r where r.id=orig.id); +compute stats TARGET.result_licenses; + +create table TARGET.licenses_normalized as select * from SOURCE.licenses_normalized; create table TARGET.result_oids stored as parquet as select * from SOURCE.result_oids orig where exists (select 1 from TARGET.result r where r.id=orig.id); compute stats TARGET.result_oids; @@ -79,6 +81,9 @@ compute stats TARGET.result_sources; create table TARGET.result_topics stored as parquet as select * from SOURCE.result_topics orig where exists (select 1 from TARGET.result r where r.id=orig.id); compute stats TARGET.result_topics; +create table TARGET.result_result stored as parquet as select * from SOURCE.result_result orig where exists (select 1 from TARGET.result r where r.id=orig.source or r.id=orig.target); +compute stats TARGET.result_result; + -- datasources create view if not exists TARGET.datasource as select * from SOURCE.datasource; create view if not exists TARGET.datasource_oids as select * from SOURCE.datasource_oids; @@ -100,6 +105,7 @@ create view if not exists TARGET.project as select * from SOURCE.project; create view if not exists TARGET.project_oids as select * from SOURCE.project_oids; create view if not exists TARGET.project_organizations as select * from SOURCE.project_organizations; create view if not exists TARGET.project_resultcount as select * from SOURCE.project_resultcount; +create view if not exists TARGET.project_classification as select * from SOURCE.project_classification; create table TARGET.project_results stored as parquet as select id as result, project as id from TARGET.result_projects; compute stats TARGET.project_results; @@ -115,8 +121,8 @@ create table TARGET.indi_pub_gold_oa stored as parquet as select * from SOURCE.i compute stats TARGET.indi_pub_gold_oa; create table TARGET.indi_pub_has_abstract stored as parquet as select * from SOURCE.indi_pub_has_abstract orig where exists (select 1 from TARGET.result r where r.id=orig.id); compute stats TARGET.indi_pub_has_abstract; -create table TARGET.indi_result_has_cc_licence_f stored as parquet as select * from SOURCE.indi_result_has_cc_licence_f orig where exists (select 1 from TARGET.result r where r.id=orig.id); -compute stats TARGET.indi_result_has_cc_licence_f; +create table TARGET.indi_result_has_cc_licence stored as parquet as select * from SOURCE.indi_result_has_cc_licence orig where exists (select 1 from TARGET.result r where r.id=orig.id); +compute stats TARGET.indi_result_has_cc_licence; create table TARGET.indi_result_has_cc_licence_url stored as parquet as select * from SOURCE.indi_result_has_cc_licence_url orig where exists (select 1 from TARGET.result r where r.id=orig.id); compute stats TARGET.indi_result_has_cc_licence_url; @@ -138,6 +144,9 @@ compute stats TARGET.indi_pub_closed_other_open; create table TARGET.indi_result_no_of_copies stored as parquet as select * from SOURCE.indi_result_no_of_copies orig where exists (select 1 from TARGET.result r where r.id=orig.id); compute stats TARGET.indi_result_no_of_copies; +--- Usage statistics +create table TARGET.usage_stats stored as parquet as select * from SOURCE.usage_stats orig where exists (select 1 from TARGET.result r where r.id=orig.result_id); + --denorm alter table TARGET.result rename to TARGET.res_tmp; From 44a937f4ed4bc32211a7a9f6c7218f98fe1488a9 Mon Sep 17 00:00:00 2001 From: Claudio Atzori Date: Wed, 19 Jan 2022 12:24:52 +0100 Subject: [PATCH 044/162] factored out entity grouping implementation, extended to consider results from delegated authorities rather than identical records from other sources --- .../oa/merge}/DispatchEntitiesSparkJob.java | 22 +- .../dhp/oa/merge}/GroupEntitiesSparkJob.java | 46 ++- .../dhp/schema/oaf/utils/OafMapperUtils.java | 18 ++ .../dedup/consistency/oozie_app/workflow.xml | 16 +- .../group/dispatch_entities_parameters.json | 26 ++ .../group_graph_entities_parameters.json | 20 ++ .../graph/group/oozie_app/config-default.xml | 18 ++ .../dhp/oa/graph/group/oozie_app/workflow.xml | 289 ++++++++++++++++++ pom.xml | 2 +- 9 files changed, 412 insertions(+), 45 deletions(-) rename {dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup => dhp-common/src/main/java/eu/dnetlib/dhp/oa/merge}/DispatchEntitiesSparkJob.java (96%) rename {dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup => dhp-common/src/main/java/eu/dnetlib/dhp/oa/merge}/GroupEntitiesSparkJob.java (98%) create mode 100644 dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/group/dispatch_entities_parameters.json create mode 100644 dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/group/group_graph_entities_parameters.json create mode 100644 dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/group/oozie_app/config-default.xml create mode 100644 dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/group/oozie_app/workflow.xml diff --git a/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/DispatchEntitiesSparkJob.java b/dhp-common/src/main/java/eu/dnetlib/dhp/oa/merge/DispatchEntitiesSparkJob.java similarity index 96% rename from dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/DispatchEntitiesSparkJob.java rename to dhp-common/src/main/java/eu/dnetlib/dhp/oa/merge/DispatchEntitiesSparkJob.java index ea738836b..caa9e8106 100644 --- a/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/DispatchEntitiesSparkJob.java +++ b/dhp-common/src/main/java/eu/dnetlib/dhp/oa/merge/DispatchEntitiesSparkJob.java @@ -1,11 +1,11 @@ -package eu.dnetlib.dhp.oa.dedup; - -import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession; - -import java.util.Objects; -import java.util.Optional; +package eu.dnetlib.dhp.oa.merge; +import com.fasterxml.jackson.databind.ObjectMapper; +import eu.dnetlib.dhp.application.ArgumentApplicationParser; +import eu.dnetlib.dhp.common.HdfsSupport; +import eu.dnetlib.dhp.schema.oaf.Oaf; +import eu.dnetlib.dhp.schema.oaf.OafEntity; import org.apache.commons.io.IOUtils; import org.apache.commons.lang3.StringUtils; import org.apache.spark.SparkConf; @@ -17,12 +17,10 @@ import org.apache.spark.sql.SparkSession; import org.slf4j.Logger; import org.slf4j.LoggerFactory; -import com.fasterxml.jackson.databind.ObjectMapper; +import java.util.Objects; +import java.util.Optional; -import eu.dnetlib.dhp.application.ArgumentApplicationParser; -import eu.dnetlib.dhp.common.HdfsSupport; -import eu.dnetlib.dhp.schema.oaf.Oaf; -import eu.dnetlib.dhp.schema.oaf.OafEntity; +import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession; public class DispatchEntitiesSparkJob { @@ -38,7 +36,7 @@ public class DispatchEntitiesSparkJob { .requireNonNull( DispatchEntitiesSparkJob.class .getResourceAsStream( - "/eu/dnetlib/dhp/oa/dedup/dispatch_entities_parameters.json"))); + "/eu/dnetlib/dhp/oa/graph/group/dispatch_entities_parameters.json"))); final ArgumentApplicationParser parser = new ArgumentApplicationParser(jsonConfiguration); parser.parseArgument(args); diff --git a/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/GroupEntitiesSparkJob.java b/dhp-common/src/main/java/eu/dnetlib/dhp/oa/merge/GroupEntitiesSparkJob.java similarity index 98% rename from dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/GroupEntitiesSparkJob.java rename to dhp-common/src/main/java/eu/dnetlib/dhp/oa/merge/GroupEntitiesSparkJob.java index a19f86380..771eba873 100644 --- a/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/GroupEntitiesSparkJob.java +++ b/dhp-common/src/main/java/eu/dnetlib/dhp/oa/merge/GroupEntitiesSparkJob.java @@ -1,15 +1,17 @@ -package eu.dnetlib.dhp.oa.dedup; - -import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession; -import static eu.dnetlib.dhp.utils.DHPUtils.toSeq; - -import java.io.IOException; -import java.util.List; -import java.util.Objects; -import java.util.Optional; -import java.util.stream.Collectors; +package eu.dnetlib.dhp.oa.merge; +import com.fasterxml.jackson.databind.DeserializationFeature; +import com.fasterxml.jackson.databind.ObjectMapper; +import com.jayway.jsonpath.Configuration; +import com.jayway.jsonpath.DocumentContext; +import com.jayway.jsonpath.JsonPath; +import com.jayway.jsonpath.Option; +import eu.dnetlib.dhp.application.ArgumentApplicationParser; +import eu.dnetlib.dhp.common.HdfsSupport; +import eu.dnetlib.dhp.schema.common.ModelSupport; +import eu.dnetlib.dhp.schema.oaf.*; +import eu.dnetlib.dhp.schema.oaf.utils.OafMapperUtils; import org.apache.commons.io.IOUtils; import org.apache.commons.lang3.StringUtils; import org.apache.spark.SparkConf; @@ -20,21 +22,17 @@ import org.apache.spark.sql.*; import org.apache.spark.sql.expressions.Aggregator; import org.slf4j.Logger; import org.slf4j.LoggerFactory; - -import com.fasterxml.jackson.databind.DeserializationFeature; -import com.fasterxml.jackson.databind.ObjectMapper; -import com.jayway.jsonpath.Configuration; -import com.jayway.jsonpath.DocumentContext; -import com.jayway.jsonpath.JsonPath; -import com.jayway.jsonpath.Option; - -import eu.dnetlib.dhp.application.ArgumentApplicationParser; -import eu.dnetlib.dhp.common.HdfsSupport; -import eu.dnetlib.dhp.schema.common.ModelSupport; -import eu.dnetlib.dhp.schema.oaf.*; -import eu.dnetlib.dhp.schema.oaf.utils.OafMapperUtils; import scala.Tuple2; +import java.io.IOException; +import java.util.List; +import java.util.Objects; +import java.util.Optional; +import java.util.stream.Collectors; + +import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession; +import static eu.dnetlib.dhp.utils.DHPUtils.toSeq; + /** * Groups the graph content by entity identifier to ensure ID uniqueness */ @@ -53,7 +51,7 @@ public class GroupEntitiesSparkJob { .toString( GroupEntitiesSparkJob.class .getResourceAsStream( - "/eu/dnetlib/dhp/oa/dedup/group_graph_entities_parameters.json")); + "/eu/dnetlib/dhp/oa/graph/group/group_graph_entities_parameters.json")); final ArgumentApplicationParser parser = new ArgumentApplicationParser(jsonConfiguration); parser.parseArgument(args); diff --git a/dhp-common/src/main/java/eu/dnetlib/dhp/schema/oaf/utils/OafMapperUtils.java b/dhp-common/src/main/java/eu/dnetlib/dhp/schema/oaf/utils/OafMapperUtils.java index 720fe47fb..bbdd59975 100644 --- a/dhp-common/src/main/java/eu/dnetlib/dhp/schema/oaf/utils/OafMapperUtils.java +++ b/dhp-common/src/main/java/eu/dnetlib/dhp/schema/oaf/utils/OafMapperUtils.java @@ -47,6 +47,17 @@ public class OafMapperUtils { } public static Result mergeResults(Result left, Result right) { + + final boolean leftFromDeletedAuthority = isFromDeletedAuthority(left); + final boolean rightFromDeletedAuthority = isFromDeletedAuthority(right); + + if (leftFromDeletedAuthority && !rightFromDeletedAuthority) { + return left; + } + if (!leftFromDeletedAuthority && rightFromDeletedAuthority) { + return right; + } + if (new ResultTypeComparator().compare(left, right) < 0) { left.mergeFrom(right); return left; @@ -56,6 +67,13 @@ public class OafMapperUtils { } } + private static boolean isFromDeletedAuthority(Result r) { + return r.getInstance() + .stream() + .map(i -> i.getCollectedfrom().getKey()) + .anyMatch(cfId -> IdentifierFactory.delegatedAuthorityDatasourceIds().contains(cfId)); + } + public static KeyValue keyValue(final String k, final String v) { final KeyValue kv = new KeyValue(); kv.setKey(k); diff --git a/dhp-workflows/dhp-dedup-openaire/src/main/resources/eu/dnetlib/dhp/oa/dedup/consistency/oozie_app/workflow.xml b/dhp-workflows/dhp-dedup-openaire/src/main/resources/eu/dnetlib/dhp/oa/dedup/consistency/oozie_app/workflow.xml index 4ea003926..7c500493f 100644 --- a/dhp-workflows/dhp-dedup-openaire/src/main/resources/eu/dnetlib/dhp/oa/dedup/consistency/oozie_app/workflow.xml +++ b/dhp-workflows/dhp-dedup-openaire/src/main/resources/eu/dnetlib/dhp/oa/dedup/consistency/oozie_app/workflow.xml @@ -104,7 +104,7 @@ yarn cluster group graph entities - eu.dnetlib.dhp.oa.dedup.GroupEntitiesSparkJob + eu.dnetlib.dhp.oa.merge.GroupEntitiesSparkJob dhp-dedup-openaire-${projectVersion}.jar --executor-cores=${sparkExecutorCores} @@ -138,7 +138,7 @@ yarn cluster Dispatch publications - eu.dnetlib.dhp.oa.dedup.DispatchEntitiesSparkJob + eu.dnetlib.dhp.oa.merge.DispatchEntitiesSparkJob dhp-dedup-openaire-${projectVersion}.jar --executor-cores=${sparkExecutorCores} @@ -163,7 +163,7 @@ yarn cluster Dispatch project - eu.dnetlib.dhp.oa.dedup.DispatchEntitiesSparkJob + eu.dnetlib.dhp.oa.merge.DispatchEntitiesSparkJob dhp-dedup-openaire-${projectVersion}.jar --executor-cores=${sparkExecutorCores} @@ -188,7 +188,7 @@ yarn cluster Dispatch organization - eu.dnetlib.dhp.oa.dedup.DispatchEntitiesSparkJob + eu.dnetlib.dhp.oa.merge.DispatchEntitiesSparkJob dhp-dedup-openaire-${projectVersion}.jar --executor-cores=${sparkExecutorCores} @@ -213,7 +213,7 @@ yarn cluster Dispatch publication - eu.dnetlib.dhp.oa.dedup.DispatchEntitiesSparkJob + eu.dnetlib.dhp.oa.merge.DispatchEntitiesSparkJob dhp-dedup-openaire-${projectVersion}.jar --executor-cores=${sparkExecutorCores} @@ -238,7 +238,7 @@ yarn cluster Dispatch dataset - eu.dnetlib.dhp.oa.dedup.DispatchEntitiesSparkJob + eu.dnetlib.dhp.oa.merge.DispatchEntitiesSparkJob dhp-dedup-openaire-${projectVersion}.jar --executor-cores=${sparkExecutorCores} @@ -263,7 +263,7 @@ yarn cluster Dispatch software - eu.dnetlib.dhp.oa.dedup.DispatchEntitiesSparkJob + eu.dnetlib.dhp.oa.merge.DispatchEntitiesSparkJob dhp-dedup-openaire-${projectVersion}.jar --executor-cores=${sparkExecutorCores} @@ -288,7 +288,7 @@ yarn cluster Dispatch otherresearchproduct - eu.dnetlib.dhp.oa.dedup.DispatchEntitiesSparkJob + eu.dnetlib.dhp.oa.merge.DispatchEntitiesSparkJob dhp-dedup-openaire-${projectVersion}.jar --executor-cores=${sparkExecutorCores} diff --git a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/group/dispatch_entities_parameters.json b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/group/dispatch_entities_parameters.json new file mode 100644 index 000000000..aa8d2a7c2 --- /dev/null +++ b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/group/dispatch_entities_parameters.json @@ -0,0 +1,26 @@ +[ + { + "paramName": "issm", + "paramLongName": "isSparkSessionManaged", + "paramDescription": "when true will stop SparkSession after job execution", + "paramRequired": false + }, + { + "paramName": "i", + "paramLongName": "inputPath", + "paramDescription": "the source path", + "paramRequired": true + }, + { + "paramName": "o", + "paramLongName": "outputPath", + "paramDescription": "path of the output graph", + "paramRequired": true + }, + { + "paramName": "c", + "paramLongName": "graphTableClassName", + "paramDescription": "the graph entity class name", + "paramRequired": true + } +] \ No newline at end of file diff --git a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/group/group_graph_entities_parameters.json b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/group/group_graph_entities_parameters.json new file mode 100644 index 000000000..e65acb3c4 --- /dev/null +++ b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/group/group_graph_entities_parameters.json @@ -0,0 +1,20 @@ +[ + { + "paramName": "issm", + "paramLongName": "isSparkSessionManaged", + "paramDescription": "when true will stop SparkSession after job execution", + "paramRequired": false + }, + { + "paramName": "gin", + "paramLongName": "graphInputPath", + "paramDescription": "the graph root path", + "paramRequired": true + }, + { + "paramName": "out", + "paramLongName": "outputPath", + "paramDescription": "the output merged graph root path", + "paramRequired": true + } +] \ No newline at end of file diff --git a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/group/oozie_app/config-default.xml b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/group/oozie_app/config-default.xml new file mode 100644 index 000000000..2e0ed9aee --- /dev/null +++ b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/group/oozie_app/config-default.xml @@ -0,0 +1,18 @@ + + + jobTracker + yarnRM + + + nameNode + hdfs://nameservice1 + + + oozie.use.system.libpath + true + + + oozie.action.sharelib.for.spark + spark2 + + \ No newline at end of file diff --git a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/group/oozie_app/workflow.xml b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/group/oozie_app/workflow.xml new file mode 100644 index 000000000..883d0e1fb --- /dev/null +++ b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/group/oozie_app/workflow.xml @@ -0,0 +1,289 @@ + + + + graphBasePath + the input graph base path + + + workingPath + path of the working directory + + + graphOutputPath + path of the output graph + + + sparkDriverMemory + memory for driver process + + + sparkExecutorMemory + memory for individual executor + + + sparkExecutorCores + number of cores used by single executor + + + + oozieActionShareLibForSpark2 + oozie action sharelib for spark 2.* + + + spark2ExtraListeners + com.cloudera.spark.lineage.NavigatorAppListener + spark 2.* extra listeners classname + + + spark2SqlQueryExecutionListeners + com.cloudera.spark.lineage.NavigatorQueryListener + spark 2.* sql query execution listeners classname + + + spark2YarnHistoryServerAddress + spark 2.* yarn history server address + + + spark2EventLogDir + spark 2.* event log dir location + + + + + ${jobTracker} + ${nameNode} + + + mapreduce.job.queuename + ${queueName} + + + oozie.launcher.mapred.job.queue.name + ${oozieLauncherQueueName} + + + oozie.action.sharelib.for.spark + ${oozieActionShareLibForSpark2} + + + + + + + + Action failed, error message[${wf:errorMessage(wf:lastErrorNode())}] + + + + + yarn + cluster + group graph entities + eu.dnetlib.dhp.oa.merge.GroupEntitiesSparkJob + dhp-graph-mapper-${projectVersion}.jar + + --executor-cores=${sparkExecutorCores} + --executor-memory=${sparkExecutorMemory} + --driver-memory=${sparkDriverMemory} + --conf spark.extraListeners=${spark2ExtraListeners} + --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} + --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} + --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} + --conf spark.sql.shuffle.partitions=15000 + + --graphInputPath${graphBasePath} + --outputPath${workingPath}/grouped_entities + + + + + + + + + + + + + + + + + + yarn + cluster + Dispatch publications + eu.dnetlib.dhp.oa.merge.DispatchEntitiesSparkJob + dhp-graph-mapper-${projectVersion}.jar + + --executor-cores=${sparkExecutorCores} + --executor-memory=${sparkExecutorMemory} + --driver-memory=${sparkDriverMemory} + --conf spark.extraListeners=${spark2ExtraListeners} + --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} + --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} + --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} + --conf spark.sql.shuffle.partitions=7680 + + --inputPath${workingPath}/grouped_entities + --outputPath${graphOutputPath}/datasource + --graphTableClassNameeu.dnetlib.dhp.schema.oaf.Datasource + + + + + + + + yarn + cluster + Dispatch project + eu.dnetlib.dhp.oa.merge.DispatchEntitiesSparkJob + dhp-graph-mapper-${projectVersion}.jar + + --executor-cores=${sparkExecutorCores} + --executor-memory=${sparkExecutorMemory} + --driver-memory=${sparkDriverMemory} + --conf spark.extraListeners=${spark2ExtraListeners} + --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} + --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} + --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} + --conf spark.sql.shuffle.partitions=7680 + + --inputPath${workingPath}/grouped_entities + --outputPath${graphOutputPath}/project + --graphTableClassNameeu.dnetlib.dhp.schema.oaf.Project + + + + + + + + yarn + cluster + Dispatch organization + eu.dnetlib.dhp.oa.merge.DispatchEntitiesSparkJob + dhp-graph-mapper-${projectVersion}.jar + + --executor-cores=${sparkExecutorCores} + --executor-memory=${sparkExecutorMemory} + --driver-memory=${sparkDriverMemory} + --conf spark.extraListeners=${spark2ExtraListeners} + --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} + --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} + --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} + --conf spark.sql.shuffle.partitions=7680 + + --inputPath${workingPath}/grouped_entities + --outputPath${graphOutputPath}/organization + --graphTableClassNameeu.dnetlib.dhp.schema.oaf.Organization + + + + + + + + yarn + cluster + Dispatch publication + eu.dnetlib.dhp.oa.merge.DispatchEntitiesSparkJob + dhp-graph-mapper-${projectVersion}.jar + + --executor-cores=${sparkExecutorCores} + --executor-memory=${sparkExecutorMemory} + --driver-memory=${sparkDriverMemory} + --conf spark.extraListeners=${spark2ExtraListeners} + --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} + --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} + --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} + --conf spark.sql.shuffle.partitions=7680 + + --inputPath${workingPath}/grouped_entities + --outputPath${graphOutputPath}/publication + --graphTableClassNameeu.dnetlib.dhp.schema.oaf.Publication + + + + + + + + yarn + cluster + Dispatch dataset + eu.dnetlib.dhp.oa.merge.DispatchEntitiesSparkJob + dhp-graph-mapper-${projectVersion}.jar + + --executor-cores=${sparkExecutorCores} + --executor-memory=${sparkExecutorMemory} + --driver-memory=${sparkDriverMemory} + --conf spark.extraListeners=${spark2ExtraListeners} + --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} + --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} + --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} + --conf spark.sql.shuffle.partitions=7680 + + --inputPath${workingPath}/grouped_entities + --outputPath${graphOutputPath}/dataset + --graphTableClassNameeu.dnetlib.dhp.schema.oaf.Dataset + + + + + + + + yarn + cluster + Dispatch software + eu.dnetlib.dhp.oa.merge.DispatchEntitiesSparkJob + dhp-graph-mapper-${projectVersion}.jar + + --executor-cores=${sparkExecutorCores} + --executor-memory=${sparkExecutorMemory} + --driver-memory=${sparkDriverMemory} + --conf spark.extraListeners=${spark2ExtraListeners} + --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} + --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} + --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} + --conf spark.sql.shuffle.partitions=7680 + + --inputPath${workingPath}/grouped_entities + --outputPath${graphOutputPath}/software + --graphTableClassNameeu.dnetlib.dhp.schema.oaf.Software + + + + + + + + yarn + cluster + Dispatch otherresearchproduct + eu.dnetlib.dhp.oa.merge.DispatchEntitiesSparkJob + dhp-graph-mapper-${projectVersion}.jar + + --executor-cores=${sparkExecutorCores} + --executor-memory=${sparkExecutorMemory} + --driver-memory=${sparkDriverMemory} + --conf spark.extraListeners=${spark2ExtraListeners} + --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} + --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} + --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} + --conf spark.sql.shuffle.partitions=7680 + + --inputPath${workingPath}/grouped_entities + --outputPath${graphOutputPath}/otherresearchproduct + --graphTableClassNameeu.dnetlib.dhp.schema.oaf.OtherResearchProduct + + + + + + + + + \ No newline at end of file diff --git a/pom.xml b/pom.xml index 7a026e668..86c3b4526 100644 --- a/pom.xml +++ b/pom.xml @@ -797,7 +797,7 @@ 3.3.3 3.4.2 [2.12,3.0) - [2.10.24] + [2.10.26-SNAPSHOT] [4.0.3] [6.0.5] [3.1.6] From 62f135262e6d811c3e1c5559ef5370a376a87454 Mon Sep 17 00:00:00 2001 From: Claudio Atzori Date: Wed, 19 Jan 2022 12:30:52 +0100 Subject: [PATCH 045/162] code formatting --- .../oa/merge/DispatchEntitiesSparkJob.java | 18 ++++---- .../dhp/oa/merge/GroupEntitiesSparkJob.java | 42 ++++++++++--------- .../dhp/schema/oaf/utils/OafMapperUtils.java | 9 ++-- 3 files changed, 37 insertions(+), 32 deletions(-) diff --git a/dhp-common/src/main/java/eu/dnetlib/dhp/oa/merge/DispatchEntitiesSparkJob.java b/dhp-common/src/main/java/eu/dnetlib/dhp/oa/merge/DispatchEntitiesSparkJob.java index caa9e8106..d2bc8f45d 100644 --- a/dhp-common/src/main/java/eu/dnetlib/dhp/oa/merge/DispatchEntitiesSparkJob.java +++ b/dhp-common/src/main/java/eu/dnetlib/dhp/oa/merge/DispatchEntitiesSparkJob.java @@ -1,11 +1,11 @@ package eu.dnetlib.dhp.oa.merge; -import com.fasterxml.jackson.databind.ObjectMapper; -import eu.dnetlib.dhp.application.ArgumentApplicationParser; -import eu.dnetlib.dhp.common.HdfsSupport; -import eu.dnetlib.dhp.schema.oaf.Oaf; -import eu.dnetlib.dhp.schema.oaf.OafEntity; +import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession; + +import java.util.Objects; +import java.util.Optional; + import org.apache.commons.io.IOUtils; import org.apache.commons.lang3.StringUtils; import org.apache.spark.SparkConf; @@ -17,10 +17,12 @@ import org.apache.spark.sql.SparkSession; import org.slf4j.Logger; import org.slf4j.LoggerFactory; -import java.util.Objects; -import java.util.Optional; +import com.fasterxml.jackson.databind.ObjectMapper; -import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession; +import eu.dnetlib.dhp.application.ArgumentApplicationParser; +import eu.dnetlib.dhp.common.HdfsSupport; +import eu.dnetlib.dhp.schema.oaf.Oaf; +import eu.dnetlib.dhp.schema.oaf.OafEntity; public class DispatchEntitiesSparkJob { diff --git a/dhp-common/src/main/java/eu/dnetlib/dhp/oa/merge/GroupEntitiesSparkJob.java b/dhp-common/src/main/java/eu/dnetlib/dhp/oa/merge/GroupEntitiesSparkJob.java index 771eba873..91cb543ab 100644 --- a/dhp-common/src/main/java/eu/dnetlib/dhp/oa/merge/GroupEntitiesSparkJob.java +++ b/dhp-common/src/main/java/eu/dnetlib/dhp/oa/merge/GroupEntitiesSparkJob.java @@ -1,17 +1,15 @@ package eu.dnetlib.dhp.oa.merge; -import com.fasterxml.jackson.databind.DeserializationFeature; -import com.fasterxml.jackson.databind.ObjectMapper; -import com.jayway.jsonpath.Configuration; -import com.jayway.jsonpath.DocumentContext; -import com.jayway.jsonpath.JsonPath; -import com.jayway.jsonpath.Option; -import eu.dnetlib.dhp.application.ArgumentApplicationParser; -import eu.dnetlib.dhp.common.HdfsSupport; -import eu.dnetlib.dhp.schema.common.ModelSupport; -import eu.dnetlib.dhp.schema.oaf.*; -import eu.dnetlib.dhp.schema.oaf.utils.OafMapperUtils; +import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession; +import static eu.dnetlib.dhp.utils.DHPUtils.toSeq; + +import java.io.IOException; +import java.util.List; +import java.util.Objects; +import java.util.Optional; +import java.util.stream.Collectors; + import org.apache.commons.io.IOUtils; import org.apache.commons.lang3.StringUtils; import org.apache.spark.SparkConf; @@ -22,17 +20,21 @@ import org.apache.spark.sql.*; import org.apache.spark.sql.expressions.Aggregator; import org.slf4j.Logger; import org.slf4j.LoggerFactory; + +import com.fasterxml.jackson.databind.DeserializationFeature; +import com.fasterxml.jackson.databind.ObjectMapper; +import com.jayway.jsonpath.Configuration; +import com.jayway.jsonpath.DocumentContext; +import com.jayway.jsonpath.JsonPath; +import com.jayway.jsonpath.Option; + +import eu.dnetlib.dhp.application.ArgumentApplicationParser; +import eu.dnetlib.dhp.common.HdfsSupport; +import eu.dnetlib.dhp.schema.common.ModelSupport; +import eu.dnetlib.dhp.schema.oaf.*; +import eu.dnetlib.dhp.schema.oaf.utils.OafMapperUtils; import scala.Tuple2; -import java.io.IOException; -import java.util.List; -import java.util.Objects; -import java.util.Optional; -import java.util.stream.Collectors; - -import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession; -import static eu.dnetlib.dhp.utils.DHPUtils.toSeq; - /** * Groups the graph content by entity identifier to ensure ID uniqueness */ diff --git a/dhp-common/src/main/java/eu/dnetlib/dhp/schema/oaf/utils/OafMapperUtils.java b/dhp-common/src/main/java/eu/dnetlib/dhp/schema/oaf/utils/OafMapperUtils.java index bbdd59975..2f1fc3a58 100644 --- a/dhp-common/src/main/java/eu/dnetlib/dhp/schema/oaf/utils/OafMapperUtils.java +++ b/dhp-common/src/main/java/eu/dnetlib/dhp/schema/oaf/utils/OafMapperUtils.java @@ -68,10 +68,11 @@ public class OafMapperUtils { } private static boolean isFromDeletedAuthority(Result r) { - return r.getInstance() - .stream() - .map(i -> i.getCollectedfrom().getKey()) - .anyMatch(cfId -> IdentifierFactory.delegatedAuthorityDatasourceIds().contains(cfId)); + return r + .getInstance() + .stream() + .map(i -> i.getCollectedfrom().getKey()) + .anyMatch(cfId -> IdentifierFactory.delegatedAuthorityDatasourceIds().contains(cfId)); } public static KeyValue keyValue(final String k, final String v) { From 391aa1373bbc948a447ce2bdc026d5aacb9eb7ab Mon Sep 17 00:00:00 2001 From: Claudio Atzori Date: Wed, 19 Jan 2022 17:13:21 +0100 Subject: [PATCH 046/162] added unit test --- .../oa/merge/DispatchEntitiesSparkJob.java | 2 +- .../dhp/oa/merge/GroupEntitiesSparkJob.java | 2 +- .../merge}/dispatch_entities_parameters.json | 0 .../group_graph_entities_parameters.json | 0 .../group/dispatch_entities_parameters.json | 26 ------ .../group_graph_entities_parameters.json | 20 ---- .../group/GroupEntitiesSparkJobTest.java | 91 +++++++++++++++++++ .../dhp/oa/graph/group/dataset/dataset.json | 3 + .../graph/group/publication/publication.json | 3 + 9 files changed, 99 insertions(+), 48 deletions(-) rename {dhp-workflows/dhp-dedup-openaire/src/main/resources/eu/dnetlib/dhp/oa/dedup => dhp-common/src/main/resources/eu/dnetlib/dhp/oa/merge}/dispatch_entities_parameters.json (100%) rename {dhp-workflows/dhp-dedup-openaire/src/main/resources/eu/dnetlib/dhp/oa/dedup => dhp-common/src/main/resources/eu/dnetlib/dhp/oa/merge}/group_graph_entities_parameters.json (100%) delete mode 100644 dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/group/dispatch_entities_parameters.json delete mode 100644 dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/group/group_graph_entities_parameters.json create mode 100644 dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/group/GroupEntitiesSparkJobTest.java create mode 100644 dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/group/dataset/dataset.json create mode 100644 dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/group/publication/publication.json diff --git a/dhp-common/src/main/java/eu/dnetlib/dhp/oa/merge/DispatchEntitiesSparkJob.java b/dhp-common/src/main/java/eu/dnetlib/dhp/oa/merge/DispatchEntitiesSparkJob.java index d2bc8f45d..3f65d754f 100644 --- a/dhp-common/src/main/java/eu/dnetlib/dhp/oa/merge/DispatchEntitiesSparkJob.java +++ b/dhp-common/src/main/java/eu/dnetlib/dhp/oa/merge/DispatchEntitiesSparkJob.java @@ -38,7 +38,7 @@ public class DispatchEntitiesSparkJob { .requireNonNull( DispatchEntitiesSparkJob.class .getResourceAsStream( - "/eu/dnetlib/dhp/oa/graph/group/dispatch_entities_parameters.json"))); + "/eu/dnetlib/dhp/oa/merge/dispatch_entities_parameters.json"))); final ArgumentApplicationParser parser = new ArgumentApplicationParser(jsonConfiguration); parser.parseArgument(args); diff --git a/dhp-common/src/main/java/eu/dnetlib/dhp/oa/merge/GroupEntitiesSparkJob.java b/dhp-common/src/main/java/eu/dnetlib/dhp/oa/merge/GroupEntitiesSparkJob.java index 91cb543ab..e652bd5b6 100644 --- a/dhp-common/src/main/java/eu/dnetlib/dhp/oa/merge/GroupEntitiesSparkJob.java +++ b/dhp-common/src/main/java/eu/dnetlib/dhp/oa/merge/GroupEntitiesSparkJob.java @@ -53,7 +53,7 @@ public class GroupEntitiesSparkJob { .toString( GroupEntitiesSparkJob.class .getResourceAsStream( - "/eu/dnetlib/dhp/oa/graph/group/group_graph_entities_parameters.json")); + "/eu/dnetlib/dhp/oa/merge/group_graph_entities_parameters.json")); final ArgumentApplicationParser parser = new ArgumentApplicationParser(jsonConfiguration); parser.parseArgument(args); diff --git a/dhp-workflows/dhp-dedup-openaire/src/main/resources/eu/dnetlib/dhp/oa/dedup/dispatch_entities_parameters.json b/dhp-common/src/main/resources/eu/dnetlib/dhp/oa/merge/dispatch_entities_parameters.json similarity index 100% rename from dhp-workflows/dhp-dedup-openaire/src/main/resources/eu/dnetlib/dhp/oa/dedup/dispatch_entities_parameters.json rename to dhp-common/src/main/resources/eu/dnetlib/dhp/oa/merge/dispatch_entities_parameters.json diff --git a/dhp-workflows/dhp-dedup-openaire/src/main/resources/eu/dnetlib/dhp/oa/dedup/group_graph_entities_parameters.json b/dhp-common/src/main/resources/eu/dnetlib/dhp/oa/merge/group_graph_entities_parameters.json similarity index 100% rename from dhp-workflows/dhp-dedup-openaire/src/main/resources/eu/dnetlib/dhp/oa/dedup/group_graph_entities_parameters.json rename to dhp-common/src/main/resources/eu/dnetlib/dhp/oa/merge/group_graph_entities_parameters.json diff --git a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/group/dispatch_entities_parameters.json b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/group/dispatch_entities_parameters.json deleted file mode 100644 index aa8d2a7c2..000000000 --- a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/group/dispatch_entities_parameters.json +++ /dev/null @@ -1,26 +0,0 @@ -[ - { - "paramName": "issm", - "paramLongName": "isSparkSessionManaged", - "paramDescription": "when true will stop SparkSession after job execution", - "paramRequired": false - }, - { - "paramName": "i", - "paramLongName": "inputPath", - "paramDescription": "the source path", - "paramRequired": true - }, - { - "paramName": "o", - "paramLongName": "outputPath", - "paramDescription": "path of the output graph", - "paramRequired": true - }, - { - "paramName": "c", - "paramLongName": "graphTableClassName", - "paramDescription": "the graph entity class name", - "paramRequired": true - } -] \ No newline at end of file diff --git a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/group/group_graph_entities_parameters.json b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/group/group_graph_entities_parameters.json deleted file mode 100644 index e65acb3c4..000000000 --- a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/group/group_graph_entities_parameters.json +++ /dev/null @@ -1,20 +0,0 @@ -[ - { - "paramName": "issm", - "paramLongName": "isSparkSessionManaged", - "paramDescription": "when true will stop SparkSession after job execution", - "paramRequired": false - }, - { - "paramName": "gin", - "paramLongName": "graphInputPath", - "paramDescription": "the graph root path", - "paramRequired": true - }, - { - "paramName": "out", - "paramLongName": "outputPath", - "paramDescription": "the output merged graph root path", - "paramRequired": true - } -] \ No newline at end of file diff --git a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/group/GroupEntitiesSparkJobTest.java b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/group/GroupEntitiesSparkJobTest.java new file mode 100644 index 000000000..5214c1783 --- /dev/null +++ b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/group/GroupEntitiesSparkJobTest.java @@ -0,0 +1,91 @@ + +package eu.dnetlib.dhp.oa.graph.group; + +import com.fasterxml.jackson.databind.DeserializationFeature; +import com.fasterxml.jackson.databind.ObjectMapper; +import eu.dnetlib.dhp.common.HdfsSupport; +import eu.dnetlib.dhp.oa.merge.GroupEntitiesSparkJob; +import eu.dnetlib.dhp.schema.common.ModelSupport; +import eu.dnetlib.dhp.schema.oaf.Result; +import org.apache.commons.io.FileUtils; +import org.apache.commons.lang3.StringUtils; +import org.apache.spark.SparkConf; +import org.apache.spark.api.java.function.FilterFunction; +import org.apache.spark.api.java.function.MapFunction; +import org.apache.spark.sql.Dataset; +import org.apache.spark.sql.Encoders; +import org.apache.spark.sql.SparkSession; +import org.junit.jupiter.api.*; + +import java.io.IOException; +import java.net.URISyntaxException; +import java.nio.file.Files; +import java.nio.file.Path; +import java.nio.file.Paths; + +public class GroupEntitiesSparkJobTest { + + private static SparkSession spark; + + private Path workingDir; + //private Path inputDir; + private Path graphInputPath; + + private Path outputPath; + + @BeforeAll + public static void beforeAll() { + SparkConf conf = new SparkConf(); + conf.setAppName(GroupEntitiesSparkJob.class.getSimpleName()); + conf.setMaster("local"); + conf.set("spark.serializer", "org.apache.spark.serializer.KryoSerializer"); + conf.registerKryoClasses(ModelSupport.getOafModelClasses()); + spark = SparkSession.builder().config(conf).getOrCreate(); + } + + @BeforeEach + public void beforeEach() throws IOException, URISyntaxException { + workingDir = Files.createTempDirectory(GroupEntitiesSparkJob.class.getSimpleName()); + //inputDir = workingDir.resolve("input"); + graphInputPath = Paths.get(ClassLoader.getSystemResource("eu/dnetlib/dhp/oa/graph/group").toURI()); + outputPath = workingDir.resolve("output"); + } + + @AfterEach + public void afterEach() throws IOException { + FileUtils.deleteDirectory(workingDir.toFile()); + } + + @AfterAll + public static void afterAll() { + spark.stop(); + } + + @Test + void testGroupEntities() throws Exception { + GroupEntitiesSparkJob.main(new String[] { + "-isSparkSessionManaged", + Boolean.FALSE.toString(), + "-graphInputPath", + graphInputPath.toString(), + "-outputPath", + outputPath.toString() + }); + + ObjectMapper mapper = new ObjectMapper().configure(DeserializationFeature.FAIL_ON_UNKNOWN_PROPERTIES, false); + Dataset output = spark + .read() + .textFile(outputPath.toString()) + .map((MapFunction) s -> StringUtils.substringAfter(s, "|"), Encoders.STRING()) + .map((MapFunction) s -> mapper.readValue(s, Result.class), Encoders.bean(Result.class)); + + Assertions.assertEquals( + 1, + output + .filter((FilterFunction) r -> + "50|doi_________::09821844208a5cd6300b2bfb13bca1b9".equals(r.getId()) && + r.getCollectedfrom().stream().anyMatch(kv -> kv.getValue().equalsIgnoreCase("zenodo")) ) + .count()); + } + +} diff --git a/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/group/dataset/dataset.json b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/group/dataset/dataset.json new file mode 100644 index 000000000..e30be47e9 --- /dev/null +++ b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/group/dataset/dataset.json @@ -0,0 +1,3 @@ +{"author":[{"affiliation":[],"fullname":"Greenough, B","name":"B","pid":[],"rank":1,"surname":"Greenough"}],"bestaccessright":{"classid":"UNKNOWN","classname":"not available","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":[{"key":"10|opendoar____::358aee4cc897452c00244351e4d91f69","value":"Zenodo"}],"context":[],"contributor":[],"country":[],"coverage":[],"dataInfo":{"deletedbyinference":true,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"dateofcollection":"2021-09-25T10:55:00.639Z","dateoftransformation":"2021-09-25T11:00:04.201Z","description":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"Heritage Education"}],"externalReference":[],"extraInfo":[],"format":[],"fulltext":[],"geolocation":[],"id":"50|doi_________::09821844208a5cd6300b2bfb13bca1b9","instance":[{"accessright":{"classid":"UNKNOWN","classname":"not available","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"pid":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"urn","classname":"urn","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"value":"urn:nbn:nl:ui:13-59-cjhf"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"doi","classname":"Digital Object Identifier","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"value":"10.17632/96bpgw5j9d.1"}],"collectedfrom":{"key":"10|opendoar____::358aee4cc897452c00244351e4d91f69","value":"Zenodo"},"hostedby":{"key":"10|openaire____::c6df70599aa984f16ee52b4b86d2e89f","value":"DANS (Data Archiving and Networked Services)"},"instancetype":{"classid":"0021","classname":"Dataset","schemeid":"dnet:publication_resource","schemename":"dnet:publication_resource"},"alternateIdentifier":[],"refereed":{"classid":"0000","classname":"Unknown","schemeid":"dnet:review_levels","schemename":"dnet:review_levels"},"url":["","http://dx.doi.org/10.17632/96bpgw5j9d.1"]}],"language":{"classid":"und","classname":"Undetermined","schemeid":"dnet:languages","schemename":"dnet:languages"},"lastupdatetimestamp":1635434801681,"oaiprovenance":{"originDescription":{"altered":true,"baseURL":"http%3A%2F%2Fservices.nod.dans.knaw.nl%2Foa-cerif","datestamp":"2021-08-16T15:29:45Z","harvestDate":"2021-09-25T10:55:00.639Z","identifier":"oai:services.nod.dans.knaw.nl:Products/dans:oai:easy.dans.knaw.nl:easy-dataset:211323","metadataNamespace":""}},"originalId":["50|DansKnawCris::09821844208a5cd6300b2bfb13bca1b9","oai:services.nod.dans.knaw.nl:Products/dans:oai:easy.dans.knaw.nl:easy-dataset:211323"],"pid":[],"relevantdate":[],"resourcetype":{"classid":"0021","classname":"0021","schemeid":"dnet:dataCite_resource","schemename":"dnet:dataCite_resource"},"resulttype":{"classid":"dataset","classname":"dataset","schemeid":"dnet:result_typologies","schemename":"dnet:result_typologies"},"source":[],"subject":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"Interdisciplinary sciences"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"Interdisciplinary sciences"}],"title":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"main title","classname":"main title","schemeid":"dnet:dataCite_title","schemename":"dnet:dataCite_title"},"value":"Heritage Education"}]} +{"author":[{"affiliation":[],"fullname":"Keijers, D.M.G.","name":"D.M.G.","pid":[],"rank":1,"surname":"Keijers"}],"bestaccessright":{"classid":"UNKNOWN","classname":"not available","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":[{"key":"10|openaire____::c6df70599aa984f16ee52b4b86d2e89f","value":"DANS (Data Archiving and Networked Services)"}],"context":[],"contributor":[],"country":[],"coverage":[],"dataInfo":{"deletedbyinference":true,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"dateofcollection":"2021-09-25T10:41:59.767Z","dateoftransformation":"2021-09-25T11:00:19.238Z","description":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"onderzoeksrapport"}],"externalReference":[],"extraInfo":[],"format":[],"fulltext":[],"geolocation":[],"id":"50|DansKnawCris::0dd644304b7116e8e58da3a5e3adc37a","instance":[{"accessright":{"classid":"UNKNOWN","classname":"not available","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"pid":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"urn","classname":"urn","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"value":"urn:nbn:nl:ui:13-das-fkq"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"doi","classname":"Digital Object Identifier","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"value":"10.17026/dans-xsw-qtnx"}],"collectedfrom":{"key":"10|openaire____::c6df70599aa984f16ee52b4b86d2e89f","value":"DANS (Data Archiving and Networked Services)"},"hostedby":{"key":"10|openaire____::c6df70599aa984f16ee52b4b86d2e89f","value":"DANS (Data Archiving and Networked Services)"},"instancetype":{"classid":"0021","classname":"Dataset","schemeid":"dnet:publication_resource","schemename":"dnet:publication_resource"},"alternateIdentifier":[],"refereed":{"classid":"0000","classname":"Unknown","schemeid":"dnet:review_levels","schemename":"dnet:review_levels"},"url":["","http://dx.doi.org/10.17026/dans-xsw-qtnx"]}],"language":{"classid":"dut/nld","classname":"Dutch; Flemish","schemeid":"dnet:languages","schemename":"dnet:languages"},"lastupdatetimestamp":1635434847381,"oaiprovenance":{"originDescription":{"altered":true,"baseURL":"http%3A%2F%2Fservices.nod.dans.knaw.nl%2Foa-cerif","datestamp":"2021-08-16T13:53:29Z","harvestDate":"2021-09-25T10:41:59.767Z","identifier":"oai:services.nod.dans.knaw.nl:Products/dans:oai:easy.dans.knaw.nl:easy-dataset:20759","metadataNamespace":""}},"originalId":["oai:services.nod.dans.knaw.nl:Products/dans:oai:easy.dans.knaw.nl:easy-dataset:20759","50|DansKnawCris::0dd644304b7116e8e58da3a5e3adc37a"],"pid":[],"relevantdate":[],"resourcetype":{"classid":"0021","classname":"0021","schemeid":"dnet:dataCite_resource","schemename":"dnet:dataCite_resource"},"resulttype":{"classid":"dataset","classname":"dataset","schemeid":"dnet:result_typologies","schemename":"dnet:result_typologies"},"source":[],"subject":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"PROSPECTIE"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"Archaeology"}],"title":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"main title","classname":"main title","schemeid":"dnet:dataCite_title","schemename":"dnet:dataCite_title"},"value":"Plangebied Lange Ekker te Vessem, gemeente Eersel"}]} +{"author":[],"bestaccessright":{"classid":"UNKNOWN","classname":"not available","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":[{"key":"10|openaire____::c6df70599aa984f16ee52b4b86d2e89f","value":"DANS (Data Archiving and Networked Services)"}],"context":[],"contributor":[],"country":[],"coverage":[],"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"dateofcollection":"2021-09-25T10:43:13.768Z","dateoftransformation":"2021-09-25T11:01:22.863Z","description":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"This find is registered at Portable Antiquities of the Netherlands with number PAN-00054604"}],"externalReference":[],"extraInfo":[],"format":[],"fulltext":[],"geolocation":[],"id":"50|DansKnawCris::203a27996ddc0fd1948258e5b7dec61c","instance":[{"accessright":{"classid":"UNKNOWN","classname":"not available","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"pid":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"urn","classname":"urn","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"value":"urn:nbn:nl:ui:13-a7-hwgy"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"doi","classname":"Digital Object Identifier","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"value":"10.17026/dans-x3z-fsq5"}],"collectedfrom":{"key":"10|openaire____::c6df70599aa984f16ee52b4b86d2e89f","value":"DANS (Data Archiving and Networked Services)"},"hostedby":{"key":"10|openaire____::c6df70599aa984f16ee52b4b86d2e89f","value":"DANS (Data Archiving and Networked Services)"},"instancetype":{"classid":"0021","classname":"Dataset","schemeid":"dnet:publication_resource","schemename":"dnet:publication_resource"},"alternateIdentifier":[],"refereed":{"classid":"0000","classname":"Unknown","schemeid":"dnet:review_levels","schemename":"dnet:review_levels"},"url":["","http://dx.doi.org/10.17026/dans-x3z-fsq5"]}],"language":{"classid":"eng","classname":"English","schemeid":"dnet:languages","schemename":"dnet:languages"},"lastupdatetimestamp":1635434508886,"oaiprovenance":{"originDescription":{"altered":true,"baseURL":"http%3A%2F%2Fservices.nod.dans.knaw.nl%2Foa-cerif","datestamp":"2021-08-16T14:01:37Z","harvestDate":"2021-09-25T10:43:13.768Z","identifier":"oai:services.nod.dans.knaw.nl:Products/dans:oai:easy.dans.knaw.nl:easy-dataset:129566","metadataNamespace":""}},"originalId":["oai:services.nod.dans.knaw.nl:Products/dans:oai:easy.dans.knaw.nl:easy-dataset:129566","50|DansKnawCris::203a27996ddc0fd1948258e5b7dec61c"],"pid":[],"relevantdate":[],"resourcetype":{"classid":"0021","classname":"0021","schemeid":"dnet:dataCite_resource","schemename":"dnet:dataCite_resource"},"resulttype":{"classid":"dataset","classname":"dataset","schemeid":"dnet:result_typologies","schemename":"dnet:result_typologies"},"source":[],"subject":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"early medieval enamelled disc brooch variant A9"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"Broader Match: disc brooches"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"Broader Match: schijffibula - geemailleerd"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"metal"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"copper alloy"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"Temporal coverage: Early Middle Ages C"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"Temporal coverage: Early Middle Ages D"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"Temporal coverage: 800 until 1000"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"Archaeology"}],"title":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"main title","classname":"main title","schemeid":"dnet:dataCite_title","schemename":"dnet:dataCite_title"},"value":"PAN-00054604 - early medieval enamelled disc brooch variant A9"}]} \ No newline at end of file diff --git a/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/group/publication/publication.json b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/group/publication/publication.json new file mode 100644 index 000000000..29ce76df3 --- /dev/null +++ b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/group/publication/publication.json @@ -0,0 +1,3 @@ +{"author":[{"affiliation":[],"fullname":"Greenough, B","name":"B","pid":[],"rank":1,"surname":"Greenough"}],"bestaccessright":{"classid":"UNKNOWN","classname":"not available","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":[{"key":"10|openaire____::9e3be59865b2c1c335d32dae2fe7b254","value":"Datacite"}],"context":[],"contributor":[],"country":[],"coverage":[],"dataInfo":{"deletedbyinference":true,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"dateofcollection":"2021-09-25T10:55:00.639Z","dateoftransformation":"2021-09-25T11:00:04.201Z","description":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"Heritage Education"}],"externalReference":[],"extraInfo":[],"format":[],"fulltext":[],"geolocation":[],"id":"50|doi_________::09821844208a5cd6300b2bfb13bca1b9","instance":[{"accessright":{"classid":"UNKNOWN","classname":"not available","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"pid":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"urn","classname":"urn","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"value":"urn:nbn:nl:ui:13-59-cjhf"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"doi","classname":"Digital Object Identifier","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"value":"10.17632/96bpgw5j9d.1"}],"collectedfrom":{"key":"10|openaire____::9e3be59865b2c1c335d32dae2fe7b254","value":"Datacite"},"hostedby":{"key":"10|openaire____::c6df70599aa984f16ee52b4b86d2e89f","value":"DANS (Data Archiving and Networked Services)"},"instancetype":{"classid":"0021","classname":"Dataset","schemeid":"dnet:publication_resource","schemename":"dnet:publication_resource"},"alternateIdentifier":[],"refereed":{"classid":"0000","classname":"Unknown","schemeid":"dnet:review_levels","schemename":"dnet:review_levels"},"url":["","http://dx.doi.org/10.17632/96bpgw5j9d.1"]}],"language":{"classid":"und","classname":"Undetermined","schemeid":"dnet:languages","schemename":"dnet:languages"},"lastupdatetimestamp":1635434801681,"oaiprovenance":{"originDescription":{"altered":true,"baseURL":"http%3A%2F%2Fservices.nod.dans.knaw.nl%2Foa-cerif","datestamp":"2021-08-16T15:29:45Z","harvestDate":"2021-09-25T10:55:00.639Z","identifier":"oai:services.nod.dans.knaw.nl:Products/dans:oai:easy.dans.knaw.nl:easy-dataset:211323","metadataNamespace":""}},"originalId":["50|DansKnawCris::09821844208a5cd6300b2bfb13bca1b9","oai:services.nod.dans.knaw.nl:Products/dans:oai:easy.dans.knaw.nl:easy-dataset:211323"],"pid":[],"relevantdate":[],"resourcetype":{"classid":"0021","classname":"0021","schemeid":"dnet:dataCite_resource","schemename":"dnet:dataCite_resource"},"resulttype":{"classid":"publication","classname":"publication","schemeid":"dnet:result_typologies","schemename":"dnet:result_typologies"},"source":[],"subject":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"Interdisciplinary sciences"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"Interdisciplinary sciences"}],"title":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"main title","classname":"main title","schemeid":"dnet:dataCite_title","schemename":"dnet:dataCite_title"},"value":"Heritage Education"}]} +{"author":[{"affiliation":[],"fullname":"Keijers, D.M.G.","name":"D.M.G.","pid":[],"rank":1,"surname":"Keijers"}],"bestaccessright":{"classid":"UNKNOWN","classname":"not available","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":[{"key":"10|openaire____::c6df70599aa984f16ee52b4b86d2e89f","value":"DANS (Data Archiving and Networked Services)"}],"context":[],"contributor":[],"country":[],"coverage":[],"dataInfo":{"deletedbyinference":true,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"dateofcollection":"2021-09-25T10:41:59.767Z","dateoftransformation":"2021-09-25T11:00:19.238Z","description":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"onderzoeksrapport"}],"externalReference":[],"extraInfo":[],"format":[],"fulltext":[],"geolocation":[],"id":"50|DansKnawCris::0dd644304b7116e8e58da3a5e3adc37a","instance":[{"accessright":{"classid":"UNKNOWN","classname":"not available","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"pid":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"urn","classname":"urn","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"value":"urn:nbn:nl:ui:13-das-fkq"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"doi","classname":"Digital Object Identifier","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"value":"10.17026/dans-xsw-qtnx"}],"collectedfrom":{"key":"10|openaire____::c6df70599aa984f16ee52b4b86d2e89f","value":"DANS (Data Archiving and Networked Services)"},"hostedby":{"key":"10|openaire____::c6df70599aa984f16ee52b4b86d2e89f","value":"DANS (Data Archiving and Networked Services)"},"instancetype":{"classid":"0021","classname":"Dataset","schemeid":"dnet:publication_resource","schemename":"dnet:publication_resource"},"alternateIdentifier":[],"refereed":{"classid":"0000","classname":"Unknown","schemeid":"dnet:review_levels","schemename":"dnet:review_levels"},"url":["","http://dx.doi.org/10.17026/dans-xsw-qtnx"]}],"language":{"classid":"dut/nld","classname":"Dutch; Flemish","schemeid":"dnet:languages","schemename":"dnet:languages"},"lastupdatetimestamp":1635434847381,"oaiprovenance":{"originDescription":{"altered":true,"baseURL":"http%3A%2F%2Fservices.nod.dans.knaw.nl%2Foa-cerif","datestamp":"2021-08-16T13:53:29Z","harvestDate":"2021-09-25T10:41:59.767Z","identifier":"oai:services.nod.dans.knaw.nl:Products/dans:oai:easy.dans.knaw.nl:easy-dataset:20759","metadataNamespace":""}},"originalId":["oai:services.nod.dans.knaw.nl:Products/dans:oai:easy.dans.knaw.nl:easy-dataset:20759","50|DansKnawCris::0dd644304b7116e8e58da3a5e3adc37a"],"pid":[],"relevantdate":[],"resourcetype":{"classid":"0021","classname":"0021","schemeid":"dnet:dataCite_resource","schemename":"dnet:dataCite_resource"},"resulttype":{"classid":"publication","classname":"publication","schemeid":"dnet:result_typologies","schemename":"dnet:result_typologies"},"source":[],"subject":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"PROSPECTIE"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"Archaeology"}],"title":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"main title","classname":"main title","schemeid":"dnet:dataCite_title","schemename":"dnet:dataCite_title"},"value":"Plangebied Lange Ekker te Vessem, gemeente Eersel"}]} +{"author":[],"bestaccessright":{"classid":"UNKNOWN","classname":"not available","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":[{"key":"10|openaire____::c6df70599aa984f16ee52b4b86d2e89f","value":"DANS (Data Archiving and Networked Services)"}],"context":[],"contributor":[],"country":[],"coverage":[],"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"dateofcollection":"2021-09-25T10:43:13.768Z","dateoftransformation":"2021-09-25T11:01:22.863Z","description":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"This find is registered at Portable Antiquities of the Netherlands with number PAN-00054604"}],"externalReference":[],"extraInfo":[],"format":[],"fulltext":[],"geolocation":[],"id":"50|DansKnawCris::203a27996ddc0fd1948258e5b7dec61c","instance":[{"accessright":{"classid":"UNKNOWN","classname":"not available","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"pid":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"urn","classname":"urn","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"value":"urn:nbn:nl:ui:13-a7-hwgy"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"doi","classname":"Digital Object Identifier","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"value":"10.17026/dans-x3z-fsq5"}],"collectedfrom":{"key":"10|openaire____::c6df70599aa984f16ee52b4b86d2e89f","value":"DANS (Data Archiving and Networked Services)"},"hostedby":{"key":"10|openaire____::c6df70599aa984f16ee52b4b86d2e89f","value":"DANS (Data Archiving and Networked Services)"},"instancetype":{"classid":"0021","classname":"Dataset","schemeid":"dnet:publication_resource","schemename":"dnet:publication_resource"},"alternateIdentifier":[],"refereed":{"classid":"0000","classname":"Unknown","schemeid":"dnet:review_levels","schemename":"dnet:review_levels"},"url":["","http://dx.doi.org/10.17026/dans-x3z-fsq5"]}],"language":{"classid":"eng","classname":"English","schemeid":"dnet:languages","schemename":"dnet:languages"},"lastupdatetimestamp":1635434508886,"oaiprovenance":{"originDescription":{"altered":true,"baseURL":"http%3A%2F%2Fservices.nod.dans.knaw.nl%2Foa-cerif","datestamp":"2021-08-16T14:01:37Z","harvestDate":"2021-09-25T10:43:13.768Z","identifier":"oai:services.nod.dans.knaw.nl:Products/dans:oai:easy.dans.knaw.nl:easy-dataset:129566","metadataNamespace":""}},"originalId":["oai:services.nod.dans.knaw.nl:Products/dans:oai:easy.dans.knaw.nl:easy-dataset:129566","50|DansKnawCris::203a27996ddc0fd1948258e5b7dec61c"],"pid":[],"relevantdate":[],"resourcetype":{"classid":"0021","classname":"0021","schemeid":"dnet:dataCite_resource","schemename":"dnet:dataCite_resource"},"resulttype":{"classid":"publication","classname":"publication","schemeid":"dnet:result_typologies","schemename":"dnet:result_typologies"},"source":[],"subject":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"early medieval enamelled disc brooch variant A9"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"Broader Match: disc brooches"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"Broader Match: schijffibula - geemailleerd"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"metal"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"copper alloy"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"Temporal coverage: Early Middle Ages C"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"Temporal coverage: Early Middle Ages D"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"Temporal coverage: 800 until 1000"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"Archaeology"}],"title":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"main title","classname":"main title","schemeid":"dnet:dataCite_title","schemename":"dnet:dataCite_title"},"value":"PAN-00054604 - early medieval enamelled disc brooch variant A9"}]} \ No newline at end of file From abfa9c60458ba7698c724e681b27db58fd7754cb Mon Sep 17 00:00:00 2001 From: Claudio Atzori Date: Wed, 19 Jan 2022 17:17:11 +0100 Subject: [PATCH 047/162] code formatting --- .../group/GroupEntitiesSparkJobTest.java | 129 +++++++++--------- 1 file changed, 65 insertions(+), 64 deletions(-) diff --git a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/group/GroupEntitiesSparkJobTest.java b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/group/GroupEntitiesSparkJobTest.java index 5214c1783..729948f52 100644 --- a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/group/GroupEntitiesSparkJobTest.java +++ b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/group/GroupEntitiesSparkJobTest.java @@ -1,12 +1,12 @@ package eu.dnetlib.dhp.oa.graph.group; -import com.fasterxml.jackson.databind.DeserializationFeature; -import com.fasterxml.jackson.databind.ObjectMapper; -import eu.dnetlib.dhp.common.HdfsSupport; -import eu.dnetlib.dhp.oa.merge.GroupEntitiesSparkJob; -import eu.dnetlib.dhp.schema.common.ModelSupport; -import eu.dnetlib.dhp.schema.oaf.Result; +import java.io.IOException; +import java.net.URISyntaxException; +import java.nio.file.Files; +import java.nio.file.Path; +import java.nio.file.Paths; + import org.apache.commons.io.FileUtils; import org.apache.commons.lang3.StringUtils; import org.apache.spark.SparkConf; @@ -17,75 +17,76 @@ import org.apache.spark.sql.Encoders; import org.apache.spark.sql.SparkSession; import org.junit.jupiter.api.*; -import java.io.IOException; -import java.net.URISyntaxException; -import java.nio.file.Files; -import java.nio.file.Path; -import java.nio.file.Paths; +import com.fasterxml.jackson.databind.DeserializationFeature; +import com.fasterxml.jackson.databind.ObjectMapper; + +import eu.dnetlib.dhp.oa.merge.GroupEntitiesSparkJob; +import eu.dnetlib.dhp.schema.common.ModelSupport; +import eu.dnetlib.dhp.schema.oaf.Result; public class GroupEntitiesSparkJobTest { - private static SparkSession spark; + private static SparkSession spark; - private Path workingDir; - //private Path inputDir; - private Path graphInputPath; + private Path workingDir; + private Path graphInputPath; - private Path outputPath; + private Path outputPath; - @BeforeAll - public static void beforeAll() { - SparkConf conf = new SparkConf(); - conf.setAppName(GroupEntitiesSparkJob.class.getSimpleName()); - conf.setMaster("local"); - conf.set("spark.serializer", "org.apache.spark.serializer.KryoSerializer"); - conf.registerKryoClasses(ModelSupport.getOafModelClasses()); - spark = SparkSession.builder().config(conf).getOrCreate(); - } + @BeforeAll + public static void beforeAll() { + SparkConf conf = new SparkConf(); + conf.setAppName(GroupEntitiesSparkJob.class.getSimpleName()); + conf.setMaster("local"); + conf.set("spark.serializer", "org.apache.spark.serializer.KryoSerializer"); + conf.registerKryoClasses(ModelSupport.getOafModelClasses()); + spark = SparkSession.builder().config(conf).getOrCreate(); + } - @BeforeEach - public void beforeEach() throws IOException, URISyntaxException { - workingDir = Files.createTempDirectory(GroupEntitiesSparkJob.class.getSimpleName()); - //inputDir = workingDir.resolve("input"); - graphInputPath = Paths.get(ClassLoader.getSystemResource("eu/dnetlib/dhp/oa/graph/group").toURI()); - outputPath = workingDir.resolve("output"); - } + @BeforeEach + public void beforeEach() throws IOException, URISyntaxException { + workingDir = Files.createTempDirectory(GroupEntitiesSparkJob.class.getSimpleName()); + graphInputPath = Paths.get(ClassLoader.getSystemResource("eu/dnetlib/dhp/oa/graph/group").toURI()); + outputPath = workingDir.resolve("output"); + } - @AfterEach - public void afterEach() throws IOException { - FileUtils.deleteDirectory(workingDir.toFile()); - } + @AfterEach + public void afterEach() throws IOException { + FileUtils.deleteDirectory(workingDir.toFile()); + } - @AfterAll - public static void afterAll() { - spark.stop(); - } + @AfterAll + public static void afterAll() { + spark.stop(); + } - @Test - void testGroupEntities() throws Exception { - GroupEntitiesSparkJob.main(new String[] { - "-isSparkSessionManaged", - Boolean.FALSE.toString(), - "-graphInputPath", - graphInputPath.toString(), - "-outputPath", - outputPath.toString() - }); + @Test + void testGroupEntities() throws Exception { + GroupEntitiesSparkJob.main(new String[] { + "-isSparkSessionManaged", + Boolean.FALSE.toString(), + "-graphInputPath", + graphInputPath.toString(), + "-outputPath", + outputPath.toString() + }); - ObjectMapper mapper = new ObjectMapper().configure(DeserializationFeature.FAIL_ON_UNKNOWN_PROPERTIES, false); - Dataset output = spark - .read() - .textFile(outputPath.toString()) - .map((MapFunction) s -> StringUtils.substringAfter(s, "|"), Encoders.STRING()) - .map((MapFunction) s -> mapper.readValue(s, Result.class), Encoders.bean(Result.class)); + ObjectMapper mapper = new ObjectMapper().configure(DeserializationFeature.FAIL_ON_UNKNOWN_PROPERTIES, false); + Dataset output = spark + .read() + .textFile(outputPath.toString()) + .map((MapFunction) s -> StringUtils.substringAfter(s, "|"), Encoders.STRING()) + .map((MapFunction) s -> mapper.readValue(s, Result.class), Encoders.bean(Result.class)); - Assertions.assertEquals( - 1, - output - .filter((FilterFunction) r -> - "50|doi_________::09821844208a5cd6300b2bfb13bca1b9".equals(r.getId()) && - r.getCollectedfrom().stream().anyMatch(kv -> kv.getValue().equalsIgnoreCase("zenodo")) ) - .count()); - } + Assertions + .assertEquals( + 1, + output + .filter( + (FilterFunction) r -> "50|doi_________::09821844208a5cd6300b2bfb13bca1b9" + .equals(r.getId()) && + r.getCollectedfrom().stream().anyMatch(kv -> kv.getValue().equalsIgnoreCase("zenodo"))) + .count()); + } } From 3b9020c1b7803d81ed4a9aaf9acd9a7b58f00b23 Mon Sep 17 00:00:00 2001 From: Claudio Atzori Date: Wed, 19 Jan 2022 18:15:55 +0100 Subject: [PATCH 048/162] added unit test for the DispatchEntitiesJob --- .../group/GroupEntitiesSparkJobTest.java | 104 +++++++++++++----- 1 file changed, 78 insertions(+), 26 deletions(-) diff --git a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/group/GroupEntitiesSparkJobTest.java b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/group/GroupEntitiesSparkJobTest.java index 729948f52..3bd1c13de 100644 --- a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/group/GroupEntitiesSparkJobTest.java +++ b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/group/GroupEntitiesSparkJobTest.java @@ -1,6 +1,8 @@ package eu.dnetlib.dhp.oa.graph.group; +import static org.junit.jupiter.api.Assertions.*; + import java.io.IOException; import java.net.URISyntaxException; import java.nio.file.Files; @@ -19,22 +21,34 @@ import org.junit.jupiter.api.*; import com.fasterxml.jackson.databind.DeserializationFeature; import com.fasterxml.jackson.databind.ObjectMapper; +import com.google.common.collect.Lists; +import eu.dnetlib.dhp.common.HdfsSupport; +import eu.dnetlib.dhp.oa.merge.DispatchEntitiesSparkJob; import eu.dnetlib.dhp.oa.merge.GroupEntitiesSparkJob; import eu.dnetlib.dhp.schema.common.ModelSupport; +import eu.dnetlib.dhp.schema.oaf.Publication; import eu.dnetlib.dhp.schema.oaf.Result; +import eu.dnetlib.dhp.utils.DHPUtils; +@TestMethodOrder(MethodOrderer.OrderAnnotation.class) public class GroupEntitiesSparkJobTest { private static SparkSession spark; - private Path workingDir; - private Path graphInputPath; + private static ObjectMapper mapper = new ObjectMapper() + .configure(DeserializationFeature.FAIL_ON_UNKNOWN_PROPERTIES, false); - private Path outputPath; + private static Path workingDir; + private Path dataInputPath; + + private Path groupEntityPath; + private Path dispatchEntityPath; @BeforeAll - public static void beforeAll() { + public static void beforeAll() throws IOException { + workingDir = Files.createTempDirectory(GroupEntitiesSparkJob.class.getSimpleName()); + SparkConf conf = new SparkConf(); conf.setAppName(GroupEntitiesSparkJob.class.getSimpleName()); conf.setMaster("local"); @@ -45,48 +59,86 @@ public class GroupEntitiesSparkJobTest { @BeforeEach public void beforeEach() throws IOException, URISyntaxException { - workingDir = Files.createTempDirectory(GroupEntitiesSparkJob.class.getSimpleName()); - graphInputPath = Paths.get(ClassLoader.getSystemResource("eu/dnetlib/dhp/oa/graph/group").toURI()); - outputPath = workingDir.resolve("output"); - } - - @AfterEach - public void afterEach() throws IOException { - FileUtils.deleteDirectory(workingDir.toFile()); + dataInputPath = Paths.get(ClassLoader.getSystemResource("eu/dnetlib/dhp/oa/graph/group").toURI()); + groupEntityPath = workingDir.resolve("grouped_entity"); + dispatchEntityPath = workingDir.resolve("dispatched_entity"); } @AfterAll - public static void afterAll() { + public static void afterAll() throws IOException { spark.stop(); + FileUtils.deleteDirectory(workingDir.toFile()); } @Test + @Order(1) void testGroupEntities() throws Exception { GroupEntitiesSparkJob.main(new String[] { "-isSparkSessionManaged", Boolean.FALSE.toString(), "-graphInputPath", - graphInputPath.toString(), + dataInputPath.toString(), "-outputPath", - outputPath.toString() + groupEntityPath.toString() }); - ObjectMapper mapper = new ObjectMapper().configure(DeserializationFeature.FAIL_ON_UNKNOWN_PROPERTIES, false); Dataset output = spark .read() - .textFile(outputPath.toString()) + .textFile(groupEntityPath.toString()) .map((MapFunction) s -> StringUtils.substringAfter(s, "|"), Encoders.STRING()) .map((MapFunction) s -> mapper.readValue(s, Result.class), Encoders.bean(Result.class)); - Assertions - .assertEquals( - 1, - output - .filter( - (FilterFunction) r -> "50|doi_________::09821844208a5cd6300b2bfb13bca1b9" - .equals(r.getId()) && - r.getCollectedfrom().stream().anyMatch(kv -> kv.getValue().equalsIgnoreCase("zenodo"))) - .count()); + assertEquals( + 1, + output + .filter( + (FilterFunction) r -> "50|doi_________::09821844208a5cd6300b2bfb13bca1b9" + .equals(r.getId()) && + r.getCollectedfrom().stream().anyMatch(kv -> kv.getValue().equalsIgnoreCase("zenodo"))) + .count()); + } + + @Test + @Order(2) + void testDispatchEntities() throws Exception { + for (String type : Lists + .newArrayList( + Publication.class.getCanonicalName(), eu.dnetlib.dhp.schema.oaf.Dataset.class.getCanonicalName())) { + String directory = StringUtils.substringAfterLast(type, ".").toLowerCase(); + DispatchEntitiesSparkJob.main(new String[] { + "-isSparkSessionManaged", + Boolean.FALSE.toString(), + "-inputPath", + groupEntityPath.toString(), + "-outputPath", + dispatchEntityPath.resolve(directory).toString(), + "-graphTableClassName", + type + }); + } + + Dataset output = spark + .read() + .textFile( + DHPUtils + .toSeq( + HdfsSupport + .listFiles(dispatchEntityPath.toString(), spark.sparkContext().hadoopConfiguration()))) + .map((MapFunction) s -> mapper.readValue(s, Result.class), Encoders.bean(Result.class)); + + assertEquals(3, output.count()); + assertEquals( + 2, + output + .map((MapFunction) r -> r.getResulttype().getClassid(), Encoders.STRING()) + .filter((FilterFunction) s -> s.equals("publication")) + .count()); + assertEquals( + 1, + output + .map((MapFunction) r -> r.getResulttype().getClassid(), Encoders.STRING()) + .filter((FilterFunction) s -> s.equals("dataset")) + .count()); } } From f2fde5566bc9875731911a3717210c691fbbc8e1 Mon Sep 17 00:00:00 2001 From: Claudio Atzori Date: Thu, 20 Jan 2022 09:19:07 +0100 Subject: [PATCH 049/162] using helper method from ModelSupport to find the inverse relation descriptor --- .../dhp/blacklist/ReadBlacklistFromDB.java | 2 +- .../dhp/blacklist/BlacklistRelationTest.java | 38 +++++++++++++++++++ 2 files changed, 39 insertions(+), 1 deletion(-) create mode 100644 dhp-workflows/dhp-blacklist/src/test/java/eu/dnetlib/dhp/blacklist/BlacklistRelationTest.java diff --git a/dhp-workflows/dhp-blacklist/src/main/java/eu/dnetlib/dhp/blacklist/ReadBlacklistFromDB.java b/dhp-workflows/dhp-blacklist/src/main/java/eu/dnetlib/dhp/blacklist/ReadBlacklistFromDB.java index 7d0d6b0b8..380991526 100644 --- a/dhp-workflows/dhp-blacklist/src/main/java/eu/dnetlib/dhp/blacklist/ReadBlacklistFromDB.java +++ b/dhp-workflows/dhp-blacklist/src/main/java/eu/dnetlib/dhp/blacklist/ReadBlacklistFromDB.java @@ -90,7 +90,7 @@ public class ReadBlacklistFromDB implements Closeable { inverse.setSource(target_direct); String encoding = rs.getString("relationship"); - RelationInverse ri = ModelSupport.relationInverseMap.get(encoding); + RelationInverse ri = ModelSupport.findInverse(encoding); direct.setRelClass(ri.getRelClass()); inverse.setRelClass(ri.getInverseRelClass()); direct.setRelType(ri.getRelType()); diff --git a/dhp-workflows/dhp-blacklist/src/test/java/eu/dnetlib/dhp/blacklist/BlacklistRelationTest.java b/dhp-workflows/dhp-blacklist/src/test/java/eu/dnetlib/dhp/blacklist/BlacklistRelationTest.java new file mode 100644 index 000000000..160658e5b --- /dev/null +++ b/dhp-workflows/dhp-blacklist/src/test/java/eu/dnetlib/dhp/blacklist/BlacklistRelationTest.java @@ -0,0 +1,38 @@ + +package eu.dnetlib.dhp.blacklist; + +import java.util.Arrays; +import java.util.List; + +import org.junit.jupiter.api.Assertions; +import org.junit.jupiter.api.Test; + +import eu.dnetlib.dhp.schema.common.ModelSupport; +import eu.dnetlib.dhp.schema.common.RelationInverse; + +public class BlacklistRelationTest { + + @Test + public void testRelationInverseLookup() { + + final List rels = Arrays + .asList( + "resultResult_relationship_IsRelatedTo", + "resultOrganization_affiliation_isAuthorInstitutionOf", + "resultOrganization_affiliation_hasAuthorInstitution", + "datasourceOrganization_provision_isProvidedBy", + "projectOrganization_participation_hasParticipant", + "resultProject_outcome_produces", + "resultProject_outcome_isProducedBy"); + + rels.forEach(r -> { + RelationInverse inverse = ModelSupport.relationInverseMap.get(r); + Assertions.assertNotNull(inverse); + Assertions.assertNotNull(inverse.getRelType()); + Assertions.assertNotNull(inverse.getSubReltype()); + Assertions.assertNotNull(inverse.getRelClass()); + }); + + } + +} From dd52bf1bb8c1b3732405cb5d100d53d5f062d934 Mon Sep 17 00:00:00 2001 From: Claudio Atzori Date: Fri, 21 Jan 2022 13:59:29 +0100 Subject: [PATCH 050/162] copy relations to the graphOutputPath --- .../dnetlib/dhp/oa/graph/group/oozie_app/workflow.xml | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/group/oozie_app/workflow.xml b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/group/oozie_app/workflow.xml index 883d0e1fb..f77b46105 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/group/oozie_app/workflow.xml +++ b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/group/oozie_app/workflow.xml @@ -283,7 +283,16 @@ - + + + + + ${nameNode}/${graphBasePath}/relation + ${nameNode}/${graphOutputPath}/relation + + + + \ No newline at end of file From 2f385b3ac6b7ebdb781ac568cbb0ffa366d9e351 Mon Sep 17 00:00:00 2001 From: Claudio Atzori Date: Fri, 21 Jan 2022 13:59:46 +0100 Subject: [PATCH 051/162] updated dnet workflow profile definitions --- .../00_beta_graph_complete_experiment.xml | 111 ++++++++++++----- .../dhp/provision/00_beta_graph_for_IIS.xml | 87 ++++++++++++- .../dhp/provision/00_prod_graph_for_IIS.xml | 53 +++++++- .../dnetlib/dhp/provision/02_beta_graph.xml | 115 ++++++++++++++++-- .../dnetlib/dhp/provision/02_prod_graph.xml | 99 +++++++++++++-- 5 files changed, 398 insertions(+), 67 deletions(-) diff --git a/dhp-workflows/dhp-workflow-profiles/src/main/resources/eu/dnetlib/dhp/provision/00_beta_graph_complete_experiment.xml b/dhp-workflows/dhp-workflow-profiles/src/main/resources/eu/dnetlib/dhp/provision/00_beta_graph_complete_experiment.xml index c10dd4e99..82cf9d3d5 100644 --- a/dhp-workflows/dhp-workflow-profiles/src/main/resources/eu/dnetlib/dhp/provision/00_beta_graph_complete_experiment.xml +++ b/dhp-workflows/dhp-workflow-profiles/src/main/resources/eu/dnetlib/dhp/provision/00_beta_graph_complete_experiment.xml @@ -4,7 +4,7 @@ - + Graph processing [EXPERIMENT] @@ -15,7 +15,7 @@ set the path of unresolved entities unresolvedEntityPath - /data/unresolved_BETA + /data/unresolved_BETA/content @@ -51,6 +51,16 @@ + + set the number of iteration in affiliation propagation + + iterations + 1 + + + + + Set the target path to store the MERGED graph @@ -91,11 +101,21 @@ + + Set the target path to store the GROUPED graph + + groupedGraphPath + /tmp/beta_experiment/graph/05_graph_grouped + + + + + Set the target path to store the INFERRED graph inferredGraphPath - /tmp/beta_experiment/graph/05_graph_inferred + /tmp/beta_experiment/graph/06_graph_inferred @@ -105,7 +125,7 @@ Set the target path to store the DEDUPED graph dedupGraphPath - /tmp/beta_experiment/graph/06_graph_dedup + /tmp/beta_experiment/graph/07_graph_dedup @@ -115,7 +135,7 @@ Set the target path to store the CONSISTENCY graph consistentGraphPath - /tmp/beta_experiment/graph/07_graph_consistent + /tmp/beta_experiment/graph/08_graph_consistent @@ -125,7 +145,7 @@ Set the target path to store the ORCID enriched graph orcidGraphPath - /tmp/beta_experiment/graph/08_graph_orcid + /tmp/beta_experiment/graph/09_graph_orcid @@ -135,7 +155,7 @@ Set the target path to store the BULK TAGGED graph bulkTaggingGraphPath - /tmp/beta_experiment/graph/09_graph_bulktagging + /tmp/beta_experiment/graph/10_graph_bulktagging @@ -145,7 +165,7 @@ Set the target path to store the AFFILIATION from INSTITUTIONAL REPOS graph affiliationGraphPath - /tmp/beta_experiment/graph/10_graph_affiliation + /tmp/beta_experiment/graph/11_graph_affiliation @@ -155,7 +175,7 @@ Set the target path to store the AFFILIATION from SEMATIC RELATION graph affiliationSemRelGraphPath - /tmp/beta_experiment/graph/11_graph_affiliationsr + /tmp/beta_experiment/graph/12_graph_affiliationsr @@ -165,7 +185,7 @@ Set the target path to store the COMMUNITY from SELECTED SOURCES graph communityOrganizationGraphPath - /tmp/beta_experiment/graph/12_graph_community_organization + /tmp/beta_experiment/graph/13_graph_community_organization @@ -175,7 +195,7 @@ Set the target path to store the FUNDING from SEMANTIC RELATION graph fundingGraphPath - /tmp/beta_experiment/graph/13_graph_funding + /tmp/beta_experiment/graph/14_graph_funding @@ -185,7 +205,7 @@ Set the target path to store the COMMUNITY from SEMANTIC RELATION graph communitySemRelGraphPath - /tmp/beta_experiment/graph/14_graph_community_sem_rel + /tmp/beta_experiment/graph/15_graph_community_sem_rel @@ -195,7 +215,7 @@ Set the target path to store the COUNTRY enriched graph countryGraphPath - /tmp/beta_experiment/graph/15_graph_country + /tmp/beta_experiment/graph/16_graph_country @@ -205,7 +225,7 @@ Set the target path to store the CLEANED graph cleanedGraphPath - /tmp/beta_experiment/graph/16_graph_cleaned + /tmp/beta_experiment/graph/17_graph_cleaned @@ -215,7 +235,7 @@ Set the target path to store the blacklisted graph blacklistedGraphPath - /tmp/beta_experiment/graph/17_graph_blacklisted + /tmp/beta_experiment/graph/18_graph_blacklisted @@ -548,14 +568,14 @@ 'mongoURL' : 'mongodb://beta.services.openaire.eu', 'mongoDb' : 'mdstore', 'mdstoreManagerUrl' : 'https://beta.services.openaire.eu/mdstoremanager', - 'postgresURL' : '', + 'postgresURL' : 'jdbc:postgresql://beta.services.openaire.eu:5432/dnet_openaireplus', 'postgresUser' : '', 'postgresPassword' : '', - 'postgresOpenOrgsURL' : '', + 'postgresOpenOrgsURL' : 'jdbc:postgresql://10.19.65.40:5432/oa_organizations', 'postgresOpenOrgsUser' : '', 'postgresOpenOrgsPassword' : '', 'shouldHashId' : 'true', - 'importOpenorgs' : 'true', + 'importOpenorgs' : 'false', 'workingDir' : '/tmp/beta_experiment/working_dir/beta_aggregator' } @@ -594,10 +614,10 @@ 'mongoURL' : 'mongodb://services.openaire.eu', 'mongoDb' : 'mdstore', 'mdstoreManagerUrl' : 'https://services.openaire.eu/mdstoremanager', - 'postgresURL' : '', + 'postgresURL' : 'jdbc:postgresql://postgresql.services.openaire.eu:5432/dnet_openaireplus', 'postgresUser' : '', 'postgresPassword' : '', - 'postgresOpenOrgsURL' : '', + 'postgresOpenOrgsURL' : 'jdbc:postgresql://10.19.65.39:5432/oa_organizations', 'postgresOpenOrgsUser' : '', 'postgresOpenOrgsPassword' : '', 'shouldHashId' : 'true', @@ -737,11 +757,11 @@ executeOozieJob IIS - { + { 'graphBasePath':'cleanedFirstGraphPath', 'unresolvedPath' :'unresolvedEntityPath', 'targetPath':'resolvedGraphPath' - } + } { @@ -752,6 +772,30 @@ } + + + + + + Resolve Relation + + executeOozieJob + IIS + + { + 'graphBasePath':'resolvedGraphPath', + 'targetPath':'groupedGraphPath' + } + + + { + 'oozie.wf.application.path' : '/lib/dnet/BETA/oa/graph/group/oozie_app', + 'workingDir' : '/tmp/beta_experiment/working_dir/grouping', + 'sparkExecutorCores' : '4', + 'sparkExecutorMemory' : '7G' + } + + @@ -867,9 +911,9 @@ import_mdstore_service_location import_dataset_mdstore_ids_csv oozie.wf.application.path - /lib/iis/primary/snapshots/2021-09-24 + /lib/iis/primary/snapshots/2021-12-09 IIS - /tmp/beta_inference/graph/07_graph_cleaned + deprecated - not used import_infospace_graph_location import_project_concepts_context_ids_csv @@ -908,7 +952,7 @@ 'import_islookup_service_location' : 'import_islookup_service_location', 'import_project_concepts_context_ids_csv' : 'import_project_concepts_context_ids_csv', 'import_dataset_mdstore_ids_csv' : 'import_dataset_mdstore_ids_csv', - 'import_infospace_graph_location' : 'import_infospace_graph_location', + 'import_infospace_graph_location' : 'groupedGraphPath', 'export_action_set_id_matched_doc_organizations' : 'export_action_set_id_matched_doc_organizations', 'export_action_set_id_document_referencedDatasets' : 'export_action_set_id_document_referencedDatasets', @@ -958,7 +1002,7 @@ { 'inputActionSetIds' : 'actionSetIdsIISGraph', - 'inputGraphRootPath' : 'resolvedGraphPath', + 'inputGraphRootPath' : 'groupedGraphPath', 'outputGraphRootPath' : 'inferredGraphPath', 'isLookupUrl' : 'isLookUpUrl' } @@ -1125,7 +1169,8 @@ { 'sourcePath' : 'affiliationGraphPath', - 'outputPath': 'affiliationSemRelGraphPath' + 'outputPath': 'affiliationSemRelGraphPath', + 'iterations':'iterations' } @@ -1283,7 +1328,7 @@ { 'oozie.wf.application.path' : '/lib/dnet/BETA/oa/enrichment/blacklist/oozie_app', 'workingDir' : '/tmp/beta_experiment/working_dir/blacklist', - 'postgresURL' : '', + 'postgresURL' : 'jdbc:postgresql://beta.services.openaire.eu:5432/dnet_openaireplus', 'postgresUser' : '', 'postgresPassword' : '' } @@ -1296,10 +1341,10 @@ - wf_20211206_093743_83 - 2021-12-06T10:12:32+00:00 - SUCCESS - + wf_20220111_200505_785 + 2022-01-11T20:08:53+00:00 + + - + \ No newline at end of file diff --git a/dhp-workflows/dhp-workflow-profiles/src/main/resources/eu/dnetlib/dhp/provision/00_beta_graph_for_IIS.xml b/dhp-workflows/dhp-workflow-profiles/src/main/resources/eu/dnetlib/dhp/provision/00_beta_graph_for_IIS.xml index 2fed35f44..df9528f4c 100644 --- a/dhp-workflows/dhp-workflow-profiles/src/main/resources/eu/dnetlib/dhp/provision/00_beta_graph_for_IIS.xml +++ b/dhp-workflows/dhp-workflow-profiles/src/main/resources/eu/dnetlib/dhp/provision/00_beta_graph_for_IIS.xml @@ -11,6 +11,16 @@ IIS 30 + + set the path of unresolved entities + + unresolvedEntityPath + /data/unresolved_BETA/content + + + + + set blacklist of funder nsPrefixes from the beta aggregator @@ -71,11 +81,31 @@ + + Set the target path to store the CLEANED graph + + resolvedGraphPath + /tmp/beta_inference/graph/04_graph_resolved + + + + + + + Set the target path to store the GROUPED graph + + groupedGraphPath + /tmp/beta_inference/graph/05_graph_grouped + + + + + Set the target path to store the DEDUPED graph dedupGraphPath - /tmp/beta_inference/graph/04_graph_dedup + /tmp/beta_inference/graph/06_graph_dedup @@ -85,7 +115,7 @@ Set the target path to store the CONSISTENCY graph consistentGraphPath - /tmp/beta_inference/graph/05_graph_consistent + /tmp/beta_inference/graph/07_graph_consistent @@ -95,7 +125,7 @@ Set the target path to store the CLEANED graph cleanedGraphPath - /tmp/beta_inference/graph/06_graph_cleaned + /tmp/beta_inference/graph/08_graph_cleaned @@ -548,6 +578,55 @@ build-report + + + + + + Resolve Relation + + executeOozieJob + IIS + + { + 'graphBasePath':'cleanedFirstGraphPath', + 'unresolvedPath' :'unresolvedEntityPath', + 'targetPath':'resolvedGraphPath' + } + + + { + 'oozie.wf.application.path' : '/lib/dnet/BETA/oa/graph/resolution/oozie_app', + 'workingDir' : '/tmp/beta_inference/working_dir/relation_resolution', + 'sparkExecutorCores' : '2', + 'sparkExecutorMemory' : '12G' + } + + + + + + + + Resolve Relation + + executeOozieJob + IIS + + { + 'graphBasePath':'resolvedGraphPath', + 'targetPath':'groupedGraphPath' + } + + + { + 'oozie.wf.application.path' : '/lib/dnet/BETA/oa/graph/group/oozie_app', + 'workingDir' : '/tmp/beta_inference/working_dir/grouping', + 'sparkExecutorCores' : '4', + 'sparkExecutorMemory' : '7G' + } + + @@ -560,7 +639,7 @@ { 'actionSetId' : 'dedupConfig', - 'graphBasePath' : 'cleanedFirstGraphPath', + 'graphBasePath' : 'groupedGraphPath', 'dedupGraphPath': 'dedupGraphPath', 'isLookUpUrl' : 'isLookUpUrl' } diff --git a/dhp-workflows/dhp-workflow-profiles/src/main/resources/eu/dnetlib/dhp/provision/00_prod_graph_for_IIS.xml b/dhp-workflows/dhp-workflow-profiles/src/main/resources/eu/dnetlib/dhp/provision/00_prod_graph_for_IIS.xml index e5ce3d710..0ea6be341 100644 --- a/dhp-workflows/dhp-workflow-profiles/src/main/resources/eu/dnetlib/dhp/provision/00_prod_graph_for_IIS.xml +++ b/dhp-workflows/dhp-workflow-profiles/src/main/resources/eu/dnetlib/dhp/provision/00_prod_graph_for_IIS.xml @@ -11,6 +11,16 @@ IIS 30 + + set the path of unresolved entities + + unresolvedEntityPath + /data/unresolved_PROD/content + + + + + set blacklist of funder nsPrefixes @@ -61,11 +71,21 @@ + + Set the target path to store the CLEANED graph + + resolvedGraphPath + /tmp/beta_inference/graph/03_graph_resolved + + + + + Set the target path to store the DEDUPED graph dedupGraphPath - /tmp/prod_inference/graph/03_graph_dedup + /tmp/prod_inference/graph/04_graph_dedup @@ -75,7 +95,7 @@ Set the target path to store the CONSISTENCY graph consistentGraphPath - /tmp/prod_inference/graph/04_graph_consistent + /tmp/prod_inference/graph/05_graph_consistent @@ -85,7 +105,7 @@ Set the target path to store the CLEANED graph cleanedGraphPath - /tmp/prod_inference/graph/05_graph_cleaned + /tmp/prod_inference/graph/06_graph_cleaned @@ -347,6 +367,31 @@ build-report + + + + + + Resolve Relation + + executeOozieJob + IIS + + { + 'graphBasePath':'cleanedFirstGraphPath', + 'unresolvedPath' :'unresolvedEntityPath', + 'targetPath':'resolvedGraphPath' + } + + + { + 'oozie.wf.application.path' : '/lib/dnet/PROD/oa/graph/resolution/oozie_app', + 'workingDir' : '/tmp/prod_inference/working_dir/relation_resolution', + 'sparkExecutorCores' : '2', + 'sparkExecutorMemory' : '12G' + } + + @@ -359,7 +404,7 @@ { 'actionSetId' : 'dedupConfig', - 'graphBasePath' : 'cleanedFirstGraphPath', + 'graphBasePath' : 'resolvedGraphPath', 'dedupGraphPath': 'dedupGraphPath', 'isLookUpUrl' : 'isLookUpUrl' } diff --git a/dhp-workflows/dhp-workflow-profiles/src/main/resources/eu/dnetlib/dhp/provision/02_beta_graph.xml b/dhp-workflows/dhp-workflow-profiles/src/main/resources/eu/dnetlib/dhp/provision/02_beta_graph.xml index f83337b3c..73c44aba8 100644 --- a/dhp-workflows/dhp-workflow-profiles/src/main/resources/eu/dnetlib/dhp/provision/02_beta_graph.xml +++ b/dhp-workflows/dhp-workflow-profiles/src/main/resources/eu/dnetlib/dhp/provision/02_beta_graph.xml @@ -11,6 +11,16 @@ Data Provision 30 + + set the path of unresolved entities + + unresolvedEntityPath + /data/unresolved_BETA/content + + + + + set blacklist of funder nsPrefixes from the beta aggregator @@ -71,11 +81,31 @@ + + Set the target path to store the CLEANED graph + + resolvedGraphPath + /tmp/beta_provision/graph/04_graph_resolved + + + + + + + Set the target path to store the GROUPED graph + + groupedGraphPath + /tmp/beta_provision/graph/05_graph_grouped + + + + + Set the target path to store the DEDUPED graph dedupGraphPath - /tmp/beta_provision/graph/04_graph_dedup + /tmp/beta_provision/graph/06_graph_dedup @@ -85,7 +115,7 @@ Set the target path to store the INFERRED graph inferredGraphPath - /tmp/beta_provision/graph/05_graph_inferred + /tmp/beta_provision/graph/07_graph_inferred @@ -95,7 +125,7 @@ Set the target path to store the CONSISTENCY graph consistentGraphPath - /tmp/beta_provision/graph/06_graph_consistent + /tmp/beta_provision/graph/08_graph_consistent @@ -105,7 +135,7 @@ Set the target path to store the ORCID enriched graph orcidGraphPath - /tmp/beta_provision/graph/07_graph_orcid + /tmp/beta_provision/graph/09_graph_orcid @@ -115,7 +145,7 @@ Set the target path to store the BULK TAGGED graph bulkTaggingGraphPath - /tmp/beta_provision/graph/08_graph_bulktagging + /tmp/beta_provision/graph/10_graph_bulktagging @@ -125,7 +155,17 @@ Set the target path to store the AFFILIATION from INSTITUTIONAL REPOS graph affiliationGraphPath - /tmp/beta_provision/graph/09_graph_affiliation + /tmp/beta_provision/graph/11_graph_affiliation + + + + + + + Set the target path to store the AFFILIATION from SEMATIC RELATION graph + + affiliationSemRelGraphPath + /tmp/beta_provision/graph/12_graph_affiliationsr @@ -135,7 +175,7 @@ Set the target path to store the COMMUNITY from SELECTED SOURCES graph communityOrganizationGraphPath - /tmp/beta_provision/graph/10_graph_comunity_organization + /tmp/beta_provision/graph/13_graph_comunity_organization @@ -145,7 +185,7 @@ Set the target path to store the FUNDING from SEMANTIC RELATION graph fundingGraphPath - /tmp/beta_provision/graph/11_graph_funding + /tmp/beta_provision/graph/14_graph_funding @@ -155,7 +195,7 @@ Set the target path to store the COMMUNITY from SEMANTIC RELATION graph communitySemRelGraphPath - /tmp/beta_provision/graph/12_graph_comunity_sem_rel + /tmp/beta_provision/graph/15_graph_comunity_sem_rel @@ -165,7 +205,7 @@ Set the target path to store the COUNTRY enriched graph countryGraphPath - /tmp/beta_provision/graph/13_graph_country + /tmp/beta_provision/graph/16_graph_country @@ -175,7 +215,7 @@ Set the target path to store the CLEANED graph cleanedGraphPath - /tmp/beta_provision/graph/14_graph_cleaned + /tmp/beta_provision/graph/17_graph_cleaned @@ -185,7 +225,7 @@ Set the target path to store the blacklisted graph blacklistedGraphPath - /tmp/beta_provision/graph/15_graph_blacklisted + /tmp/beta_provision/graph/18_graph_blacklisted @@ -695,6 +735,55 @@ build-report + + + + + + Resolve Relation + + executeOozieJob + IIS + + { + 'graphBasePath':'cleanedFirstGraphPath', + 'unresolvedPath' :'unresolvedEntityPath', + 'targetPath':'resolvedGraphPath' + } + + + { + 'oozie.wf.application.path' : '/lib/dnet/BETA/oa/graph/resolution/oozie_app', + 'workingDir' : '/tmp/beta_provision/working_dir/relation_resolution', + 'sparkExecutorCores' : '2', + 'sparkExecutorMemory' : '12G' + } + + + + + + + + Resolve Relation + + executeOozieJob + IIS + + { + 'graphBasePath':'resolvedGraphPath', + 'targetPath':'groupedGraphPath' + } + + + { + 'oozie.wf.application.path' : '/lib/dnet/BETA/oa/graph/group/oozie_app', + 'workingDir' : '/tmp/beta_provision/working_dir/grouping', + 'sparkExecutorCores' : '4', + 'sparkExecutorMemory' : '7G' + } + + @@ -707,7 +796,7 @@ { 'actionSetId' : 'dedupConfig', - 'graphBasePath' : 'cleanedFirstGraphPath', + 'graphBasePath' : 'groupedGraphPath', 'dedupGraphPath': 'dedupGraphPath', 'isLookUpUrl' : 'isLookUpUrl' } diff --git a/dhp-workflows/dhp-workflow-profiles/src/main/resources/eu/dnetlib/dhp/provision/02_prod_graph.xml b/dhp-workflows/dhp-workflow-profiles/src/main/resources/eu/dnetlib/dhp/provision/02_prod_graph.xml index be6155f2f..205db29a6 100644 --- a/dhp-workflows/dhp-workflow-profiles/src/main/resources/eu/dnetlib/dhp/provision/02_prod_graph.xml +++ b/dhp-workflows/dhp-workflow-profiles/src/main/resources/eu/dnetlib/dhp/provision/02_prod_graph.xml @@ -11,6 +11,16 @@ Data Provision 30 + + set the path of unresolved entities + + unresolvedEntityPath + /data/unresolved_PROD/content + + + + + set blacklist of funder nsPrefixes @@ -51,11 +61,21 @@ + + Set the target path to store the CLEANED graph + + resolvedGraphPath + /tmp/beta_inference/graph/03_graph_resolved + + + + + Set the target path to store the DEDUPED graph dedupGraphPath - /tmp/prod_provision/graph/03_graph_dedup + /tmp/prod_provision/graph/04_graph_dedup @@ -65,7 +85,7 @@ Set the target path to store the INFERRED graph inferredGraphPath - /tmp/prod_provision/graph/04_graph_inferred + /tmp/prod_provision/graph/05_graph_inferred @@ -75,7 +95,7 @@ Set the target path to store the CONSISTENCY graph consistentGraphPath - /tmp/prod_provision/graph/05_graph_consistent + /tmp/prod_provision/graph/06_graph_consistent @@ -85,7 +105,7 @@ Set the target path to store the ORCID enriched graph orcidGraphPath - /tmp/prod_provision/graph/06_graph_orcid + /tmp/prod_provision/graph/07_graph_orcid @@ -95,7 +115,7 @@ Set the target path to store the BULK TAGGED graph bulkTaggingGraphPath - /tmp/prod_provision/graph/07_graph_bulktagging + /tmp/prod_provision/graph/08_graph_bulktagging @@ -105,7 +125,7 @@ Set the target path to store the AFFILIATION from INSTITUTIONAL REPOS graph affiliationGraphPath - /tmp/prod_provision/graph/08_graph_affiliation + /tmp/prod_provision/graph/09_graph_affiliation @@ -115,7 +135,7 @@ Set the target path to store the COMMUNITY from SELECTED SOURCES graph communityOrganizationGraphPath - /tmp/prod_provision/graph/09_graph_comunity_organization + /tmp/prod_provision/graph/10_graph_comunity_organization @@ -125,7 +145,7 @@ Set the target path to store the FUNDING from SEMANTIC RELATION graph fundingGraphPath - /tmp/prod_provision/graph/10_graph_funding + /tmp/prod_provision/graph/11_graph_funding @@ -135,7 +155,7 @@ Set the target path to store the COMMUNITY from SEMANTIC RELATION graph communitySemRelGraphPath - /tmp/prod_provision/graph/11_graph_comunity_sem_rel + /tmp/prod_provision/graph/12_graph_comunity_sem_rel @@ -145,7 +165,7 @@ Set the target path to store the COUNTRY enriched graph countryGraphPath - /tmp/prod_provision/graph/12_graph_country + /tmp/prod_provision/graph/13_graph_country @@ -155,7 +175,7 @@ Set the target path to store the CLEANED graph cleanedGraphPath - /tmp/prod_provision/graph/13_graph_cleaned + /tmp/prod_provision/graph/14_graph_cleaned @@ -165,7 +185,7 @@ Set the target path to store the blacklisted graph blacklistedGraphPath - /tmp/prod_provision/graph/14_graph_blacklisted + /tmp/prod_provision/graph/15_graph_blacklisted @@ -446,6 +466,59 @@ build-report + + + + + + updates publication's hostedby info according to the ISSNs available from DOAJ and UNIBI + + executeOozieJob + IIS + + { + 'sourcePath' : 'cleanedFirstGraphPath' + } + + + { + 'resumeFrom' : 'prepareInfo', + 'hostedByMapPath' : '/user/dnet.production/data/hostedByMap', + 'oozie.wf.application.path' : '/lib/dnet/PROD/oa/graph/hostedbymap/oozie_app', + 'workingDir' : '/tmp/prod_provision/working_dir/hostedbymap', + 'outputPath' : '/tmp/prod_provision/working_dir/hostedbymap', + 'sparkExecutorCores' : '3', + 'sparkExecutorMemory' : '10G' + } + + build-report + + + + + + + Graph resolution + + executeOozieJob + IIS + + { + 'graphBasePath':'cleanedFirstGraphPath', + 'unresolvedPath' :'unresolvedEntityPath', + 'targetPath':'resolvedGraphPath' + } + + + { + 'oozie.wf.application.path' : '/lib/dnet/PROD/oa/graph/resolution/oozie_app', + 'workingDir' : '/tmp/prod_provision/working_dir/relation_resolution', + 'shouldResolveEntities' : 'false', + 'sparkExecutorCores' : '4', + 'sparkExecutorMemory' : '9G' + } + + @@ -458,7 +531,7 @@ { 'actionSetId' : 'dedupConfig', - 'graphBasePath' : 'cleanedFirstGraphPath', + 'graphBasePath' : 'resolvedGraphPath', 'dedupGraphPath': 'dedupGraphPath', 'isLookUpUrl' : 'isLookUpUrl' } From c42623f0066272088eef70a831ddceb9dbf365a0 Mon Sep 17 00:00:00 2001 From: Claudio Atzori Date: Fri, 21 Jan 2022 14:30:09 +0100 Subject: [PATCH 052/162] added NPE checks --- .../dhp/schema/oaf/utils/OafMapperUtils.java | 20 +++++++++++-------- 1 file changed, 12 insertions(+), 8 deletions(-) diff --git a/dhp-common/src/main/java/eu/dnetlib/dhp/schema/oaf/utils/OafMapperUtils.java b/dhp-common/src/main/java/eu/dnetlib/dhp/schema/oaf/utils/OafMapperUtils.java index 2f1fc3a58..1ffc66dfd 100644 --- a/dhp-common/src/main/java/eu/dnetlib/dhp/schema/oaf/utils/OafMapperUtils.java +++ b/dhp-common/src/main/java/eu/dnetlib/dhp/schema/oaf/utils/OafMapperUtils.java @@ -48,8 +48,8 @@ public class OafMapperUtils { public static Result mergeResults(Result left, Result right) { - final boolean leftFromDeletedAuthority = isFromDeletedAuthority(left); - final boolean rightFromDeletedAuthority = isFromDeletedAuthority(right); + final boolean leftFromDeletedAuthority = isFromDelegatedAuthority(left); + final boolean rightFromDeletedAuthority = isFromDelegatedAuthority(right); if (leftFromDeletedAuthority && !rightFromDeletedAuthority) { return left; @@ -67,12 +67,16 @@ public class OafMapperUtils { } } - private static boolean isFromDeletedAuthority(Result r) { - return r - .getInstance() - .stream() - .map(i -> i.getCollectedfrom().getKey()) - .anyMatch(cfId -> IdentifierFactory.delegatedAuthorityDatasourceIds().contains(cfId)); + private static boolean isFromDelegatedAuthority(Result r) { + return Optional + .ofNullable(r.getInstance()) + .map( + instance -> instance + .stream() + .filter(i -> Objects.nonNull(i.getCollectedfrom())) + .map(i -> i.getCollectedfrom().getKey()) + .anyMatch(cfId -> IdentifierFactory.delegatedAuthorityDatasourceIds().contains(cfId))) + .orElse(false); } public static KeyValue keyValue(final String k, final String v) { From 8de97883083bf2608a084840ab1f3e03f4244e05 Mon Sep 17 00:00:00 2001 From: Claudio Atzori Date: Mon, 24 Jan 2022 11:29:22 +0100 Subject: [PATCH 053/162] applied fix for avoiding ruling out the invisible (APC) records during the graph cleaning --- .../oaf/utils/GraphCleaningFunctions.java | 16 + .../clean/GraphCleaningFunctionsTest.java | 47 + .../dhp/oa/graph/clean/result_invisible.json | 958 ++++++++++++++++++ .../graph/clean/result_missing_invisible.json | 922 +++++++++++++++++ 4 files changed, 1943 insertions(+) create mode 100644 dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/clean/result_invisible.json create mode 100644 dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/clean/result_missing_invisible.json diff --git a/dhp-common/src/main/java/eu/dnetlib/dhp/schema/oaf/utils/GraphCleaningFunctions.java b/dhp-common/src/main/java/eu/dnetlib/dhp/schema/oaf/utils/GraphCleaningFunctions.java index f49a47c0a..48e35a342 100644 --- a/dhp-common/src/main/java/eu/dnetlib/dhp/schema/oaf/utils/GraphCleaningFunctions.java +++ b/dhp-common/src/main/java/eu/dnetlib/dhp/schema/oaf/utils/GraphCleaningFunctions.java @@ -88,6 +88,22 @@ public class GraphCleaningFunctions extends CleaningFunctions { } public static boolean filter(T value) { + if (Boolean.TRUE + .equals( + Optional + .ofNullable(value) + .map( + o -> Optional + .ofNullable(o.getDataInfo()) + .map( + d -> Optional + .ofNullable(d.getInvisible()) + .orElse(true)) + .orElse(true)) + .orElse(true))) { + return true; + } + if (value instanceof Datasource) { // nothing to evaluate here } else if (value instanceof Project) { diff --git a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/clean/GraphCleaningFunctionsTest.java b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/clean/GraphCleaningFunctionsTest.java index 0e9efed21..ebd8cb55c 100644 --- a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/clean/GraphCleaningFunctionsTest.java +++ b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/clean/GraphCleaningFunctionsTest.java @@ -71,6 +71,53 @@ public class GraphCleaningFunctionsTest { } } + @Test + void testFilter_invisible_true() throws Exception { + + assertNotNull(vocabularies); + assertNotNull(mapping); + + String json = IOUtils + .toString(getClass().getResourceAsStream("/eu/dnetlib/dhp/oa/graph/clean/result_invisible.json")); + Publication p_in = MAPPER.readValue(json, Publication.class); + + assertTrue(p_in instanceof Result); + assertTrue(p_in instanceof Publication); + + assertEquals(true, GraphCleaningFunctions.filter(p_in)); + } + + @Test + void testFilter_true_nothing_to_filter() throws Exception { + + assertNotNull(vocabularies); + assertNotNull(mapping); + + String json = IOUtils.toString(getClass().getResourceAsStream("/eu/dnetlib/dhp/oa/graph/clean/result.json")); + Publication p_in = MAPPER.readValue(json, Publication.class); + + assertTrue(p_in instanceof Result); + assertTrue(p_in instanceof Publication); + + assertEquals(true, GraphCleaningFunctions.filter(p_in)); + } + + @Test + void testFilter_missing_invisible() throws Exception { + + assertNotNull(vocabularies); + assertNotNull(mapping); + + String json = IOUtils + .toString(getClass().getResourceAsStream("/eu/dnetlib/dhp/oa/graph/clean/result_missing_invisible.json")); + Publication p_in = MAPPER.readValue(json, Publication.class); + + assertTrue(p_in instanceof Result); + assertTrue(p_in instanceof Publication); + + assertEquals(true, GraphCleaningFunctions.filter(p_in)); + } + @Test void testCleaning() throws Exception { diff --git a/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/clean/result_invisible.json b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/clean/result_invisible.json new file mode 100644 index 000000000..ffcb187c1 --- /dev/null +++ b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/clean/result_invisible.json @@ -0,0 +1,958 @@ +{ + "author": [ + { + "affiliation": [ + ], + "fullname": "Brien, Tom", + "name": "Tom", + "pid": [ + { + "dataInfo": { + "deletedbyinference": false, + "inferenceprovenance": "", + "inferred": false, + "invisible": false, + "provenanceaction": { + "classid": "sysimport:crosswalk:datasetarchive", + "classname": "sysimport:crosswalk:datasetarchive", + "schemeid": "dnet:provenanceActions", + "schemename": "dnet:provenanceActions" + }, + "trust": "0.9" + }, + "qualifier": { + "classid": "ORCID12", + "classname": "ORCID12", + "schemeid": "dnet:pid_types", + "schemename": "dnet:pid_types" + }, + "value": "0000-0001-9613-6639" + }, + { + "dataInfo": { + "deletedbyinference": false, + "inferenceprovenance": "", + "inferred": false, + "invisible": false, + "provenanceaction": { + "classid": "sysimport:crosswalk:datasetarchive", + "classname": "sysimport:crosswalk:datasetarchive", + "schemeid": "dnet:provenanceActions", + "schemename": "dnet:provenanceActions" + }, + "trust": "0.9" + }, + "qualifier": { + "classid": "ORCID12", + "classname": "ORCID12", + "schemeid": "dnet:pid_types", + "schemename": "dnet:pid_types" + }, + "value": "https://orcid.org/0000-0001-9613-6639" + }, + { + "dataInfo": { + "deletedbyinference": false, + "inferenceprovenance": "", + "inferred": false, + "invisible": false, + "provenanceaction": { + "classid": "sysimport:crosswalk:entityregistry", + "classname": "sysimport:crosswalk:entityregistry", + "schemeid": "dnet:provenanceActions", + "schemename": "dnet:provenanceActions" + }, + "trust": "0.9" + }, + "qualifier": { + "classid": "orcid", + "classname": "ORCID12", + "schemeid": "dnet:pid_types", + "schemename": "dnet:pid_types" + }, + "value": "0000-0001-9613-6639" + } + ], + "rank": 1, + "surname": "Brien" + }, + { + "affiliation": [ + ], + "fullname": "Ade, Peter", + "name": "Peter", + "pid": [ + { + "dataInfo": { + "deletedbyinference": false, + "inferenceprovenance": "", + "inferred": false, + "invisible": false, + "provenanceaction": { + "classid": "sysimport:crosswalk:datasetarchive", + "classname": "sysimport:crosswalk:datasetarchive", + "schemeid": "dnet:provenanceActions", + "schemename": "dnet:provenanceActions" + }, + "trust": "0.9" + }, + "qualifier": { + "classid": "xyz", + "classname": "XYZ", + "schemeid": "dnet:pid_types", + "schemename": "dnet:pid_types" + }, + "value": "qwerty" + }, + { + "dataInfo": { + "deletedbyinference": false, + "inferenceprovenance": "", + "inferred": false, + "invisible": false, + "provenanceaction": { + "classid": "sysimport:crosswalk:datasetarchive", + "classname": "sysimport:crosswalk:datasetarchive", + "schemeid": "dnet:provenanceActions", + "schemename": "dnet:provenanceActions" + }, + "trust": "0.9" + }, + "qualifier": { + "classid": "ORCID", + "classname": "ORCID", + "schemeid": "", + "schemename": "" + }, + "value": "asdasd" + } + ], + "rank": 2, + "surname": "Ade" + }, + { + "affiliation": [ + ], + "fullname": "Barry, Peter S.", + "name": "Peter S.", + "pid": null, + "rank": 3, + "surname": "Barry" + }, + { + "affiliation": [ + ], + "fullname": "Dunscombe, Chris J.", + "name": "Chris J.", + "pid": [ + ], + "rank": 4, + "surname": "Dunscombe" + }, + { + "affiliation": [ + ], + "fullname": "Leadley, David R.", + "name": "David R.", + "pid": [ + ], + "rank": 5, + "surname": "Leadley" + }, + { + "affiliation": [ + ], + "fullname": "Morozov, Dmitry V.", + "name": "Dmitry V.", + "pid": [ + ], + "rank": 6, + "surname": "Morozov" + }, + { + "affiliation": [ + ], + "fullname": "Myronov, Maksym", + "name": "Maksym", + "pid": [ + ], + "rank": 7, + "surname": "Myronov" + }, + { + "affiliation": [ + ], + "fullname": "Parker, Evan", + "name": "Evan", + "pid": [ + ], + "rank": 8, + "surname": "Parker" + }, + { + "affiliation": [ + ], + "fullname": "Prest, Martin J.", + "name": "Martin J.", + "pid": [ + ], + "rank": 9, + "surname": "Prest" + }, + { + "affiliation": [ + ], + "fullname": "Prunnila, Mika", + "name": "Mika", + "pid": [ + ], + "rank": 10, + "surname": "Prunnila" + }, + { + "affiliation": [ + ], + "fullname": "Sudiwala, Rashmi V.", + "name": "Rashmi V.", + "pid": [ + ], + "rank": 11, + "surname": "Sudiwala" + }, + { + "affiliation": [ + ], + "fullname": "Whall, Terry E.", + "name": "Terry E.", + "pid": [ + ], + "rank": 12, + "surname": "Whall" + }, + { + "affiliation": [ + ], + "fullname": "Mauskopf", + "name": "", + "pid": [ + ], + "rank": 13, + "surname": "" + }, + { + "affiliation": [ + ], + "fullname": " P. D. ", + "name": "", + "pid": [ + ], + "rank": 14, + "surname": "" + } + ], + "bestaccessright": null, + "publisher": { + "value": null + }, + "collectedfrom": [ + { + "key": "10|CSC_________::a2b9ce8435390bcbfc05f3cae3948747", + "value": "VIRTA" + } + ], + "context": [ + ], + "contributor": [ + ], + "country": [ + { + "classid": "DE", + "classname": "DE", + "schemeid": "dnet:countries", + "schemename": "dnet:countries" + } + ], + "coverage": [ + ], + "dataInfo": { + "deletedbyinference": false, + "inferenceprovenance": "", + "inferred": false, + "invisible": true, + "provenanceaction": { + "classid": "sysimport:crosswalk:datasetarchive", + "classname": "sysimport:crosswalk:datasetarchive", + "schemeid": "dnet:provenanceActions", + "schemename": "dnet:provenanceActions" + }, + "trust": "0.9" + }, + "dateofacceptance": { + "dataInfo": { + "deletedbyinference": false, + "inferenceprovenance": "", + "inferred": false, + "invisible": false, + "provenanceaction": { + "classid": "sysimport:crosswalk:datasetarchive", + "classname": "sysimport:crosswalk:datasetarchive", + "schemeid": "dnet:provenanceActions", + "schemename": "dnet:provenanceActions" + }, + "trust": "0.9" + }, + "value": "7 oct 1970" + }, + "dateofcollection": "", + "dateoftransformation": "2020-04-22T12:34:08.009Z", + "description": [ + ], + "externalReference": [ + ], + "extraInfo": [ + ], + "format": [ + ], + "fulltext": [ + ], + "id": "50|CSC_________::2250a70c903c6ac6e4c01438259e9375", + "instance": [ + { + "pid": [ + { + "dataInfo": null, + "qualifier": { + "classid": "doi", + "classname": "doi", + "schemeid": "dnet:pid_types", + "schemename": "dnet:pid_types" + }, + "value": "10.1007/s109090161569x" + }, + { + "dataInfo": null, + "qualifier": { + "classid": "doi", + "classname": "doi", + "schemeid": "dnet:pid_types", + "schemename": "dnet:pid_types" + }, + "value": "10.1008/abcd" + } + ], + "alternateIdentifier": [ + { + "dataInfo": null, + "qualifier": { + "classid": "doi", + "classname": "doi", + "schemeid": "dnet:pid_types", + "schemename": "dnet:pid_types" + }, + "value": "10.1007/s109090161569x" + }, + { + "dataInfo": null, + "qualifier": { + "classid": "doi", + "classname": "doi", + "schemeid": "dnet:pid_types", + "schemename": "dnet:pid_types" + }, + "value": "10.1009/qwerty" + } + ], + "accessright": { + "classid": "CLOSED", + "classname": "CLOSED", + "schemeid": "dnet:access_modes", + "schemename": "dnet:access_modes" + }, + "collectedfrom": { + "key": "10|CSC_________::a2b9ce8435390bcbfc05f3cae3948747", + "value": "VIRTA" + }, + "dateofacceptance": { + "dataInfo": { + "deletedbyinference": false, + "inferenceprovenance": "", + "inferred": false, + "invisible": false, + "provenanceaction": { + "classid": "sysimport:crosswalk:datasetarchive", + "classname": "sysimport:crosswalk:datasetarchive", + "schemeid": "dnet:provenanceActions", + "schemename": "dnet:provenanceActions" + }, + "trust": "0.9" + }, + "value": "2016-01-01" + }, + "distributionlocation": "", + "hostedby": { + "key": "10|CSC_________::a2b9ce8435390bcbfc05f3cae3948747", + "value": "VIRTA" + }, + "instancetype": { + "classid": "Comment/debate", + "classname": "Comment/debate", + "schemeid": "dnet:publication_resource", + "schemename": "dnet:publication_resource" + }, + "url": [ + "http://juuli.fi/Record/0275158616", + "http://dx.doi.org/10.1007/s109090161569x" + ] + } + ], + "journal": { + "dataInfo": { + "deletedbyinference": false, + "inferenceprovenance": "", + "inferred": false, + "invisible": false, + "provenanceaction": { + "classid": "sysimport:crosswalk:datasetarchive", + "classname": "sysimport:crosswalk:datasetarchive", + "schemeid": "dnet:provenanceActions", + "schemename": "dnet:provenanceActions" + }, + "trust": "0.9" + }, + "edition": "", + "ep": " 7", + "iss": "9 March", + "issnLinking": "", + "issnOnline": "", + "issnPrinted": "0022-2291", + "name": "Journal of Low Temperature Physics - Early Acces", + "sp": "1 ", + "vol": "" + }, + "language": { + "classid": "UNKNOWN", + "classname": "UNKNOWN", + "schemeid": "dnet:languages", + "schemename": "dnet:languages" + }, + "lastupdatetimestamp": 1591283286319, + "oaiprovenance": { + "originDescription": { + "altered": true, + "baseURL": "https%3A%2F%2Fvirta-jtp.csc.fi%2Fapi%2Fcerif", + "datestamp": "2019-07-30", + "harvestDate": "2020-04-22T11:04:38.685Z", + "identifier": "oai:virta-jtp.csc.fi:Publications/0275158616", + "metadataNamespace": "" + } + }, + "originalId": [ + "CSC_________::2250a70c903c6ac6e4c01438259e9375" + ], + "pid": [ + { + "dataInfo": { + "deletedbyinference": false, + "inferenceprovenance": "", + "inferred": false, + "invisible": false, + "provenanceaction": { + "classid": "sysimport:crosswalk:datasetarchive", + "classname": "sysimport:crosswalk:datasetarchive", + "schemeid": "dnet:provenanceActions", + "schemename": "dnet:provenanceActions" + }, + "trust": "0.9" + }, + "qualifier": { + "classid": "doi", + "classname": "doi", + "schemeid": "dnet:pid_types", + "schemename": "dnet:pid_types" + }, + "value": "10.1007/s109090161569x" + }, + { + "dataInfo": { + "deletedbyinference": false, + "inferenceprovenance": "", + "inferred": false, + "invisible": false, + "provenanceaction": { + "classid": "sysimport:crosswalk:datasetarchive", + "classname": "sysimport:crosswalk:datasetarchive", + "schemeid": "dnet:provenanceActions", + "schemename": "dnet:provenanceActions" + }, + "trust": "0.9" + }, + "qualifier": { + "classid": "doi", + "classname": "doi", + "schemeid": "dnet:pid_types", + "schemename": "dnet:pid_types" + }, + "value": "10.1007/s109090161569x" + }, + { + "dataInfo": { + "deletedbyinference": false, + "inferenceprovenance": "", + "inferred": false, + "invisible": false, + "provenanceaction": { + "classid": "sysimport:crosswalk:datasetarchive", + "classname": "sysimport:crosswalk:datasetarchive", + "schemeid": "dnet:provenanceActions", + "schemename": "dnet:provenanceActions" + }, + "trust": "0.9" + }, + "qualifier": { + "classid": "doi", + "classname": "doi", + "schemeid": "dnet:pid_types", + "schemename": "dnet:pid_types" + }, + "value": "" + } + ], + "relevantdate": [ + ], + "resourcetype": { + "classid": "0001", + "classname": "0001", + "schemeid": "dnet:dataCite_resource", + "schemename": "dnet:dataCite_resource" + }, + "resulttype": { + "classid": "publication", + "classname": "publication", + "schemeid": "dnet:result_typologies", + "schemename": "dnet:result_typologies" + }, + "source": [ + ], + "subject": [ + { + "dataInfo": { + "deletedbyinference": false, + "inferenceprovenance": "", + "inferred": false, + "invisible": false, + "provenanceaction": { + "classid": "sysimport:crosswalk:datasetarchive", + "classname": "sysimport:crosswalk:datasetarchive", + "schemeid": "dnet:provenanceActions", + "schemename": "dnet:provenanceActions" + }, + "trust": "0.9" + }, + "qualifier": { + "classid": "", + "classname": "", + "schemeid": "", + "schemename": "" + }, + "value": "ta213" + }, + { + "dataInfo": { + "deletedbyinference": false, + "inferenceprovenance": "", + "inferred": false, + "invisible": false, + "provenanceaction": { + "classid": "sysimport:crosswalk:datasetarchive", + "classname": "sysimport:crosswalk:datasetarchive", + "schemeid": "dnet:provenanceActions", + "schemename": "dnet:provenanceActions" + }, + "trust": "0.9" + }, + "qualifier": { + "classid": "", + "classname": "", + "schemeid": "", + "schemename": "" + }, + "value": "infrared detectors" + }, + { + "dataInfo": { + "deletedbyinference": false, + "inferenceprovenance": "", + "inferred": false, + "invisible": false, + "provenanceaction": { + "classid": "sysimport:crosswalk:datasetarchive", + "classname": "sysimport:crosswalk:datasetarchive", + "schemeid": "dnet:provenanceActions", + "schemename": "dnet:provenanceActions" + }, + "trust": "0.9" + }, + "qualifier": { + "classid": "", + "classname": "", + "schemeid": "", + "schemename": "" + }, + "value": "lens antennas" + }, + { + "dataInfo": { + "deletedbyinference": false, + "inferenceprovenance": "", + "inferred": false, + "invisible": false, + "provenanceaction": { + "classid": "sysimport:crosswalk:datasetarchive", + "classname": "sysimport:crosswalk:datasetarchive", + "schemeid": "dnet:provenanceActions", + "schemename": "dnet:provenanceActions" + }, + "trust": "0.9" + }, + "qualifier": { + "classid": "", + "classname": "", + "schemeid": "", + "schemename": "" + }, + "value": "silicon" + }, + { + "dataInfo": { + "deletedbyinference": false, + "inferenceprovenance": "", + "inferred": false, + "invisible": false, + "provenanceaction": { + "classid": "sysimport:crosswalk:datasetarchive", + "classname": "sysimport:crosswalk:datasetarchive", + "schemeid": "dnet:provenanceActions", + "schemename": "dnet:provenanceActions" + }, + "trust": "0.9" + }, + "qualifier": { + "classid": "", + "classname": "", + "schemeid": "", + "schemename": "" + }, + "value": "slot antennas" + }, + { + "dataInfo": { + "deletedbyinference": false, + "inferenceprovenance": "", + "inferred": false, + "invisible": false, + "provenanceaction": { + "classid": "sysimport:crosswalk:datasetarchive", + "classname": "sysimport:crosswalk:datasetarchive", + "schemeid": "dnet:provenanceActions", + "schemename": "dnet:provenanceActions" + }, + "trust": "0.9" + }, + "qualifier": { + "classid": "", + "classname": "", + "schemeid": "", + "schemename": "" + }, + "value": "strained silicon" + }, + { + "dataInfo": { + "deletedbyinference": false, + "inferenceprovenance": "", + "inferred": false, + "invisible": false, + "provenanceaction": { + "classid": "sysimport:crosswalk:datasetarchive", + "classname": "sysimport:crosswalk:datasetarchive", + "schemeid": "dnet:provenanceActions", + "schemename": "dnet:provenanceActions" + }, + "trust": "0.9" + }, + "qualifier": { + "classid": "", + "classname": "", + "schemeid": "", + "schemename": "" + }, + "value": "cold electron bolometers" + }, + { + "dataInfo": { + "deletedbyinference": false, + "inferenceprovenance": "", + "inferred": false, + "invisible": false, + "provenanceaction": { + "classid": "sysimport:crosswalk:datasetarchive", + "classname": "sysimport:crosswalk:datasetarchive", + "schemeid": "dnet:provenanceActions", + "schemename": "dnet:provenanceActions" + }, + "trust": "0.9" + }, + "qualifier": { + "classid": "", + "classname": "", + "schemeid": "", + "schemename": "" + }, + "value": "doped silicon" + }, + { + "dataInfo": { + "deletedbyinference": false, + "inferenceprovenance": "", + "inferred": false, + "invisible": false, + "provenanceaction": { + "classid": "sysimport:crosswalk:datasetarchive", + "classname": "sysimport:crosswalk:datasetarchive", + "schemeid": "dnet:provenanceActions", + "schemename": "dnet:provenanceActions" + }, + "trust": "0.9" + }, + "qualifier": { + "classid": "", + "classname": "", + "schemeid": "", + "schemename": "" + }, + "value": "measure noise" + }, + { + "dataInfo": { + "deletedbyinference": false, + "inferenceprovenance": "", + "inferred": false, + "invisible": false, + "provenanceaction": { + "classid": "sysimport:crosswalk:datasetarchive", + "classname": "sysimport:crosswalk:datasetarchive", + "schemeid": "dnet:provenanceActions", + "schemename": "dnet:provenanceActions" + }, + "trust": "0.9" + }, + "qualifier": { + "classid": "", + "classname": "", + "schemeid": "", + "schemename": "" + }, + "value": "noise equivalent power" + }, + { + "dataInfo": { + "deletedbyinference": false, + "inferenceprovenance": "", + "inferred": false, + "invisible": false, + "provenanceaction": { + "classid": "sysimport:crosswalk:datasetarchive", + "classname": "sysimport:crosswalk:datasetarchive", + "schemeid": "dnet:provenanceActions", + "schemename": "dnet:provenanceActions" + }, + "trust": "0.9" + }, + "qualifier": { + "classid": "", + "classname": "", + "schemeid": "", + "schemename": "" + }, + "value": "optical characterisation" + }, + { + "dataInfo": { + "deletedbyinference": false, + "inferenceprovenance": "", + "inferred": false, + "invisible": false, + "provenanceaction": { + "classid": "sysimport:crosswalk:datasetarchive", + "classname": "sysimport:crosswalk:datasetarchive", + "schemeid": "dnet:provenanceActions", + "schemename": "dnet:provenanceActions" + }, + "trust": "0.9" + }, + "qualifier": { + "classid": "", + "classname": "", + "schemeid": "", + "schemename": "" + }, + "value": "optical response" + }, + { + "dataInfo": { + "deletedbyinference": false, + "inferenceprovenance": "", + "inferred": false, + "invisible": false, + "provenanceaction": { + "classid": "sysimport:crosswalk:datasetarchive", + "classname": "sysimport:crosswalk:datasetarchive", + "schemeid": "dnet:provenanceActions", + "schemename": "dnet:provenanceActions" + }, + "trust": "0.9" + }, + "qualifier": { + "classid": "", + "classname": "", + "schemeid": "", + "schemename": "" + }, + "value": "photon noise" + }, + { + "dataInfo": { + "deletedbyinference": false, + "inferenceprovenance": "", + "inferred": false, + "invisible": false, + "provenanceaction": { + "classid": "sysimport:crosswalk:datasetarchive", + "classname": "sysimport:crosswalk:datasetarchive", + "schemeid": "dnet:provenanceActions", + "schemename": "dnet:provenanceActions" + }, + "trust": "0.9" + }, + "qualifier": { + "classid": "", + "classname": "", + "schemeid": "", + "schemename": "" + }, + "value": "silicon absorbers" + } + ], + "title": [ + { + "dataInfo": { + "deletedbyinference": false, + "inferenceprovenance": "", + "inferred": false, + "invisible": false, + "provenanceaction": { + "classid": "sysimport:crosswalk:datasetarchive", + "classname": "sysimport:crosswalk:datasetarchive", + "schemeid": "dnet:provenanceActions", + "schemename": "dnet:provenanceActions" + }, + "trust": "0.9" + }, + "qualifier": { + "classid": "main title", + "classname": "main title", + "schemeid": "dnet:dataCite_title", + "schemename": "dnet:dataCite_title" + }, + "value": "Optical response of strained- and unstrained-silicon cold-electron bolometers test" + }, + { + "dataInfo": { + "deletedbyinference": false, + "inferenceprovenance": "", + "inferred": false, + "invisible": false, + "provenanceaction": { + "classid": "sysimport:crosswalk:datasetarchive", + "classname": "sysimport:crosswalk:datasetarchive", + "schemeid": "dnet:provenanceActions", + "schemename": "dnet:provenanceActions" + }, + "trust": "0.9" + }, + "qualifier": { + "classid": "main title", + "classname": "main title", + "schemeid": "dnet:dataCite_title", + "schemename": "dnet:dataCite_title" + }, + "value": "test test 123 test" + }, + { + "dataInfo": { + "deletedbyinference": false, + "inferenceprovenance": "", + "inferred": false, + "invisible": false, + "provenanceaction": { + "classid": "sysimport:crosswalk:datasetarchive", + "classname": "sysimport:crosswalk:datasetarchive", + "schemeid": "dnet:provenanceActions", + "schemename": "dnet:provenanceActions" + }, + "trust": "0.9" + }, + "qualifier": { + "classid": "main title", + "classname": "main title", + "schemeid": "dnet:dataCite_title", + "schemename": "dnet:dataCite_title" + }, + "value": "omic" + }, + { + "dataInfo": { + "deletedbyinference": false, + "inferenceprovenance": "", + "inferred": false, + "invisible": false, + "provenanceaction": { + "classid": "sysimport:crosswalk:datasetarchive", + "classname": "sysimport:crosswalk:datasetarchive", + "schemeid": "dnet:provenanceActions", + "schemename": "dnet:provenanceActions" + }, + "trust": "0.9" + }, + "qualifier": { + "classid": "main title", + "classname": "main title", + "schemeid": "dnet:dataCite_title", + "schemename": "dnet:dataCite_title" + }, + "value": "「マキャベリ的知性と心の理論の進化論」 リチャード・バーン, アンドリュー・ホワイトゥン 編/藤田和生, 山下博志, 友永雅巳 監訳" + }, + { + "dataInfo": { + "deletedbyinference": false, + "inferenceprovenance": "", + "inferred": false, + "invisible": false, + "provenanceaction": { + "classid": "sysimport:crosswalk:datasetarchive", + "classname": "sysimport:crosswalk:datasetarchive", + "schemeid": "dnet:provenanceActions", + "schemename": "dnet:provenanceActions" + }, + "trust": "0.9" + }, + "qualifier": { + "classid": "main title", + "classname": "main title", + "schemeid": "dnet:dataCite_title", + "schemename": "dnet:dataCite_title" + }, + "value": "-" + } + ] +} \ No newline at end of file diff --git a/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/clean/result_missing_invisible.json b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/clean/result_missing_invisible.json new file mode 100644 index 000000000..c0b5a959a --- /dev/null +++ b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/clean/result_missing_invisible.json @@ -0,0 +1,922 @@ +{ + "author": [ + { + "affiliation": [], + "fullname": "Brien, Tom", + "name": "Tom", + "pid": [ + { + "dataInfo": { + "deletedbyinference": false, + "inferenceprovenance": "", + "inferred": false, + "invisible": false, + "provenanceaction": { + "classid": "sysimport:crosswalk:datasetarchive", + "classname": "sysimport:crosswalk:datasetarchive", + "schemeid": "dnet:provenanceActions", + "schemename": "dnet:provenanceActions" + }, + "trust": "0.9" + }, + "qualifier": { + "classid": "ORCID12", + "classname": "ORCID12", + "schemeid": "dnet:pid_types", + "schemename": "dnet:pid_types" + }, + "value": "0000-0001-9613-6639" + }, + { + "dataInfo": { + "deletedbyinference": false, + "inferenceprovenance": "", + "inferred": false, + "invisible": false, + "provenanceaction": { + "classid": "sysimport:crosswalk:datasetarchive", + "classname": "sysimport:crosswalk:datasetarchive", + "schemeid": "dnet:provenanceActions", + "schemename": "dnet:provenanceActions" + }, + "trust": "0.9" + }, + "qualifier": { + "classid": "ORCID12", + "classname": "ORCID12", + "schemeid": "dnet:pid_types", + "schemename": "dnet:pid_types" + }, + "value": "https://orcid.org/0000-0001-9613-6639" + }, + { + "dataInfo": { + "deletedbyinference": false, + "inferenceprovenance": "", + "inferred": false, + "invisible": false, + "provenanceaction": { + "classid": "sysimport:crosswalk:entityregistry", + "classname": "sysimport:crosswalk:entityregistry", + "schemeid": "dnet:provenanceActions", + "schemename": "dnet:provenanceActions" + }, + "trust": "0.9" + }, + "qualifier": { + "classid": "orcid", + "classname": "ORCID12", + "schemeid": "dnet:pid_types", + "schemename": "dnet:pid_types" + }, + "value": "0000-0001-9613-6639" + } + ], + "rank": 1, + "surname": "Brien" + }, + { + "affiliation": [], + "fullname": "Ade, Peter", + "name": "Peter", + "pid": [ + { + "dataInfo": { + "deletedbyinference": false, + "inferenceprovenance": "", + "inferred": false, + "invisible": false, + "provenanceaction": { + "classid": "sysimport:crosswalk:datasetarchive", + "classname": "sysimport:crosswalk:datasetarchive", + "schemeid": "dnet:provenanceActions", + "schemename": "dnet:provenanceActions" + }, + "trust": "0.9" + }, + "qualifier": { + "classid": "xyz", + "classname": "XYZ", + "schemeid": "dnet:pid_types", + "schemename": "dnet:pid_types" + }, + "value": "qwerty" + }, + { + "dataInfo": { + "deletedbyinference": false, + "inferenceprovenance": "", + "inferred": false, + "invisible": false, + "provenanceaction": { + "classid": "sysimport:crosswalk:datasetarchive", + "classname": "sysimport:crosswalk:datasetarchive", + "schemeid": "dnet:provenanceActions", + "schemename": "dnet:provenanceActions" + }, + "trust": "0.9" + }, + "qualifier": { + "classid": "ORCID", + "classname": "ORCID", + "schemeid": "", + "schemename": "" + }, + "value": "asdasd" + } + ], + "rank": 2, + "surname": "Ade" + }, + { + "affiliation": [], + "fullname": "Barry, Peter S.", + "name": "Peter S.", + "pid": null, + "rank": 3, + "surname": "Barry" + }, + { + "affiliation": [], + "fullname": "Dunscombe, Chris J.", + "name": "Chris J.", + "pid": [], + "rank": 4, + "surname": "Dunscombe" + }, + { + "affiliation": [], + "fullname": "Leadley, David R.", + "name": "David R.", + "pid": [], + "rank": 5, + "surname": "Leadley" + }, + { + "affiliation": [], + "fullname": "Morozov, Dmitry V.", + "name": "Dmitry V.", + "pid": [], + "rank": 6, + "surname": "Morozov" + }, + { + "affiliation": [], + "fullname": "Myronov, Maksym", + "name": "Maksym", + "pid": [], + "rank": 7, + "surname": "Myronov" + }, + { + "affiliation": [], + "fullname": "Parker, Evan", + "name": "Evan", + "pid": [], + "rank": 8, + "surname": "Parker" + }, + { + "affiliation": [], + "fullname": "Prest, Martin J.", + "name": "Martin J.", + "pid": [], + "rank": 9, + "surname": "Prest" + }, + { + "affiliation": [], + "fullname": "Prunnila, Mika", + "name": "Mika", + "pid": [], + "rank": 10, + "surname": "Prunnila" + }, + { + "affiliation": [], + "fullname": "Sudiwala, Rashmi V.", + "name": "Rashmi V.", + "pid": [], + "rank": 11, + "surname": "Sudiwala" + }, + { + "affiliation": [], + "fullname": "Whall, Terry E.", + "name": "Terry E.", + "pid": [], + "rank": 12, + "surname": "Whall" + }, + { + "affiliation": [], + "fullname": "Mauskopf", + "name": "", + "pid": [], + "rank": 13, + "surname": "" + }, + { + "affiliation": [], + "fullname": " P. D. ", + "name": "", + "pid": [], + "rank": 14, + "surname": "" + } + ], + "bestaccessright": null, + "publisher": { + "value": null + }, + "collectedfrom": [ + { + "key": "10|CSC_________::a2b9ce8435390bcbfc05f3cae3948747", + "value": "VIRTA" + } + ], + "context": [], + "contributor": [], + "country": [ + { + "classid": "DE", + "classname": "DE", + "schemeid": "dnet:countries", + "schemename": "dnet:countries" + } + ], + "coverage": [], + "dataInfo": { + "deletedbyinference": false, + "inferenceprovenance": "", + "inferred": false, + "provenanceaction": { + "classid": "sysimport:crosswalk:datasetarchive", + "classname": "sysimport:crosswalk:datasetarchive", + "schemeid": "dnet:provenanceActions", + "schemename": "dnet:provenanceActions" + }, + "trust": "0.9" + }, + "dateofacceptance": { + "dataInfo": { + "deletedbyinference": false, + "inferenceprovenance": "", + "inferred": false, + "invisible": false, + "provenanceaction": { + "classid": "sysimport:crosswalk:datasetarchive", + "classname": "sysimport:crosswalk:datasetarchive", + "schemeid": "dnet:provenanceActions", + "schemename": "dnet:provenanceActions" + }, + "trust": "0.9" + }, + "value": "7 oct 1970" + }, + "dateofcollection": "", + "dateoftransformation": "2020-04-22T12:34:08.009Z", + "description": [], + "externalReference": [], + "extraInfo": [], + "format": [], + "fulltext": [], + "id": "50|CSC_________::2250a70c903c6ac6e4c01438259e9375", + "instance": [ + { + "pid": [ + { + "dataInfo": null, + "qualifier": { + "classid": "doi", + "classname": "doi", + "schemeid": "dnet:pid_types", + "schemename": "dnet:pid_types" + }, + "value": "10.1007/s109090161569x" + }, + { + "dataInfo": null, + "qualifier": { + "classid": "doi", + "classname": "doi", + "schemeid": "dnet:pid_types", + "schemename": "dnet:pid_types" + }, + "value": "10.1008/abcd" + } + ], + "alternateIdentifier": [ + { + "dataInfo": null, + "qualifier": { + "classid": "doi", + "classname": "doi", + "schemeid": "dnet:pid_types", + "schemename": "dnet:pid_types" + }, + "value": "10.1007/s109090161569x" + }, + { + "dataInfo": null, + "qualifier": { + "classid": "doi", + "classname": "doi", + "schemeid": "dnet:pid_types", + "schemename": "dnet:pid_types" + }, + "value": "10.1009/qwerty" + } + ], + "accessright": { + "classid": "CLOSED", + "classname": "CLOSED", + "schemeid": "dnet:access_modes", + "schemename": "dnet:access_modes" + }, + "collectedfrom": { + "key": "10|CSC_________::a2b9ce8435390bcbfc05f3cae3948747", + "value": "VIRTA" + }, + "dateofacceptance": { + "dataInfo": { + "deletedbyinference": false, + "inferenceprovenance": "", + "inferred": false, + "invisible": false, + "provenanceaction": { + "classid": "sysimport:crosswalk:datasetarchive", + "classname": "sysimport:crosswalk:datasetarchive", + "schemeid": "dnet:provenanceActions", + "schemename": "dnet:provenanceActions" + }, + "trust": "0.9" + }, + "value": "2016-01-01" + }, + "distributionlocation": "", + "hostedby": { + "key": "10|CSC_________::a2b9ce8435390bcbfc05f3cae3948747", + "value": "VIRTA" + }, + "instancetype": { + "classid": "Comment/debate", + "classname": "Comment/debate", + "schemeid": "dnet:publication_resource", + "schemename": "dnet:publication_resource" + }, + "url": [ + "http://juuli.fi/Record/0275158616", + "http://dx.doi.org/10.1007/s109090161569x" + ] + } + ], + "journal": { + "dataInfo": { + "deletedbyinference": false, + "inferenceprovenance": "", + "inferred": false, + "invisible": false, + "provenanceaction": { + "classid": "sysimport:crosswalk:datasetarchive", + "classname": "sysimport:crosswalk:datasetarchive", + "schemeid": "dnet:provenanceActions", + "schemename": "dnet:provenanceActions" + }, + "trust": "0.9" + }, + "edition": "", + "ep": " 7", + "iss": "9 March", + "issnLinking": "", + "issnOnline": "", + "issnPrinted": "0022-2291", + "name": "Journal of Low Temperature Physics - Early Acces", + "sp": "1 ", + "vol": "" + }, + "language": { + "classid": "UNKNOWN", + "classname": "UNKNOWN", + "schemeid": "dnet:languages", + "schemename": "dnet:languages" + }, + "lastupdatetimestamp": 1591283286319, + "oaiprovenance": { + "originDescription": { + "altered": true, + "baseURL": "https%3A%2F%2Fvirta-jtp.csc.fi%2Fapi%2Fcerif", + "datestamp": "2019-07-30", + "harvestDate": "2020-04-22T11:04:38.685Z", + "identifier": "oai:virta-jtp.csc.fi:Publications/0275158616", + "metadataNamespace": "" + } + }, + "originalId": [ + "CSC_________::2250a70c903c6ac6e4c01438259e9375" + ], + "pid": [ + { + "dataInfo": { + "deletedbyinference": false, + "inferenceprovenance": "", + "inferred": false, + "invisible": false, + "provenanceaction": { + "classid": "sysimport:crosswalk:datasetarchive", + "classname": "sysimport:crosswalk:datasetarchive", + "schemeid": "dnet:provenanceActions", + "schemename": "dnet:provenanceActions" + }, + "trust": "0.9" + }, + "qualifier": { + "classid": "doi", + "classname": "doi", + "schemeid": "dnet:pid_types", + "schemename": "dnet:pid_types" + }, + "value": "10.1007/s109090161569x" + }, + { + "dataInfo": { + "deletedbyinference": false, + "inferenceprovenance": "", + "inferred": false, + "invisible": false, + "provenanceaction": { + "classid": "sysimport:crosswalk:datasetarchive", + "classname": "sysimport:crosswalk:datasetarchive", + "schemeid": "dnet:provenanceActions", + "schemename": "dnet:provenanceActions" + }, + "trust": "0.9" + }, + "qualifier": { + "classid": "doi", + "classname": "doi", + "schemeid": "dnet:pid_types", + "schemename": "dnet:pid_types" + }, + "value": "10.1007/s109090161569x" + }, + { + "dataInfo": { + "deletedbyinference": false, + "inferenceprovenance": "", + "inferred": false, + "invisible": false, + "provenanceaction": { + "classid": "sysimport:crosswalk:datasetarchive", + "classname": "sysimport:crosswalk:datasetarchive", + "schemeid": "dnet:provenanceActions", + "schemename": "dnet:provenanceActions" + }, + "trust": "0.9" + }, + "qualifier": { + "classid": "doi", + "classname": "doi", + "schemeid": "dnet:pid_types", + "schemename": "dnet:pid_types" + }, + "value": "" + } + ], + "relevantdate": [], + "resourcetype": { + "classid": "0001", + "classname": "0001", + "schemeid": "dnet:dataCite_resource", + "schemename": "dnet:dataCite_resource" + }, + "resulttype": { + "classid": "publication", + "classname": "publication", + "schemeid": "dnet:result_typologies", + "schemename": "dnet:result_typologies" + }, + "source": [], + "subject": [ + { + "dataInfo": { + "deletedbyinference": false, + "inferenceprovenance": "", + "inferred": false, + "invisible": false, + "provenanceaction": { + "classid": "sysimport:crosswalk:datasetarchive", + "classname": "sysimport:crosswalk:datasetarchive", + "schemeid": "dnet:provenanceActions", + "schemename": "dnet:provenanceActions" + }, + "trust": "0.9" + }, + "qualifier": { + "classid": "", + "classname": "", + "schemeid": "", + "schemename": "" + }, + "value": "ta213" + }, + { + "dataInfo": { + "deletedbyinference": false, + "inferenceprovenance": "", + "inferred": false, + "invisible": false, + "provenanceaction": { + "classid": "sysimport:crosswalk:datasetarchive", + "classname": "sysimport:crosswalk:datasetarchive", + "schemeid": "dnet:provenanceActions", + "schemename": "dnet:provenanceActions" + }, + "trust": "0.9" + }, + "qualifier": { + "classid": "", + "classname": "", + "schemeid": "", + "schemename": "" + }, + "value": "infrared detectors" + }, + { + "dataInfo": { + "deletedbyinference": false, + "inferenceprovenance": "", + "inferred": false, + "invisible": false, + "provenanceaction": { + "classid": "sysimport:crosswalk:datasetarchive", + "classname": "sysimport:crosswalk:datasetarchive", + "schemeid": "dnet:provenanceActions", + "schemename": "dnet:provenanceActions" + }, + "trust": "0.9" + }, + "qualifier": { + "classid": "", + "classname": "", + "schemeid": "", + "schemename": "" + }, + "value": "lens antennas" + }, + { + "dataInfo": { + "deletedbyinference": false, + "inferenceprovenance": "", + "inferred": false, + "invisible": false, + "provenanceaction": { + "classid": "sysimport:crosswalk:datasetarchive", + "classname": "sysimport:crosswalk:datasetarchive", + "schemeid": "dnet:provenanceActions", + "schemename": "dnet:provenanceActions" + }, + "trust": "0.9" + }, + "qualifier": { + "classid": "", + "classname": "", + "schemeid": "", + "schemename": "" + }, + "value": "silicon" + }, + { + "dataInfo": { + "deletedbyinference": false, + "inferenceprovenance": "", + "inferred": false, + "invisible": false, + "provenanceaction": { + "classid": "sysimport:crosswalk:datasetarchive", + "classname": "sysimport:crosswalk:datasetarchive", + "schemeid": "dnet:provenanceActions", + "schemename": "dnet:provenanceActions" + }, + "trust": "0.9" + }, + "qualifier": { + "classid": "", + "classname": "", + "schemeid": "", + "schemename": "" + }, + "value": "slot antennas" + }, + { + "dataInfo": { + "deletedbyinference": false, + "inferenceprovenance": "", + "inferred": false, + "invisible": false, + "provenanceaction": { + "classid": "sysimport:crosswalk:datasetarchive", + "classname": "sysimport:crosswalk:datasetarchive", + "schemeid": "dnet:provenanceActions", + "schemename": "dnet:provenanceActions" + }, + "trust": "0.9" + }, + "qualifier": { + "classid": "", + "classname": "", + "schemeid": "", + "schemename": "" + }, + "value": "strained silicon" + }, + { + "dataInfo": { + "deletedbyinference": false, + "inferenceprovenance": "", + "inferred": false, + "invisible": false, + "provenanceaction": { + "classid": "sysimport:crosswalk:datasetarchive", + "classname": "sysimport:crosswalk:datasetarchive", + "schemeid": "dnet:provenanceActions", + "schemename": "dnet:provenanceActions" + }, + "trust": "0.9" + }, + "qualifier": { + "classid": "", + "classname": "", + "schemeid": "", + "schemename": "" + }, + "value": "cold electron bolometers" + }, + { + "dataInfo": { + "deletedbyinference": false, + "inferenceprovenance": "", + "inferred": false, + "invisible": false, + "provenanceaction": { + "classid": "sysimport:crosswalk:datasetarchive", + "classname": "sysimport:crosswalk:datasetarchive", + "schemeid": "dnet:provenanceActions", + "schemename": "dnet:provenanceActions" + }, + "trust": "0.9" + }, + "qualifier": { + "classid": "", + "classname": "", + "schemeid": "", + "schemename": "" + }, + "value": "doped silicon" + }, + { + "dataInfo": { + "deletedbyinference": false, + "inferenceprovenance": "", + "inferred": false, + "invisible": false, + "provenanceaction": { + "classid": "sysimport:crosswalk:datasetarchive", + "classname": "sysimport:crosswalk:datasetarchive", + "schemeid": "dnet:provenanceActions", + "schemename": "dnet:provenanceActions" + }, + "trust": "0.9" + }, + "qualifier": { + "classid": "", + "classname": "", + "schemeid": "", + "schemename": "" + }, + "value": "measure noise" + }, + { + "dataInfo": { + "deletedbyinference": false, + "inferenceprovenance": "", + "inferred": false, + "invisible": false, + "provenanceaction": { + "classid": "sysimport:crosswalk:datasetarchive", + "classname": "sysimport:crosswalk:datasetarchive", + "schemeid": "dnet:provenanceActions", + "schemename": "dnet:provenanceActions" + }, + "trust": "0.9" + }, + "qualifier": { + "classid": "", + "classname": "", + "schemeid": "", + "schemename": "" + }, + "value": "noise equivalent power" + }, + { + "dataInfo": { + "deletedbyinference": false, + "inferenceprovenance": "", + "inferred": false, + "invisible": false, + "provenanceaction": { + "classid": "sysimport:crosswalk:datasetarchive", + "classname": "sysimport:crosswalk:datasetarchive", + "schemeid": "dnet:provenanceActions", + "schemename": "dnet:provenanceActions" + }, + "trust": "0.9" + }, + "qualifier": { + "classid": "", + "classname": "", + "schemeid": "", + "schemename": "" + }, + "value": "optical characterisation" + }, + { + "dataInfo": { + "deletedbyinference": false, + "inferenceprovenance": "", + "inferred": false, + "invisible": false, + "provenanceaction": { + "classid": "sysimport:crosswalk:datasetarchive", + "classname": "sysimport:crosswalk:datasetarchive", + "schemeid": "dnet:provenanceActions", + "schemename": "dnet:provenanceActions" + }, + "trust": "0.9" + }, + "qualifier": { + "classid": "", + "classname": "", + "schemeid": "", + "schemename": "" + }, + "value": "optical response" + }, + { + "dataInfo": { + "deletedbyinference": false, + "inferenceprovenance": "", + "inferred": false, + "invisible": false, + "provenanceaction": { + "classid": "sysimport:crosswalk:datasetarchive", + "classname": "sysimport:crosswalk:datasetarchive", + "schemeid": "dnet:provenanceActions", + "schemename": "dnet:provenanceActions" + }, + "trust": "0.9" + }, + "qualifier": { + "classid": "", + "classname": "", + "schemeid": "", + "schemename": "" + }, + "value": "photon noise" + }, + { + "dataInfo": { + "deletedbyinference": false, + "inferenceprovenance": "", + "inferred": false, + "invisible": false, + "provenanceaction": { + "classid": "sysimport:crosswalk:datasetarchive", + "classname": "sysimport:crosswalk:datasetarchive", + "schemeid": "dnet:provenanceActions", + "schemename": "dnet:provenanceActions" + }, + "trust": "0.9" + }, + "qualifier": { + "classid": "", + "classname": "", + "schemeid": "", + "schemename": "" + }, + "value": "silicon absorbers" + } + ], + "title": [ + { + "dataInfo": { + "deletedbyinference": false, + "inferenceprovenance": "", + "inferred": false, + "invisible": false, + "provenanceaction": { + "classid": "sysimport:crosswalk:datasetarchive", + "classname": "sysimport:crosswalk:datasetarchive", + "schemeid": "dnet:provenanceActions", + "schemename": "dnet:provenanceActions" + }, + "trust": "0.9" + }, + "qualifier": { + "classid": "main title", + "classname": "main title", + "schemeid": "dnet:dataCite_title", + "schemename": "dnet:dataCite_title" + }, + "value": "Optical response of strained- and unstrained-silicon cold-electron bolometers test" + }, + { + "dataInfo": { + "deletedbyinference": false, + "inferenceprovenance": "", + "inferred": false, + "invisible": false, + "provenanceaction": { + "classid": "sysimport:crosswalk:datasetarchive", + "classname": "sysimport:crosswalk:datasetarchive", + "schemeid": "dnet:provenanceActions", + "schemename": "dnet:provenanceActions" + }, + "trust": "0.9" + }, + "qualifier": { + "classid": "main title", + "classname": "main title", + "schemeid": "dnet:dataCite_title", + "schemename": "dnet:dataCite_title" + }, + "value": "test test 123 test" + }, + { + "dataInfo": { + "deletedbyinference": false, + "inferenceprovenance": "", + "inferred": false, + "invisible": false, + "provenanceaction": { + "classid": "sysimport:crosswalk:datasetarchive", + "classname": "sysimport:crosswalk:datasetarchive", + "schemeid": "dnet:provenanceActions", + "schemename": "dnet:provenanceActions" + }, + "trust": "0.9" + }, + "qualifier": { + "classid": "main title", + "classname": "main title", + "schemeid": "dnet:dataCite_title", + "schemename": "dnet:dataCite_title" + }, + "value": "omic" + }, + { + "dataInfo": { + "deletedbyinference": false, + "inferenceprovenance": "", + "inferred": false, + "invisible": false, + "provenanceaction": { + "classid": "sysimport:crosswalk:datasetarchive", + "classname": "sysimport:crosswalk:datasetarchive", + "schemeid": "dnet:provenanceActions", + "schemename": "dnet:provenanceActions" + }, + "trust": "0.9" + }, + "qualifier": { + "classid": "main title", + "classname": "main title", + "schemeid": "dnet:dataCite_title", + "schemename": "dnet:dataCite_title" + }, + "value": "「マキャベリ的知性と心の理論の進化論」 リチャード・バーン, アンドリュー・ホワイトゥン 編/藤田和生, 山下博志, 友永雅巳 監訳" + }, + { + "dataInfo": { + "deletedbyinference": false, + "inferenceprovenance": "", + "inferred": false, + "invisible": false, + "provenanceaction": { + "classid": "sysimport:crosswalk:datasetarchive", + "classname": "sysimport:crosswalk:datasetarchive", + "schemeid": "dnet:provenanceActions", + "schemename": "dnet:provenanceActions" + }, + "trust": "0.9" + }, + "qualifier": { + "classid": "main title", + "classname": "main title", + "schemeid": "dnet:dataCite_title", + "schemename": "dnet:dataCite_title" + }, + "value": "-" + } + ] +} \ No newline at end of file From 97ad94d7d9187396507b26b2b124b143197e0d11 Mon Sep 17 00:00:00 2001 From: Claudio Atzori Date: Mon, 24 Jan 2022 18:02:07 +0100 Subject: [PATCH 054/162] [graph resolution] drop output path at the beginning --- .../dhp/oa/graph/resolution/oozie_app/workflow.xml | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/resolution/oozie_app/workflow.xml b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/resolution/oozie_app/workflow.xml index 3cd08bc9b..74e792f07 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/resolution/oozie_app/workflow.xml +++ b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/resolution/oozie_app/workflow.xml @@ -19,12 +19,21 @@ - + Action failed, error message[${wf:errorMessage(wf:lastErrorNode())}] + + + + + + + + + yarn From 59a250337c473f6153e3e3a1771f47dd6830690f Mon Sep 17 00:00:00 2001 From: Claudio Atzori Date: Mon, 24 Jan 2022 18:02:07 +0100 Subject: [PATCH 055/162] [graph resolution] drop output path at the beginning --- .../dhp/oa/graph/resolution/oozie_app/workflow.xml | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/resolution/oozie_app/workflow.xml b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/resolution/oozie_app/workflow.xml index 3cd08bc9b..74e792f07 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/resolution/oozie_app/workflow.xml +++ b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/resolution/oozie_app/workflow.xml @@ -19,12 +19,21 @@ - + Action failed, error message[${wf:errorMessage(wf:lastErrorNode())}] + + + + + + + + + yarn From 4fc44edb71e8426040821fd919f0759a46d7cac3 Mon Sep 17 00:00:00 2001 From: Claudio Atzori Date: Thu, 27 Jan 2022 16:03:57 +0100 Subject: [PATCH 056/162] depending on dhp-schemas:2.10.26 --- pom.xml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pom.xml b/pom.xml index 86c3b4526..a40942696 100644 --- a/pom.xml +++ b/pom.xml @@ -797,7 +797,7 @@ 3.3.3 3.4.2 [2.12,3.0) - [2.10.26-SNAPSHOT] + [2.10.26] [4.0.3] [6.0.5] [3.1.6] From af61e44accbd8b04b487892dc2ff03e2dfe269ae Mon Sep 17 00:00:00 2001 From: Claudio Atzori Date: Thu, 27 Jan 2022 16:19:14 +0100 Subject: [PATCH 057/162] ported changes to the GraphCleaningFunctionsTest from 8de97883083bf2608a084840ab1f3e03f4244e05 --- .../dhp/oa/graph/clean/GraphCleaningFunctionsTest.java | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/clean/GraphCleaningFunctionsTest.java b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/clean/GraphCleaningFunctionsTest.java index a7c7eb810..7c39efb40 100644 --- a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/clean/GraphCleaningFunctionsTest.java +++ b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/clean/GraphCleaningFunctionsTest.java @@ -75,7 +75,7 @@ public class GraphCleaningFunctionsTest { } @Test - void testFilter_false() throws Exception { + void testFilter_invisible_true() throws Exception { assertNotNull(vocabularies); assertNotNull(mapping); @@ -87,11 +87,11 @@ public class GraphCleaningFunctionsTest { assertTrue(p_in instanceof Result); assertTrue(p_in instanceof Publication); - assertEquals(false, GraphCleaningFunctions.filter(p_in)); + assertEquals(true, GraphCleaningFunctions.filter(p_in)); } @Test - void testFilter_true() throws Exception { + void testFilter_true_nothing_to_filter() throws Exception { assertNotNull(vocabularies); assertNotNull(mapping); From db299dd8abc48255a7197c8fb07f6b9fdfe47ebb Mon Sep 17 00:00:00 2001 From: Claudio Atzori Date: Thu, 27 Jan 2022 16:24:06 +0100 Subject: [PATCH 058/162] fixed typo --- .../eu/dnetlib/dhp/schema/oaf/utils/OafMapperUtils.java | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/dhp-common/src/main/java/eu/dnetlib/dhp/schema/oaf/utils/OafMapperUtils.java b/dhp-common/src/main/java/eu/dnetlib/dhp/schema/oaf/utils/OafMapperUtils.java index 1ffc66dfd..0a51e8600 100644 --- a/dhp-common/src/main/java/eu/dnetlib/dhp/schema/oaf/utils/OafMapperUtils.java +++ b/dhp-common/src/main/java/eu/dnetlib/dhp/schema/oaf/utils/OafMapperUtils.java @@ -48,13 +48,13 @@ public class OafMapperUtils { public static Result mergeResults(Result left, Result right) { - final boolean leftFromDeletedAuthority = isFromDelegatedAuthority(left); - final boolean rightFromDeletedAuthority = isFromDelegatedAuthority(right); + final boolean leftFromDelegatedAuthority = isFromDelegatedAuthority(left); + final boolean rightFromDelegatedAuthority = isFromDelegatedAuthority(right); - if (leftFromDeletedAuthority && !rightFromDeletedAuthority) { + if (leftFromDelegatedAuthority && !rightFromDelegatedAuthority) { return left; } - if (!leftFromDeletedAuthority && rightFromDeletedAuthority) { + if (!leftFromDelegatedAuthority && rightFromDelegatedAuthority) { return right; } From 8eb75ca16963286ca4585c0db5dcc96c4ac98ab5 Mon Sep 17 00:00:00 2001 From: Claudio Atzori Date: Thu, 27 Jan 2022 16:24:37 +0100 Subject: [PATCH 059/162] adapted GenerateEntitiesApplicationTest behaviour --- .../dhp/oa/graph/raw/GenerateEntitiesApplicationTest.java | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/raw/GenerateEntitiesApplicationTest.java b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/raw/GenerateEntitiesApplicationTest.java index 67490a470..53b3f8432 100644 --- a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/raw/GenerateEntitiesApplicationTest.java +++ b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/raw/GenerateEntitiesApplicationTest.java @@ -51,8 +51,8 @@ class GenerateEntitiesApplicationTest { Result software = getResult("odf_software.xml", Software.class); Result orp = getResult("oaf_orp.xml", OtherResearchProduct.class); - verifyMerge(publication, dataset, Publication.class, ModelConstants.PUBLICATION_RESULTTYPE_CLASSID); - verifyMerge(dataset, publication, Publication.class, ModelConstants.PUBLICATION_RESULTTYPE_CLASSID); + verifyMerge(publication, dataset, Dataset.class, ModelConstants.DATASET_RESULTTYPE_CLASSID); + verifyMerge(dataset, publication, Dataset.class, ModelConstants.DATASET_RESULTTYPE_CLASSID); verifyMerge(publication, software, Publication.class, ModelConstants.PUBLICATION_RESULTTYPE_CLASSID); verifyMerge(software, publication, Publication.class, ModelConstants.PUBLICATION_RESULTTYPE_CLASSID); From dce7f5fea8de2b34bb3ff6ce11ac6a0081eb0f29 Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Mon, 31 Jan 2022 08:20:28 +0100 Subject: [PATCH 060/162] [BULK TAGGING] changed to fix issue that should have been fixed already --- .../dhp/bulktag/community/ResultTagger.java | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/community/ResultTagger.java b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/community/ResultTagger.java index c8b1bc8fe..1b625494a 100644 --- a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/community/ResultTagger.java +++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/community/ResultTagger.java @@ -95,14 +95,14 @@ public class ResultTagger implements Serializable { } - result - .getInstance() - .stream() - .map(i -> new Pair<>(i.getCollectedfrom().getKey(), i.getHostedby().getKey())) - .flatMap(p -> Stream.of(p.getFst(), p.getSnd())) - .map(s -> StringUtils.substringAfter(s, "|")) - .collect(Collectors.toCollection(HashSet::new)) - .forEach( +// result +// .getInstance() +// .stream() +// .map(i -> new Pair<>(i.getCollectedfrom().getKey(), i.getHostedby().getKey())) +// .flatMap(p -> Stream.of(p.getFst(), p.getSnd())) +// .map(s -> StringUtils.substringAfter(s, "|")) +// .collect(Collectors.toCollection(HashSet::new)) + tmp.forEach( dsId -> datasources .addAll( conf.getCommunityForDatasource(dsId, param))); From 73eba34d425f7d23b8ae6fb260469bd8cdd61cb7 Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Tue, 1 Feb 2022 08:38:41 +0100 Subject: [PATCH 061/162] [UnresolvedEntities] Changed the way to merge the unresolved because the new merge removed the dataInfo from the merged result. Added also data info for subjects --- .../PrepareFOSSparkJob.java | 16 +++ .../PrepareSDGSparkJob.java | 15 +++ .../SparkSaveUnresolved.java | 15 ++- .../createunresolvedentities/PrepareTest.java | 111 ++++++++++-------- .../createunresolvedentities/ProduceTest.java | 3 + 5 files changed, 107 insertions(+), 53 deletions(-) diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/createunresolvedentities/PrepareFOSSparkJob.java b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/createunresolvedentities/PrepareFOSSparkJob.java index fef796515..55e391932 100644 --- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/createunresolvedentities/PrepareFOSSparkJob.java +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/createunresolvedentities/PrepareFOSSparkJob.java @@ -21,8 +21,10 @@ import org.slf4j.LoggerFactory; import eu.dnetlib.dhp.actionmanager.createunresolvedentities.model.FOSDataModel; import eu.dnetlib.dhp.application.ArgumentApplicationParser; +import eu.dnetlib.dhp.schema.common.ModelConstants; import eu.dnetlib.dhp.schema.oaf.Result; import eu.dnetlib.dhp.schema.oaf.StructuredProperty; +import eu.dnetlib.dhp.schema.oaf.utils.OafMapperUtils; import eu.dnetlib.dhp.utils.DHPUtils; public class PrepareFOSSparkJob implements Serializable { @@ -71,6 +73,7 @@ public class PrepareFOSSparkJob implements Serializable { Result r = new Result(); FOSDataModel first = it.next(); r.setId(DHPUtils.generateUnresolvedIdentifier(k, DOI)); + HashSet level1 = new HashSet<>(); HashSet level2 = new HashSet<>(); HashSet level3 = new HashSet<>(); @@ -81,6 +84,19 @@ public class PrepareFOSSparkJob implements Serializable { level2.forEach(l -> sbjs.add(getSubject(l, FOS_CLASS_ID, FOS_CLASS_NAME, UPDATE_SUBJECT_FOS_CLASS_ID))); level3.forEach(l -> sbjs.add(getSubject(l, FOS_CLASS_ID, FOS_CLASS_NAME, UPDATE_SUBJECT_FOS_CLASS_ID))); r.setSubject(sbjs); + r + .setDataInfo( + OafMapperUtils + .dataInfo( + false, null, true, + false, + OafMapperUtils + .qualifier( + ModelConstants.PROVENANCE_ENRICH, + null, + ModelConstants.DNET_PROVENANCE_ACTIONS, + ModelConstants.DNET_PROVENANCE_ACTIONS), + null)); return r; }, Encoders.bean(Result.class)) .write() diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/createunresolvedentities/PrepareSDGSparkJob.java b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/createunresolvedentities/PrepareSDGSparkJob.java index 27da77c0c..a31e380fe 100644 --- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/createunresolvedentities/PrepareSDGSparkJob.java +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/createunresolvedentities/PrepareSDGSparkJob.java @@ -21,8 +21,10 @@ import org.slf4j.LoggerFactory; import eu.dnetlib.dhp.actionmanager.createunresolvedentities.model.SDGDataModel; import eu.dnetlib.dhp.application.ArgumentApplicationParser; +import eu.dnetlib.dhp.schema.common.ModelConstants; import eu.dnetlib.dhp.schema.oaf.Result; import eu.dnetlib.dhp.schema.oaf.StructuredProperty; +import eu.dnetlib.dhp.schema.oaf.utils.OafMapperUtils; import eu.dnetlib.dhp.utils.DHPUtils; public class PrepareSDGSparkJob implements Serializable { @@ -78,6 +80,19 @@ public class PrepareSDGSparkJob implements Serializable { s -> sbjs .add(getSubject(s.getSbj(), SDG_CLASS_ID, SDG_CLASS_NAME, UPDATE_SUBJECT_SDG_CLASS_ID))); r.setSubject(sbjs); + r + .setDataInfo( + OafMapperUtils + .dataInfo( + false, null, true, + false, + OafMapperUtils + .qualifier( + ModelConstants.PROVENANCE_ENRICH, + null, + ModelConstants.DNET_PROVENANCE_ACTIONS, + ModelConstants.DNET_PROVENANCE_ACTIONS), + null)); return r; }, Encoders.bean(Result.class)) .write() diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/createunresolvedentities/SparkSaveUnresolved.java b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/createunresolvedentities/SparkSaveUnresolved.java index ab8356836..3b9775094 100644 --- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/createunresolvedentities/SparkSaveUnresolved.java +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/createunresolvedentities/SparkSaveUnresolved.java @@ -5,6 +5,7 @@ import static eu.dnetlib.dhp.actionmanager.Constants.*; import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession; import java.io.Serializable; +import java.util.Optional; import org.apache.commons.io.IOUtils; import org.apache.spark.SparkConf; @@ -67,7 +68,19 @@ public class SparkSaveUnresolved implements Serializable { .groupByKey((MapFunction) Result::getId, Encoders.STRING()) .mapGroups((MapGroupsFunction) (k, it) -> { Result ret = it.next(); - it.forEachRemaining(r -> ret.mergeFrom(r)); + it.forEachRemaining(r -> { + if (r.getInstance() != null) { + ret.setInstance(r.getInstance()); + } + if (r.getSubject() != null) { + if (ret.getSubject() != null) + ret.getSubject().addAll(r.getSubject()); + else + ret.setSubject(r.getSubject()); + } + + // ret.mergeFrom(r) + }); return ret; }, Encoders.bean(Result.class)) .write() diff --git a/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/actionmanager/createunresolvedentities/PrepareTest.java b/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/actionmanager/createunresolvedentities/PrepareTest.java index 166430c2f..cc8108bde 100644 --- a/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/actionmanager/createunresolvedentities/PrepareTest.java +++ b/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/actionmanager/createunresolvedentities/PrepareTest.java @@ -146,6 +146,11 @@ public class PrepareTest { .get(0) .getValue()); + final String doi2 = "unresolved::10.3390/s18072310::doi"; + + Assertions.assertEquals(1, tmp.filter(r -> r.getId().equals(doi2)).count()); + Assertions.assertEquals(1, tmp.filter(r -> r.getId().equals(doi2)).collect().get(0).getInstance().size()); + } @Test @@ -259,59 +264,61 @@ public class PrepareTest { .collect() .contains("8. Economic growth")); - } - - @Test - void test3() throws Exception { - final String sourcePath = "/Users/miriam.baglioni/Downloads/doi_fos_results_20_12_2021.csv.gz"; - - final String outputPath = workingDir.toString() + "/fos.json"; - GetFOSSparkJob - .main( - new String[] { - "--isSparkSessionManaged", Boolean.FALSE.toString(), - "--sourcePath", sourcePath, - - "-outputPath", outputPath - - }); - - final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext()); - - JavaRDD tmp = sc - .textFile(outputPath) - .map(item -> OBJECT_MAPPER.readValue(item, FOSDataModel.class)); - - tmp.foreach(t -> Assertions.assertTrue(t.getDoi() != null)); - tmp.foreach(t -> Assertions.assertTrue(t.getLevel1() != null)); - tmp.foreach(t -> Assertions.assertTrue(t.getLevel2() != null)); - tmp.foreach(t -> Assertions.assertTrue(t.getLevel3() != null)); + Assertions.assertEquals(32, tmp.filter(row -> row.getDataInfo() != null).count()); } - @Test - void test4() throws Exception { - final String sourcePath = "/Users/miriam.baglioni/Downloads/doi_sdg_results_20_12_21.csv.gz"; - - final String outputPath = workingDir.toString() + "/sdg.json"; - GetSDGSparkJob - .main( - new String[] { - "--isSparkSessionManaged", Boolean.FALSE.toString(), - "--sourcePath", sourcePath, - - "-outputPath", outputPath - - }); - - final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext()); - - JavaRDD tmp = sc - .textFile(outputPath) - .map(item -> OBJECT_MAPPER.readValue(item, SDGDataModel.class)); - - tmp.foreach(t -> Assertions.assertTrue(t.getDoi() != null)); - tmp.foreach(t -> Assertions.assertTrue(t.getSbj() != null)); - - } +// @Test +// void test3() throws Exception { +// final String sourcePath = "/Users/miriam.baglioni/Downloads/doi_fos_results_20_12_2021.csv.gz"; +// +// final String outputPath = workingDir.toString() + "/fos.json"; +// GetFOSSparkJob +// .main( +// new String[] { +// "--isSparkSessionManaged", Boolean.FALSE.toString(), +// "--sourcePath", sourcePath, +// +// "-outputPath", outputPath +// +// }); +// +// final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext()); +// +// JavaRDD tmp = sc +// .textFile(outputPath) +// .map(item -> OBJECT_MAPPER.readValue(item, FOSDataModel.class)); +// +// tmp.foreach(t -> Assertions.assertTrue(t.getDoi() != null)); +// tmp.foreach(t -> Assertions.assertTrue(t.getLevel1() != null)); +// tmp.foreach(t -> Assertions.assertTrue(t.getLevel2() != null)); +// tmp.foreach(t -> Assertions.assertTrue(t.getLevel3() != null)); +// +// } +// +// @Test +// void test4() throws Exception { +// final String sourcePath = "/Users/miriam.baglioni/Downloads/doi_sdg_results_20_12_21.csv.gz"; +// +// final String outputPath = workingDir.toString() + "/sdg.json"; +// GetSDGSparkJob +// .main( +// new String[] { +// "--isSparkSessionManaged", Boolean.FALSE.toString(), +// "--sourcePath", sourcePath, +// +// "-outputPath", outputPath +// +// }); +// +// final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext()); +// +// JavaRDD tmp = sc +// .textFile(outputPath) +// .map(item -> OBJECT_MAPPER.readValue(item, SDGDataModel.class)); +// +// tmp.foreach(t -> Assertions.assertTrue(t.getDoi() != null)); +// tmp.foreach(t -> Assertions.assertTrue(t.getSbj() != null)); +// +// } } diff --git a/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/actionmanager/createunresolvedentities/ProduceTest.java b/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/actionmanager/createunresolvedentities/ProduceTest.java index ce44f0036..a5ecaeabf 100644 --- a/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/actionmanager/createunresolvedentities/ProduceTest.java +++ b/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/actionmanager/createunresolvedentities/ProduceTest.java @@ -196,6 +196,9 @@ public class ProduceTest { final String doi = "unresolved::10.3390/s18072310::doi"; JavaRDD tmp = getResultJavaRDD(); + tmp + .filter(row -> row.getId().equals(doi)) + .foreach(r -> System.out.println(OBJECT_MAPPER.writeValueAsString(r))); Assertions .assertEquals( 3, tmp From 37784209c98fbd8d9b6c309334da259419301bb1 Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Wed, 2 Feb 2022 12:46:31 +0100 Subject: [PATCH 062/162] [dhp-schemas-] updated the version of dhp-schema to 2.10.27 for APC name and id modification --- .../java/eu/dnetlib/dhp/bulktag/community/ResultTagger.java | 3 ++- pom.xml | 2 +- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/community/ResultTagger.java b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/community/ResultTagger.java index 1b625494a..71a2b3525 100644 --- a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/community/ResultTagger.java +++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/community/ResultTagger.java @@ -102,7 +102,8 @@ public class ResultTagger implements Serializable { // .flatMap(p -> Stream.of(p.getFst(), p.getSnd())) // .map(s -> StringUtils.substringAfter(s, "|")) // .collect(Collectors.toCollection(HashSet::new)) - tmp.forEach( + tmp + .forEach( dsId -> datasources .addAll( conf.getCommunityForDatasource(dsId, param))); diff --git a/pom.xml b/pom.xml index a40942696..489f712b0 100644 --- a/pom.xml +++ b/pom.xml @@ -797,7 +797,7 @@ 3.3.3 3.4.2 [2.12,3.0) - [2.10.26] + [2.10.27] [4.0.3] [6.0.5] [3.1.6] From 2e215abfa8e30a80e1bc198e474494439f8b5e0b Mon Sep 17 00:00:00 2001 From: Alessia Bardi Date: Wed, 2 Feb 2022 17:27:44 +0100 Subject: [PATCH 063/162] test for instances with URLs for OpenAPC --- .../dnetlib/dhp/oa/graph/raw/MappersTest.java | 15 +++++++ .../dnetlib/dhp/oa/graph/raw/oaf_openapc.xml | 45 +++++++++++++++++++ 2 files changed, 60 insertions(+) diff --git a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/raw/MappersTest.java b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/raw/MappersTest.java index 6a3414b7c..6a5fd33b0 100644 --- a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/raw/MappersTest.java +++ b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/raw/MappersTest.java @@ -814,6 +814,21 @@ class MappersTest { } } + @Test + void testOpenAPC() throws IOException, DocumentException { + final String xml = IOUtils.toString(Objects.requireNonNull(getClass().getResourceAsStream("oaf_openapc.xml"))); + final List list = new OafToOafMapper(vocs, true, true).processMdRecord(xml); + + System.out.println("***************"); + System.out.println(new ObjectMapper().writeValueAsString(list)); + System.out.println("***************"); + + final Publication p = (Publication) list.get(0); + assertTrue(p.getInstance().size() > 0); + + assertEquals("https://doi.org/10.1155/2015/439379", p.getInstance().get(0).getUrl().get(0)); + } + private void assertValidId(final String id) { // System.out.println(id); diff --git a/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/raw/oaf_openapc.xml b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/raw/oaf_openapc.xml index e69de29bb..4a326a21e 100644 --- a/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/raw/oaf_openapc.xml +++ b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/raw/oaf_openapc.xml @@ -0,0 +1,45 @@ + + + + openapc_____::000023f9cb6e3a247c764daec4273cbc + 10.1155/2015/439379 + 2022-02-01T15:26:33.817Z + openapc_____ + 2022-02-02T15:45:32.502Z + + + https://doi.org/10.1155/2015/439379 + 10.1155/2015/439379 + PMC4354964 + 25811027.0 + UCL + UCL + 1721.47 + BioMed Research International + http://creativecommons.org/licenses/by/3.0/ + 2015 + 0004 + OPEN + open access + + + + + + false + false + 0.9 + + + + + \ No newline at end of file From dbd2646d59f5bd7435be004656230ce1421234d1 Mon Sep 17 00:00:00 2001 From: Antonis Lempesis Date: Thu, 3 Feb 2022 12:37:10 +0200 Subject: [PATCH 064/162] fixed the result_result creation for monitor --- .../graph/stats/oozie_app/scripts/step13.sql | 14 ++++++------- .../graph/stats/oozie_app/scripts/step14.sql | 10 +++++----- .../graph/stats/oozie_app/scripts/step15.sql | 8 ++++---- .../graph/stats/oozie_app/scripts/step2.sql | 20 +++++++++---------- .../scripts/step20-createMonitorDB.sql | 2 +- .../graph/stats/oozie_app/scripts/step3.sql | 20 +++++++++---------- .../graph/stats/oozie_app/scripts/step4.sql | 20 +++++++++---------- .../graph/stats/oozie_app/scripts/step5.sql | 20 +++++++++---------- .../graph/stats/oozie_app/scripts/step6.sql | 12 ++++++----- .../graph/stats/oozie_app/scripts/step7.sql | 2 +- .../graph/stats/oozie_app/scripts/step8.sql | 13 ++++++------ .../graph/stats/oozie_app/scripts/step9.sql | 2 +- 12 files changed, 73 insertions(+), 70 deletions(-) diff --git a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step13.sql b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step13.sql index 315d296fc..947c91072 100644 --- a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step13.sql +++ b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step13.sql @@ -14,7 +14,7 @@ LEFT OUTER JOIN ( SELECT substr(d.id, 4) id from ${openaire_db_name}.datasource d - WHERE d.datainfo.deletedbyinference=false) d on p.datasource = d.id; + WHERE d.datainfo.deletedbyinference=false and d.datainfo.invisible = FALSE) d on p.datasource = d.id; CREATE TABLE IF NOT EXISTS ${stats_db_name}.dataset_sources as SELECT p.id, case when d.id is null then 'other' else p.datasource end as datasource @@ -25,7 +25,7 @@ LEFT OUTER JOIN ( SELECT substr(d.id, 4) id from ${openaire_db_name}.datasource d - WHERE d.datainfo.deletedbyinference=false) d on p.datasource = d.id; + WHERE d.datainfo.deletedbyinference=false and d.datainfo.invisible = FALSE) d on p.datasource = d.id; CREATE TABLE IF NOT EXISTS ${stats_db_name}.software_sources as SELECT p.id, case when d.id is null then 'other' else p.datasource end as datasource @@ -36,7 +36,7 @@ LEFT OUTER JOIN ( SELECT substr(d.id, 4) id from ${openaire_db_name}.datasource d - WHERE d.datainfo.deletedbyinference=false) d on p.datasource = d.id; + WHERE d.datainfo.deletedbyinference=false and d.datainfo.invisible = FALSE) d on p.datasource = d.id; CREATE TABLE IF NOT EXISTS ${stats_db_name}.otherresearchproduct_sources as SELECT p.id, case when d.id is null then 'other' else p.datasource end as datasource @@ -47,7 +47,7 @@ LEFT OUTER JOIN ( SELECT substr(d.id, 4) id from ${openaire_db_name}.datasource d - WHERE d.datainfo.deletedbyinference=false) d on p.datasource = d.id; + WHERE d.datainfo.deletedbyinference=false and d.datainfo.invisible = FALSE) d on p.datasource = d.id; CREATE VIEW IF NOT EXISTS ${stats_db_name}.result_sources AS SELECT * FROM ${stats_db_name}.publication_sources @@ -76,8 +76,8 @@ join ${openaire_db_name}.result r1 on rel.source=r1.id join ${openaire_db_name}.result r2 on r2.id=rel.target where reltype='resultResult' and r1.resulttype.classname!=r2.resulttype.classname - and r1.datainfo.deletedbyinference=false - and r2.datainfo.deletedbyinference=false + and r1.datainfo.deletedbyinference=false and r1.datainfo.invisible = FALSE + and r2.datainfo.deletedbyinference=false and r2.datainfo.invisible = FALSE and r1.resulttype.classname != 'other' and r2.resulttype.classname != 'other' - and rel.datainfo.deletedbyinference=false; \ No newline at end of file + and rel.datainfo.deletedbyinference=false and rel.datainfo.invisible = FALSE; \ No newline at end of file diff --git a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step14.sql b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step14.sql index 00a6913bc..e3a33a893 100644 --- a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step14.sql +++ b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step14.sql @@ -8,22 +8,22 @@ CREATE TABLE IF NOT EXISTS ${stats_db_name}.publication_licenses AS SELECT substr(p.id, 4) as id, licenses.value as type from ${openaire_db_name}.publication p LATERAL VIEW explode(p.instance.license) instances as licenses -where licenses.value is not null and licenses.value != '' and p.datainfo.deletedbyinference=false; +where licenses.value is not null and licenses.value != '' and p.datainfo.deletedbyinference=false and p.datainfo.invisible = FALSE; CREATE TABLE IF NOT EXISTS ${stats_db_name}.dataset_licenses AS SELECT substr(p.id, 4) as id, licenses.value as type from ${openaire_db_name}.dataset p LATERAL VIEW explode(p.instance.license) instances as licenses -where licenses.value is not null and licenses.value != '' and p.datainfo.deletedbyinference=false; +where licenses.value is not null and licenses.value != '' and p.datainfo.deletedbyinference=false and p.datainfo.invisible = FALSE; CREATE TABLE IF NOT EXISTS ${stats_db_name}.software_licenses AS SELECT substr(p.id, 4) as id, licenses.value as type from ${openaire_db_name}.software p LATERAL VIEW explode(p.instance.license) instances as licenses -where licenses.value is not null and licenses.value != '' and p.datainfo.deletedbyinference=false; +where licenses.value is not null and licenses.value != '' and p.datainfo.deletedbyinference=false and p.datainfo.invisible = FALSE; CREATE TABLE IF NOT EXISTS ${stats_db_name}.otherresearchproduct_licenses AS SELECT substr(p.id, 4) as id, licenses.value as type from ${openaire_db_name}.otherresearchproduct p LATERAL VIEW explode(p.instance.license) instances as licenses -where licenses.value is not null and licenses.value != '' and p.datainfo.deletedbyinference=false; +where licenses.value is not null and licenses.value != '' and p.datainfo.deletedbyinference=false and p.datainfo.invisible = FALSE; CREATE VIEW IF NOT EXISTS ${stats_db_name}.result_licenses AS SELECT * FROM ${stats_db_name}.publication_licenses @@ -46,7 +46,7 @@ FROM ( LEFT OUTER JOIN ( SELECT substr(d.id, 4) id from ${openaire_db_name}.datasource d - WHERE d.datainfo.deletedbyinference=false) d on o.datasource = d.id; + WHERE d.datainfo.deletedbyinference=false and d.datainfo.invisible = FALSE) d on o.datasource = d.id; -- ANALYZE TABLE ${stats_db_name}.publication_licenses COMPUTE STATISTICS; -- ANALYZE TABLE ${stats_db_name}.publication_licenses COMPUTE STATISTICS FOR COLUMNS; diff --git a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step15.sql b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step15.sql index 8e66e05c0..f46b65171 100644 --- a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step15.sql +++ b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step15.sql @@ -9,22 +9,22 @@ CREATE TABLE IF NOT EXISTS ${stats_db_name}.publication_refereed as select substr(r.id, 4) as id, inst.refereed.classname as refereed from ${openaire_db_name}.publication r lateral view explode(r.instance) instances as inst -where r.datainfo.deletedbyinference=false; +where r.datainfo.deletedbyinference=false and r.datainfo.invisible = FALSE; CREATE TABLE IF NOT EXISTS ${stats_db_name}.dataset_refereed as select substr(r.id, 4) as id, inst.refereed.classname as refereed from ${openaire_db_name}.dataset r lateral view explode(r.instance) instances as inst -where r.datainfo.deletedbyinference=false; +where r.datainfo.deletedbyinference=false and r.datainfo.invisible = FALSE; CREATE TABLE IF NOT EXISTS ${stats_db_name}.software_refereed as select substr(r.id, 4) as id, inst.refereed.classname as refereed from ${openaire_db_name}.software r lateral view explode(r.instance) instances as inst -where r.datainfo.deletedbyinference=false; +where r.datainfo.deletedbyinference=false and r.datainfo.invisible = FALSE; CREATE TABLE IF NOT EXISTS ${stats_db_name}.otherresearchproduct_refereed as select substr(r.id, 4) as id, inst.refereed.classname as refereed from ${openaire_db_name}.otherresearchproduct r lateral view explode(r.instance) instances as inst -where r.datainfo.deletedbyinference=false; +where r.datainfo.deletedbyinference=false and r.datainfo.invisible = FALSE; CREATE VIEW IF NOT EXISTS ${stats_db_name}.result_refereed as select * from ${stats_db_name}.publication_refereed diff --git a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step2.sql b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step2.sql index bb0d0ac6c..3d7a53a1c 100644 --- a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step2.sql +++ b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step2.sql @@ -38,13 +38,13 @@ SELECT substr(p.id, 4) as id, case when size(p.description) > 0 then true else false end as abstract, 'publication' as type from ${openaire_db_name}.publication p -where p.datainfo.deletedbyinference = false; +where p.datainfo.deletedbyinference = false and p.datainfo.invisible=false; CREATE TABLE ${stats_db_name}.publication_classifications AS SELECT substr(p.id, 4) as id, instancetype.classname as type from ${openaire_db_name}.publication p LATERAL VIEW explode(p.instance.instancetype) instances as instancetype -where p.datainfo.deletedbyinference = false; +where p.datainfo.deletedbyinference = false and p.datainfo.invisible=false; CREATE TABLE ${stats_db_name}.publication_concepts AS SELECT substr(p.id, 4) as id, case @@ -53,45 +53,45 @@ SELECT substr(p.id, 4) as id, case when contexts.context.id RLIKE '^[^::]+$' then concat(contexts.context.id, '::other::other') END as concept from ${openaire_db_name}.publication p LATERAL VIEW explode(p.context) contexts as context -where p.datainfo.deletedbyinference = false; +where p.datainfo.deletedbyinference = false and p.datainfo.invisible=false; CREATE TABLE ${stats_db_name}.publication_datasources as SELECT p.id, case when d.id is null then 'other' else p.datasource end as datasource FROM ( SELECT substr(p.id, 4) as id, substr(instances.instance.hostedby.key, 4) as datasource from ${openaire_db_name}.publication p lateral view explode(p.instance) instances as instance - where p.datainfo.deletedbyinference = false) p + where p.datainfo.deletedbyinference = false and p.datainfo.invisible=false) p LEFT OUTER JOIN ( SELECT substr(d.id, 4) id from ${openaire_db_name}.datasource d - WHERE d.datainfo.deletedbyinference = false) d on p.datasource = d.id; + WHERE d.datainfo.deletedbyinference = false and p.datainfo.invisible=false) d on p.datasource = d.id; CREATE TABLE ${stats_db_name}.publication_languages AS select substr(p.id, 4) as id, p.language.classname as language FROM ${openaire_db_name}.publication p -where p.datainfo.deletedbyinference = false; +where p.datainfo.deletedbyinference = false and p.datainfo.invisible=false; CREATE TABLE ${stats_db_name}.publication_oids AS SELECT substr(p.id, 4) AS id, oids.ids AS oid FROM ${openaire_db_name}.publication p LATERAL VIEW explode(p.originalid) oids AS ids -where p.datainfo.deletedbyinference = false; +where p.datainfo.deletedbyinference = false and p.datainfo.invisible=false; CREATE TABLE ${stats_db_name}.publication_pids AS SELECT substr(p.id, 4) AS id, ppid.qualifier.classname AS type, ppid.value as pid FROM ${openaire_db_name}.publication p LATERAL VIEW explode(p.pid) pids AS ppid -where p.datainfo.deletedbyinference = false; +where p.datainfo.deletedbyinference = false and p.datainfo.invisible=false; CREATE TABLE ${stats_db_name}.publication_topics as select substr(p.id, 4) AS id, subjects.subject.qualifier.classname AS TYPE, subjects.subject.value AS topic FROM ${openaire_db_name}.publication p LATERAL VIEW explode(p.subject) subjects AS subject -where p.datainfo.deletedbyinference = false; +where p.datainfo.deletedbyinference = false and p.datainfo.invisible=false; CREATE TABLE ${stats_db_name}.publication_citations AS SELECT substr(p.id, 4) AS id, xpath_string(citation.value, "//citation/id[@type='openaire']/@value") AS cites FROM ${openaire_db_name}.publication p lateral view explode(p.extrainfo) citations AS citation WHERE xpath_string(citation.value, "//citation/id[@type='openaire']/@value") != "" - and p.datainfo.deletedbyinference = false; \ No newline at end of file + and p.datainfo.deletedbyinference = false and p.datainfo.invisible=false; \ No newline at end of file diff --git a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step20-createMonitorDB.sql b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step20-createMonitorDB.sql index e72378f56..2528e904d 100644 --- a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step20-createMonitorDB.sql +++ b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step20-createMonitorDB.sql @@ -81,7 +81,7 @@ compute stats TARGET.result_sources; create table TARGET.result_topics stored as parquet as select * from SOURCE.result_topics orig where exists (select 1 from TARGET.result r where r.id=orig.id); compute stats TARGET.result_topics; -create table TARGET.result_result stored as parquet as select * from SOURCE.result_result orig where exists (select 1 from TARGET.result r where r.id=orig.source or r.id=orig.target); +create table TARGET.result_result stored as parquet as select * from SOURCE.result_result orig where exists (select 1 from TARGET.result r where r.id=orig.source) or exists (select 1 from TARGET.result r where r.id=orig.target); compute stats TARGET.result_result; -- datasources diff --git a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step3.sql b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step3.sql index 953eaad6a..76a5e5a48 100644 --- a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step3.sql +++ b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step3.sql @@ -38,20 +38,20 @@ SELECT substr(d.id, 4) AS id, CASE WHEN SIZE(d.description) > 0 THEN TRUE ELSE FALSE end AS abstract, 'dataset' AS type FROM ${openaire_db_name}.dataset d -WHERE d.datainfo.deletedbyinference = FALSE; +WHERE d.datainfo.deletedbyinference = FALSE and d.datainfo.invisible=false; CREATE TABLE ${stats_db_name}.dataset_citations AS SELECT substr(d.id, 4) AS id, xpath_string(citation.value, "//citation/id[@type='openaire']/@value") AS cites FROM ${openaire_db_name}.dataset d LATERAL VIEW explode(d.extrainfo) citations AS citation WHERE xpath_string(citation.value, "//citation/id[@type='openaire']/@value") != "" - and d.datainfo.deletedbyinference = false; + and d.datainfo.deletedbyinference = false and d.datainfo.invisible=false; CREATE TABLE ${stats_db_name}.dataset_classifications AS SELECT substr(p.id, 4) AS id, instancetype.classname AS type FROM ${openaire_db_name}.dataset p LATERAL VIEW explode(p.instance.instancetype) instances AS instancetype -where p.datainfo.deletedbyinference = false; +where p.datainfo.deletedbyinference = false and p.datainfo.invisible=false; CREATE TABLE ${stats_db_name}.dataset_concepts AS SELECT substr(p.id, 4) as id, case @@ -60,7 +60,7 @@ SELECT substr(p.id, 4) as id, case when contexts.context.id RLIKE '^[^::]+$' then concat(contexts.context.id, '::other::other') END as concept from ${openaire_db_name}.dataset p LATERAL VIEW explode(p.context) contexts as context -where p.datainfo.deletedbyinference = false; +where p.datainfo.deletedbyinference = false and p.datainfo.invisible=false; CREATE TABLE ${stats_db_name}.dataset_datasources AS SELECT p.id, case when d.id IS NULL THEN 'other' ELSE p.datasource END AS datasource @@ -68,31 +68,31 @@ FROM ( SELECT substr(p.id, 4) as id, substr(instances.instance.hostedby.key, 4) AS datasource FROM ${openaire_db_name}.dataset p LATERAL VIEW explode(p.instance) instances AS instance - where p.datainfo.deletedbyinference = false) p + where p.datainfo.deletedbyinference = false and p.datainfo.invisible=false) p LEFT OUTER JOIN ( SELECT substr(d.id, 4) id FROM ${openaire_db_name}.datasource d - WHERE d.datainfo.deletedbyinference = false) d ON p.datasource = d.id; + WHERE d.datainfo.deletedbyinference = false and d.datainfo.invisible=false) d ON p.datasource = d.id; CREATE TABLE ${stats_db_name}.dataset_languages AS SELECT substr(p.id, 4) AS id, p.language.classname AS language FROM ${openaire_db_name}.dataset p -where p.datainfo.deletedbyinference = false; +where p.datainfo.deletedbyinference = false and p.datainfo.invisible=false; CREATE TABLE ${stats_db_name}.dataset_oids AS SELECT substr(p.id, 4) AS id, oids.ids AS oid FROM ${openaire_db_name}.dataset p LATERAL VIEW explode(p.originalid) oids AS ids -where p.datainfo.deletedbyinference = false; +where p.datainfo.deletedbyinference = false and p.datainfo.invisible=false; CREATE TABLE ${stats_db_name}.dataset_pids AS SELECT substr(p.id, 4) AS id, ppid.qualifier.classname AS type, ppid.value AS pid FROM ${openaire_db_name}.dataset p LATERAL VIEW explode(p.pid) pids AS ppid -where p.datainfo.deletedbyinference = false; +where p.datainfo.deletedbyinference = false and p.datainfo.invisible=false; CREATE TABLE ${stats_db_name}.dataset_topics AS SELECT substr(p.id, 4) AS id, subjects.subject.qualifier.classname AS type, subjects.subject.value AS topic FROM ${openaire_db_name}.dataset p LATERAL VIEW explode(p.subject) subjects AS subject -where p.datainfo.deletedbyinference = false; \ No newline at end of file +where p.datainfo.deletedbyinference = false and p.datainfo.invisible=false; \ No newline at end of file diff --git a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step4.sql b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step4.sql index 0210dc8cb..dc71f41f1 100644 --- a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step4.sql +++ b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step4.sql @@ -38,20 +38,20 @@ SELECT substr(s.id, 4) as id, CASE WHEN SIZE(s.description) > 0 THEN TRUE ELSE FALSE END AS abstract, 'software' as type from ${openaire_db_name}.software s -where s.datainfo.deletedbyinference = false; +where s.datainfo.deletedbyinference = false and s.datainfo.invisible=false; CREATE TABLE ${stats_db_name}.software_citations AS SELECT substr(s.id, 4) as id, xpath_string(citation.value, "//citation/id[@type='openaire']/@value") AS cites FROM ${openaire_db_name}.software s LATERAL VIEW explode(s.extrainfo) citations as citation where xpath_string(citation.value, "//citation/id[@type='openaire']/@value") != "" - and s.datainfo.deletedbyinference = false; + and s.datainfo.deletedbyinference = false and s.datainfo.invisible=false; CREATE TABLE ${stats_db_name}.software_classifications AS SELECT substr(p.id, 4) AS id, instancetype.classname AS type FROM ${openaire_db_name}.software p LATERAL VIEW explode(p.instance.instancetype) instances AS instancetype -where p.datainfo.deletedbyinference = false; +where p.datainfo.deletedbyinference = false and p.datainfo.invisible=false; CREATE TABLE ${stats_db_name}.software_concepts AS SELECT substr(p.id, 4) as id, case @@ -60,7 +60,7 @@ SELECT substr(p.id, 4) as id, case when contexts.context.id RLIKE '^[^::]+$' then concat(contexts.context.id, '::other::other') END as concept FROM ${openaire_db_name}.software p LATERAL VIEW explode(p.context) contexts AS context -where p.datainfo.deletedbyinference = false; +where p.datainfo.deletedbyinference = false and p.datainfo.invisible=false; CREATE TABLE ${stats_db_name}.software_datasources AS SELECT p.id, CASE WHEN d.id IS NULL THEN 'other' ELSE p.datasource end as datasource @@ -68,31 +68,31 @@ FROM ( SELECT substr(p.id, 4) AS id, substr(instances.instance.hostedby.key, 4) AS datasource FROM ${openaire_db_name}.software p LATERAL VIEW explode(p.instance) instances AS instance - where p.datainfo.deletedbyinference = false) p + where p.datainfo.deletedbyinference = false and p.datainfo.invisible=false) p LEFT OUTER JOIN ( SELECT substr(d.id, 4) id FROM ${openaire_db_name}.datasource d - WHERE d.datainfo.deletedbyinference = false) d ON p.datasource = d.id; + WHERE d.datainfo.deletedbyinference = false and d.datainfo.invisible=false) d ON p.datasource = d.id; CREATE TABLE ${stats_db_name}.software_languages AS select substr(p.id, 4) AS id, p.language.classname AS language FROM ${openaire_db_name}.software p -where p.datainfo.deletedbyinference = false; +where p.datainfo.deletedbyinference = false and p.datainfo.invisible=false; CREATE TABLE ${stats_db_name}.software_oids AS SELECT substr(p.id, 4) AS id, oids.ids AS oid FROM ${openaire_db_name}.software p LATERAL VIEW explode(p.originalid) oids AS ids -where p.datainfo.deletedbyinference = false; +where p.datainfo.deletedbyinference = false and p.datainfo.invisible=false; CREATE TABLE ${stats_db_name}.software_pids AS SELECT substr(p.id, 4) AS id, ppid.qualifier.classname AS type, ppid.value AS pid FROM ${openaire_db_name}.software p LATERAL VIEW explode(p.pid) pids AS ppid -where p.datainfo.deletedbyinference = false; +where p.datainfo.deletedbyinference = false and p.datainfo.invisible=false; CREATE TABLE ${stats_db_name}.software_topics AS SELECT substr(p.id, 4) AS id, subjects.subject.qualifier.classname AS type, subjects.subject.value AS topic FROM ${openaire_db_name}.software p LATERAL VIEW explode(p.subject) subjects AS subject -where p.datainfo.deletedbyinference = false; \ No newline at end of file +where p.datainfo.deletedbyinference = false and p.datainfo.invisible=false; \ No newline at end of file diff --git a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step5.sql b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step5.sql index f7b302186..353aa98b7 100644 --- a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step5.sql +++ b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step5.sql @@ -37,19 +37,19 @@ SELECT substr(o.id, 4) AS id, CASE WHEN SIZE(o.description) > 0 THEN TRUE ELSE FALSE END AS abstract, 'other' AS type FROM ${openaire_db_name}.otherresearchproduct o -WHERE o.datainfo.deletedbyinference = FALSE; +WHERE o.datainfo.deletedbyinference = FALSE and o.datainfo.invisible=false; -- Otherresearchproduct_citations CREATE TABLE ${stats_db_name}.otherresearchproduct_citations AS SELECT substr(o.id, 4) AS id, xpath_string(citation.value, "//citation/id[@type='openaire']/@value") AS cites FROM ${openaire_db_name}.otherresearchproduct o LATERAL VIEW explode(o.extrainfo) citations AS citation WHERE xpath_string(citation.value, "//citation/id[@type='openaire']/@value") != "" - and o.datainfo.deletedbyinference = false; + and o.datainfo.deletedbyinference = false and o.datainfo.invisible=false; CREATE TABLE ${stats_db_name}.otherresearchproduct_classifications AS SELECT substr(p.id, 4) AS id, instancetype.classname AS type FROM ${openaire_db_name}.otherresearchproduct p LATERAL VIEW explode(p.instance.instancetype) instances AS instancetype -where p.datainfo.deletedbyinference = false; +where p.datainfo.deletedbyinference = false and p.datainfo.invisible=false; CREATE TABLE ${stats_db_name}.otherresearchproduct_concepts AS SELECT substr(p.id, 4) as id, case @@ -57,33 +57,33 @@ SELECT substr(p.id, 4) as id, case when contexts.context.id RLIKE '^[^::]+::[^::]+$' then concat(contexts.context.id, '::other') when contexts.context.id RLIKE '^[^::]+$' then concat(contexts.context.id, '::other::other') END as concept FROM ${openaire_db_name}.otherresearchproduct p LATERAL VIEW explode(p.context) contexts AS context -where p.datainfo.deletedbyinference = false; +where p.datainfo.deletedbyinference = false and p.datainfo.invisible=false; CREATE TABLE ${stats_db_name}.otherresearchproduct_datasources AS SELECT p.id, CASE WHEN d.id IS NULL THEN 'other' ELSE p.datasource END AS datasource FROM (SELECT substr(p.id, 4) AS id, substr(instances.instance.hostedby.key, 4) AS datasource from ${openaire_db_name}.otherresearchproduct p lateral view explode(p.instance) instances as instance - where p.datainfo.deletedbyinference = false) p + where p.datainfo.deletedbyinference = false and p.datainfo.invisible=false) p LEFT OUTER JOIN(SELECT substr(d.id, 4) id from ${openaire_db_name}.datasource d - WHERE d.datainfo.deletedbyinference = false) d on p.datasource = d.id; + WHERE d.datainfo.deletedbyinference = false and d.datainfo.invisible=false) d on p.datasource = d.id; CREATE TABLE ${stats_db_name}.otherresearchproduct_languages AS SELECT substr(p.id, 4) AS id, p.language.classname AS language FROM ${openaire_db_name}.otherresearchproduct p -where p.datainfo.deletedbyinference = false; +where p.datainfo.deletedbyinference = false and p.datainfo.invisible=false; CREATE TABLE ${stats_db_name}.otherresearchproduct_oids AS SELECT substr(p.id, 4) AS id, oids.ids AS oid FROM ${openaire_db_name}.otherresearchproduct p LATERAL VIEW explode(p.originalid) oids AS ids -where p.datainfo.deletedbyinference = false; +where p.datainfo.deletedbyinference = false and p.datainfo.invisible=false; CREATE TABLE ${stats_db_name}.otherresearchproduct_pids AS SELECT substr(p.id, 4) AS id, ppid.qualifier.classname AS type, ppid.value AS pid FROM ${openaire_db_name}.otherresearchproduct p LATERAL VIEW explode(p.pid) pids AS ppid -where p.datainfo.deletedbyinference = false; +where p.datainfo.deletedbyinference = false and p.datainfo.invisible=false; CREATE TABLE ${stats_db_name}.otherresearchproduct_topics AS SELECT substr(p.id, 4) AS id, subjects.subject.qualifier.classname AS type, subjects.subject.value AS topic FROM ${openaire_db_name}.otherresearchproduct p LATERAL VIEW explode(p.subject) subjects AS subject -where p.datainfo.deletedbyinference = false; \ No newline at end of file +where p.datainfo.deletedbyinference = false and p.datainfo.invisible=false; \ No newline at end of file diff --git a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step6.sql b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step6.sql index 378e0f17b..b5e2eb37b 100644 --- a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step6.sql +++ b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step6.sql @@ -5,24 +5,26 @@ ------------------------------------------------------ CREATE TABLE ${stats_db_name}.project_oids AS SELECT substr(p.id, 4) AS id, oids.ids AS oid -FROM ${openaire_db_name}.project p LATERAL VIEW explode(p.originalid) oids AS ids; +FROM ${openaire_db_name}.project p LATERAL VIEW explode(p.originalid) oids AS ids +where p.datainfo.deletedbyinference=false and p.datainfo.invisible=false; + CREATE TABLE ${stats_db_name}.project_organizations AS SELECT substr(r.source, 4) AS id, substr(r.target, 4) AS organization from ${openaire_db_name}.relation r WHERE r.reltype = 'projectOrganization' - and r.datainfo.deletedbyinference = false; + and r.datainfo.deletedbyinference = false and r.datainfo.invisible=false; CREATE TABLE ${stats_db_name}.project_results AS SELECT substr(r.target, 4) AS id, substr(r.source, 4) AS result, r.datainfo.provenanceaction.classname as provenance FROM ${openaire_db_name}.relation r WHERE r.reltype = 'resultProject' - and r.datainfo.deletedbyinference = false; + and r.datainfo.deletedbyinference = false and r.datainfo.invisible=false; create table ${stats_db_name}.project_classification as select substr(p.id, 4) as id, class.h2020programme.code, class.level1, class.level2, class.level3 from ${openaire_db_name}.project p lateral view explode(p.h2020classification) classifs as class -where p.datainfo.deletedbyinference=false and class.h2020programme is not null; +where p.datainfo.deletedbyinference=false and p.datainfo.invisible=false and class.h2020programme is not null; CREATE TABLE ${stats_db_name}.project_tmp ( @@ -72,7 +74,7 @@ SELECT substr(p.id, 4) AS id, p.code.value AS code, p.totalcost AS totalcost FROM ${openaire_db_name}.project p -WHERE p.datainfo.deletedbyinference = false; +WHERE p.datainfo.deletedbyinference = false and p.datainfo.invisible=false; create table ${stats_db_name}.funder as select distinct xpath_string(fund, '//funder/id') as id, diff --git a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step7.sql b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step7.sql index b3cbc9b41..a57966abf 100644 --- a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step7.sql +++ b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step7.sql @@ -127,7 +127,7 @@ CREATE TABLE ${stats_db_name}.result_organization AS SELECT substr(r.target, 4) AS id, substr(r.source, 4) AS organization FROM ${openaire_db_name}.relation r WHERE r.reltype = 'resultOrganization' - and r.datainfo.deletedbyinference = false; + and r.datainfo.deletedbyinference = false and r.datainfo.invisible=false; CREATE TABLE ${stats_db_name}.result_projects AS select pr.result AS id, pr.id AS project, datediff(p.enddate, p.startdate) AS daysfromend, pr.provenance as provenance diff --git a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step8.sql b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step8.sql index 76d31eb5e..d3935fd4a 100644 --- a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step8.sql +++ b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step8.sql @@ -44,7 +44,7 @@ FROM ${openaire_db_name}.datasource d1 LATERAL VIEW EXPLODE(originalid) temp AS originalidd WHERE originalidd like "piwik:%") AS d2 ON d1.id = d2.id -WHERE d1.datainfo.deletedbyinference = FALSE; +WHERE d1.datainfo.deletedbyinference = FALSE and d1.datainfo.invisible=false; -- Updating temporary table with everything that is not based on results -> This is done with the following "dual" table. -- Creating a temporary dual table that will be removed after the following insert @@ -82,24 +82,25 @@ WHERE yearofvalidation = '-1'; CREATE TABLE ${stats_db_name}.datasource_languages AS SELECT substr(d.id, 4) AS id, langs.languages AS language -FROM ${openaire_db_name}.datasource d LATERAL VIEW explode(d.odlanguages.value) langs AS languages; +FROM ${openaire_db_name}.datasource d LATERAL VIEW explode(d.odlanguages.value) langs AS languages +where d.datainfo.deletedbyinference=false and d.datainfo.invisible=false; CREATE TABLE ${stats_db_name}.datasource_oids AS SELECT substr(d.id, 4) AS id, oids.ids AS oid -FROM ${openaire_db_name}.datasource d LATERAL VIEW explode(d.originalid) oids AS ids; +FROM ${openaire_db_name}.datasource d LATERAL VIEW explode(d.originalid) oids AS ids +where d.datainfo.deletedbyinference=false and d.datainfo.invisible=false; CREATE TABLE ${stats_db_name}.datasource_organizations AS SELECT substr(r.target, 4) AS id, substr(r.source, 4) AS organization FROM ${openaire_db_name}.relation r -WHERE r.reltype = 'datasourceOrganization' - and r.datainfo.deletedbyinference = false; +WHERE r.reltype = 'datasourceOrganization' and r.datainfo.deletedbyinference = false and r.datainfo.invisible=false; -- datasource sources: -- where the datasource info have been collected from. create table if not exists ${stats_db_name}.datasource_sources AS select substr(d.id, 4) as id, substr(cf.key, 4) as datasource from ${openaire_db_name}.datasource d lateral view explode(d.collectedfrom) cfrom as cf -where d.datainfo.deletedbyinference = false; +where d.datainfo.deletedbyinference = false and d.datainfo.invisible=false; CREATE OR REPLACE VIEW ${stats_db_name}.datasource_results AS SELECT datasource AS id, id AS result diff --git a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step9.sql b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step9.sql index a1cb46185..c73aa811c 100644 --- a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step9.sql +++ b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step9.sql @@ -9,7 +9,7 @@ SELECT substr(o.id, 4) as id, o.legalshortname.value as legalshortname, o.country.classid as country FROM ${openaire_db_name}.organization o -WHERE o.datainfo.deletedbyinference = FALSE; +WHERE o.datainfo.deletedbyinference = FALSE and o.datainfo.invisible = FALSE; CREATE OR REPLACE VIEW ${stats_db_name}.organization_datasources AS SELECT organization AS id, id AS datasource From c2b44530a36f332e335a6a45190b4290285bdf73 Mon Sep 17 00:00:00 2001 From: Antonis Lempesis Date: Thu, 3 Feb 2022 13:44:07 +0200 Subject: [PATCH 065/162] typo... --- .../eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step2.sql | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step2.sql b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step2.sql index 3d7a53a1c..468a42045 100644 --- a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step2.sql +++ b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step2.sql @@ -64,7 +64,7 @@ FROM ( LEFT OUTER JOIN ( SELECT substr(d.id, 4) id from ${openaire_db_name}.datasource d - WHERE d.datainfo.deletedbyinference = false and p.datainfo.invisible=false) d on p.datasource = d.id; + WHERE d.datainfo.deletedbyinference = false and d.datainfo.invisible=false) d on p.datasource = d.id; CREATE TABLE ${stats_db_name}.publication_languages AS select substr(p.id, 4) as id, p.language.classname as language From 3c60e53a96b68669e64d09aadc05031408d05cb9 Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Thu, 3 Feb 2022 14:47:08 +0100 Subject: [PATCH 066/162] [stats-wf]fixed the result_result creation for monitor PR#190 on beta --- .../graph/stats/oozie_app/scripts/step13.sql | 14 ++++++------- .../graph/stats/oozie_app/scripts/step14.sql | 10 +++++----- .../graph/stats/oozie_app/scripts/step15.sql | 8 ++++---- .../graph/stats/oozie_app/scripts/step2.sql | 20 +++++++++---------- .../scripts/step20-createMonitorDB.sql | 2 +- .../graph/stats/oozie_app/scripts/step3.sql | 20 +++++++++---------- .../graph/stats/oozie_app/scripts/step4.sql | 20 +++++++++---------- .../graph/stats/oozie_app/scripts/step5.sql | 20 +++++++++---------- .../graph/stats/oozie_app/scripts/step6.sql | 12 ++++++----- .../graph/stats/oozie_app/scripts/step7.sql | 2 +- .../graph/stats/oozie_app/scripts/step8.sql | 2 +- .../graph/stats/oozie_app/scripts/step9.sql | 2 +- 12 files changed, 67 insertions(+), 65 deletions(-) diff --git a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step13.sql b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step13.sql index 315d296fc..947c91072 100644 --- a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step13.sql +++ b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step13.sql @@ -14,7 +14,7 @@ LEFT OUTER JOIN ( SELECT substr(d.id, 4) id from ${openaire_db_name}.datasource d - WHERE d.datainfo.deletedbyinference=false) d on p.datasource = d.id; + WHERE d.datainfo.deletedbyinference=false and d.datainfo.invisible = FALSE) d on p.datasource = d.id; CREATE TABLE IF NOT EXISTS ${stats_db_name}.dataset_sources as SELECT p.id, case when d.id is null then 'other' else p.datasource end as datasource @@ -25,7 +25,7 @@ LEFT OUTER JOIN ( SELECT substr(d.id, 4) id from ${openaire_db_name}.datasource d - WHERE d.datainfo.deletedbyinference=false) d on p.datasource = d.id; + WHERE d.datainfo.deletedbyinference=false and d.datainfo.invisible = FALSE) d on p.datasource = d.id; CREATE TABLE IF NOT EXISTS ${stats_db_name}.software_sources as SELECT p.id, case when d.id is null then 'other' else p.datasource end as datasource @@ -36,7 +36,7 @@ LEFT OUTER JOIN ( SELECT substr(d.id, 4) id from ${openaire_db_name}.datasource d - WHERE d.datainfo.deletedbyinference=false) d on p.datasource = d.id; + WHERE d.datainfo.deletedbyinference=false and d.datainfo.invisible = FALSE) d on p.datasource = d.id; CREATE TABLE IF NOT EXISTS ${stats_db_name}.otherresearchproduct_sources as SELECT p.id, case when d.id is null then 'other' else p.datasource end as datasource @@ -47,7 +47,7 @@ LEFT OUTER JOIN ( SELECT substr(d.id, 4) id from ${openaire_db_name}.datasource d - WHERE d.datainfo.deletedbyinference=false) d on p.datasource = d.id; + WHERE d.datainfo.deletedbyinference=false and d.datainfo.invisible = FALSE) d on p.datasource = d.id; CREATE VIEW IF NOT EXISTS ${stats_db_name}.result_sources AS SELECT * FROM ${stats_db_name}.publication_sources @@ -76,8 +76,8 @@ join ${openaire_db_name}.result r1 on rel.source=r1.id join ${openaire_db_name}.result r2 on r2.id=rel.target where reltype='resultResult' and r1.resulttype.classname!=r2.resulttype.classname - and r1.datainfo.deletedbyinference=false - and r2.datainfo.deletedbyinference=false + and r1.datainfo.deletedbyinference=false and r1.datainfo.invisible = FALSE + and r2.datainfo.deletedbyinference=false and r2.datainfo.invisible = FALSE and r1.resulttype.classname != 'other' and r2.resulttype.classname != 'other' - and rel.datainfo.deletedbyinference=false; \ No newline at end of file + and rel.datainfo.deletedbyinference=false and rel.datainfo.invisible = FALSE; \ No newline at end of file diff --git a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step14.sql b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step14.sql index 00a6913bc..e3a33a893 100644 --- a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step14.sql +++ b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step14.sql @@ -8,22 +8,22 @@ CREATE TABLE IF NOT EXISTS ${stats_db_name}.publication_licenses AS SELECT substr(p.id, 4) as id, licenses.value as type from ${openaire_db_name}.publication p LATERAL VIEW explode(p.instance.license) instances as licenses -where licenses.value is not null and licenses.value != '' and p.datainfo.deletedbyinference=false; +where licenses.value is not null and licenses.value != '' and p.datainfo.deletedbyinference=false and p.datainfo.invisible = FALSE; CREATE TABLE IF NOT EXISTS ${stats_db_name}.dataset_licenses AS SELECT substr(p.id, 4) as id, licenses.value as type from ${openaire_db_name}.dataset p LATERAL VIEW explode(p.instance.license) instances as licenses -where licenses.value is not null and licenses.value != '' and p.datainfo.deletedbyinference=false; +where licenses.value is not null and licenses.value != '' and p.datainfo.deletedbyinference=false and p.datainfo.invisible = FALSE; CREATE TABLE IF NOT EXISTS ${stats_db_name}.software_licenses AS SELECT substr(p.id, 4) as id, licenses.value as type from ${openaire_db_name}.software p LATERAL VIEW explode(p.instance.license) instances as licenses -where licenses.value is not null and licenses.value != '' and p.datainfo.deletedbyinference=false; +where licenses.value is not null and licenses.value != '' and p.datainfo.deletedbyinference=false and p.datainfo.invisible = FALSE; CREATE TABLE IF NOT EXISTS ${stats_db_name}.otherresearchproduct_licenses AS SELECT substr(p.id, 4) as id, licenses.value as type from ${openaire_db_name}.otherresearchproduct p LATERAL VIEW explode(p.instance.license) instances as licenses -where licenses.value is not null and licenses.value != '' and p.datainfo.deletedbyinference=false; +where licenses.value is not null and licenses.value != '' and p.datainfo.deletedbyinference=false and p.datainfo.invisible = FALSE; CREATE VIEW IF NOT EXISTS ${stats_db_name}.result_licenses AS SELECT * FROM ${stats_db_name}.publication_licenses @@ -46,7 +46,7 @@ FROM ( LEFT OUTER JOIN ( SELECT substr(d.id, 4) id from ${openaire_db_name}.datasource d - WHERE d.datainfo.deletedbyinference=false) d on o.datasource = d.id; + WHERE d.datainfo.deletedbyinference=false and d.datainfo.invisible = FALSE) d on o.datasource = d.id; -- ANALYZE TABLE ${stats_db_name}.publication_licenses COMPUTE STATISTICS; -- ANALYZE TABLE ${stats_db_name}.publication_licenses COMPUTE STATISTICS FOR COLUMNS; diff --git a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step15.sql b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step15.sql index 8e66e05c0..f46b65171 100644 --- a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step15.sql +++ b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step15.sql @@ -9,22 +9,22 @@ CREATE TABLE IF NOT EXISTS ${stats_db_name}.publication_refereed as select substr(r.id, 4) as id, inst.refereed.classname as refereed from ${openaire_db_name}.publication r lateral view explode(r.instance) instances as inst -where r.datainfo.deletedbyinference=false; +where r.datainfo.deletedbyinference=false and r.datainfo.invisible = FALSE; CREATE TABLE IF NOT EXISTS ${stats_db_name}.dataset_refereed as select substr(r.id, 4) as id, inst.refereed.classname as refereed from ${openaire_db_name}.dataset r lateral view explode(r.instance) instances as inst -where r.datainfo.deletedbyinference=false; +where r.datainfo.deletedbyinference=false and r.datainfo.invisible = FALSE; CREATE TABLE IF NOT EXISTS ${stats_db_name}.software_refereed as select substr(r.id, 4) as id, inst.refereed.classname as refereed from ${openaire_db_name}.software r lateral view explode(r.instance) instances as inst -where r.datainfo.deletedbyinference=false; +where r.datainfo.deletedbyinference=false and r.datainfo.invisible = FALSE; CREATE TABLE IF NOT EXISTS ${stats_db_name}.otherresearchproduct_refereed as select substr(r.id, 4) as id, inst.refereed.classname as refereed from ${openaire_db_name}.otherresearchproduct r lateral view explode(r.instance) instances as inst -where r.datainfo.deletedbyinference=false; +where r.datainfo.deletedbyinference=false and r.datainfo.invisible = FALSE; CREATE VIEW IF NOT EXISTS ${stats_db_name}.result_refereed as select * from ${stats_db_name}.publication_refereed diff --git a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step2.sql b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step2.sql index bb0d0ac6c..468a42045 100644 --- a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step2.sql +++ b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step2.sql @@ -38,13 +38,13 @@ SELECT substr(p.id, 4) as id, case when size(p.description) > 0 then true else false end as abstract, 'publication' as type from ${openaire_db_name}.publication p -where p.datainfo.deletedbyinference = false; +where p.datainfo.deletedbyinference = false and p.datainfo.invisible=false; CREATE TABLE ${stats_db_name}.publication_classifications AS SELECT substr(p.id, 4) as id, instancetype.classname as type from ${openaire_db_name}.publication p LATERAL VIEW explode(p.instance.instancetype) instances as instancetype -where p.datainfo.deletedbyinference = false; +where p.datainfo.deletedbyinference = false and p.datainfo.invisible=false; CREATE TABLE ${stats_db_name}.publication_concepts AS SELECT substr(p.id, 4) as id, case @@ -53,45 +53,45 @@ SELECT substr(p.id, 4) as id, case when contexts.context.id RLIKE '^[^::]+$' then concat(contexts.context.id, '::other::other') END as concept from ${openaire_db_name}.publication p LATERAL VIEW explode(p.context) contexts as context -where p.datainfo.deletedbyinference = false; +where p.datainfo.deletedbyinference = false and p.datainfo.invisible=false; CREATE TABLE ${stats_db_name}.publication_datasources as SELECT p.id, case when d.id is null then 'other' else p.datasource end as datasource FROM ( SELECT substr(p.id, 4) as id, substr(instances.instance.hostedby.key, 4) as datasource from ${openaire_db_name}.publication p lateral view explode(p.instance) instances as instance - where p.datainfo.deletedbyinference = false) p + where p.datainfo.deletedbyinference = false and p.datainfo.invisible=false) p LEFT OUTER JOIN ( SELECT substr(d.id, 4) id from ${openaire_db_name}.datasource d - WHERE d.datainfo.deletedbyinference = false) d on p.datasource = d.id; + WHERE d.datainfo.deletedbyinference = false and d.datainfo.invisible=false) d on p.datasource = d.id; CREATE TABLE ${stats_db_name}.publication_languages AS select substr(p.id, 4) as id, p.language.classname as language FROM ${openaire_db_name}.publication p -where p.datainfo.deletedbyinference = false; +where p.datainfo.deletedbyinference = false and p.datainfo.invisible=false; CREATE TABLE ${stats_db_name}.publication_oids AS SELECT substr(p.id, 4) AS id, oids.ids AS oid FROM ${openaire_db_name}.publication p LATERAL VIEW explode(p.originalid) oids AS ids -where p.datainfo.deletedbyinference = false; +where p.datainfo.deletedbyinference = false and p.datainfo.invisible=false; CREATE TABLE ${stats_db_name}.publication_pids AS SELECT substr(p.id, 4) AS id, ppid.qualifier.classname AS type, ppid.value as pid FROM ${openaire_db_name}.publication p LATERAL VIEW explode(p.pid) pids AS ppid -where p.datainfo.deletedbyinference = false; +where p.datainfo.deletedbyinference = false and p.datainfo.invisible=false; CREATE TABLE ${stats_db_name}.publication_topics as select substr(p.id, 4) AS id, subjects.subject.qualifier.classname AS TYPE, subjects.subject.value AS topic FROM ${openaire_db_name}.publication p LATERAL VIEW explode(p.subject) subjects AS subject -where p.datainfo.deletedbyinference = false; +where p.datainfo.deletedbyinference = false and p.datainfo.invisible=false; CREATE TABLE ${stats_db_name}.publication_citations AS SELECT substr(p.id, 4) AS id, xpath_string(citation.value, "//citation/id[@type='openaire']/@value") AS cites FROM ${openaire_db_name}.publication p lateral view explode(p.extrainfo) citations AS citation WHERE xpath_string(citation.value, "//citation/id[@type='openaire']/@value") != "" - and p.datainfo.deletedbyinference = false; \ No newline at end of file + and p.datainfo.deletedbyinference = false and p.datainfo.invisible=false; \ No newline at end of file diff --git a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step20-createMonitorDB.sql b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step20-createMonitorDB.sql index e72378f56..2528e904d 100644 --- a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step20-createMonitorDB.sql +++ b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step20-createMonitorDB.sql @@ -81,7 +81,7 @@ compute stats TARGET.result_sources; create table TARGET.result_topics stored as parquet as select * from SOURCE.result_topics orig where exists (select 1 from TARGET.result r where r.id=orig.id); compute stats TARGET.result_topics; -create table TARGET.result_result stored as parquet as select * from SOURCE.result_result orig where exists (select 1 from TARGET.result r where r.id=orig.source or r.id=orig.target); +create table TARGET.result_result stored as parquet as select * from SOURCE.result_result orig where exists (select 1 from TARGET.result r where r.id=orig.source) or exists (select 1 from TARGET.result r where r.id=orig.target); compute stats TARGET.result_result; -- datasources diff --git a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step3.sql b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step3.sql index 953eaad6a..76a5e5a48 100644 --- a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step3.sql +++ b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step3.sql @@ -38,20 +38,20 @@ SELECT substr(d.id, 4) AS id, CASE WHEN SIZE(d.description) > 0 THEN TRUE ELSE FALSE end AS abstract, 'dataset' AS type FROM ${openaire_db_name}.dataset d -WHERE d.datainfo.deletedbyinference = FALSE; +WHERE d.datainfo.deletedbyinference = FALSE and d.datainfo.invisible=false; CREATE TABLE ${stats_db_name}.dataset_citations AS SELECT substr(d.id, 4) AS id, xpath_string(citation.value, "//citation/id[@type='openaire']/@value") AS cites FROM ${openaire_db_name}.dataset d LATERAL VIEW explode(d.extrainfo) citations AS citation WHERE xpath_string(citation.value, "//citation/id[@type='openaire']/@value") != "" - and d.datainfo.deletedbyinference = false; + and d.datainfo.deletedbyinference = false and d.datainfo.invisible=false; CREATE TABLE ${stats_db_name}.dataset_classifications AS SELECT substr(p.id, 4) AS id, instancetype.classname AS type FROM ${openaire_db_name}.dataset p LATERAL VIEW explode(p.instance.instancetype) instances AS instancetype -where p.datainfo.deletedbyinference = false; +where p.datainfo.deletedbyinference = false and p.datainfo.invisible=false; CREATE TABLE ${stats_db_name}.dataset_concepts AS SELECT substr(p.id, 4) as id, case @@ -60,7 +60,7 @@ SELECT substr(p.id, 4) as id, case when contexts.context.id RLIKE '^[^::]+$' then concat(contexts.context.id, '::other::other') END as concept from ${openaire_db_name}.dataset p LATERAL VIEW explode(p.context) contexts as context -where p.datainfo.deletedbyinference = false; +where p.datainfo.deletedbyinference = false and p.datainfo.invisible=false; CREATE TABLE ${stats_db_name}.dataset_datasources AS SELECT p.id, case when d.id IS NULL THEN 'other' ELSE p.datasource END AS datasource @@ -68,31 +68,31 @@ FROM ( SELECT substr(p.id, 4) as id, substr(instances.instance.hostedby.key, 4) AS datasource FROM ${openaire_db_name}.dataset p LATERAL VIEW explode(p.instance) instances AS instance - where p.datainfo.deletedbyinference = false) p + where p.datainfo.deletedbyinference = false and p.datainfo.invisible=false) p LEFT OUTER JOIN ( SELECT substr(d.id, 4) id FROM ${openaire_db_name}.datasource d - WHERE d.datainfo.deletedbyinference = false) d ON p.datasource = d.id; + WHERE d.datainfo.deletedbyinference = false and d.datainfo.invisible=false) d ON p.datasource = d.id; CREATE TABLE ${stats_db_name}.dataset_languages AS SELECT substr(p.id, 4) AS id, p.language.classname AS language FROM ${openaire_db_name}.dataset p -where p.datainfo.deletedbyinference = false; +where p.datainfo.deletedbyinference = false and p.datainfo.invisible=false; CREATE TABLE ${stats_db_name}.dataset_oids AS SELECT substr(p.id, 4) AS id, oids.ids AS oid FROM ${openaire_db_name}.dataset p LATERAL VIEW explode(p.originalid) oids AS ids -where p.datainfo.deletedbyinference = false; +where p.datainfo.deletedbyinference = false and p.datainfo.invisible=false; CREATE TABLE ${stats_db_name}.dataset_pids AS SELECT substr(p.id, 4) AS id, ppid.qualifier.classname AS type, ppid.value AS pid FROM ${openaire_db_name}.dataset p LATERAL VIEW explode(p.pid) pids AS ppid -where p.datainfo.deletedbyinference = false; +where p.datainfo.deletedbyinference = false and p.datainfo.invisible=false; CREATE TABLE ${stats_db_name}.dataset_topics AS SELECT substr(p.id, 4) AS id, subjects.subject.qualifier.classname AS type, subjects.subject.value AS topic FROM ${openaire_db_name}.dataset p LATERAL VIEW explode(p.subject) subjects AS subject -where p.datainfo.deletedbyinference = false; \ No newline at end of file +where p.datainfo.deletedbyinference = false and p.datainfo.invisible=false; \ No newline at end of file diff --git a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step4.sql b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step4.sql index 0210dc8cb..dc71f41f1 100644 --- a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step4.sql +++ b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step4.sql @@ -38,20 +38,20 @@ SELECT substr(s.id, 4) as id, CASE WHEN SIZE(s.description) > 0 THEN TRUE ELSE FALSE END AS abstract, 'software' as type from ${openaire_db_name}.software s -where s.datainfo.deletedbyinference = false; +where s.datainfo.deletedbyinference = false and s.datainfo.invisible=false; CREATE TABLE ${stats_db_name}.software_citations AS SELECT substr(s.id, 4) as id, xpath_string(citation.value, "//citation/id[@type='openaire']/@value") AS cites FROM ${openaire_db_name}.software s LATERAL VIEW explode(s.extrainfo) citations as citation where xpath_string(citation.value, "//citation/id[@type='openaire']/@value") != "" - and s.datainfo.deletedbyinference = false; + and s.datainfo.deletedbyinference = false and s.datainfo.invisible=false; CREATE TABLE ${stats_db_name}.software_classifications AS SELECT substr(p.id, 4) AS id, instancetype.classname AS type FROM ${openaire_db_name}.software p LATERAL VIEW explode(p.instance.instancetype) instances AS instancetype -where p.datainfo.deletedbyinference = false; +where p.datainfo.deletedbyinference = false and p.datainfo.invisible=false; CREATE TABLE ${stats_db_name}.software_concepts AS SELECT substr(p.id, 4) as id, case @@ -60,7 +60,7 @@ SELECT substr(p.id, 4) as id, case when contexts.context.id RLIKE '^[^::]+$' then concat(contexts.context.id, '::other::other') END as concept FROM ${openaire_db_name}.software p LATERAL VIEW explode(p.context) contexts AS context -where p.datainfo.deletedbyinference = false; +where p.datainfo.deletedbyinference = false and p.datainfo.invisible=false; CREATE TABLE ${stats_db_name}.software_datasources AS SELECT p.id, CASE WHEN d.id IS NULL THEN 'other' ELSE p.datasource end as datasource @@ -68,31 +68,31 @@ FROM ( SELECT substr(p.id, 4) AS id, substr(instances.instance.hostedby.key, 4) AS datasource FROM ${openaire_db_name}.software p LATERAL VIEW explode(p.instance) instances AS instance - where p.datainfo.deletedbyinference = false) p + where p.datainfo.deletedbyinference = false and p.datainfo.invisible=false) p LEFT OUTER JOIN ( SELECT substr(d.id, 4) id FROM ${openaire_db_name}.datasource d - WHERE d.datainfo.deletedbyinference = false) d ON p.datasource = d.id; + WHERE d.datainfo.deletedbyinference = false and d.datainfo.invisible=false) d ON p.datasource = d.id; CREATE TABLE ${stats_db_name}.software_languages AS select substr(p.id, 4) AS id, p.language.classname AS language FROM ${openaire_db_name}.software p -where p.datainfo.deletedbyinference = false; +where p.datainfo.deletedbyinference = false and p.datainfo.invisible=false; CREATE TABLE ${stats_db_name}.software_oids AS SELECT substr(p.id, 4) AS id, oids.ids AS oid FROM ${openaire_db_name}.software p LATERAL VIEW explode(p.originalid) oids AS ids -where p.datainfo.deletedbyinference = false; +where p.datainfo.deletedbyinference = false and p.datainfo.invisible=false; CREATE TABLE ${stats_db_name}.software_pids AS SELECT substr(p.id, 4) AS id, ppid.qualifier.classname AS type, ppid.value AS pid FROM ${openaire_db_name}.software p LATERAL VIEW explode(p.pid) pids AS ppid -where p.datainfo.deletedbyinference = false; +where p.datainfo.deletedbyinference = false and p.datainfo.invisible=false; CREATE TABLE ${stats_db_name}.software_topics AS SELECT substr(p.id, 4) AS id, subjects.subject.qualifier.classname AS type, subjects.subject.value AS topic FROM ${openaire_db_name}.software p LATERAL VIEW explode(p.subject) subjects AS subject -where p.datainfo.deletedbyinference = false; \ No newline at end of file +where p.datainfo.deletedbyinference = false and p.datainfo.invisible=false; \ No newline at end of file diff --git a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step5.sql b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step5.sql index f7b302186..353aa98b7 100644 --- a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step5.sql +++ b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step5.sql @@ -37,19 +37,19 @@ SELECT substr(o.id, 4) AS id, CASE WHEN SIZE(o.description) > 0 THEN TRUE ELSE FALSE END AS abstract, 'other' AS type FROM ${openaire_db_name}.otherresearchproduct o -WHERE o.datainfo.deletedbyinference = FALSE; +WHERE o.datainfo.deletedbyinference = FALSE and o.datainfo.invisible=false; -- Otherresearchproduct_citations CREATE TABLE ${stats_db_name}.otherresearchproduct_citations AS SELECT substr(o.id, 4) AS id, xpath_string(citation.value, "//citation/id[@type='openaire']/@value") AS cites FROM ${openaire_db_name}.otherresearchproduct o LATERAL VIEW explode(o.extrainfo) citations AS citation WHERE xpath_string(citation.value, "//citation/id[@type='openaire']/@value") != "" - and o.datainfo.deletedbyinference = false; + and o.datainfo.deletedbyinference = false and o.datainfo.invisible=false; CREATE TABLE ${stats_db_name}.otherresearchproduct_classifications AS SELECT substr(p.id, 4) AS id, instancetype.classname AS type FROM ${openaire_db_name}.otherresearchproduct p LATERAL VIEW explode(p.instance.instancetype) instances AS instancetype -where p.datainfo.deletedbyinference = false; +where p.datainfo.deletedbyinference = false and p.datainfo.invisible=false; CREATE TABLE ${stats_db_name}.otherresearchproduct_concepts AS SELECT substr(p.id, 4) as id, case @@ -57,33 +57,33 @@ SELECT substr(p.id, 4) as id, case when contexts.context.id RLIKE '^[^::]+::[^::]+$' then concat(contexts.context.id, '::other') when contexts.context.id RLIKE '^[^::]+$' then concat(contexts.context.id, '::other::other') END as concept FROM ${openaire_db_name}.otherresearchproduct p LATERAL VIEW explode(p.context) contexts AS context -where p.datainfo.deletedbyinference = false; +where p.datainfo.deletedbyinference = false and p.datainfo.invisible=false; CREATE TABLE ${stats_db_name}.otherresearchproduct_datasources AS SELECT p.id, CASE WHEN d.id IS NULL THEN 'other' ELSE p.datasource END AS datasource FROM (SELECT substr(p.id, 4) AS id, substr(instances.instance.hostedby.key, 4) AS datasource from ${openaire_db_name}.otherresearchproduct p lateral view explode(p.instance) instances as instance - where p.datainfo.deletedbyinference = false) p + where p.datainfo.deletedbyinference = false and p.datainfo.invisible=false) p LEFT OUTER JOIN(SELECT substr(d.id, 4) id from ${openaire_db_name}.datasource d - WHERE d.datainfo.deletedbyinference = false) d on p.datasource = d.id; + WHERE d.datainfo.deletedbyinference = false and d.datainfo.invisible=false) d on p.datasource = d.id; CREATE TABLE ${stats_db_name}.otherresearchproduct_languages AS SELECT substr(p.id, 4) AS id, p.language.classname AS language FROM ${openaire_db_name}.otherresearchproduct p -where p.datainfo.deletedbyinference = false; +where p.datainfo.deletedbyinference = false and p.datainfo.invisible=false; CREATE TABLE ${stats_db_name}.otherresearchproduct_oids AS SELECT substr(p.id, 4) AS id, oids.ids AS oid FROM ${openaire_db_name}.otherresearchproduct p LATERAL VIEW explode(p.originalid) oids AS ids -where p.datainfo.deletedbyinference = false; +where p.datainfo.deletedbyinference = false and p.datainfo.invisible=false; CREATE TABLE ${stats_db_name}.otherresearchproduct_pids AS SELECT substr(p.id, 4) AS id, ppid.qualifier.classname AS type, ppid.value AS pid FROM ${openaire_db_name}.otherresearchproduct p LATERAL VIEW explode(p.pid) pids AS ppid -where p.datainfo.deletedbyinference = false; +where p.datainfo.deletedbyinference = false and p.datainfo.invisible=false; CREATE TABLE ${stats_db_name}.otherresearchproduct_topics AS SELECT substr(p.id, 4) AS id, subjects.subject.qualifier.classname AS type, subjects.subject.value AS topic FROM ${openaire_db_name}.otherresearchproduct p LATERAL VIEW explode(p.subject) subjects AS subject -where p.datainfo.deletedbyinference = false; \ No newline at end of file +where p.datainfo.deletedbyinference = false and p.datainfo.invisible=false; \ No newline at end of file diff --git a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step6.sql b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step6.sql index 378e0f17b..b5e2eb37b 100644 --- a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step6.sql +++ b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step6.sql @@ -5,24 +5,26 @@ ------------------------------------------------------ CREATE TABLE ${stats_db_name}.project_oids AS SELECT substr(p.id, 4) AS id, oids.ids AS oid -FROM ${openaire_db_name}.project p LATERAL VIEW explode(p.originalid) oids AS ids; +FROM ${openaire_db_name}.project p LATERAL VIEW explode(p.originalid) oids AS ids +where p.datainfo.deletedbyinference=false and p.datainfo.invisible=false; + CREATE TABLE ${stats_db_name}.project_organizations AS SELECT substr(r.source, 4) AS id, substr(r.target, 4) AS organization from ${openaire_db_name}.relation r WHERE r.reltype = 'projectOrganization' - and r.datainfo.deletedbyinference = false; + and r.datainfo.deletedbyinference = false and r.datainfo.invisible=false; CREATE TABLE ${stats_db_name}.project_results AS SELECT substr(r.target, 4) AS id, substr(r.source, 4) AS result, r.datainfo.provenanceaction.classname as provenance FROM ${openaire_db_name}.relation r WHERE r.reltype = 'resultProject' - and r.datainfo.deletedbyinference = false; + and r.datainfo.deletedbyinference = false and r.datainfo.invisible=false; create table ${stats_db_name}.project_classification as select substr(p.id, 4) as id, class.h2020programme.code, class.level1, class.level2, class.level3 from ${openaire_db_name}.project p lateral view explode(p.h2020classification) classifs as class -where p.datainfo.deletedbyinference=false and class.h2020programme is not null; +where p.datainfo.deletedbyinference=false and p.datainfo.invisible=false and class.h2020programme is not null; CREATE TABLE ${stats_db_name}.project_tmp ( @@ -72,7 +74,7 @@ SELECT substr(p.id, 4) AS id, p.code.value AS code, p.totalcost AS totalcost FROM ${openaire_db_name}.project p -WHERE p.datainfo.deletedbyinference = false; +WHERE p.datainfo.deletedbyinference = false and p.datainfo.invisible=false; create table ${stats_db_name}.funder as select distinct xpath_string(fund, '//funder/id') as id, diff --git a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step7.sql b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step7.sql index b3cbc9b41..a57966abf 100644 --- a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step7.sql +++ b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step7.sql @@ -127,7 +127,7 @@ CREATE TABLE ${stats_db_name}.result_organization AS SELECT substr(r.target, 4) AS id, substr(r.source, 4) AS organization FROM ${openaire_db_name}.relation r WHERE r.reltype = 'resultOrganization' - and r.datainfo.deletedbyinference = false; + and r.datainfo.deletedbyinference = false and r.datainfo.invisible=false; CREATE TABLE ${stats_db_name}.result_projects AS select pr.result AS id, pr.id AS project, datediff(p.enddate, p.startdate) AS daysfromend, pr.provenance as provenance diff --git a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step8.sql b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step8.sql index 76d31eb5e..4956daf7f 100644 --- a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step8.sql +++ b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step8.sql @@ -44,7 +44,7 @@ FROM ${openaire_db_name}.datasource d1 LATERAL VIEW EXPLODE(originalid) temp AS originalidd WHERE originalidd like "piwik:%") AS d2 ON d1.id = d2.id -WHERE d1.datainfo.deletedbyinference = FALSE; +WHERE d1.datainfo.deletedbyinference = FALSE and d1.datainfo.invisible=false; -- Updating temporary table with everything that is not based on results -> This is done with the following "dual" table. -- Creating a temporary dual table that will be removed after the following insert diff --git a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step9.sql b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step9.sql index a1cb46185..c73aa811c 100644 --- a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step9.sql +++ b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step9.sql @@ -9,7 +9,7 @@ SELECT substr(o.id, 4) as id, o.legalshortname.value as legalshortname, o.country.classid as country FROM ${openaire_db_name}.organization o -WHERE o.datainfo.deletedbyinference = FALSE; +WHERE o.datainfo.deletedbyinference = FALSE and o.datainfo.invisible = FALSE; CREATE OR REPLACE VIEW ${stats_db_name}.organization_datasources AS SELECT organization AS id, id AS datasource From bcfdf9a0d70d3ebe3b42134c18c50b2f58ea4f9a Mon Sep 17 00:00:00 2001 From: Sandro La Bruzzo Date: Thu, 3 Feb 2022 16:49:31 +0100 Subject: [PATCH 067/162] iis repository with https --- dhp-workflows/pom.xml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dhp-workflows/pom.xml b/dhp-workflows/pom.xml index 53d029467..143178560 100644 --- a/dhp-workflows/pom.xml +++ b/dhp-workflows/pom.xml @@ -44,7 +44,7 @@ iis-releases iis releases plugin repository - http://maven.ceon.pl/artifactory/iis-releases + https://maven.ceon.pl/artifactory/iis-releases default From aae667e6b60281adb0932e7c4beec8d10b076a1c Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Fri, 4 Feb 2022 12:34:25 +0100 Subject: [PATCH 068/162] [APC at the result level] added the APC at the level of the result and modified test class --- .../dhp/oa/graph/raw/AbstractMdRecordToOafMapper.java | 4 ++++ .../test/java/eu/dnetlib/dhp/oa/graph/raw/MappersTest.java | 6 ++++++ pom.xml | 2 +- 3 files changed, 11 insertions(+), 1 deletion(-) diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/AbstractMdRecordToOafMapper.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/AbstractMdRecordToOafMapper.java index 526f45f6e..3e74bc5e0 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/AbstractMdRecordToOafMapper.java +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/AbstractMdRecordToOafMapper.java @@ -347,6 +347,10 @@ public abstract class AbstractMdRecordToOafMapper { r.setCoverage(prepareCoverages(doc, info)); r.setContext(prepareContexts(doc, info)); r.setExternalReference(new ArrayList<>()); // NOT PRESENT IN MDSTORES + r + .setProcessingchargeamount(field(doc.valueOf("//oaf:processingchargeamount"), info)); + r + .setProcessingchargecurrency(field(doc.valueOf("//oaf:processingchargeamount/@currency"), info)); r.setInstance(instances); r.setBestaccessright(OafMapperUtils.createBestAccessRights(instances)); diff --git a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/raw/MappersTest.java b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/raw/MappersTest.java index 6a5fd33b0..bdb73abf5 100644 --- a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/raw/MappersTest.java +++ b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/raw/MappersTest.java @@ -827,6 +827,12 @@ class MappersTest { assertTrue(p.getInstance().size() > 0); assertEquals("https://doi.org/10.1155/2015/439379", p.getInstance().get(0).getUrl().get(0)); + + assertTrue(p.getProcessingchargeamount() != null); + assertTrue(p.getProcessingchargecurrency() != null); + + assertEquals("1721.47", p.getProcessingchargeamount().getValue()); + assertEquals("EUR", p.getProcessingchargecurrency().getValue()); } private void assertValidId(final String id) { diff --git a/pom.xml b/pom.xml index 489f712b0..a99162d26 100644 --- a/pom.xml +++ b/pom.xml @@ -797,7 +797,7 @@ 3.3.3 3.4.2 [2.12,3.0) - [2.10.27] + [2.10.28-SNAPSHOT] [4.0.3] [6.0.5] [3.1.6] From ae633c566b9938729faf0cdd4638e69ac4bc958f Mon Sep 17 00:00:00 2001 From: Antonis Lempesis Date: Fri, 4 Feb 2022 15:04:19 +0200 Subject: [PATCH 069/162] fixed the result_result table --- .../stats/oozie_app/scripts/step20-createMonitorDB.sql | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step20-createMonitorDB.sql b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step20-createMonitorDB.sql index 2528e904d..4e5f7a373 100644 --- a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step20-createMonitorDB.sql +++ b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step20-createMonitorDB.sql @@ -81,7 +81,11 @@ compute stats TARGET.result_sources; create table TARGET.result_topics stored as parquet as select * from SOURCE.result_topics orig where exists (select 1 from TARGET.result r where r.id=orig.id); compute stats TARGET.result_topics; -create table TARGET.result_result stored as parquet as select * from SOURCE.result_result orig where exists (select 1 from TARGET.result r where r.id=orig.source) or exists (select 1 from TARGET.result r where r.id=orig.target); +create view TARGET.foo1 as select * from SOURCE.result_result rr where rr.source in (select id from TARGET.result); +create view TARGET.foo2 as select * from SOURCE.result_result rr where rr.target in (select id from TARGET.result); +create table TARGET.result_result as select distinct * from (select * from TARGET.foo1 union all select * from TARGET.foo2) foufou; +drop view TARGET.foo1; +drop view TARGET.foo2; compute stats TARGET.result_result; -- datasources From 493caef358d37a4f4d32223b0e1923bc7209885a Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Fri, 4 Feb 2022 14:51:25 +0100 Subject: [PATCH 070/162] [stats-wf]fixed the result_result table related to PR#191 --- .scalafmt.conf | 21 +++++++++++++++++++ dhp-workflows/dhp-aggregation/.scalafmt.conf | 21 +++++++++++++++++++ dhp-workflows/dhp-graph-mapper/.scalafmt.conf | 21 +++++++++++++++++++ .../scripts/step20-createMonitorDB.sql | 6 +++++- 4 files changed, 68 insertions(+), 1 deletion(-) create mode 100644 .scalafmt.conf create mode 100644 dhp-workflows/dhp-aggregation/.scalafmt.conf create mode 100644 dhp-workflows/dhp-graph-mapper/.scalafmt.conf diff --git a/.scalafmt.conf b/.scalafmt.conf new file mode 100644 index 000000000..0b5dbe0b4 --- /dev/null +++ b/.scalafmt.conf @@ -0,0 +1,21 @@ +style = defaultWithAlign + +align.openParenCallSite = false +align.openParenDefnSite = false +align.tokens = [{code = "->"}, {code = "<-"}, {code = "=>", owner = "Case"}] +continuationIndent.callSite = 2 +continuationIndent.defnSite = 2 +danglingParentheses = true +indentOperator = spray +maxColumn = 120 +newlines.alwaysBeforeTopLevelStatements = true +project.excludeFilters = [".*\\.sbt"] +rewrite.rules = [AvoidInfix] +rewrite.rules = [ExpandImportSelectors] +rewrite.rules = [RedundantBraces] +rewrite.rules = [RedundantParens] +rewrite.rules = [SortImports] +rewrite.rules = [SortModifiers] +rewrite.rules = [PreferCurlyFors] +spaces.inImportCurlyBraces = false +unindentTopLevelOperators = true \ No newline at end of file diff --git a/dhp-workflows/dhp-aggregation/.scalafmt.conf b/dhp-workflows/dhp-aggregation/.scalafmt.conf new file mode 100644 index 000000000..0b5dbe0b4 --- /dev/null +++ b/dhp-workflows/dhp-aggregation/.scalafmt.conf @@ -0,0 +1,21 @@ +style = defaultWithAlign + +align.openParenCallSite = false +align.openParenDefnSite = false +align.tokens = [{code = "->"}, {code = "<-"}, {code = "=>", owner = "Case"}] +continuationIndent.callSite = 2 +continuationIndent.defnSite = 2 +danglingParentheses = true +indentOperator = spray +maxColumn = 120 +newlines.alwaysBeforeTopLevelStatements = true +project.excludeFilters = [".*\\.sbt"] +rewrite.rules = [AvoidInfix] +rewrite.rules = [ExpandImportSelectors] +rewrite.rules = [RedundantBraces] +rewrite.rules = [RedundantParens] +rewrite.rules = [SortImports] +rewrite.rules = [SortModifiers] +rewrite.rules = [PreferCurlyFors] +spaces.inImportCurlyBraces = false +unindentTopLevelOperators = true \ No newline at end of file diff --git a/dhp-workflows/dhp-graph-mapper/.scalafmt.conf b/dhp-workflows/dhp-graph-mapper/.scalafmt.conf new file mode 100644 index 000000000..0b5dbe0b4 --- /dev/null +++ b/dhp-workflows/dhp-graph-mapper/.scalafmt.conf @@ -0,0 +1,21 @@ +style = defaultWithAlign + +align.openParenCallSite = false +align.openParenDefnSite = false +align.tokens = [{code = "->"}, {code = "<-"}, {code = "=>", owner = "Case"}] +continuationIndent.callSite = 2 +continuationIndent.defnSite = 2 +danglingParentheses = true +indentOperator = spray +maxColumn = 120 +newlines.alwaysBeforeTopLevelStatements = true +project.excludeFilters = [".*\\.sbt"] +rewrite.rules = [AvoidInfix] +rewrite.rules = [ExpandImportSelectors] +rewrite.rules = [RedundantBraces] +rewrite.rules = [RedundantParens] +rewrite.rules = [SortImports] +rewrite.rules = [SortModifiers] +rewrite.rules = [PreferCurlyFors] +spaces.inImportCurlyBraces = false +unindentTopLevelOperators = true \ No newline at end of file diff --git a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step20-createMonitorDB.sql b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step20-createMonitorDB.sql index 2528e904d..4e5f7a373 100644 --- a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step20-createMonitorDB.sql +++ b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step20-createMonitorDB.sql @@ -81,7 +81,11 @@ compute stats TARGET.result_sources; create table TARGET.result_topics stored as parquet as select * from SOURCE.result_topics orig where exists (select 1 from TARGET.result r where r.id=orig.id); compute stats TARGET.result_topics; -create table TARGET.result_result stored as parquet as select * from SOURCE.result_result orig where exists (select 1 from TARGET.result r where r.id=orig.source) or exists (select 1 from TARGET.result r where r.id=orig.target); +create view TARGET.foo1 as select * from SOURCE.result_result rr where rr.source in (select id from TARGET.result); +create view TARGET.foo2 as select * from SOURCE.result_result rr where rr.target in (select id from TARGET.result); +create table TARGET.result_result as select distinct * from (select * from TARGET.foo1 union all select * from TARGET.foo2) foufou; +drop view TARGET.foo1; +drop view TARGET.foo2; compute stats TARGET.result_result; -- datasources From 9fd2ef468e25cb7140fee450693cce796b254bd4 Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Fri, 4 Feb 2022 16:40:32 +0100 Subject: [PATCH 071/162] [APC at the result level] changed dependecy in external pom --- pom.xml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pom.xml b/pom.xml index a99162d26..f3da49580 100644 --- a/pom.xml +++ b/pom.xml @@ -797,7 +797,7 @@ 3.3.3 3.4.2 [2.12,3.0) - [2.10.28-SNAPSHOT] + [2.10.28] [4.0.3] [6.0.5] [3.1.6] From 5f762cbd09163ff6a8ab4ade7300410ba6bb8dee Mon Sep 17 00:00:00 2001 From: Antonis Lempesis Date: Mon, 7 Feb 2022 12:09:12 +0200 Subject: [PATCH 072/162] fixed yet another typo --- .../oa/graph/stats/oozie_app/scripts/step20-createMonitorDB.sql | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step20-createMonitorDB.sql b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step20-createMonitorDB.sql index 4e5f7a373..9e4edb44a 100644 --- a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step20-createMonitorDB.sql +++ b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step20-createMonitorDB.sql @@ -130,7 +130,7 @@ compute stats TARGET.indi_result_has_cc_licence; create table TARGET.indi_result_has_cc_licence_url stored as parquet as select * from SOURCE.indi_result_has_cc_licence_url orig where exists (select 1 from TARGET.result r where r.id=orig.id); compute stats TARGET.indi_result_has_cc_licence_url; -create view TARGET.indi_funder_country_collab stored as select * from SOURCE.indi_funder_country_collab; +create view TARGET.indi_funder_country_collab stored as parquet as select * from SOURCE.indi_funder_country_collab; create table TARGET.indi_result_with_orcid stored as parquet as select * from SOURCE.indi_result_with_orcid orig where exists (select 1 from TARGET.result r where r.id=orig.id); compute stats TARGET.indi_result_with_orcid; From 78be2975f0d324bb4e8947f93f2609d77eb84ec2 Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Mon, 7 Feb 2022 11:22:08 +0100 Subject: [PATCH 073/162] [stats-wf]fixed another typo related to PR#193 --- .../oa/graph/stats/oozie_app/scripts/step20-createMonitorDB.sql | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step20-createMonitorDB.sql b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step20-createMonitorDB.sql index 4e5f7a373..9e4edb44a 100644 --- a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step20-createMonitorDB.sql +++ b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step20-createMonitorDB.sql @@ -130,7 +130,7 @@ compute stats TARGET.indi_result_has_cc_licence; create table TARGET.indi_result_has_cc_licence_url stored as parquet as select * from SOURCE.indi_result_has_cc_licence_url orig where exists (select 1 from TARGET.result r where r.id=orig.id); compute stats TARGET.indi_result_has_cc_licence_url; -create view TARGET.indi_funder_country_collab stored as select * from SOURCE.indi_funder_country_collab; +create view TARGET.indi_funder_country_collab stored as parquet as select * from SOURCE.indi_funder_country_collab; create table TARGET.indi_result_with_orcid stored as parquet as select * from SOURCE.indi_result_with_orcid orig where exists (select 1 from TARGET.result r where r.id=orig.id); compute stats TARGET.indi_result_with_orcid; From fbc28ee8c35a4c30d4cc97d01557ab569cf907f4 Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Mon, 7 Feb 2022 18:32:08 +0100 Subject: [PATCH 074/162] [OpenCitation] change the integration logic to consider dois with commas inside --- .../CreateActionSetSparkJob.java | 33 ++++-- .../actionmanager/opencitations/ReadCOCI.java | 111 ++++++++++++++++++ .../opencitations/model/COCI.java | 89 ++++++++++++++ .../input_readcoci_parameters.json | 36 ++++++ .../opencitations/oozie_app/workflow.xml | 27 +++++ .../CreateOpenCitationsASTest.java | 22 ++-- .../opencitations/ReadCOCITest.java | 94 +++++++++++++++ .../opencitations/COCI/input1/_SUCCESS | 0 ...b-77f8-4059-91c0-5521309823f8-c000.json.gz | Bin 0 -> 346 bytes .../opencitations/COCI/input2/_SUCCESS | 0 ...6-f472-40fa-985a-a4f3c74f9b53-c000.json.gz | Bin 0 -> 306 bytes .../opencitations/COCI/input3/_SUCCESS | 0 ...c-5b5c-4c65-92b7-7a6928da5cdb-c000.json.gz | Bin 0 -> 316 bytes .../opencitations/COCI/input4/_SUCCESS | 0 ...b-f97d-449d-bd08-04a9b935bfd2-c000.json.gz | Bin 0 -> 137 bytes .../opencitations/inputFiles/input4 | 2 + .../raw/AbstractMdRecordToOafMapper.java | 4 +- dhp-workflows/pom.xml | 2 +- 18 files changed, 394 insertions(+), 26 deletions(-) create mode 100644 dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/opencitations/ReadCOCI.java create mode 100644 dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/opencitations/model/COCI.java create mode 100644 dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/actionmanager/opencitations/input_readcoci_parameters.json create mode 100644 dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/actionmanager/opencitations/ReadCOCITest.java create mode 100644 dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/actionmanager/opencitations/COCI/input1/_SUCCESS create mode 100644 dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/actionmanager/opencitations/COCI/input1/part-00000-b05c4abb-77f8-4059-91c0-5521309823f8-c000.json.gz create mode 100644 dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/actionmanager/opencitations/COCI/input2/_SUCCESS create mode 100644 dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/actionmanager/opencitations/COCI/input2/part-00000-6831e1e6-f472-40fa-985a-a4f3c74f9b53-c000.json.gz create mode 100644 dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/actionmanager/opencitations/COCI/input3/_SUCCESS create mode 100644 dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/actionmanager/opencitations/COCI/input3/part-00000-2ec9f31c-5b5c-4c65-92b7-7a6928da5cdb-c000.json.gz create mode 100644 dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/actionmanager/opencitations/COCI/input4/_SUCCESS create mode 100644 dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/actionmanager/opencitations/COCI/input4/part-00000-2ba3f17b-f97d-449d-bd08-04a9b935bfd2-c000.json.gz create mode 100644 dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/actionmanager/opencitations/inputFiles/input4 diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/opencitations/CreateActionSetSparkJob.java b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/opencitations/CreateActionSetSparkJob.java index ea5fea96f..c16f8eeea 100644 --- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/opencitations/CreateActionSetSparkJob.java +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/opencitations/CreateActionSetSparkJob.java @@ -14,6 +14,7 @@ import org.apache.hadoop.mapred.SequenceFileOutputFormat; import org.apache.spark.SparkConf; import org.apache.spark.api.java.function.FilterFunction; import org.apache.spark.api.java.function.FlatMapFunction; +import org.apache.spark.api.java.function.MapFunction; import org.apache.spark.sql.Encoders; import org.apache.spark.sql.SparkSession; import org.slf4j.Logger; @@ -21,6 +22,7 @@ import org.slf4j.LoggerFactory; import com.fasterxml.jackson.databind.ObjectMapper; +import eu.dnetlib.dhp.actionmanager.opencitations.model.COCI; import eu.dnetlib.dhp.application.ArgumentApplicationParser; import eu.dnetlib.dhp.schema.action.AtomicAction; import eu.dnetlib.dhp.schema.common.ModelConstants; @@ -83,10 +85,16 @@ public class CreateActionSetSparkJob implements Serializable { private static void extractContent(SparkSession spark, String inputPath, String outputPath, boolean shouldDuplicateRels) { spark - .sqlContext() - .createDataset(spark.sparkContext().textFile(inputPath + "/*", 6000), Encoders.STRING()) + .read() + .textFile(inputPath + "/*") + .map( + (MapFunction) value -> OBJECT_MAPPER.readValue(value, COCI.class), + Encoders.bean(COCI.class)) +// spark +// .sqlContext() +// .createDataset(spark.sparkContext().textFile(inputPath + "/*", 6000), Encoders.STRING()) .flatMap( - (FlatMapFunction) value -> createRelation(value, shouldDuplicateRels).iterator(), + (FlatMapFunction) value -> createRelation(value, shouldDuplicateRels).iterator(), Encoders.bean(Relation.class)) .filter((FilterFunction) value -> value != null) .toJavaRDD() @@ -98,15 +106,14 @@ public class CreateActionSetSparkJob implements Serializable { } - private static List createRelation(String value, boolean duplicate) { - String[] line = value.split(","); - if (!line[1].startsWith("10.")) { - return new ArrayList<>(); - } + private static List createRelation(COCI value, boolean duplicate) { + List relationList = new ArrayList<>(); - String citing = ID_PREFIX + IdentifierFactory.md5(CleaningFunctions.normalizePidValue("doi", line[1])); - final String cited = ID_PREFIX + IdentifierFactory.md5(CleaningFunctions.normalizePidValue("doi", line[2])); + String citing = ID_PREFIX + + IdentifierFactory.md5(CleaningFunctions.normalizePidValue("doi", value.getCiting())); + final String cited = ID_PREFIX + + IdentifierFactory.md5(CleaningFunctions.normalizePidValue("doi", value.getCited())); relationList .addAll( @@ -114,9 +121,11 @@ public class CreateActionSetSparkJob implements Serializable { citing, cited)); - if (duplicate && line[1].endsWith(".refs")) { + if (duplicate && value.getCiting().endsWith(".refs")) { citing = ID_PREFIX + IdentifierFactory - .md5(CleaningFunctions.normalizePidValue("doi", line[1].substring(0, line[1].indexOf(".refs")))); + .md5( + CleaningFunctions + .normalizePidValue("doi", value.getCiting().substring(0, value.getCiting().indexOf(".refs")))); relationList.addAll(getRelations(citing, cited)); } diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/opencitations/ReadCOCI.java b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/opencitations/ReadCOCI.java new file mode 100644 index 000000000..7ac1f2de3 --- /dev/null +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/opencitations/ReadCOCI.java @@ -0,0 +1,111 @@ + +package eu.dnetlib.dhp.actionmanager.opencitations; + +import static eu.dnetlib.dhp.actionmanager.Constants.DEFAULT_DELIMITER; +import static eu.dnetlib.dhp.actionmanager.Constants.isSparkSessionManaged; +import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession; + +import java.io.IOException; +import java.io.Serializable; +import java.util.Optional; + +import org.apache.commons.io.IOUtils; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.LocatedFileStatus; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.fs.RemoteIterator; +import org.apache.spark.SparkConf; +import org.apache.spark.api.java.function.MapFunction; +import org.apache.spark.sql.*; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import eu.dnetlib.dhp.actionmanager.opencitations.model.COCI; +import eu.dnetlib.dhp.application.ArgumentApplicationParser; + +public class ReadCOCI implements Serializable { + + private static final Logger log = LoggerFactory.getLogger(ReadCOCI.class); + + public static void main(String[] args) throws Exception { + String jsonConfiguration = IOUtils + .toString( + ReadCOCI.class + .getResourceAsStream( + "/eu/dnetlib/dhp/actionmanager/opencitations/input_readcoci_parameters.json")); + + final ArgumentApplicationParser parser = new ArgumentApplicationParser(jsonConfiguration); + parser.parseArgument(args); + + final String outputPath = parser.get("outputPath"); + log.info("outputPath: {}", outputPath); + + final String hdfsNameNode = parser.get("nameNode"); + log.info("nameNode: {}", hdfsNameNode); + + final String inputPath = parser.get("sourcePath"); + log.info("input path : {}", inputPath); + Boolean isSparkSessionManaged = isSparkSessionManaged(parser); + log.info("isSparkSessionManaged: {}", isSparkSessionManaged); + + Configuration conf = new Configuration(); + conf.set("fs.defaultFS", hdfsNameNode); + + FileSystem fileSystem = FileSystem.get(conf); + SparkConf sconf = new SparkConf(); + + final String delimiter = Optional + .ofNullable(parser.get("delimiter")) + .orElse(DEFAULT_DELIMITER); + + runWithSparkSession( + sconf, + isSparkSessionManaged, + spark -> { + doRead( + spark, + fileSystem, + inputPath, + outputPath, + delimiter); + }); + } + + public static void doRead(SparkSession spark, FileSystem fileSystem, String inputPath, String outputPath, + String delimiter) throws IOException { + + RemoteIterator iterator = fileSystem + .listFiles( + new Path(inputPath), true); + + while (iterator.hasNext()) { + LocatedFileStatus fileStatus = iterator.next(); + + Path p = fileStatus.getPath(); + String p_string = p.toString(); + Dataset cociData = spark + .read() + .format("csv") + .option("sep", delimiter) + .option("inferSchema", "true") + .option("header", "true") + .option("quotes", "\"") + .load(p_string); + + cociData.map((MapFunction) row -> { + COCI coci = new COCI(); + coci.setOci(row.getString(0)); + coci.setCiting(row.getString(1)); + coci.setCited(row.getString(2)); + return coci; + }, Encoders.bean(COCI.class)) + .write() + .mode(SaveMode.Overwrite) + .option("compression", "gzip") + .json(outputPath + "/" + p_string.substring(p_string.lastIndexOf("/") + 1)); + } + + } + +} diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/opencitations/model/COCI.java b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/opencitations/model/COCI.java new file mode 100644 index 000000000..a7b3330ea --- /dev/null +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/opencitations/model/COCI.java @@ -0,0 +1,89 @@ + +package eu.dnetlib.dhp.actionmanager.opencitations.model; + +import java.io.Serializable; + +import com.opencsv.bean.CsvBindByPosition; + +public class COCI implements Serializable { + @CsvBindByPosition(position = 0) +// @CsvBindByName(column = "doi") + private String oci; + + @CsvBindByPosition(position = 1) +// @CsvBindByName(column = "level1") + private String citing; + + @CsvBindByPosition(position = 2) +// @CsvBindByName(column = "level2") + private String cited; + + @CsvBindByPosition(position = 3) +// @CsvBindByName(column = "level3") + private String creation; + + @CsvBindByPosition(position = 4) + private String timespan; + + @CsvBindByPosition(position = 5) + private String journal_sc; + + @CsvBindByPosition(position = 6) + private String author_sc; + + public String getOci() { + return oci; + } + + public void setOci(String oci) { + this.oci = oci; + } + + public String getCiting() { + return citing; + } + + public void setCiting(String citing) { + this.citing = citing; + } + + public String getCited() { + return cited; + } + + public void setCited(String cited) { + this.cited = cited; + } + + public String getCreation() { + return creation; + } + + public void setCreation(String creation) { + this.creation = creation; + } + + public String getTimespan() { + return timespan; + } + + public void setTimespan(String timespan) { + this.timespan = timespan; + } + + public String getJournal_sc() { + return journal_sc; + } + + public void setJournal_sc(String journal_sc) { + this.journal_sc = journal_sc; + } + + public String getAuthor_sc() { + return author_sc; + } + + public void setAuthor_sc(String author_sc) { + this.author_sc = author_sc; + } +} diff --git a/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/actionmanager/opencitations/input_readcoci_parameters.json b/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/actionmanager/opencitations/input_readcoci_parameters.json new file mode 100644 index 000000000..14c20f762 --- /dev/null +++ b/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/actionmanager/opencitations/input_readcoci_parameters.json @@ -0,0 +1,36 @@ +[ + { + "paramName": "sp", + "paramLongName": "sourcePath", + "paramDescription": "the zipped opencitations file", + "paramRequired": true + }, + + { + "paramName": "nn", + "paramLongName": "nameNode", + "paramDescription": "the hdfs name node", + "paramRequired": true + }, + { + "paramName": "issm", + "paramLongName": "isSparkSessionManaged", + "paramDescription": "the hdfs name node", + "paramRequired": false + }, + { + "paramName": "d", + "paramLongName": "delimiter", + "paramDescription": "the hdfs name node", + "paramRequired": false + }, + { + "paramName": "op", + "paramLongName": "outputPath", + "paramDescription": "the hdfs name node", + "paramRequired": false + } +] + + + diff --git a/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/actionmanager/opencitations/oozie_app/workflow.xml b/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/actionmanager/opencitations/oozie_app/workflow.xml index d052791a3..7276d2d3e 100644 --- a/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/actionmanager/opencitations/oozie_app/workflow.xml +++ b/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/actionmanager/opencitations/oozie_app/workflow.xml @@ -26,6 +26,7 @@ ${wf:conf('resumeFrom') eq 'DownloadDump'} ${wf:conf('resumeFrom') eq 'ExtractContent'} + ${wf:conf('resumeFrom') eq 'ReadContent'} @@ -60,6 +61,32 @@ --inputFile${inputFile} --workingPath${workingPath} + + + + + + + yarn + cluster + Produces the AS for OC + eu.dnetlib.dhp.actionmanager.opencitations.ReadCOCI + dhp-aggregation-${projectVersion}.jar + + --executor-memory=${sparkExecutorMemory} + --executor-cores=${sparkExecutorCores} + --driver-memory=${sparkDriverMemory} + --conf spark.extraListeners=${spark2ExtraListeners} + --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} + --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} + --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} + --conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir} + + --sourcePath${workingPath}/COCI + --outputPath${workingDir}/COCI + --nameNode${nameNode} + --delimiter${delimiter} + diff --git a/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/actionmanager/opencitations/CreateOpenCitationsASTest.java b/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/actionmanager/opencitations/CreateOpenCitationsASTest.java index 5153c412f..3e4ce750e 100644 --- a/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/actionmanager/opencitations/CreateOpenCitationsASTest.java +++ b/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/actionmanager/opencitations/CreateOpenCitationsASTest.java @@ -76,7 +76,7 @@ public class CreateOpenCitationsASTest { String inputPath = getClass() .getResource( - "/eu/dnetlib/dhp/actionmanager/opencitations/inputFiles") + "/eu/dnetlib/dhp/actionmanager/opencitations/COCI") .getPath(); CreateActionSetSparkJob @@ -99,7 +99,7 @@ public class CreateOpenCitationsASTest { .map(value -> OBJECT_MAPPER.readValue(value._2().toString(), AtomicAction.class)) .map(aa -> ((Relation) aa.getPayload())); - assertEquals(60, tmp.count()); + assertEquals(62, tmp.count()); // tmp.foreach(r -> System.out.println(OBJECT_MAPPER.writeValueAsString(r))); @@ -110,7 +110,7 @@ public class CreateOpenCitationsASTest { String inputPath = getClass() .getResource( - "/eu/dnetlib/dhp/actionmanager/opencitations/inputFiles") + "/eu/dnetlib/dhp/actionmanager/opencitations/COCI") .getPath(); CreateActionSetSparkJob @@ -131,7 +131,7 @@ public class CreateOpenCitationsASTest { .map(value -> OBJECT_MAPPER.readValue(value._2().toString(), AtomicAction.class)) .map(aa -> ((Relation) aa.getPayload())); - assertEquals(44, tmp.count()); + assertEquals(46, tmp.count()); // tmp.foreach(r -> System.out.println(OBJECT_MAPPER.writeValueAsString(r))); @@ -142,7 +142,7 @@ public class CreateOpenCitationsASTest { String inputPath = getClass() .getResource( - "/eu/dnetlib/dhp/actionmanager/opencitations/inputFiles") + "/eu/dnetlib/dhp/actionmanager/opencitations/COCI") .getPath(); CreateActionSetSparkJob @@ -175,7 +175,7 @@ public class CreateOpenCitationsASTest { String inputPath = getClass() .getResource( - "/eu/dnetlib/dhp/actionmanager/opencitations/inputFiles") + "/eu/dnetlib/dhp/actionmanager/opencitations/COCI") .getPath(); CreateActionSetSparkJob @@ -215,7 +215,7 @@ public class CreateOpenCitationsASTest { String inputPath = getClass() .getResource( - "/eu/dnetlib/dhp/actionmanager/opencitations/inputFiles") + "/eu/dnetlib/dhp/actionmanager/opencitations/COCI") .getPath(); CreateActionSetSparkJob @@ -240,8 +240,8 @@ public class CreateOpenCitationsASTest { assertEquals("citation", r.getSubRelType()); assertEquals("resultResult", r.getRelType()); }); - assertEquals(22, tmp.filter(r -> r.getRelClass().equals("Cites")).count()); - assertEquals(22, tmp.filter(r -> r.getRelClass().equals("IsCitedBy")).count()); + assertEquals(23, tmp.filter(r -> r.getRelClass().equals("Cites")).count()); + assertEquals(23, tmp.filter(r -> r.getRelClass().equals("IsCitedBy")).count()); } @@ -250,7 +250,7 @@ public class CreateOpenCitationsASTest { String inputPath = getClass() .getResource( - "/eu/dnetlib/dhp/actionmanager/opencitations/inputFiles") + "/eu/dnetlib/dhp/actionmanager/opencitations/COCI") .getPath(); CreateActionSetSparkJob @@ -295,7 +295,7 @@ public class CreateOpenCitationsASTest { String inputPath = getClass() .getResource( - "/eu/dnetlib/dhp/actionmanager/opencitations/inputFiles") + "/eu/dnetlib/dhp/actionmanager/opencitations/COCI") .getPath(); CreateActionSetSparkJob diff --git a/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/actionmanager/opencitations/ReadCOCITest.java b/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/actionmanager/opencitations/ReadCOCITest.java new file mode 100644 index 000000000..e1b9c4d23 --- /dev/null +++ b/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/actionmanager/opencitations/ReadCOCITest.java @@ -0,0 +1,94 @@ + +package eu.dnetlib.dhp.actionmanager.opencitations; + +import static eu.dnetlib.dhp.actionmanager.Constants.DEFAULT_DELIMITER; + +import java.io.IOException; +import java.nio.file.Files; +import java.nio.file.Path; + +import org.apache.commons.io.FileUtils; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.FileSystem; +import org.apache.spark.SparkConf; +import org.apache.spark.api.java.JavaRDD; +import org.apache.spark.api.java.JavaSparkContext; +import org.apache.spark.sql.Encoders; +import org.apache.spark.sql.SparkSession; +import org.junit.jupiter.api.AfterAll; +import org.junit.jupiter.api.Assertions; +import org.junit.jupiter.api.BeforeAll; +import org.junit.jupiter.api.Test; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import com.fasterxml.jackson.databind.ObjectMapper; + +import eu.dnetlib.dhp.actionmanager.opencitations.model.COCI; +import eu.dnetlib.dhp.schema.oaf.Dataset; + +public class ReadCOCITest { + + private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper(); + + private static SparkSession spark; + + private static Path workingDir; + private static final Logger log = LoggerFactory + .getLogger(ReadCOCITest.class); + + @BeforeAll + public static void beforeAll() throws IOException { + workingDir = Files + .createTempDirectory(ReadCOCITest.class.getSimpleName()); + log.info("using work dir {}", workingDir); + + SparkConf conf = new SparkConf(); + conf.setAppName(ReadCOCITest.class.getSimpleName()); + + conf.setMaster("local[*]"); + conf.set("spark.driver.host", "localhost"); + conf.set("hive.metastore.local", "true"); + conf.set("spark.ui.enabled", "false"); + conf.set("spark.sql.warehouse.dir", workingDir.toString()); + conf.set("hive.metastore.warehouse.dir", workingDir.resolve("warehouse").toString()); + + spark = SparkSession + .builder() + .appName(ReadCOCITest.class.getSimpleName()) + .config(conf) + .getOrCreate(); + } + + @AfterAll + public static void afterAll() throws IOException { + FileUtils.deleteDirectory(workingDir.toFile()); + spark.stop(); + } + + @Test + void testReadCOCI() throws Exception { + String inputPath = getClass() + .getResource( + "/eu/dnetlib/dhp/actionmanager/opencitations/inputFiles") + .getPath(); + + ReadCOCI + .doRead( + spark, FileSystem.getLocal(new Configuration()), inputPath, + workingDir.toString() + "/COCI", DEFAULT_DELIMITER); + + final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext()); + + JavaRDD tmp = sc + .textFile(workingDir.toString() + "/COCI/*/") + .map(item -> OBJECT_MAPPER.readValue(item, COCI.class)); + + Assertions.assertEquals(23, tmp.count()); + + Assertions.assertEquals(1, tmp.filter(c -> c.getCiting().equals("10.1207/s15327647jcd3,4-01")).count()); + + Assertions.assertEquals(8, tmp.filter(c -> c.getCiting().indexOf(".refs") > -1).count()); + } + +} diff --git a/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/actionmanager/opencitations/COCI/input1/_SUCCESS b/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/actionmanager/opencitations/COCI/input1/_SUCCESS new file mode 100644 index 000000000..e69de29bb diff --git a/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/actionmanager/opencitations/COCI/input1/part-00000-b05c4abb-77f8-4059-91c0-5521309823f8-c000.json.gz b/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/actionmanager/opencitations/COCI/input1/part-00000-b05c4abb-77f8-4059-91c0-5521309823f8-c000.json.gz new file mode 100644 index 0000000000000000000000000000000000000000..c55dcd71c6171ba9cf8c61bb0d51852c8c5104e8 GIT binary patch literal 346 zcmV-g0j2&QiwFP!000000L7EdYQr!PgztR}pIWGt{gGsC-^RG1bs;GvITS+Qy|YRP z2}vluxE9E3X=HypjQ8C*?Ut|IOUD!$*Wx2`&K$(JL?Nn?Bw_yQo?SYv-;P?Mez9e$ zIR)YzwA2(_^f^vY5RO8EtJfm6)s-@(qU1Xnccp?gtf+>Az6~vG+PemWp%XGZ6|6~n zV?{-%)1g-Fz904%eO$1mAdJc8XyCp+wMO_@*)=j3SU)Z|!)jXZ;5=(2i&SRfc7vPo zES#ig65%Q;cC?*Io9))-a;t+gF0x?88q$B74hpxrrAJYvo2C=ltN;K2 literal 0 HcmV?d00001 diff --git a/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/actionmanager/opencitations/COCI/input2/_SUCCESS b/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/actionmanager/opencitations/COCI/input2/_SUCCESS new file mode 100644 index 000000000..e69de29bb diff --git a/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/actionmanager/opencitations/COCI/input2/part-00000-6831e1e6-f472-40fa-985a-a4f3c74f9b53-c000.json.gz b/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/actionmanager/opencitations/COCI/input2/part-00000-6831e1e6-f472-40fa-985a-a4f3c74f9b53-c000.json.gz new file mode 100644 index 0000000000000000000000000000000000000000..ae7886e8c57b098106a3c52959788b361cadc034 GIT binary patch literal 306 zcmV-20nPp&iwFP!000000KJq=PQx$|h4&mIE08dG^J9;P>y!%0f=UDlq>8&U<5Hvo z-NuRHjNcpkiyiwgZFXS%ZH}Z=hfkyk=neFG%cMfUxRMK z)=*>}SV9~A=srf((E7;a+EcBLV`*^<(Ff%U{DaK%^L}$z&1@}FmPXD>EEXA-*Ke*$uJhA& zSqgK}s#apRtuU1}+o1tyq}(|b2FFuF(~*1Jf0w6Hs;+b%oKK|;!aN7Q0BvPIpR5D` E0R117`Tzg` literal 0 HcmV?d00001 diff --git a/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/actionmanager/opencitations/COCI/input3/_SUCCESS b/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/actionmanager/opencitations/COCI/input3/_SUCCESS new file mode 100644 index 000000000..e69de29bb diff --git a/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/actionmanager/opencitations/COCI/input3/part-00000-2ec9f31c-5b5c-4c65-92b7-7a6928da5cdb-c000.json.gz b/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/actionmanager/opencitations/COCI/input3/part-00000-2ec9f31c-5b5c-4c65-92b7-7a6928da5cdb-c000.json.gz new file mode 100644 index 0000000000000000000000000000000000000000..8374019193373d40ded63283b49baf2bedbc8271 GIT binary patch literal 316 zcmV-C0mJ?uiwFP!000000L@cDPQ)+_yz`8nX*#x>xZyj?g319T7SKv7{?0gEAwWgq z)TKSNNyhfrGdXp;`DK6G-E@MKP0N505cBRjPUgq^s!t+P&7rF4d4G2x>S;F< z0%a89Q7fo$OkBrXVO()PUUPLBCF|5jMCO^IFal8)k%Q>&{p$3U7mLd9kc9mDnsH1F z^#qvIblimKf)Fb$sVu~cF(#9SMWL3{&D+-^pSjs;D{qI_Bx<>C%iTmLL*uW+XVjnU z76Vv?)^T$K#54m{F|P9($=c-t#?;1+bNhoa#V9TD|H_!WRcF~3cdi+es5w#sIM1pojL%bM;0 literal 0 HcmV?d00001 diff --git a/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/actionmanager/opencitations/COCI/input4/_SUCCESS b/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/actionmanager/opencitations/COCI/input4/_SUCCESS new file mode 100644 index 000000000..e69de29bb diff --git a/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/actionmanager/opencitations/COCI/input4/part-00000-2ba3f17b-f97d-449d-bd08-04a9b935bfd2-c000.json.gz b/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/actionmanager/opencitations/COCI/input4/part-00000-2ba3f17b-f97d-449d-bd08-04a9b935bfd2-c000.json.gz new file mode 100644 index 0000000000000000000000000000000000000000..0436b10ffee3c5aa7b224d814fd1b437aed14221 GIT binary patch literal 137 zcmb2|=3syT(>_N&CPNO_?{bS@OV&=A7PbEJOrKXy;=7FR#lF&Dae7euDSqMC=et_G z1^Sw%TxM2fzRJCMRic#7{#<*FM-x(dj|T1B=DOTO+3#+2c?*Y;l&kK%+H?Odx=M5^ og)Y*lQ<`a|`gxtiR_%m0?d<>T_9e~}<$Zsc{V1()); // NOT PRESENT IN MDSTORES r - .setProcessingchargeamount(field(doc.valueOf("//oaf:processingchargeamount"), info)); + .setProcessingchargeamount(field(doc.valueOf("//oaf:processingchargeamount"), info)); r - .setProcessingchargecurrency(field(doc.valueOf("//oaf:processingchargeamount/@currency"), info)); + .setProcessingchargecurrency(field(doc.valueOf("//oaf:processingchargeamount/@currency"), info)); r.setInstance(instances); r.setBestaccessright(OafMapperUtils.createBestAccessRights(instances)); diff --git a/dhp-workflows/pom.xml b/dhp-workflows/pom.xml index 53d029467..143178560 100644 --- a/dhp-workflows/pom.xml +++ b/dhp-workflows/pom.xml @@ -44,7 +44,7 @@ iis-releases iis releases plugin repository - http://maven.ceon.pl/artifactory/iis-releases + https://maven.ceon.pl/artifactory/iis-releases default From b071f8e4154d6100db1acfeff9f6ba34efabc21d Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Tue, 8 Feb 2022 15:37:28 +0100 Subject: [PATCH 075/162] [OpenCitation] change to extract in json format each folder just onece --- .../actionmanager/opencitations/ReadCOCI.java | 34 +++++--------- .../input_readcoci_parameters.json | 19 ++++---- .../opencitations/oozie_app/workflow.xml | 8 ++-- .../opencitations/ReadCOCITest.java | 47 +++++++++++++++++-- 4 files changed, 70 insertions(+), 38 deletions(-) diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/opencitations/ReadCOCI.java b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/opencitations/ReadCOCI.java index 7ac1f2de3..fd83f7072 100644 --- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/opencitations/ReadCOCI.java +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/opencitations/ReadCOCI.java @@ -41,18 +41,14 @@ public class ReadCOCI implements Serializable { final String outputPath = parser.get("outputPath"); log.info("outputPath: {}", outputPath); - final String hdfsNameNode = parser.get("nameNode"); - log.info("nameNode: {}", hdfsNameNode); - - final String inputPath = parser.get("sourcePath"); - log.info("input path : {}", inputPath); + final String[] inputFile = parser.get("inputFile").split(";"); + log.info("inputFile {}", inputFile.toString()); Boolean isSparkSessionManaged = isSparkSessionManaged(parser); log.info("isSparkSessionManaged: {}", isSparkSessionManaged); - Configuration conf = new Configuration(); - conf.set("fs.defaultFS", hdfsNameNode); + final String workingPath = parser.get("workingPath"); + log.info("workingPath {}", workingPath); - FileSystem fileSystem = FileSystem.get(conf); SparkConf sconf = new SparkConf(); final String delimiter = Optional @@ -65,25 +61,20 @@ public class ReadCOCI implements Serializable { spark -> { doRead( spark, - fileSystem, - inputPath, + workingPath, + inputFile, outputPath, delimiter); }); } - public static void doRead(SparkSession spark, FileSystem fileSystem, String inputPath, String outputPath, + private static void doRead(SparkSession spark, String workingPath, String[] inputFiles, + String outputPath, String delimiter) throws IOException { - RemoteIterator iterator = fileSystem - .listFiles( - new Path(inputPath), true); + for(String inputFile : inputFiles){ + String p_string = workingPath + "/" + inputFile ; - while (iterator.hasNext()) { - LocatedFileStatus fileStatus = iterator.next(); - - Path p = fileStatus.getPath(); - String p_string = p.toString(); Dataset cociData = spark .read() .format("csv") @@ -91,7 +82,8 @@ public class ReadCOCI implements Serializable { .option("inferSchema", "true") .option("header", "true") .option("quotes", "\"") - .load(p_string); + .load(p_string) + .repartition(100); cociData.map((MapFunction) row -> { COCI coci = new COCI(); @@ -103,7 +95,7 @@ public class ReadCOCI implements Serializable { .write() .mode(SaveMode.Overwrite) .option("compression", "gzip") - .json(outputPath + "/" + p_string.substring(p_string.lastIndexOf("/") + 1)); + .json(outputPath + inputFile); } } diff --git a/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/actionmanager/opencitations/input_readcoci_parameters.json b/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/actionmanager/opencitations/input_readcoci_parameters.json index 14c20f762..b57cb5d9a 100644 --- a/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/actionmanager/opencitations/input_readcoci_parameters.json +++ b/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/actionmanager/opencitations/input_readcoci_parameters.json @@ -1,17 +1,12 @@ [ { - "paramName": "sp", - "paramLongName": "sourcePath", + "paramName": "wp", + "paramLongName": "workingPath", "paramDescription": "the zipped opencitations file", "paramRequired": true }, - { - "paramName": "nn", - "paramLongName": "nameNode", - "paramDescription": "the hdfs name node", - "paramRequired": true - }, + { "paramName": "issm", "paramLongName": "isSparkSessionManaged", @@ -28,7 +23,13 @@ "paramName": "op", "paramLongName": "outputPath", "paramDescription": "the hdfs name node", - "paramRequired": false + "paramRequired": true + }, + { + "paramName": "if", + "paramLongName": "inputFile", + "paramDescription": "the hdfs name node", + "paramRequired": true } ] diff --git a/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/actionmanager/opencitations/oozie_app/workflow.xml b/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/actionmanager/opencitations/oozie_app/workflow.xml index 7276d2d3e..aee2559ee 100644 --- a/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/actionmanager/opencitations/oozie_app/workflow.xml +++ b/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/actionmanager/opencitations/oozie_app/workflow.xml @@ -82,10 +82,10 @@ --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} --conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir} - --sourcePath${workingPath}/COCI - --outputPath${workingDir}/COCI - --nameNode${nameNode} + --workingPath${workingPath}/COCI + --outputPath${workingPath}/COCI_JSON --delimiter${delimiter} + --inputFile${inputFileCoci} @@ -108,7 +108,7 @@ --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} --conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir} - --inputPath${workingPath}/COCI + --inputPath${workingPath}/COCI_JSON --outputPath${outputPath} diff --git a/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/actionmanager/opencitations/ReadCOCITest.java b/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/actionmanager/opencitations/ReadCOCITest.java index e1b9c4d23..27627f9f6 100644 --- a/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/actionmanager/opencitations/ReadCOCITest.java +++ b/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/actionmanager/opencitations/ReadCOCITest.java @@ -10,6 +10,7 @@ import java.nio.file.Path; import org.apache.commons.io.FileUtils; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.LocalFileSystem; import org.apache.spark.SparkConf; import org.apache.spark.api.java.JavaRDD; import org.apache.spark.api.java.JavaSparkContext; @@ -73,15 +74,53 @@ public class ReadCOCITest { "/eu/dnetlib/dhp/actionmanager/opencitations/inputFiles") .getPath(); + LocalFileSystem fs = FileSystem.getLocal(new Configuration()); + fs + .copyFromLocalFile( + false, new org.apache.hadoop.fs.Path(getClass() + .getResource("/eu/dnetlib/dhp/actionmanager/opencitations/inputFiles/input1") + .getPath()), + new org.apache.hadoop.fs.Path(workingDir + "/COCI/input1")); + + fs + .copyFromLocalFile( + false, new org.apache.hadoop.fs.Path(getClass() + .getResource("/eu/dnetlib/dhp/actionmanager/opencitations/inputFiles/input2") + .getPath()), + new org.apache.hadoop.fs.Path(workingDir + "/COCI/input2")); + + fs + .copyFromLocalFile( + false, new org.apache.hadoop.fs.Path(getClass() + .getResource("/eu/dnetlib/dhp/actionmanager/opencitations/inputFiles/input3") + .getPath()), + new org.apache.hadoop.fs.Path(workingDir + "/COCI/input3")); + + fs + .copyFromLocalFile( + false, new org.apache.hadoop.fs.Path(getClass() + .getResource("/eu/dnetlib/dhp/actionmanager/opencitations/inputFiles/input4") + .getPath()), + new org.apache.hadoop.fs.Path(workingDir + "/COCI/input4")); + ReadCOCI - .doRead( - spark, FileSystem.getLocal(new Configuration()), inputPath, - workingDir.toString() + "/COCI", DEFAULT_DELIMITER); + .main( + new String[] { + "-isSparkSessionManaged", + Boolean.FALSE.toString(), + "-workingPath", + workingDir.toString() + "/COCI", + "-outputPath", + workingDir.toString() + "/COCI_json/", + "-inputFile", "input1;input2;input3;input4" + }); + + final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext()); JavaRDD tmp = sc - .textFile(workingDir.toString() + "/COCI/*/") + .textFile(workingDir.toString() + "/COCI_json/*/") .map(item -> OBJECT_MAPPER.readValue(item, COCI.class)); Assertions.assertEquals(23, tmp.count()); From 759ed519f251e976ce34571bd351713bb6c429a8 Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Tue, 8 Feb 2022 16:15:34 +0100 Subject: [PATCH 076/162] [OpenCitation] added logic to avoid the genration of self citations relations --- .../CreateActionSetSparkJob.java | 24 ++++++++++-------- .../opencitations/ReadCOCITest.java | 11 ++++++-- .../opencitations/COCI/input5/_SUCCESS | 0 ...e-90e3-4791-821a-b84636bc13e2-c000.json.gz | Bin 0 -> 20 bytes ...e-90e3-4791-821a-b84636bc13e2-c000.json.gz | Bin 0 -> 108 bytes .../opencitations/inputFiles/input5 | 2 ++ 6 files changed, 24 insertions(+), 13 deletions(-) create mode 100644 dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/actionmanager/opencitations/COCI/input5/_SUCCESS create mode 100644 dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/actionmanager/opencitations/COCI/input5/part-00000-d6d1dc6e-90e3-4791-821a-b84636bc13e2-c000.json.gz create mode 100644 dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/actionmanager/opencitations/COCI/input5/part-00061-d6d1dc6e-90e3-4791-821a-b84636bc13e2-c000.json.gz create mode 100644 dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/actionmanager/opencitations/inputFiles/input5 diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/opencitations/CreateActionSetSparkJob.java b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/opencitations/CreateActionSetSparkJob.java index c16f8eeea..4051bc6f0 100644 --- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/opencitations/CreateActionSetSparkJob.java +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/opencitations/CreateActionSetSparkJob.java @@ -115,18 +115,20 @@ public class CreateActionSetSparkJob implements Serializable { final String cited = ID_PREFIX + IdentifierFactory.md5(CleaningFunctions.normalizePidValue("doi", value.getCited())); - relationList - .addAll( - getRelations( - citing, - cited)); + if(!citing.equals(cited)){ + relationList + .addAll( + getRelations( + citing, + cited)); - if (duplicate && value.getCiting().endsWith(".refs")) { - citing = ID_PREFIX + IdentifierFactory - .md5( - CleaningFunctions - .normalizePidValue("doi", value.getCiting().substring(0, value.getCiting().indexOf(".refs")))); - relationList.addAll(getRelations(citing, cited)); + if (duplicate && value.getCiting().endsWith(".refs")) { + citing = ID_PREFIX + IdentifierFactory + .md5( + CleaningFunctions + .normalizePidValue("doi", value.getCiting().substring(0, value.getCiting().indexOf(".refs")))); + relationList.addAll(getRelations(citing, cited)); + } } return relationList; diff --git a/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/actionmanager/opencitations/ReadCOCITest.java b/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/actionmanager/opencitations/ReadCOCITest.java index 27627f9f6..53af074e1 100644 --- a/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/actionmanager/opencitations/ReadCOCITest.java +++ b/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/actionmanager/opencitations/ReadCOCITest.java @@ -103,6 +103,13 @@ public class ReadCOCITest { .getPath()), new org.apache.hadoop.fs.Path(workingDir + "/COCI/input4")); + fs + .copyFromLocalFile( + false, new org.apache.hadoop.fs.Path(getClass() + .getResource("/eu/dnetlib/dhp/actionmanager/opencitations/inputFiles/input5") + .getPath()), + new org.apache.hadoop.fs.Path(workingDir + "/COCI/input5")); + ReadCOCI .main( new String[] { @@ -112,7 +119,7 @@ public class ReadCOCITest { workingDir.toString() + "/COCI", "-outputPath", workingDir.toString() + "/COCI_json/", - "-inputFile", "input1;input2;input3;input4" + "-inputFile", "input1;input2;input3;input4;input5" }); @@ -123,7 +130,7 @@ public class ReadCOCITest { .textFile(workingDir.toString() + "/COCI_json/*/") .map(item -> OBJECT_MAPPER.readValue(item, COCI.class)); - Assertions.assertEquals(23, tmp.count()); + Assertions.assertEquals(24, tmp.count()); Assertions.assertEquals(1, tmp.filter(c -> c.getCiting().equals("10.1207/s15327647jcd3,4-01")).count()); diff --git a/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/actionmanager/opencitations/COCI/input5/_SUCCESS b/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/actionmanager/opencitations/COCI/input5/_SUCCESS new file mode 100644 index 000000000..e69de29bb diff --git a/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/actionmanager/opencitations/COCI/input5/part-00000-d6d1dc6e-90e3-4791-821a-b84636bc13e2-c000.json.gz b/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/actionmanager/opencitations/COCI/input5/part-00000-d6d1dc6e-90e3-4791-821a-b84636bc13e2-c000.json.gz new file mode 100644 index 0000000000000000000000000000000000000000..001322f84b053326cd87d0b2f1df13fddaa4da35 GIT binary patch literal 20 Rcmb2|=3syTW+=_T000et0JZ=C literal 0 HcmV?d00001 diff --git a/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/actionmanager/opencitations/COCI/input5/part-00061-d6d1dc6e-90e3-4791-821a-b84636bc13e2-c000.json.gz b/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/actionmanager/opencitations/COCI/input5/part-00061-d6d1dc6e-90e3-4791-821a-b84636bc13e2-c000.json.gz new file mode 100644 index 0000000000000000000000000000000000000000..12968af39e578a2c9afae321fee5a918e8ad8ee1 GIT binary patch literal 108 zcmb2|=3sz;si$`HHW=`*T$pDVBW!W}XT%y|0WW4&5s!|$hB^%e#n Date: Tue, 8 Feb 2022 16:23:05 +0100 Subject: [PATCH 077/162] [OpenCitation] refactoring --- .../actionmanager/opencitations/CreateActionSetSparkJob.java | 3 --- 1 file changed, 3 deletions(-) diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/opencitations/CreateActionSetSparkJob.java b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/opencitations/CreateActionSetSparkJob.java index 4051bc6f0..f230a7fd7 100644 --- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/opencitations/CreateActionSetSparkJob.java +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/opencitations/CreateActionSetSparkJob.java @@ -90,9 +90,6 @@ public class CreateActionSetSparkJob implements Serializable { .map( (MapFunction) value -> OBJECT_MAPPER.readValue(value, COCI.class), Encoders.bean(COCI.class)) -// spark -// .sqlContext() -// .createDataset(spark.sparkContext().textFile(inputPath + "/*", 6000), Encoders.STRING()) .flatMap( (FlatMapFunction) value -> createRelation(value, shouldDuplicateRels).iterator(), Encoders.bean(Relation.class)) From 600ede1798beb34c282dba65593e229385d908a1 Mon Sep 17 00:00:00 2001 From: Alessia Bardi Date: Fri, 11 Feb 2022 11:00:20 +0100 Subject: [PATCH 078/162] serialisation of APCs int he XML records --- .../raw/AbstractMdRecordToOafMapper.java | 4 +-- .../oa/provision/utils/XmlRecordFactory.java | 10 ++++++ .../oa/provision/XmlRecordFactoryTest.java | 3 ++ .../dnetlib/dhp/oa/provision/publication.json | 34 ++++++++++++++++++- 4 files changed, 48 insertions(+), 3 deletions(-) diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/AbstractMdRecordToOafMapper.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/AbstractMdRecordToOafMapper.java index 3e74bc5e0..3e8ca1763 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/AbstractMdRecordToOafMapper.java +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/AbstractMdRecordToOafMapper.java @@ -348,9 +348,9 @@ public abstract class AbstractMdRecordToOafMapper { r.setContext(prepareContexts(doc, info)); r.setExternalReference(new ArrayList<>()); // NOT PRESENT IN MDSTORES r - .setProcessingchargeamount(field(doc.valueOf("//oaf:processingchargeamount"), info)); + .setProcessingchargeamount(field(doc.valueOf("//oaf:processingchargeamount"), info)); r - .setProcessingchargecurrency(field(doc.valueOf("//oaf:processingchargeamount/@currency"), info)); + .setProcessingchargecurrency(field(doc.valueOf("//oaf:processingchargeamount/@currency"), info)); r.setInstance(instances); r.setBestaccessright(OafMapperUtils.createBestAccessRights(instances)); diff --git a/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/utils/XmlRecordFactory.java b/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/utils/XmlRecordFactory.java index 9a3f2188b..e51a84e02 100644 --- a/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/utils/XmlRecordFactory.java +++ b/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/utils/XmlRecordFactory.java @@ -398,6 +398,16 @@ public class XmlRecordFactory implements Serializable { if (r.getResourcetype() != null) { metadata.add(XmlSerializationUtils.mapQualifier("resourcetype", r.getResourcetype())); } + if (r.getProcessingchargeamount() != null) { + metadata + .add( + XmlSerializationUtils + .asXmlElement("processingchargeamount", r.getProcessingchargeamount().getValue())); + metadata + .add( + XmlSerializationUtils + .asXmlElement("processingchargecurrency", r.getProcessingchargecurrency().getValue())); + } } switch (type) { diff --git a/dhp-workflows/dhp-graph-provision/src/test/java/eu/dnetlib/dhp/oa/provision/XmlRecordFactoryTest.java b/dhp-workflows/dhp-graph-provision/src/test/java/eu/dnetlib/dhp/oa/provision/XmlRecordFactoryTest.java index a4b6182bc..c1e0567e6 100644 --- a/dhp-workflows/dhp-graph-provision/src/test/java/eu/dnetlib/dhp/oa/provision/XmlRecordFactoryTest.java +++ b/dhp-workflows/dhp-graph-provision/src/test/java/eu/dnetlib/dhp/oa/provision/XmlRecordFactoryTest.java @@ -66,6 +66,9 @@ public class XmlRecordFactoryTest { assertEquals("10.5689/LIB.2018.2853550", doc.valueOf("//instance/alternateidentifier/text()")); assertEquals(3, doc.selectNodes("//instance").size()); + + assertEquals("1721.47", doc.valueOf("//processingchargeamount/text()")); + assertEquals("EUR", doc.valueOf("//processingchargecurrency/text()")); } @Test diff --git a/dhp-workflows/dhp-graph-provision/src/test/resources/eu/dnetlib/dhp/oa/provision/publication.json b/dhp-workflows/dhp-graph-provision/src/test/resources/eu/dnetlib/dhp/oa/provision/publication.json index d5aa13ed6..3b5b54bbf 100644 --- a/dhp-workflows/dhp-graph-provision/src/test/resources/eu/dnetlib/dhp/oa/provision/publication.json +++ b/dhp-workflows/dhp-graph-provision/src/test/resources/eu/dnetlib/dhp/oa/provision/publication.json @@ -1655,5 +1655,37 @@ }, "value": "Understanding Electromigration in Cu-CNT Composite Interconnects A Multiscale Electrothermal Simulation Study" } - ] + ], + "processingchargeamount": { + "value": "1721.47", + "dataInfo": { + "invisible": true, + "inferred": false, + "deletedbyinference": false, + "trust": "0.9", + "inferenceprovenance": "", + "provenanceaction": { + "classid": "sysimport:crosswalk:datasetarchive", + "classname": "sysimport:crosswalk:datasetarchive", + "schemeid": "dnet:provenanceActions", + "schemename": "dnet:provenanceActions" + } + } + }, + "processingchargecurrency": { + "value": "EUR", + "dataInfo": { + "invisible": true, + "inferred": false, + "deletedbyinference": false, + "trust": "0.9", + "inferenceprovenance": "", + "provenanceaction": { + "classid": "sysimport:crosswalk:datasetarchive", + "classname": "sysimport:crosswalk:datasetarchive", + "schemeid": "dnet:provenanceActions", + "schemename": "dnet:provenanceActions" + } + } + } } \ No newline at end of file From 615817033410a288709c47a310c35572e5b59d05 Mon Sep 17 00:00:00 2001 From: Alessia Bardi Date: Fri, 11 Feb 2022 18:05:18 +0100 Subject: [PATCH 079/162] testing delegated authority and bumped dep to schemas --- .../schema/oaf/utils/OafMapperUtilsTest.java | 16 ++ .../dhp/schema/oaf/utils/dataset_2.json | 141 +++++++++++++++++- .../schema/oaf/utils/dataset_delegated.json | 140 +++++++++++++++++ pom.xml | 2 +- 4 files changed, 297 insertions(+), 2 deletions(-) create mode 100644 dhp-common/src/test/resources/eu/dnetlib/dhp/schema/oaf/utils/dataset_delegated.json diff --git a/dhp-common/src/test/java/eu/dnetlib/dhp/schema/oaf/utils/OafMapperUtilsTest.java b/dhp-common/src/test/java/eu/dnetlib/dhp/schema/oaf/utils/OafMapperUtilsTest.java index 8804469fa..79629a171 100644 --- a/dhp-common/src/test/java/eu/dnetlib/dhp/schema/oaf/utils/OafMapperUtilsTest.java +++ b/dhp-common/src/test/java/eu/dnetlib/dhp/schema/oaf/utils/OafMapperUtilsTest.java @@ -185,6 +185,22 @@ class OafMapperUtilsTest { .getClassid()); } + @Test + void testDelegatedAuthority() throws IOException { + Dataset d1 = read("dataset_2.json", Dataset.class); + Dataset d2 = read("dataset_delegated.json", Dataset.class); + + assertEquals(1, d2.getCollectedfrom().size()); + assertTrue(cfId(d2.getCollectedfrom()).contains(ModelConstants.ZENODO_OD_ID)); + + Result res = OafMapperUtils.mergeResults(d1, d2); + + assertEquals(d2, res); + + System.out.println(OBJECT_MAPPER.writeValueAsString(res)); + + } + protected HashSet cfId(List collectedfrom) { return collectedfrom.stream().map(KeyValue::getKey).collect(Collectors.toCollection(HashSet::new)); } diff --git a/dhp-common/src/test/resources/eu/dnetlib/dhp/schema/oaf/utils/dataset_2.json b/dhp-common/src/test/resources/eu/dnetlib/dhp/schema/oaf/utils/dataset_2.json index 52e4e126a..c880edb7d 100644 --- a/dhp-common/src/test/resources/eu/dnetlib/dhp/schema/oaf/utils/dataset_2.json +++ b/dhp-common/src/test/resources/eu/dnetlib/dhp/schema/oaf/utils/dataset_2.json @@ -1 +1,140 @@ -{"id":"50|DansKnawCris::0829b5191605bdbea36d6502b8c1ce1g", "resuttype" : { "classid" : "dataset" }, "pid":[{"qualifier":{"classid":"doi"},"value":"10.1016/j.cmet.2011.03.013"},{"qualifier":{"classid":"urn"},"value":"urn:nbn:nl:ui:29-f3ed5f9e-edf6-457e-8848-61b58a4075e2"},{"qualifier":{"classid":"scp-number"},"value":"79953761260"},{"qualifier":{"classid":"pmc"},"value":"21459329"}], "collectedfrom" : [ { "key" : "10|openaire____::081b82f96300b6a6e3d282bad31cb6e3", "value" : "Repository B"} ]} \ No newline at end of file +{ + "id": "50|DansKnawCris::0829b5191605bdbea36d6502b8c1ce1g", + "resuttype": {"classid": "dataset"}, + "pid": [ + { + "qualifier": {"classid": "doi"}, + "value": "10.1016/j.cmet.2011.03.013" + }, + { + "qualifier": {"classid": "urn"}, + "value": "urn:nbn:nl:ui:29-f3ed5f9e-edf6-457e-8848-61b58a4075e2" + }, + { + "qualifier": {"classid": "scp-number"}, + "value": "79953761260" + }, + { + "qualifier": {"classid": "pmc"}, + "value": "21459329" + } + ], + "collectedfrom": [ + { + "key": "10|openaire____::081b82f96300b6a6e3d282bad31cb6e3", + "value": "Repository B" + } + ], + "instance": [ + { + "refereed": { + "classid": "0000", + "classname": "UNKNOWN", + "schemeid": "dnet:review_levels", + "schemename": "dnet:review_levels" + }, + "hostedby": { + "key": "10|opendoar____::358aee4cc897452c00244351e4d91f69", + "value": "Zenodo" + }, + "accessright": { + "classid": "OPEN", + "classname": "Open Access", + "schemeid": "dnet:access_modes", + "schemename": "dnet:access_modes" + }, + "processingchargecurrency": { + "dataInfo": { + "provenanceaction": { + "classid": "sysimport:crosswalk:datasetarchive", + "classname": "Harvested", + "schemeid": "dnet:provenanceActions", + "schemename": "dnet:provenanceActions" + }, + "deletedbyinference": false, + "inferred": false, + "inferenceprovenance": "", + "invisible": true, + "trust": "0.9" + }, + "value": "EUR" + }, + "pid": [ + { + "dataInfo": { + "provenanceaction": { + "classid": "sysimport:crosswalk:datasetarchive", + "classname": "Harvested", + "schemeid": "dnet:provenanceActions", + "schemename": "dnet:provenanceActions" + }, + "deletedbyinference": false, + "inferred": false, + "inferenceprovenance": "", + "invisible": true, + "trust": "0.9" + }, + "qualifier": { + "classid": "doi", + "classname": "Digital Object Identifier", + "schemeid": "dnet:pid_types", + "schemename": "dnet:pid_types" + }, + "value": "10.1371/journal.pone.0085605" + } + ], + "distributionlocation": "", + "url": ["https://doi.org/10.1371/journal.pone.0085605"], + "alternateIdentifier": [ + { + "dataInfo": { + "provenanceaction": { + "classid": "sysimport:crosswalk:datasetarchive", + "classname": "Harvested", + "schemeid": "dnet:provenanceActions", + "schemename": "dnet:provenanceActions" + }, + "deletedbyinference": false, + "inferred": false, + "inferenceprovenance": "", + "invisible": true, + "trust": "0.9" + }, + "qualifier": { + "classid": "pmid", + "classname": "PubMed ID", + "schemeid": "dnet:pid_types", + "schemename": "dnet:pid_types" + }, + "value": "24454899.0" + } + ], + "collectedfrom": { + "key": "10|openaire____::081b82f96300b6a6e3d282bad31cb6e3", + "value": "Repository B" + }, + "processingchargeamount": { + "dataInfo": { + "provenanceaction": { + "classid": "sysimport:crosswalk:datasetarchive", + "classname": "Harvested", + "schemeid": "dnet:provenanceActions", + "schemename": "dnet:provenanceActions" + }, + "deletedbyinference": false, + "inferred": false, + "inferenceprovenance": "", + "invisible": true, + "trust": "0.9" + }, + "value": "1022.02" + }, + "instancetype": { + "classid": "0004", + "classname": "Conference object", + "schemeid": "dnet:publication_resource", + "schemename": "dnet:publication_resource" + } + } + ] +} \ No newline at end of file diff --git a/dhp-common/src/test/resources/eu/dnetlib/dhp/schema/oaf/utils/dataset_delegated.json b/dhp-common/src/test/resources/eu/dnetlib/dhp/schema/oaf/utils/dataset_delegated.json new file mode 100644 index 000000000..967c1181b --- /dev/null +++ b/dhp-common/src/test/resources/eu/dnetlib/dhp/schema/oaf/utils/dataset_delegated.json @@ -0,0 +1,140 @@ +{ + "id": "50|DansKnawCris::0829b5191605bdbea36d6502b8c1ce1g", + "resuttype": {"classid": "dataset"}, + "pid": [ + { + "qualifier": {"classid": "doi"}, + "value": "10.1016/j.cmet.2011.03.013" + }, + { + "qualifier": {"classid": "urn"}, + "value": "urn:nbn:nl:ui:29-f3ed5f9e-edf6-457e-8848-61b58a4075e2" + }, + { + "qualifier": {"classid": "scp-number"}, + "value": "79953761260" + }, + { + "qualifier": {"classid": "pmc"}, + "value": "21459329" + } + ], + "collectedfrom": [ + { + "key": "10|opendoar____::358aee4cc897452c00244351e4d91f69", + "value": "Zenodo" + } + ], + "instance": [ + { + "refereed": { + "classid": "0000", + "classname": "UNKNOWN", + "schemeid": "dnet:review_levels", + "schemename": "dnet:review_levels" + }, + "hostedby": { + "key": "10|opendoar____::358aee4cc897452c00244351e4d91f69", + "value": "Zenodo" + }, + "accessright": { + "classid": "OPEN", + "classname": "Open Access", + "schemeid": "dnet:access_modes", + "schemename": "dnet:access_modes" + }, + "processingchargecurrency": { + "dataInfo": { + "provenanceaction": { + "classid": "sysimport:crosswalk:datasetarchive", + "classname": "Harvested", + "schemeid": "dnet:provenanceActions", + "schemename": "dnet:provenanceActions" + }, + "deletedbyinference": false, + "inferred": false, + "inferenceprovenance": "", + "invisible": true, + "trust": "0.9" + }, + "value": "EUR" + }, + "pid": [ + { + "dataInfo": { + "provenanceaction": { + "classid": "sysimport:crosswalk:datasetarchive", + "classname": "Harvested", + "schemeid": "dnet:provenanceActions", + "schemename": "dnet:provenanceActions" + }, + "deletedbyinference": false, + "inferred": false, + "inferenceprovenance": "", + "invisible": true, + "trust": "0.9" + }, + "qualifier": { + "classid": "doi", + "classname": "Digital Object Identifier", + "schemeid": "dnet:pid_types", + "schemename": "dnet:pid_types" + }, + "value": "10.1371/journal.pone.0085605" + } + ], + "distributionlocation": "", + "url": ["https://doi.org/10.1371/journal.pone.0085605"], + "alternateIdentifier": [ + { + "dataInfo": { + "provenanceaction": { + "classid": "sysimport:crosswalk:datasetarchive", + "classname": "Harvested", + "schemeid": "dnet:provenanceActions", + "schemename": "dnet:provenanceActions" + }, + "deletedbyinference": false, + "inferred": false, + "inferenceprovenance": "", + "invisible": true, + "trust": "0.9" + }, + "qualifier": { + "classid": "pmid", + "classname": "PubMed ID", + "schemeid": "dnet:pid_types", + "schemename": "dnet:pid_types" + }, + "value": "24454899.0" + } + ], + "collectedfrom": { + "key": "10|opendoar____::358aee4cc897452c00244351e4d91f69", + "value": "Zenodo" + }, + "processingchargeamount": { + "dataInfo": { + "provenanceaction": { + "classid": "sysimport:crosswalk:datasetarchive", + "classname": "Harvested", + "schemeid": "dnet:provenanceActions", + "schemename": "dnet:provenanceActions" + }, + "deletedbyinference": false, + "inferred": false, + "inferenceprovenance": "", + "invisible": true, + "trust": "0.9" + }, + "value": "1022.02" + }, + "instancetype": { + "classid": "0004", + "classname": "Conference object", + "schemeid": "dnet:publication_resource", + "schemename": "dnet:publication_resource" + } + } + ] +} \ No newline at end of file diff --git a/pom.xml b/pom.xml index f3da49580..603a4cf1e 100644 --- a/pom.xml +++ b/pom.xml @@ -797,7 +797,7 @@ 3.3.3 3.4.2 [2.12,3.0) - [2.10.28] + [2.10.29] [4.0.3] [6.0.5] [3.1.6] From 1490867cc7886746b5b9925b9bd5a3ebd7499cb5 Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Mon, 14 Feb 2022 14:52:12 +0100 Subject: [PATCH 080/162] [OpenCitation] cleaning of the COCI model --- .../opencitations/model/COCI.java | 50 +------------------ 1 file changed, 1 insertion(+), 49 deletions(-) diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/opencitations/model/COCI.java b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/opencitations/model/COCI.java index a7b3330ea..bad4a5a3b 100644 --- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/opencitations/model/COCI.java +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/opencitations/model/COCI.java @@ -6,30 +6,12 @@ import java.io.Serializable; import com.opencsv.bean.CsvBindByPosition; public class COCI implements Serializable { - @CsvBindByPosition(position = 0) -// @CsvBindByName(column = "doi") private String oci; - @CsvBindByPosition(position = 1) -// @CsvBindByName(column = "level1") private String citing; - @CsvBindByPosition(position = 2) -// @CsvBindByName(column = "level2") private String cited; - @CsvBindByPosition(position = 3) -// @CsvBindByName(column = "level3") - private String creation; - - @CsvBindByPosition(position = 4) - private String timespan; - - @CsvBindByPosition(position = 5) - private String journal_sc; - - @CsvBindByPosition(position = 6) - private String author_sc; public String getOci() { return oci; @@ -55,35 +37,5 @@ public class COCI implements Serializable { this.cited = cited; } - public String getCreation() { - return creation; - } - - public void setCreation(String creation) { - this.creation = creation; - } - - public String getTimespan() { - return timespan; - } - - public void setTimespan(String timespan) { - this.timespan = timespan; - } - - public String getJournal_sc() { - return journal_sc; - } - - public void setJournal_sc(String journal_sc) { - this.journal_sc = journal_sc; - } - - public String getAuthor_sc() { - return author_sc; - } - - public void setAuthor_sc(String author_sc) { - this.author_sc = author_sc; - } + } From be64055cfe212bcfc945da6fa53c76e15f977b8b Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Mon, 14 Feb 2022 15:49:44 +0100 Subject: [PATCH 081/162] [OpenCitation] changed the name of destination folders --- .../actionmanager/opencitations/ReadCOCI.java | 2 +- .../opencitations/model/COCI.java | 2 +- .../opencitations/ReadCOCITest.java | 20 +++++++++--------- .../opencitations/inputFiles/input1 | 8 ------- .../opencitations/inputFiles/input1.gz | Bin 0 -> 422 bytes .../opencitations/inputFiles/input2 | 8 ------- .../opencitations/inputFiles/input2.gz | Bin 0 -> 378 bytes .../opencitations/inputFiles/input3 | 9 -------- .../opencitations/inputFiles/input3.gz | Bin 0 -> 395 bytes .../opencitations/inputFiles/input4 | 2 -- .../opencitations/inputFiles/input4.gz | Bin 0 -> 179 bytes .../opencitations/inputFiles/input5 | 2 -- .../opencitations/inputFiles/input5.gz | Bin 0 -> 150 bytes 13 files changed, 12 insertions(+), 41 deletions(-) delete mode 100644 dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/actionmanager/opencitations/inputFiles/input1 create mode 100644 dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/actionmanager/opencitations/inputFiles/input1.gz delete mode 100644 dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/actionmanager/opencitations/inputFiles/input2 create mode 100644 dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/actionmanager/opencitations/inputFiles/input2.gz delete mode 100644 dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/actionmanager/opencitations/inputFiles/input3 create mode 100644 dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/actionmanager/opencitations/inputFiles/input3.gz delete mode 100644 dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/actionmanager/opencitations/inputFiles/input4 create mode 100644 dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/actionmanager/opencitations/inputFiles/input4.gz delete mode 100644 dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/actionmanager/opencitations/inputFiles/input5 create mode 100644 dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/actionmanager/opencitations/inputFiles/input5.gz diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/opencitations/ReadCOCI.java b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/opencitations/ReadCOCI.java index fd83f7072..3fd6e8e9a 100644 --- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/opencitations/ReadCOCI.java +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/opencitations/ReadCOCI.java @@ -73,7 +73,7 @@ public class ReadCOCI implements Serializable { String delimiter) throws IOException { for(String inputFile : inputFiles){ - String p_string = workingPath + "/" + inputFile ; + String p_string = workingPath + "/" + inputFile + ".gz"; Dataset cociData = spark .read() diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/opencitations/model/COCI.java b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/opencitations/model/COCI.java index bad4a5a3b..ce087ce22 100644 --- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/opencitations/model/COCI.java +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/opencitations/model/COCI.java @@ -37,5 +37,5 @@ public class COCI implements Serializable { this.cited = cited; } - + } diff --git a/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/actionmanager/opencitations/ReadCOCITest.java b/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/actionmanager/opencitations/ReadCOCITest.java index 53af074e1..629e95c8e 100644 --- a/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/actionmanager/opencitations/ReadCOCITest.java +++ b/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/actionmanager/opencitations/ReadCOCITest.java @@ -78,37 +78,37 @@ public class ReadCOCITest { fs .copyFromLocalFile( false, new org.apache.hadoop.fs.Path(getClass() - .getResource("/eu/dnetlib/dhp/actionmanager/opencitations/inputFiles/input1") + .getResource("/eu/dnetlib/dhp/actionmanager/opencitations/inputFiles/input1.gz") .getPath()), - new org.apache.hadoop.fs.Path(workingDir + "/COCI/input1")); + new org.apache.hadoop.fs.Path(workingDir + "/COCI/input1.gz")); fs .copyFromLocalFile( false, new org.apache.hadoop.fs.Path(getClass() - .getResource("/eu/dnetlib/dhp/actionmanager/opencitations/inputFiles/input2") + .getResource("/eu/dnetlib/dhp/actionmanager/opencitations/inputFiles/input2.gz") .getPath()), - new org.apache.hadoop.fs.Path(workingDir + "/COCI/input2")); + new org.apache.hadoop.fs.Path(workingDir + "/COCI/input2.gz")); fs .copyFromLocalFile( false, new org.apache.hadoop.fs.Path(getClass() - .getResource("/eu/dnetlib/dhp/actionmanager/opencitations/inputFiles/input3") + .getResource("/eu/dnetlib/dhp/actionmanager/opencitations/inputFiles/input3.gz") .getPath()), - new org.apache.hadoop.fs.Path(workingDir + "/COCI/input3")); + new org.apache.hadoop.fs.Path(workingDir + "/COCI/input3.gz")); fs .copyFromLocalFile( false, new org.apache.hadoop.fs.Path(getClass() - .getResource("/eu/dnetlib/dhp/actionmanager/opencitations/inputFiles/input4") + .getResource("/eu/dnetlib/dhp/actionmanager/opencitations/inputFiles/input4.gz") .getPath()), - new org.apache.hadoop.fs.Path(workingDir + "/COCI/input4")); + new org.apache.hadoop.fs.Path(workingDir + "/COCI/input4.gz")); fs .copyFromLocalFile( false, new org.apache.hadoop.fs.Path(getClass() - .getResource("/eu/dnetlib/dhp/actionmanager/opencitations/inputFiles/input5") + .getResource("/eu/dnetlib/dhp/actionmanager/opencitations/inputFiles/input5.gz") .getPath()), - new org.apache.hadoop.fs.Path(workingDir + "/COCI/input5")); + new org.apache.hadoop.fs.Path(workingDir + "/COCI/input5.gz")); ReadCOCI .main( diff --git a/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/actionmanager/opencitations/inputFiles/input1 b/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/actionmanager/opencitations/inputFiles/input1 deleted file mode 100644 index d93d6fd99..000000000 --- a/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/actionmanager/opencitations/inputFiles/input1 +++ /dev/null @@ -1,8 +0,0 @@ -oci,citing,cited,creation,timespan,journal_sc,author_sc -02001000007362801000805046300010563030608046333-0200101010136193701050501630209010637020000083700020400083733,10.1007/s10854-015-3684-x,10.1111/j.1551-2916.2008.02408.x,2015-09-01,P7Y2M,no,no -02001000007362801000805046300010563030608046333-02001000007362801000805046300010463020101046309,10.1007/s10854-015-3684-x,10.1007/s10854-014-2114-9,2015-09-01,P1Y2M4D,yes,no -02001000007362801000805046300010563030608046333-020010001063619371214271022182329370200010337000937000609,10.1007/s10854-015-3684-x,10.1016/j.ceramint.2013.09.069,2015-09-01,P1Y6M,no,no -02001000007362801000805046300010563030608046333-02001000007362801000805046300000963090901036304,10.1007/s10854-015-3684-x,10.1007/s10854-009-9913-4,2015-09-01,P6Y3M10D,yes,no -02001000007362801000805046300010563030608046333-02001000106360000030863010009085807025909000307006305,10.1007/s10854-015-3684-x,10.1016/0038-1098(72)90370-5,2015-09-01,P43Y8M,no,no -02001000007362801000805046300010563030608056309-02001000106361937281010370200010437000937000308,10.1007/s10854-015-3685-9,10.1016/j.saa.2014.09.038,2015-09-03,P0Y7M,no,no -02001000007362801000805046300010563030608056309-0200100010636193722102912171027370200010537000437000106,10.1007/s10854-015-3685-9,10.1016/j.matchar.2015.04.016,2015-09-03,P0Y2M,no,no \ No newline at end of file diff --git a/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/actionmanager/opencitations/inputFiles/input1.gz b/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/actionmanager/opencitations/inputFiles/input1.gz new file mode 100644 index 0000000000000000000000000000000000000000..7a734f4e201d4520ede681fcda66aa75e2fb8706 GIT binary patch literal 422 zcmV;X0a^YZiwFqDY6@Zi18Ht>b#yTRwUk>=tuPRU-~A<;v^bZxwC}!}_`07=!y$?} z8w{`${r9eRtH-S|UQbKPFf+i{#R^k5m~L2y@v5BqKc<`eybjZ7)?wQ(w|O)-({3K~ z-^-;ldAD9qGom9BBBnB04H9b{$&(98rQ&f`3dkX&YWMwhYT%@8>A__52_;+w zxZb80o>+1&c5u}W8vn7Xxu8w^d#C~vC)l}|xqU|V6?PAf-cKQK^V(hKxnTQ-K+fS6 Q_S4_u0fkiL0VM?h0048#m;e9( literal 0 HcmV?d00001 diff --git a/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/actionmanager/opencitations/inputFiles/input2 b/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/actionmanager/opencitations/inputFiles/input2 deleted file mode 100644 index 14ee8b354..000000000 --- a/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/actionmanager/opencitations/inputFiles/input2 +++ /dev/null @@ -1,8 +0,0 @@ -oci,citing,cited,creation,timespan,journal_sc,author_sc -02001000308362804010509076300010963000003086301-0200100020936020001003227000009010004,10.1038/s41597-019-0038-1,10.1029/2010wr009104,2019-04-15,P8Y1M,no,no -02001000308362804010509076300010963000003086301-0200100010636280103060463080105025800015900000006006303,10.1038/s41597-019-0038-1,10.1016/s1364-8152(01)00060-3,2019-04-15,P17Y3M,no,no -02001000308362804010509076300010963000003086301-02001000007362800000407076300010063000401066333,10.1038/s41597-019-0038-1,10.1007/s00477-010-0416-x,2019-04-15,P8Y9M6D,no,no -02001000308362804010509076300010963000003086301-02001000007362800000700046300010363000905016308,10.1038/s41597-019-0038-1,10.1007/s00704-013-0951-8,2019-04-15,P5Y9M23D,no,no -02001000308362804010509076300010963000003086301-02001000002361924123705070707,10.1038/s41597-019-0038-1,10.1002/joc.5777,2019-04-15,P0Y8M1D,no,no -02001000308362804010509076300010963000003086301-02005010904361714282863020263040504076302000108,10.1038/s41597-019-0038-1,10.5194/hess-22-4547-2018,2019-04-15,P0Y7M18D,no,no -02001000308362804010509076300010963000003086301-02001000002361924123703050404,10.1038/s41597-019-0038-1,10.1002/joc.3544,2019-04-15,P6Y9M6D,no,no \ No newline at end of file diff --git a/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/actionmanager/opencitations/inputFiles/input2.gz b/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/actionmanager/opencitations/inputFiles/input2.gz new file mode 100644 index 0000000000000000000000000000000000000000..b30aa073cc3b8c0515bd3cf766e184a805a7705b GIT binary patch literal 378 zcmV-=0fqh_iwFqDY6@Zi18Ht>b#yWSt&>ep!!Qhn@B9*wn$=&4lRMW+Tz5iKnOG6D zsO+Ms?Enb@a%MhqkNQZtnW$9JWtYJ8m~~H}uuqy=|xWO<%nX`?24= zK2B}5+0VOSwABp|00IC*LISNsrtd@zu6E%@f!jJ5YuAGBpPgy7Vxg5IzGWhUGt^f09yQc2b#yZTy;DtZgD?!e@3(~D4~WTq*Gju`nh@(YrVS>b zQ-A*Kw5mogMQt^LlrKN$`Pm-Y&b8g#^=~$9UtBwGo4Ff$H+Ns#>AUIO$FLjw=JR=K zU9+3thf!A#KmZ8#5eJAMQ0d-A1lFCN+h8HV0!ykgLzH?)5EKDWBs7|}L-6X1@+cJ| zMiw9oK}z=|mWn2kvCDJ*Gb z^T)z`fwCT9{^3!nXR(C6S2yAnkJ0a!F;HFIDF#NW7*ZUh_9etMEPtk*N8&OrXcaZW zhT`K*%>Jg^G&QRT#j1331d7E{Dyi`{sh0IeiI&Q>uKrY0!6hNCb#ycUHBCtl!Y~ZH^NgNd7-uDqK;p^?RcQsOAVF>V ze;ifyZjZ+=JN@6AjSRG0rRpFDqNEyfc1|qbAdXJ8BqM hFgaomx^qF3d{I5iJElAP2j3_cu|J_U8=~j{004P1OThpD literal 0 HcmV?d00001 diff --git a/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/actionmanager/opencitations/inputFiles/input5 b/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/actionmanager/opencitations/inputFiles/input5 deleted file mode 100644 index 0a1cd5aca..000000000 --- a/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/actionmanager/opencitations/inputFiles/input5 +++ /dev/null @@ -1,2 +0,0 @@ -oci,citing,cited,creation,timespan,journal_sc,author_sc -02001000007362801000805046300010563030608046333-02001000007362801000805046300010563030608046333,10.1007/s10854-015-3684-x,10.1007/s10854-015-3684-x,2015-09-01,P7Y2M,no,no \ No newline at end of file diff --git a/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/actionmanager/opencitations/inputFiles/input5.gz b/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/actionmanager/opencitations/inputFiles/input5.gz new file mode 100644 index 0000000000000000000000000000000000000000..2e7a6c85b1bd6f4478ad720806217cd4aa2b8779 GIT binary patch literal 150 zcmV;H0BQdpiwFqDY6@Zi18Ht>b#yfVm5NIWLNO2o_nu{k`2l0cnMH!HD;dMoq?^6&?La9jG?`4-Q%=bK(F1 E0PH42g8%>k literal 0 HcmV?d00001 From 3aa2020b24abd65b2bcdc9bb6c96d2babb359c1e Mon Sep 17 00:00:00 2001 From: Sandro La Bruzzo Date: Tue, 15 Feb 2022 11:05:27 +0100 Subject: [PATCH 082/162] added script to regenerate hostedBy Map following instruction defined on ticket #7539 updated hosted By Map --- .../dhp/datacite/create_updated_hb_map.py | 63 + .../eu/dnetlib/dhp/datacite/hostedBy_map.json | 1065 ++++++++++++++++- 2 files changed, 1103 insertions(+), 25 deletions(-) create mode 100644 dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/datacite/create_updated_hb_map.py diff --git a/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/datacite/create_updated_hb_map.py b/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/datacite/create_updated_hb_map.py new file mode 100644 index 000000000..db0431aae --- /dev/null +++ b/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/datacite/create_updated_hb_map.py @@ -0,0 +1,63 @@ +from urllib.request import urlopen +import json + + +def retrieve_datacite_clients(base_url): + datacite_clients = {} + while base_url is not None: + with urlopen(base_url) as response: + print(f"requesting {base_url}") + response_content = response.read() + data = json.loads(response_content) + if 'data' in data and len(data['data'])>0: + for item in data['data']: + datacite_clients[item['id'].lower()]= item['attributes']['re3data'].lower().replace("https://doi.org/","") + base_url = data['links']['next'] + else: + base_url = None + return datacite_clients + + +def retrieve_r3data(start_url): + r3data_clients = {} + page_number = 1 + base_url = start_url + while base_url is not None: + with urlopen(base_url) as response: + print(f"requesting {base_url}") + response_content = response.read() + data = json.loads(response_content) + if 'data' in data and len(data['data'])>0: + for item in data['data']: + r3data_clients[item['id'].lower()]= dict( + openaire_id= "re3data_____::"+item['attributes']['re3dataId'].lower(), + official_name=item['attributes']['repositoryName'] + ) + page_number +=1 + base_url = f"{start_url}&page[number]={page_number}" + else: + base_url = None + return r3data_clients + + + + + + +base_url ="https://api.datacite.org/clients?query=re3data_id:*&page[size]=250" + +dc = retrieve_datacite_clients(base_url) +r3 = retrieve_r3data("https://api.datacite.org/re3data?page[size]=250") + +result = {} + +for item in dc: + res = dc[item].lower() + if res not in r3: + print(f"missing {res} for {item} in dictionary") + else: + result[item.upper()]= dict(openaire_id=r3[res]["openaire_id"],datacite_name=r3[res]["official_name"], official_name=r3[res]["official_name"] ) + + +with open('hostedBy_map.json', 'w', encoding='utf8') as json_file: + json.dump(result, json_file, ensure_ascii=False, indent=1) diff --git a/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/datacite/hostedBy_map.json b/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/datacite/hostedBy_map.json index 5aa50d6a8..947a9a255 100644 --- a/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/datacite/hostedBy_map.json +++ b/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/datacite/hostedBy_map.json @@ -1,27 +1,1042 @@ { - "CERN.ZENODO": { - "openaire_id": "opendoar____::2659", - "datacite_name": "Zenodo", - "official_name": "ZENODO" - }, - "TIB.PANGAEA": { - "openaire_id": "re3data_____::r3d100010134", - "datacite_name": "PANGAEA", - "official_name": "PANGAEA" - }, - "DRYAD.DRYAD": { - "openaire_id": "re3data_____::r3d100000044", - "datacite_name": "DRYAD", - "official_name": "DRYAD" - }, - "FIGSHARE.UCT": { - "openaire_id": "re3data_____::r3d100012633", - "datacite_name": "University of Cape Town (UCT)", - "official_name": "ZivaHub" - }, - "CSIC.DIGITAL": { - "openaire_id": "re3data_____::r3d100011076", - "datacite_name": "Digital CSIC", - "official_name": "DIGITAL.CSIC" - } + "GESIS.RKI": { + "openaire_id": "re3data_____::r3d100010436", + "datacite_name": "Forschungsdatenzentrum am Robert Koch Institut", + "official_name": "Forschungsdatenzentrum am Robert Koch Institut" + }, + "DELFT.DATA4TU": { + "openaire_id": "re3data_____::r3d100010216", + "datacite_name": "4TU.ResearchData | science.engineering.design", + "official_name": "4TU.ResearchData | science.engineering.design" + }, + "FBTK.DMXFNR": { + "openaire_id": "re3data_____::r3d100013444", + "datacite_name": "Aperta TÜBİTAK Open Archive", + "official_name": "Aperta TÜBİTAK Open Archive" + }, + "BL.CAM": { + "openaire_id": "re3data_____::r3d100010620", + "datacite_name": "Apollo", + "official_name": "Apollo" + }, + "NU.ARCH": { + "openaire_id": "re3data_____::r3d100012925", + "datacite_name": "Arch", + "official_name": "Arch" + }, + "BL.ADS": { + "openaire_id": "re3data_____::r3d100000006", + "datacite_name": "Archaeology Data Service", + "official_name": "Archaeology Data Service" + }, + "FZJ.B2SHARE": { + "openaire_id": "re3data_____::r3d100011394", + "datacite_name": "B2SHARE", + "official_name": "B2SHARE" + }, + "STSCI.MAST": { + "openaire_id": "re3data_____::r3d100010403", + "datacite_name": "Barbara A. Mikulski Archive for Space Telescopes", + "official_name": "Barbara A. Mikulski Archive for Space Telescopes" + }, + "CBG.DATASETS": { + "openaire_id": "re3data_____::r3d100010927", + "datacite_name": "Barcode of Life Data Systems", + "official_name": "Barcode of Life Data Systems" + }, + "TIB.BEILST": { + "openaire_id": "re3data_____::r3d100012329", + "datacite_name": "STRENDA DB", + "official_name": "STRENDA DB" + }, + "MLBS.SKUXGS": { + "openaire_id": "re3data_____::r3d100011696", + "datacite_name": "biodiversity.aq", + "official_name": "biodiversity.aq" + }, + "BL.BIRKBECK": { + "openaire_id": "re3data_____::r3d100012185", + "datacite_name": "Birkbeck Research Data", + "official_name": "Birkbeck Research Data" + }, + "SND.BOLIN": { + "openaire_id": "re3data_____::r3d100011699", + "datacite_name": "Bolin Centre Database", + "official_name": "Bolin Centre Database" + }, + "BROWN.BDR": { + "openaire_id": "re3data_____::r3d100011654", + "datacite_name": "Brown Digital Repository", + "official_name": "Brown Digital Repository" + }, + "BL.BRUNEL": { + "openaire_id": "re3data_____::r3d100012140", + "datacite_name": "Brunel figshare", + "official_name": "Brunel figshare" + }, + "TIB.BAFG": { + "openaire_id": "re3data_____::r3d100011664", + "datacite_name": "Geoportal der BFG", + "official_name": "Geoportal der BFG" + }, + "TIND.CALTECH": { + "openaire_id": "re3data_____::r3d100012384", + "datacite_name": "CaltechDATA", + "official_name": "CaltechDATA" + }, + "CUL.CIESIN": { + "openaire_id": "re3data_____::r3d100010207", + "datacite_name": "Center for International Earth Science Information Network", + "official_name": "Center for International Earth Science Information Network" + }, + "TIB.KIT-IOC": { + "openaire_id": "re3data_____::r3d100010748", + "datacite_name": "chemotion", + "official_name": "chemotion" + }, + "CORNELL.CISER": { + "openaire_id": "re3data_____::r3d100011056", + "datacite_name": "CISER Data & Reproduction Archive", + "official_name": "CISER Data & Reproduction Archive" + }, + "CLARIN.CLARIN": { + "openaire_id": "re3data_____::r3d100010209", + "datacite_name": "CLARIN-ERIC", + "official_name": "CLARIN-ERIC" + }, + "OCEAN.OCEAN": { + "openaire_id": "re3data_____::r3d100012369", + "datacite_name": "Code Ocean", + "official_name": "Code Ocean" + }, + "CORNELL.LIBRARY": { + "openaire_id": "re3data_____::r3d100012322", + "datacite_name": "eCommons - Cornell's digital repository", + "official_name": "eCommons - Cornell's digital repository" + }, + "BL.CRAN": { + "openaire_id": "re3data_____::r3d100012068", + "datacite_name": "Cranfield Online Research Data", + "official_name": "Cranfield Online Research Data" + }, + "DARTLIB.CRAWDAD": { + "openaire_id": "re3data_____::r3d100010716", + "datacite_name": "CRAWDAD", + "official_name": "CRAWDAD" + }, + "GESIS.CSDA": { + "openaire_id": "re3data_____::r3d100010484", + "datacite_name": "Czech Social Science Data Archive", + "official_name": "Czech Social Science Data Archive" + }, + "PSU.DATACOM": { + "openaire_id": "re3data_____::r3d100012927", + "datacite_name": "Data Commons", + "official_name": "Data Commons" + }, + "INIST.INRA": { + "openaire_id": "re3data_____::r3d100012673", + "datacite_name": "Data INRAE", + "official_name": "Data INRAE" + }, + "UMN.DRUM": { + "openaire_id": "re3data_____::r3d100011393", + "datacite_name": "Data Repository for the University of Minnesota", + "official_name": "Data Repository for the University of Minnesota" + }, + "ESTDOI.REPO": { + "openaire_id": "re3data_____::r3d100012333", + "datacite_name": "DataDOI", + "official_name": "DataDOI" + }, + "DAFI.CLIENT": { + "openaire_id": "re3data_____::r3d100010217", + "datacite_name": "DataFirst", + "official_name": "DataFirst" + }, + "UNM.DATAONE": { + "openaire_id": "re3data_____::r3d100000045", + "datacite_name": "DataONE", + "official_name": "DataONE" + }, + "FCT.UMINHO": { + "openaire_id": "re3data_____::r3d100013173", + "datacite_name": "DataRepositoriUM", + "official_name": "DataRepositoriUM" + }, + "FIGSHARE.IASTATE": { + "openaire_id": "re3data_____::r3d100012696", + "datacite_name": "DataShare: the Open Data Repository of Iowa State University", + "official_name": "DataShare: the Open Data Repository of Iowa State University" + }, + "PU.DATASPACE": { + "openaire_id": "re3data_____::r3d100012513", + "datacite_name": "DataSpace", + "official_name": "DataSpace" + }, + "DANS.DATAVERSENL": { + "openaire_id": "re3data_____::r3d100011201", + "datacite_name": "DataverseNL", + "official_name": "DataverseNL" + }, + "BIBSYS.UIT-ORD": { + "openaire_id": "re3data_____::r3d100012538", + "datacite_name": "DataverseNO", + "official_name": "DataverseNO" + }, + "GESIS.SSRI": { + "openaire_id": "re3data_____::r3d100013494", + "datacite_name": "DATICE", + "official_name": "DATICE" + }, + "SML.TDAR": { + "openaire_id": "re3data_____::r3d100010347", + "datacite_name": "tDAR", + "official_name": "tDAR" + }, + "CSIC.DIGITAL": { + "openaire_id": "re3data_____::r3d100011076", + "datacite_name": "DIGITAL.CSIC", + "official_name": "DIGITAL.CSIC" + }, + "BL.DRI": { + "openaire_id": "re3data_____::r3d100011805", + "datacite_name": "Digital Repository of Ireland", + "official_name": "Digital Repository of Ireland" + }, + "SUBGOE.DARIAH": { + "openaire_id": "re3data_____::r3d100011345", + "datacite_name": "DARIAH-DE Repository", + "official_name": "DARIAH-DE Repository" + }, + "DRYAD.DRYAD": { + "openaire_id": "re3data_____::r3d100000044", + "datacite_name": "DRYAD", + "official_name": "DRYAD" + }, + "ZBMED.DSMZ": { + "openaire_id": "re3data_____::r3d100010219", + "datacite_name": "DSMZ", + "official_name": "DSMZ" + }, + "DKRZ.ESGF": { + "openaire_id": "re3data_____::r3d100011159", + "datacite_name": "Earth System Grid Federation", + "official_name": "Earth System Grid Federation" + }, + "KTSW.AEZVVV": { + "openaire_id": "re3data_____::r3d100013469", + "datacite_name": "EarthEnv", + "official_name": "EarthEnv" + }, + "DANS.ARCHIVE": { + "openaire_id": "re3data_____::r3d100010214", + "datacite_name": "EASY", + "official_name": "EASY" + }, + "ETHZ.WSL": { + "openaire_id": "re3data_____::r3d100012587", + "datacite_name": "EnviDat", + "official_name": "EnviDat" + }, + "ETHZ.E-COLL": { + "openaire_id": "re3data_____::r3d100012557", + "datacite_name": "ETH Zürich Research Collection", + "official_name": "ETH Zürich Research Collection" + }, + "ETHZ.DA-RD": { + "openaire_id": "re3data_____::r3d100011626", + "datacite_name": "ETH Data Archive", + "official_name": "ETH Data Archive" + }, + "BL.ECMWF": { + "openaire_id": "re3data_____::r3d100011726", + "datacite_name": "European Centre for Medium-Range Weather Forecasts", + "official_name": "European Centre for Medium-Range Weather Forecasts" + }, + "CARL.FRDR": { + "openaire_id": "re3data_____::r3d100012646", + "datacite_name": "Federated Research Data Repository", + "official_name": "Federated Research Data Repository" + }, + "FIGSHARE.ARS": { + "openaire_id": "re3data_____::r3d100010066", + "datacite_name": "figshare", + "official_name": "figshare" + }, + "TIB.FLOSS": { + "openaire_id": "re3data_____::r3d100010863", + "datacite_name": "FLOSSmole", + "official_name": "FLOSSmole" + }, + "LXKC.DSKYFI": { + "openaire_id": "re3data_____::r3d100010976", + "datacite_name": "ForestPlots.net", + "official_name": "ForestPlots.net" + }, + "YKDK.ZUYSQI": { + "openaire_id": "re3data_____::r3d100010368", + "datacite_name": "FORS DARIS", + "official_name": "FORS DARIS" + }, + "TIB.LUIS": { + "openaire_id": "re3data_____::r3d100012825", + "datacite_name": "Forschungsdaten-Repositorium der LUH", + "official_name": "Forschungsdaten-Repositorium der LUH" + }, + "GESIS.BIBB-FDZ": { + "openaire_id": "re3data_____::r3d100010190", + "datacite_name": "Forschungsdatenzentrum im Bundesinstitut für Berufsbildung", + "official_name": "Forschungsdatenzentrum im Bundesinstitut für Berufsbildung" + }, + "GESIS.ZPID": { + "openaire_id": "re3data_____::r3d100010328", + "datacite_name": "PsychData", + "official_name": "PsychData" + }, + "TIB.GFZ": { + "openaire_id": "re3data_____::r3d100012335", + "datacite_name": "GFZ Data Services", + "official_name": "GFZ Data Services" + }, + "CNGB.GIGADB": { + "openaire_id": "re3data_____::r3d100010478", + "datacite_name": "GigaDB", + "official_name": "GigaDB" + }, + "GBIF.GBIF": { + "openaire_id": "re3data_____::r3d100000039", + "datacite_name": "Global Biodiversity Information Facility", + "official_name": "Global Biodiversity Information Facility" + }, + "ARDCX.GRIFFITH": { + "openaire_id": "re3data_____::r3d100010864", + "datacite_name": "Griffith University Research Data Repository", + "official_name": "Griffith University Research Data Repository" + }, + "GDCC.HARVARD-SBGR": { + "openaire_id": "re3data_____::r3d100011601", + "datacite_name": "Structural Biology Data Grid", + "official_name": "Structural Biology Data Grid" + }, + "GDCC.HARVARD-DV": { + "openaire_id": "re3data_____::r3d100010051", + "datacite_name": "Harvard Dataverse", + "official_name": "Harvard Dataverse" + }, + "CERN.HEPDATA": { + "openaire_id": "re3data_____::r3d100010081", + "datacite_name": "HEPData", + "official_name": "HEPData" + }, + "SND.ICOS": { + "openaire_id": "re3data_____::r3d100012203", + "datacite_name": "ICOS Carbon Portal", + "official_name": "ICOS Carbon Portal" + }, + "GESIS.ICPSR": { + "openaire_id": "re3data_____::r3d100010255", + "datacite_name": "Inter-university Consortium for Political and Social Research", + "official_name": "Inter-university Consortium for Political and Social Research" + }, + "IEEE.DATAPORT": { + "openaire_id": "re3data_____::r3d100012569", + "datacite_name": "IEEE DataPort", + "official_name": "IEEE DataPort" + }, + "IIASA.DARE": { + "openaire_id": "re3data_____::r3d100012932", + "datacite_name": "IIASA DARE", + "official_name": "IIASA DARE" + }, + "ILLINOIS.DATABANK": { + "openaire_id": "re3data_____::r3d100012001", + "datacite_name": "Illinois Data Bank", + "official_name": "Illinois Data Bank" + }, + "IRIS.IRIS": { + "openaire_id": "re3data_____::r3d100010268", + "datacite_name": "Incorporated Research Institutions for Seismology", + "official_name": "Incorporated Research Institutions for Seismology" + }, + "GESIS.INDEPTH": { + "openaire_id": "re3data_____::r3d100011392", + "datacite_name": "INDEPTH Data Repository", + "official_name": "INDEPTH Data Repository" + }, + "JCVI.GXPWAQ": { + "openaire_id": "re3data_____::r3d100011558", + "datacite_name": "Influenza Research Database", + "official_name": "Influenza Research Database" + }, + "TIB.INP": { + "openaire_id": "re3data_____::r3d100013120", + "datacite_name": "INPTDAT", + "official_name": "INPTDAT" + }, + "CERN.INSPIRE": { + "openaire_id": "re3data_____::r3d100011077", + "datacite_name": "Inspire-HEP", + "official_name": "Inspire-HEP" + }, + "INIST.IFREMER": { + "openaire_id": "re3data_____::r3d100012965", + "datacite_name": "IFREMER-SISMER Portail de données marines", + "official_name": "IFREMER-SISMER Portail de données marines" + }, + "INIST.ILL": { + "openaire_id": "re3data_____::r3d100012072", + "datacite_name": "ILL Data Portal", + "official_name": "ILL Data Portal" + }, + "TIB.KIT-IMK": { + "openaire_id": "re3data_____::r3d100011956", + "datacite_name": "CARIBIC", + "official_name": "CARIBIC" + }, + "WWPX.INTR2": { + "openaire_id": "re3data_____::r3d100012347", + "datacite_name": "²Dok[§]", + "official_name": "²Dok[§]" + }, + "BL.IITA": { + "openaire_id": "re3data_____::r3d100012883", + "datacite_name": "International Institute of Tropical Agriculture datasets", + "official_name": "International Institute of Tropical Agriculture datasets" + }, + "TIB.IPK": { + "openaire_id": "re3data_____::r3d100011647", + "datacite_name": "IPK Gatersleben", + "official_name": "IPK Gatersleben" + }, + "IST.REX": { + "openaire_id": "re3data_____::r3d100012394", + "datacite_name": "IST Austria Research Explorer", + "official_name": "IST Austria Research Explorer" + }, + "GDCC.JHU": { + "openaire_id": "re3data_____::r3d100011836", + "datacite_name": "Johns Hopkins Data Archive Dataverse Network", + "official_name": "Johns Hopkins Data Archive Dataverse Network" + }, + "KAGGLE.KAGGLE": { + "openaire_id": "re3data_____::r3d100012705", + "datacite_name": "Kaggle", + "official_name": "Kaggle" + }, + "ESTDOI.KEEL": { + "openaire_id": "re3data_____::r3d100011941", + "datacite_name": "Center of Estonian Language Resources", + "official_name": "Center of Estonian Language Resources" + }, + "RICE.KINDER": { + "openaire_id": "re3data_____::r3d100012884", + "datacite_name": "Kinder Institute Urban Data Platform", + "official_name": "Kinder Institute Urban Data Platform" + }, + "DELFT.KNMI": { + "openaire_id": "re3data_____::r3d100011879", + "datacite_name": "KNMI Data Platform", + "official_name": "KNMI Data Platform" + }, + "LANDCARE.SOILS": { + "openaire_id": "re3data_____::r3d100010835", + "datacite_name": "Land Resource Information Systems Portal", + "official_name": "Land Resource Information Systems Portal" + }, + "LANDCARE.GENERAL": { + "openaire_id": "re3data_____::r3d100011662", + "datacite_name": "Landcare Research Data Repository", + "official_name": "Landcare Research Data Repository" + }, + "TIB.LDEO": { + "openaire_id": "re3data_____::r3d100012547", + "datacite_name": "Lamont-Doherty Core Repository", + "official_name": "Lamont-Doherty Core Repository" + }, + "ZBMED.LERNZDB": { + "openaire_id": "re3data_____::r3d100010066", + "datacite_name": "figshare", + "official_name": "figshare" + }, + "GESIS.NEPS": { + "openaire_id": "re3data_____::r3d100010736", + "datacite_name": "Nationales Bildungspanel", + "official_name": "Nationales Bildungspanel" + }, + "BL.LINCOLN": { + "openaire_id": "re3data_____::r3d100012407", + "datacite_name": "Lincoln repository", + "official_name": "Lincoln repository" + }, + "LDC.CATALOG": { + "openaire_id": "re3data_____::r3d100011940", + "datacite_name": "Linguistic Data Consortium", + "official_name": "Linguistic Data Consortium" + }, + "ZBW.IFO": { + "openaire_id": "re3data_____::r3d100010201", + "datacite_name": "LMU-ifo Economics & Business Data Center", + "official_name": "LMU-ifo Economics & Business Data Center" + }, + "DK.SB": { + "openaire_id": "re3data_____::r3d100012617", + "datacite_name": "LOAR", + "official_name": "LOAR" + }, + "BL.LSHTM": { + "openaire_id": "re3data_____::r3d100011800", + "datacite_name": "LSHTM Data Compass", + "official_name": "LSHTM Data Compass" + }, + "BL.LBORO": { + "openaire_id": "re3data_____::r3d100012143", + "datacite_name": "Loughborough Data Repository", + "official_name": "Loughborough Data Repository" + }, + "DELFT.MAASTRO": { + "openaire_id": "re3data_____::r3d100011086", + "datacite_name": "CancerData.org", + "official_name": "CancerData.org" + }, + "OIBK.OHYCFA": { + "openaire_id": "re3data_____::r3d100013499", + "datacite_name": "Materials Data Repository", + "official_name": "Materials Data Repository" + }, + "MDW.REPOSITORY": { + "openaire_id": "re3data_____::r3d100012108", + "datacite_name": "mdw Repository", + "official_name": "mdw Repository" + }, + "ELSEVIER.MD": { + "openaire_id": "re3data_____::r3d100011868", + "datacite_name": "Mendeley Data", + "official_name": "Mendeley Data" + }, + "BL.MENDELEY": { + "openaire_id": "re3data_____::r3d100011868", + "datacite_name": "Mendeley Data", + "official_name": "Mendeley Data" + }, + "BKMX.AZJWZC": { + "openaire_id": "re3data_____::r3d100011394", + "datacite_name": "B2SHARE", + "official_name": "B2SHARE" + }, + "CSC.NRD": { + "openaire_id": "re3data_____::r3d100012157", + "datacite_name": "Fairdata IDA Research Data Storage Service", + "official_name": "Fairdata IDA Research Data Storage Service" + }, + "UMN.IPUMS": { + "openaire_id": "re3data_____::r3d100010794", + "datacite_name": "Minnesota Population Center", + "official_name": "Minnesota Population Center" + }, + "PHBI.REPO": { + "openaire_id": "re3data_____::r3d100010101", + "datacite_name": "MorphoBank", + "official_name": "MorphoBank" + }, + "TIB.UKON": { + "openaire_id": "re3data_____::r3d100010469", + "datacite_name": "Movebank Data Repository", + "official_name": "Movebank Data Repository" + }, + "INIST.HUMANUM": { + "openaire_id": "re3data_____::r3d100012102", + "datacite_name": "NAKALA", + "official_name": "NAKALA" + }, + "GDCC.NTU": { + "openaire_id": "re3data_____::r3d100012440", + "datacite_name": "DR-NTU (Data)", + "official_name": "DR-NTU (Data)" + }, + "CORNELL.NDACAN": { + "openaire_id": "re3data_____::r3d100011036", + "datacite_name": "National Data Archive on Child Abuse and Neglect", + "official_name": "National Data Archive on Child Abuse and Neglect" + }, + "NOAA.NCEI": { + "openaire_id": "re3data_____::r3d100011801", + "datacite_name": "NCEI", + "official_name": "NCEI" + }, + "GDCC.HARVARD-SLP": { + "openaire_id": "re3data_____::r3d100011861", + "datacite_name": "National Sleep Research Resource", + "official_name": "National Sleep Research Resource" + }, + "NSIDC.DATACTR": { + "openaire_id": "re3data_____::r3d100010110", + "datacite_name": "National Snow and Ice Data Center", + "official_name": "National Snow and Ice Data Center" + }, + "NUS.SB": { + "openaire_id": "re3data_____::r3d100012564", + "datacite_name": "ScholarBank@NUS", + "official_name": "ScholarBank@NUS" + }, + "BL.NHM": { + "openaire_id": "re3data_____::r3d100011675", + "datacite_name": "Natural History Museum, Data Portal", + "official_name": "Natural History Museum, Data Portal" + }, + "ESDIS.ORNL": { + "openaire_id": "re3data_____::r3d100000037", + "datacite_name": "Oak Ridge National Laboratory Distributed Active Archive Center for Biogeochemical Dynamics", + "official_name": "Oak Ridge National Laboratory Distributed Active Archive Center for Biogeochemical Dynamics" + }, + "INIST.OTELO": { + "openaire_id": "re3data_____::r3d100012505", + "datacite_name": "ORDaR", + "official_name": "ORDaR" + }, + "EUROP.ODIN": { + "openaire_id": "re3data_____::r3d100011378", + "datacite_name": "MatDB", + "official_name": "MatDB" + }, + "GDCC.ODUM-DV": { + "openaire_id": "re3data_____::r3d100000005", + "datacite_name": "UNC Dataverse", + "official_name": "UNC Dataverse" + }, + "OHSU.OHSU": { + "openaire_id": "re3data_____::r3d100012244", + "datacite_name": "OHSU Digital Commons", + "official_name": "OHSU Digital Commons" + }, + "KIM.OPENKIM": { + "openaire_id": "re3data_____::r3d100011864", + "datacite_name": "OpenKIM", + "official_name": "OpenKIM" + }, + "COS.OSF": { + "openaire_id": "re3data_____::r3d100011137", + "datacite_name": "Open Science Framework", + "official_name": "Open Science Framework" + }, + "SUL.OPENNEURO": { + "openaire_id": "re3data_____::r3d100010924", + "datacite_name": "OpenNeuro", + "official_name": "OpenNeuro" + }, + "BL.SHEF": { + "openaire_id": "re3data_____::r3d100012124", + "datacite_name": "ORDA - The University of Sheffield Research Data Catalogue and Repository", + "official_name": "ORDA - The University of Sheffield Research Data Catalogue and Repository" + }, + "BL.BROOKES": { + "openaire_id": "re3data_____::r3d100012929", + "datacite_name": "Oxford Brookes University: RADAR", + "official_name": "Oxford Brookes University: RADAR" + }, + "BL.OXDB": { + "openaire_id": "re3data_____::r3d100011653", + "datacite_name": "DataBank, Bodleian Libraries, University of Oxford", + "official_name": "DataBank, Bodleian Libraries, University of Oxford" + }, + "PANGAEA.REPOSITORY": { + "openaire_id": "re3data_____::r3d100010134", + "datacite_name": "PANGAEA", + "official_name": "PANGAEA" + }, + "NASAPDS.NASAPDS": { + "openaire_id": "re3data_____::r3d100010121", + "datacite_name": "PDS", + "official_name": "PDS" + }, + "BF.DISCOVER": { + "openaire_id": "re3data_____::r3d100013148", + "datacite_name": "Blackfynn Discover", + "official_name": "Blackfynn Discover" + }, + "MIT.PHYSIO": { + "openaire_id": "re3data_____::r3d100011561", + "datacite_name": "PhysioNet", + "official_name": "PhysioNet" + }, + "ZBMED.BIOFRESH": { + "openaire_id": "re3data_____::r3d100011651", + "datacite_name": "Freshwater Biodiversity Data Portal", + "official_name": "Freshwater Biodiversity Data Portal" + }, + "PDS.DATA": { + "openaire_id": "re3data_____::r3d100013015", + "datacite_name": "Project Data Sphere", + "official_name": "Project Data Sphere" + }, + "ESTDOI.QDB": { + "openaire_id": "re3data_____::r3d100013451", + "datacite_name": "QsarDB", + "official_name": "QsarDB" + }, + "INIST.ADISP": { + "openaire_id": "re3data_____::r3d100010494", + "datacite_name": "Quetelet PROGEDO Diffusion", + "official_name": "Quetelet PROGEDO Diffusion" + }, + "TIB.RADAR": { + "openaire_id": "re3data_____::r3d100012330", + "datacite_name": "RADAR", + "official_name": "RADAR" + }, + "UCHILE.DATAVERSE": { + "openaire_id": "re3data_____::r3d100013108", + "datacite_name": "Repositorio de Datos de Investigación de la Universidad de Chile", + "official_name": "Repositorio de Datos de Investigación de la Universidad de Chile" + }, + "UDR.RESEARCHDATA": { + "openaire_id": "re3data_____::r3d100013212", + "datacite_name": "Repositorio de datos de investigación de la Universidad del Rosario", + "official_name": "Repositorio de datos de investigación de la Universidad del Rosario" + }, + "GESIS.DIPF": { + "openaire_id": "re3data_____::r3d100010390", + "datacite_name": "Forschungsdatenzentrum Bildung", + "official_name": "Forschungsdatenzentrum Bildung" + }, + "RG.RG": { + "openaire_id": "re3data_____::r3d100012227", + "datacite_name": "ResearchGate Data", + "official_name": "ResearchGate Data" + }, + "INIST.RESIF": { + "openaire_id": "re3data_____::r3d100012222", + "datacite_name": "Résif Seismological Data Portal", + "official_name": "Résif Seismological Data Portal" + }, + "TIB.HZDR": { + "openaire_id": "re3data_____::r3d100012757", + "datacite_name": "RODARE", + "official_name": "RODARE" + }, + "OCUL.SPDV": { + "openaire_id": "re3data_____::r3d100010691", + "datacite_name": "Scholars Portal Dataverse", + "official_name": "Scholars Portal Dataverse" + }, + "PSU.SCHOLAR": { + "openaire_id": "re3data_____::r3d100010701", + "datacite_name": "ScholarSphere", + "official_name": "ScholarSphere" + }, + "TIB.BIKF": { + "openaire_id": "re3data_____::r3d100012379", + "datacite_name": "Senckenberg (meta) data portal", + "official_name": "Senckenberg (meta) data portal" + }, + "GESIS.SHARE": { + "openaire_id": "re3data_____::r3d100010430", + "datacite_name": "Survey of Health, Ageing and Retirement in Europe", + "official_name": "Survey of Health, Ageing and Retirement in Europe" + }, + "BL.HALLAM": { + "openaire_id": "re3data_____::r3d100011909", + "datacite_name": "Sheffield Hallam University Research Data Archive", + "official_name": "Sheffield Hallam University Research Data Archive" + }, + "ETHZ.SICAS": { + "openaire_id": "re3data_____::r3d100011560", + "datacite_name": "Sicas Medical Image Repository", + "official_name": "Sicas Medical Image Repository" + }, + "SUL.SIMTK": { + "openaire_id": "re3data_____::r3d100012486", + "datacite_name": "SimTK", + "official_name": "SimTK" + }, + "SI.SI": { + "openaire_id": "re3data_____::r3d100012274", + "datacite_name": "Smithsonian Research Online", + "official_name": "Smithsonian Research Online" + }, + "CONCOR.KCYDCU": { + "openaire_id": "re3data_____::r3d100012818", + "datacite_name": "Spectrum Research Repository", + "official_name": "Spectrum Research Repository" + }, + "SUL.SDR": { + "openaire_id": "re3data_____::r3d100010710", + "datacite_name": "Stanford Digital Repository", + "official_name": "Stanford Digital Repository" + }, + "SND.SU": { + "openaire_id": "re3data_____::r3d100012147", + "datacite_name": "Stockholm University Figshare Repository", + "official_name": "Stockholm University Figshare Repository" + }, + "INIST.CDS": { + "openaire_id": "re3data_____::r3d100010584", + "datacite_name": "Strasbourg Astronomical Data Center", + "official_name": "Strasbourg Astronomical Data Center" + }, + "DELFT.SURFSARA": { + "openaire_id": "re3data_____::r3d100013084", + "datacite_name": "SURF Data Repository", + "official_name": "SURF Data Repository" + }, + "SND.SMHI": { + "openaire_id": "re3data_____::r3d100011776", + "datacite_name": "Swedish Meteorological and Hydrological Institute open data", + "official_name": "Swedish Meteorological and Hydrological Institute open data" + }, + "SND.SND": { + "openaire_id": "re3data_____::r3d100010146", + "datacite_name": "Swedish National Data Service", + "official_name": "Swedish National Data Service" + }, + "SAGEBIO.SYNAPSE": { + "openaire_id": "re3data_____::r3d100011894", + "datacite_name": "Synapse", + "official_name": "Synapse" + }, + "GDCC.SYR-QDR": { + "openaire_id": "re3data_____::r3d100011038", + "datacite_name": "Qualitative Data Repository", + "official_name": "Qualitative Data Repository" + }, + "FZJ.TERENO": { + "openaire_id": "re3data_____::r3d100011471", + "datacite_name": "TERENO Data Discovery Portal", + "official_name": "TERENO Data Discovery Portal" + }, + "TUW.TETHYS": { + "openaire_id": "re3data_____::r3d100013400", + "datacite_name": "Tethys", + "official_name": "Tethys" + }, + "GESIS.AUSSDA": { + "openaire_id": "re3data_____::r3d100010483", + "datacite_name": "AUSSDA Dataverse", + "official_name": "AUSSDA Dataverse" + }, + "CCDC.CSD": { + "openaire_id": "re3data_____::r3d100010197", + "datacite_name": "The Cambridge Structural Database", + "official_name": "The Cambridge Structural Database" + }, + "SML.TCIA": { + "openaire_id": "re3data_____::r3d100011559", + "datacite_name": "The Cancer Imaging Archive", + "official_name": "The Cancer Imaging Archive" + }, + "SI.CDA": { + "openaire_id": "re3data_____::r3d100010035", + "datacite_name": "The Chandra Data Archive", + "official_name": "The Chandra Data Archive" + }, + "HLQC.ZNXELI": { + "openaire_id": "re3data_____::r3d100013029", + "datacite_name": "TUdatalib", + "official_name": "TUdatalib" + }, + "TIB.TUHH": { + "openaire_id": "re3data_____::r3d100013076", + "datacite_name": "TUHH Open Research - Research Data TUHH", + "official_name": "TUHH Open Research - Research Data TUHH" + }, + "BL.UEL": { + "openaire_id": "re3data_____::r3d100012414", + "datacite_name": "UEL Research Repository", + "official_name": "UEL Research Repository" + }, + "ARFM.UFZDRP": { + "openaire_id": "re3data_____::r3d100013674", + "datacite_name": "Datenrechercheportal UFZ", + "official_name": "Datenrechercheportal UFZ" + }, + "BL.UKDA": { + "openaire_id": "re3data_____::r3d100010215", + "datacite_name": "UK Data Archive", + "official_name": "UK Data Archive" + }, + "GDCC.ODUM-LIBRARY": { + "openaire_id": "re3data_____::r3d100000005", + "datacite_name": "UNC Dataverse", + "official_name": "UNC Dataverse" + }, + "CRUI.UNIBO": { + "openaire_id": "re3data_____::r3d100012604", + "datacite_name": "AMS Acta", + "official_name": "AMS Acta" + }, + "LMU.UB": { + "openaire_id": "re3data_____::r3d100010731", + "datacite_name": "Open Data LMU", + "official_name": "Open Data LMU" + }, + "INIST.IFSTTAR": { + "openaire_id": "re3data_____::r3d100013062", + "datacite_name": "Data Univ Gustave Eiffel", + "official_name": "Data Univ Gustave Eiffel" + }, + "BL.UCLD": { + "openaire_id": "re3data_____::r3d100012417", + "datacite_name": "UCL Discovery", + "official_name": "UCL Discovery" + }, + "NZAU.DATA": { + "openaire_id": "re3data_____::r3d100012110", + "datacite_name": "University of Auckland Data Repository", + "official_name": "University of Auckland Data Repository" + }, + "BL.BATH": { + "openaire_id": "re3data_____::r3d100011947", + "datacite_name": "University of Bath Research Data Archive", + "official_name": "University of Bath Research Data Archive" + }, + "BL.BRISTOL": { + "openaire_id": "re3data_____::r3d100011099", + "datacite_name": "data.bris Research Data Repository", + "official_name": "data.bris Research Data Repository" + }, + "FIGSHARE.UCT": { + "openaire_id": "re3data_____::r3d100012633", + "datacite_name": "ZivaHub", + "official_name": "ZivaHub" + }, + "BL.UCLAN": { + "openaire_id": "re3data_____::r3d100012019", + "datacite_name": "UCLanData", + "official_name": "UCLanData" + }, + "BL.ED": { + "openaire_id": "re3data_____::r3d100000047", + "datacite_name": "Edinburgh DataShare", + "official_name": "Edinburgh DataShare" + }, + "BL.ESSEX": { + "openaire_id": "re3data_____::r3d100012405", + "datacite_name": "Research Data at Essex", + "official_name": "Research Data at Essex" + }, + "BL.EXETER": { + "openaire_id": "re3data_____::r3d100011202", + "datacite_name": "Open Research Exeter", + "official_name": "Open Research Exeter" + }, + "BL.HERTS": { + "openaire_id": "re3data_____::r3d100013116", + "datacite_name": "University of Hertfordshire Research Archive", + "official_name": "University of Hertfordshire Research Archive" + }, + "NKN.NKN": { + "openaire_id": "re3data_____::r3d100011587", + "datacite_name": "Northwest Knowledge Network", + "official_name": "Northwest Knowledge Network" + }, + "BL.LEEDS": { + "openaire_id": "re3data_____::r3d100011945", + "datacite_name": "Research Data Leeds Repository", + "official_name": "Research Data Leeds Repository" + }, + "UNIMELB.REPO1": { + "openaire_id": "re3data_____::r3d100012145", + "datacite_name": "melbourne.figshare.com", + "official_name": "melbourne.figshare.com" + }, + "BL.READING": { + "openaire_id": "re3data_____::r3d100012064", + "datacite_name": "University of Reading Research Data Archive", + "official_name": "University of Reading Research Data Archive" + }, + "BL.SALFORD": { + "openaire_id": "re3data_____::r3d100012144", + "datacite_name": "University of Salford Data Repository", + "official_name": "University of Salford Data Repository" + }, + "BL.SOTON": { + "openaire_id": "re3data_____::r3d100011245", + "datacite_name": "University of Southampton Institutional Research Repository", + "official_name": "University of Southampton Institutional Research Repository" + }, + "ARDCX.USQ": { + "openaire_id": "re3data_____::r3d100011638", + "datacite_name": "University of Southern Queensland research data collection", + "official_name": "University of Southern Queensland research data collection" + }, + "BL.STANDREW": { + "openaire_id": "re3data_____::r3d100012411", + "datacite_name": "St Andrews Research portal - Research Data", + "official_name": "St Andrews Research portal - Research Data" + }, + "BL.STRATH": { + "openaire_id": "re3data_____::r3d100012412", + "datacite_name": "University of Strathclyde KnowledgeBase Datasets", + "official_name": "University of Strathclyde KnowledgeBase Datasets" + }, + "BL.SURREY": { + "openaire_id": "re3data_____::r3d100012232", + "datacite_name": "Surrey Research Insight", + "official_name": "Surrey Research Insight" + }, + "USDA.USDA": { + "openaire_id": "re3data_____::r3d100011890", + "datacite_name": "Ag Data Commons", + "official_name": "Ag Data Commons" + }, + "USGS.PROD": { + "openaire_id": "re3data_____::r3d100010054", + "datacite_name": "U.S. Geological Survey", + "official_name": "U.S. Geological Survey" + }, + "DELFT.UU": { + "openaire_id": "re3data_____::r3d100012623", + "datacite_name": "Yoda", + "official_name": "Yoda" + }, + "VT.VTECHDATA": { + "openaire_id": "re3data_____::r3d100012601", + "datacite_name": "Virginia Tech Data Repository", + "official_name": "Virginia Tech Data Repository" + }, + "JCVI.EIVBWB": { + "openaire_id": "re3data_____::r3d100011931", + "datacite_name": "Virus Pathogen Resource", + "official_name": "Virus Pathogen Resource" + }, + "VIVLI.SEARCH": { + "openaire_id": "re3data_____::r3d100012823", + "datacite_name": "Vivli", + "official_name": "Vivli" + }, + "DELFT.VLIZ": { + "openaire_id": "re3data_____::r3d100010661", + "datacite_name": "Flanders Marine Institute", + "official_name": "Flanders Marine Institute" + }, + "WH.WHOAS": { + "openaire_id": "re3data_____::r3d100010423", + "datacite_name": "Woods Hole Open Access Server", + "official_name": "Woods Hole Open Access Server" + }, + "DKRZ.WDCC": { + "openaire_id": "re3data_____::r3d100010299", + "datacite_name": "World Data Center for Climate", + "official_name": "World Data Center for Climate" + }, + "ETHZ.WGMS": { + "openaire_id": "re3data_____::r3d100010627", + "datacite_name": "World Glacier Monitoring Service", + "official_name": "World Glacier Monitoring Service" + }, + "ZBW.ZBW-JDA": { + "openaire_id": "re3data_____::r3d100012190", + "datacite_name": "ZBW Journal Data Archive", + "official_name": "ZBW Journal Data Archive" + }, + "CERN.ZENODO": { + "openaire_id": "re3data_____::r3d100010468", + "datacite_name": "Zenodo", + "official_name": "Zenodo" + }, + "ZBW.ZEW": { + "openaire_id": "re3data_____::r3d100010399", + "datacite_name": "ZEW Forschungsdatenzentrum", + "official_name": "ZEW Forschungsdatenzentrum" + } } \ No newline at end of file From 393a4ee956fdd3ea7dea2ee40eebfa4df4931ce3 Mon Sep 17 00:00:00 2001 From: Antonis Lempesis Date: Tue, 15 Feb 2022 12:56:50 +0200 Subject: [PATCH 083/162] fixed yet another typo... --- .../oa/graph/stats/oozie_app/scripts/step20-createMonitorDB.sql | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step20-createMonitorDB.sql b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step20-createMonitorDB.sql index 9e4edb44a..e75bdffc8 100644 --- a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step20-createMonitorDB.sql +++ b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step20-createMonitorDB.sql @@ -130,7 +130,7 @@ compute stats TARGET.indi_result_has_cc_licence; create table TARGET.indi_result_has_cc_licence_url stored as parquet as select * from SOURCE.indi_result_has_cc_licence_url orig where exists (select 1 from TARGET.result r where r.id=orig.id); compute stats TARGET.indi_result_has_cc_licence_url; -create view TARGET.indi_funder_country_collab stored as parquet as select * from SOURCE.indi_funder_country_collab; +create view TARGET.indi_funder_country_collab as select * from SOURCE.indi_funder_country_collab; create table TARGET.indi_result_with_orcid stored as parquet as select * from SOURCE.indi_result_with_orcid orig where exists (select 1 from TARGET.result r where r.id=orig.id); compute stats TARGET.indi_result_with_orcid; From 58c59f46eb2273a65f2fe71f4e212dd202b8e097 Mon Sep 17 00:00:00 2001 From: dimitrispie Date: Thu, 17 Feb 2022 10:21:09 +0200 Subject: [PATCH 084/162] Added Sprint 6 --- .../scripts/step16-createIndicatorsTables.sql | 181 +++++++++++++++++- 1 file changed, 180 insertions(+), 1 deletion(-) diff --git a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step16-createIndicatorsTables.sql b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step16-createIndicatorsTables.sql index 9f11fa49d..e5bc588be 100755 --- a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step16-createIndicatorsTables.sql +++ b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step16-createIndicatorsTables.sql @@ -177,4 +177,183 @@ on tmp.id=ri.id; ---- Sprint 5 ---- create table indi_result_no_of_copies stored as parquet as -select id, count(id) as number_of_copies from result_instance group by id; \ No newline at end of file +select id, count(id) as number_of_copies from result_instance group by id; + +---- Sprint 6 ---- +create table indi_pub_gold_oa_new stored as parquet as +WITH gold_oa AS (SELECT issn_l, journal_is_in_doaj,journal_is_oa, issn_1 as issn +FROM stats_ext.oa_journals +WHERE issn_1 != "" +UNION ALL +SELECT issn_l, journal_is_in_doaj, journal_is_oa, issn_2 as issn +FROM stats_ext.oa_journals +WHERE issn_2 != "" ), +issn AS (SELECT * FROM +(SELECT id, issn_printed as issn +FROM datasource WHERE issn_printed IS NOT NULL +UNION +SELECT id, issn_online as issn +FROM datasource WHERE issn_online IS NOT NULL) as issn +WHERE LENGTH(issn) > 7) +SELECT DISTINCT pd.id, coalesce(is_gold, 0) as is_gold +FROM publication_datasources pd +LEFT OUTER JOIN +(SELECT pd.id, 1 as is_gold FROM publication_datasources pd +JOIN issn on issn.id=pd.datasource +JOIN gold_oa on issn.issn = gold_oa.issn) tmp +ON pd.id=tmp.id; + +create table indi_datasets_gold_oa_new stored as parquet as +WITH gold_oa AS (SELECT issn_l, journal_is_in_doaj, journal_is_oa, issn_1 as issn +FROM stats_ext.oa_journals +WHERE issn_1 != "" +UNION +ALL SELECT issn_l,journal_is_in_doaj,journal_is_oa,issn_2 as issn +FROM stats_ext.oa_journals +WHERE issn_2 != "" ), +issn AS (SELECT * +FROM (SELECT id,issn_printed as issn +FROM datasource WHERE issn_printed IS NOT NULL +UNION +SELECT id, issn_online as issn +FROM datasource +WHERE issn_online IS NOT NULL ) as issn +WHERE LENGTH(issn) > 7) +SELECT DISTINCT pd.id, coalesce(is_gold, 0) as is_gold +FROM dataset_datasources pd +LEFT OUTER JOIN +(SELECT pd.id, 1 as is_gold FROM dataset_datasources pd +JOIN issn on issn.id=pd.datasource +JOIN gold_oa on issn.issn = gold_oa.issn) tmp +ON pd.id=tmp.id; + +create table indi_software_gold_oa_new stored as parquet as +WITH gold_oa AS (SELECT issn_l, journal_is_in_doaj, journal_is_oa, issn_1 as issn +FROM stats_ext.oa_journals +WHERE issn_1 != "" +UNION +ALL SELECT issn_l,journal_is_in_doaj,journal_is_oa,issn_2 as issn +FROM stats_ext.oa_journals +WHERE issn_2 != "" ), +issn AS (SELECT * +FROM (SELECT id,issn_printed as issn +FROM datasource WHERE issn_printed IS NOT NULL +UNION +SELECT id, issn_online as issn +FROM datasource +WHERE issn_online IS NOT NULL ) as issn +WHERE LENGTH(issn) > 7) +SELECT DISTINCT pd.id, coalesce(is_gold, 0) as is_gold +FROM software_datasources pd +LEFT OUTER JOIN +(SELECT pd.id, 1 as is_gold FROM software_datasources pd +JOIN issn on issn.id=pd.datasource +JOIN gold_oa on issn.issn = gold_oa.issn) tmp +ON pd.id=tmp.id; + +create table indi_org_findable stored as parquet as +with result_with_pid as +(select ro.organization organization, count(distinct rp.id) no_result_with_pid from result_organization ro +join result_pids rp on rp.id=ro.id +group by ro.organization), +result_has_abstract as +(select ro.organization organization, count(distinct rp.id) no_result_with_abstract from result_organization ro +join result rp on rp.id=ro.id where rp.abstract=true +group by ro.organization), +allresults as (select organization, count(distinct id) no_allresults from result_organization +group by organization), +result_with_pid_share as( +select allresults.organization, result_with_pid.no_result_with_pid/allresults.no_allresults pid_share +from allresults +join result_with_pid on result_with_pid.organization=allresults.organization), +result_with_abstract_share as( +select allresults.organization, result_has_abstract.no_result_with_abstract/allresults.no_allresults abstract_share +from allresults +join result_has_abstract on result_has_abstract.organization=allresults.organization) +select allresults.organization, coalesce((pid_share+abstract_share)/2,pid_share) org_findable +from allresults +join result_with_pid_share on result_with_pid_share.organization=allresults.organization +left outer join ( +select organization, abstract_share from result_with_abstract_share +)tmp +on tmp.organization=allresults.organization; + +create table indi_org_openess stored as parquet as +WITH datasets_oa as ( +SELECT ro.organization, count(dg.id) no_oadatasets FROM indi_datasets_gold_oa_new dg +join openaire_prod_stats.result_organization ro on dg.id=ro.id +join openaire_prod_stats.dataset ds on dg.id=ds.id +WHERE dg.is_gold=1 +group by ro.organization), +software_oa as ( +SELECT ro.organization, count(dg.id) no_oasoftware FROM indi_software_gold_oa_new dg +join openaire_prod_stats.result_organization ro on dg.id=ro.id +join openaire_prod_stats.software ds on dg.id=ds.id +WHERE dg.is_gold=1 +group by ro.organization), +pubs_oa as ( +SELECT ro.organization, count(dg.id) no_oapubs FROM indi_pub_gold_oa_new dg +join openaire_prod_stats.result_organization ro on dg.id=ro.id +join openaire_prod_stats.publication ds on dg.id=ds.id +where dg.is_gold=1 +group by ro.organization), +allpubs as ( +SELECT ro.organization organization, count(ro.id) no_allpubs FROM result_organization ro +join openaire_prod_stats.publication ps on ps.id=ro.id +group by ro.organization), +alldatasets as ( +SELECT ro.organization organization, count(ro.id) no_alldatasets FROM result_organization ro +join openaire_prod_stats.dataset ps on ps.id=ro.id +group by ro.organization), +allsoftware as ( +SELECT ro.organization organization, count(ro.id) no_allsoftware FROM result_organization ro +join openaire_prod_stats.software ps on ps.id=ro.id +group by ro.organization), +allpubsshare as ( +select pubs_oa.organization, pubs_oa.no_oapubs/allpubs.no_allpubs p from allpubs +join pubs_oa on allpubs.organization=pubs_oa.organization), +alldatasetssshare as ( +select datasets_oa.organization, datasets_oa.no_oadatasets/alldatasets.no_alldatasets c +from alldatasets +join datasets_oa on alldatasets.organization=datasets_oa.organization), +allsoftwaresshare as ( +select software_oa.organization, software_oa.no_oasoftware/allsoftware.no_allsoftware s +from allsoftware +join software_oa on allsoftware.organization=software_oa.organization) +select allpubsshare.organization, coalesce((c+p+s)/3, p) org_openess FROM allpubsshare +left outer join (select organization,c from +alldatasetssshare) tmp +on tmp.organization=allpubsshare.organization +left outer join (select organization,s from +allsoftwaresshare) tmp1 +on tmp1.organization=allpubsshare.organization; + +create table indi_pub_hybrid_oa_with_cc stored as parquet as +WITH hybrid_oa AS (SELECT issn_l, journal_is_in_doaj, journal_is_oa, issn_print as issn +FROM stats_ext.plan_s_jn +WHERE issn_print != "" +UNION ALL +SELECT issn_l, journal_is_in_doaj, journal_is_oa, issn_online as issn +FROM stats_ext.plan_s_jn +WHERE issn_online != "" and (journal_is_in_doaj = FALSE OR journal_is_oa = FALSE)), +issn AS (SELECT * FROM +(SELECT id, issn_printed as issn +FROM datasource +WHERE issn_printed IS NOT NULL +UNION +SELECT id,issn_online as issn +FROM datasource +WHERE issn_online IS NOT NULL ) as issn +WHERE LENGTH(issn) > 7) +SELECT DISTINCT pd.id, coalesce(is_hybrid_oa, 0) as is_hybrid_oa +FROM publication_datasources pd +LEFT OUTER JOIN ( +SELECT pd.id, 1 as is_hybrid_oa from publication_datasources pd +JOIN datasource d on d.id=pd.datasource +JOIN issn on issn.id=pd.datasource +JOIN hybrid_oa ON issn.issn = hybrid_oa.issn +JOIN indi_result_has_cc_licence cc on pd.id=cc.id +where cc.has_cc_license=1) tmp +on pd.id=tmp.id; + + From d3f03abd511f6c7944e097158874ab62a26ec2de Mon Sep 17 00:00:00 2001 From: Sandro La Bruzzo Date: Fri, 18 Feb 2022 11:11:17 +0100 Subject: [PATCH 085/162] fixed wrong json path --- .../main/scala/eu/dnetlib/dhp/datacite/AbstractRestClient.scala | 2 +- .../src/main/scala/eu/dnetlib/dhp/datacite/ImportDatacite.scala | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/dhp-workflows/dhp-aggregation/src/main/scala/eu/dnetlib/dhp/datacite/AbstractRestClient.scala b/dhp-workflows/dhp-aggregation/src/main/scala/eu/dnetlib/dhp/datacite/AbstractRestClient.scala index 471149b25..b09ffcfd5 100644 --- a/dhp-workflows/dhp-aggregation/src/main/scala/eu/dnetlib/dhp/datacite/AbstractRestClient.scala +++ b/dhp-workflows/dhp-aggregation/src/main/scala/eu/dnetlib/dhp/datacite/AbstractRestClient.scala @@ -49,7 +49,7 @@ abstract class AbstractRestClient extends Iterator[String] { } private def doHTTPRequest[A <: HttpUriRequest](r: A): String = { - val timeout = 60; // seconds + val timeout = 600; // seconds val config = RequestConfig .custom() .setConnectTimeout(timeout * 1000) diff --git a/dhp-workflows/dhp-aggregation/src/main/scala/eu/dnetlib/dhp/datacite/ImportDatacite.scala b/dhp-workflows/dhp-aggregation/src/main/scala/eu/dnetlib/dhp/datacite/ImportDatacite.scala index cb021925a..6e2cc798c 100644 --- a/dhp-workflows/dhp-aggregation/src/main/scala/eu/dnetlib/dhp/datacite/ImportDatacite.scala +++ b/dhp-workflows/dhp-aggregation/src/main/scala/eu/dnetlib/dhp/datacite/ImportDatacite.scala @@ -46,7 +46,7 @@ object ImportDatacite { Source .fromInputStream( getClass.getResourceAsStream( - "/eu/dnetlib/dhp/actionmanager/datacite/import_from_api.json" + "/eu/dnetlib/dhp/datacite/import_from_api.json" ) ) .mkString From cf8443780e11755eface8a03157ee2f1f0f79864 Mon Sep 17 00:00:00 2001 From: Claudio Atzori Date: Fri, 18 Feb 2022 15:17:48 +0100 Subject: [PATCH 086/162] added processingchargeamount to the result view --- .../graph/hive/oozie_app/lib/scripts/postprocessing.sql | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/hive/oozie_app/lib/scripts/postprocessing.sql b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/hive/oozie_app/lib/scripts/postprocessing.sql index 7eaec2e2c..4d1fbcd9f 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/hive/oozie_app/lib/scripts/postprocessing.sql +++ b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/hive/oozie_app/lib/scripts/postprocessing.sql @@ -1,13 +1,13 @@ DROP VIEW IF EXISTS ${hiveDbName}.result; CREATE VIEW IF NOT EXISTS ${hiveDbName}.result as - select id, originalid, dateofcollection, title, publisher, bestaccessright, datainfo, collectedfrom, pid, author, resulttype, language, country, subject, description, dateofacceptance, relevantdate, embargoenddate, resourcetype, context, externalreference, instance, measures from ${hiveDbName}.publication p + select id, originalid, dateofcollection, title, publisher, bestaccessright, datainfo, collectedfrom, pid, author, resulttype, language, country, subject, description, dateofacceptance, relevantdate, embargoenddate, resourcetype, context, externalreference, instance, measures, processingchargeamount from ${hiveDbName}.publication p union all - select id, originalid, dateofcollection, title, publisher, bestaccessright, datainfo, collectedfrom, pid, author, resulttype, language, country, subject, description, dateofacceptance, relevantdate, embargoenddate, resourcetype, context, externalreference, instance, measures from ${hiveDbName}.dataset d + select id, originalid, dateofcollection, title, publisher, bestaccessright, datainfo, collectedfrom, pid, author, resulttype, language, country, subject, description, dateofacceptance, relevantdate, embargoenddate, resourcetype, context, externalreference, instance, measures, processingchargeamount from ${hiveDbName}.dataset d union all - select id, originalid, dateofcollection, title, publisher, bestaccessright, datainfo, collectedfrom, pid, author, resulttype, language, country, subject, description, dateofacceptance, relevantdate, embargoenddate, resourcetype, context, externalreference, instance, measures from ${hiveDbName}.software s + select id, originalid, dateofcollection, title, publisher, bestaccessright, datainfo, collectedfrom, pid, author, resulttype, language, country, subject, description, dateofacceptance, relevantdate, embargoenddate, resourcetype, context, externalreference, instance, measures, processingchargeamount from ${hiveDbName}.software s union all - select id, originalid, dateofcollection, title, publisher, bestaccessright, datainfo, collectedfrom, pid, author, resulttype, language, country, subject, description, dateofacceptance, relevantdate, embargoenddate, resourcetype, context, externalreference, instance, measures from ${hiveDbName}.otherresearchproduct o; + select id, originalid, dateofcollection, title, publisher, bestaccessright, datainfo, collectedfrom, pid, author, resulttype, language, country, subject, description, dateofacceptance, relevantdate, embargoenddate, resourcetype, context, externalreference, instance, measures, processingchargeamount from ${hiveDbName}.otherresearchproduct o; ANALYZE TABLE ${hiveDbName}.datasource COMPUTE STATISTICS; ANALYZE TABLE ${hiveDbName}.organization COMPUTE STATISTICS; From 401dd38074d315b7a2e6bc76aae6e843b5fccc09 Mon Sep 17 00:00:00 2001 From: Claudio Atzori Date: Fri, 18 Feb 2022 15:19:15 +0100 Subject: [PATCH 087/162] code formatting --- .../CreateActionSetSparkJob.java | 17 ++--- .../actionmanager/opencitations/ReadCOCI.java | 2 +- .../opencitations/model/COCI.java | 2 - .../opencitations/ReadCOCITest.java | 72 +++++++++---------- 4 files changed, 45 insertions(+), 48 deletions(-) diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/opencitations/CreateActionSetSparkJob.java b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/opencitations/CreateActionSetSparkJob.java index f230a7fd7..61bc3fbca 100644 --- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/opencitations/CreateActionSetSparkJob.java +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/opencitations/CreateActionSetSparkJob.java @@ -112,18 +112,19 @@ public class CreateActionSetSparkJob implements Serializable { final String cited = ID_PREFIX + IdentifierFactory.md5(CleaningFunctions.normalizePidValue("doi", value.getCited())); - if(!citing.equals(cited)){ + if (!citing.equals(cited)) { relationList - .addAll( - getRelations( - citing, - cited)); + .addAll( + getRelations( + citing, + cited)); if (duplicate && value.getCiting().endsWith(".refs")) { citing = ID_PREFIX + IdentifierFactory - .md5( - CleaningFunctions - .normalizePidValue("doi", value.getCiting().substring(0, value.getCiting().indexOf(".refs")))); + .md5( + CleaningFunctions + .normalizePidValue( + "doi", value.getCiting().substring(0, value.getCiting().indexOf(".refs")))); relationList.addAll(getRelations(citing, cited)); } } diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/opencitations/ReadCOCI.java b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/opencitations/ReadCOCI.java index 3fd6e8e9a..4293ca187 100644 --- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/opencitations/ReadCOCI.java +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/opencitations/ReadCOCI.java @@ -72,7 +72,7 @@ public class ReadCOCI implements Serializable { String outputPath, String delimiter) throws IOException { - for(String inputFile : inputFiles){ + for (String inputFile : inputFiles) { String p_string = workingPath + "/" + inputFile + ".gz"; Dataset cociData = spark diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/opencitations/model/COCI.java b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/opencitations/model/COCI.java index ce087ce22..c1ef1abad 100644 --- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/opencitations/model/COCI.java +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/opencitations/model/COCI.java @@ -12,7 +12,6 @@ public class COCI implements Serializable { private String cited; - public String getOci() { return oci; } @@ -37,5 +36,4 @@ public class COCI implements Serializable { this.cited = cited; } - } diff --git a/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/actionmanager/opencitations/ReadCOCITest.java b/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/actionmanager/opencitations/ReadCOCITest.java index 629e95c8e..3b416caf2 100644 --- a/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/actionmanager/opencitations/ReadCOCITest.java +++ b/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/actionmanager/opencitations/ReadCOCITest.java @@ -76,53 +76,51 @@ public class ReadCOCITest { LocalFileSystem fs = FileSystem.getLocal(new Configuration()); fs - .copyFromLocalFile( - false, new org.apache.hadoop.fs.Path(getClass() - .getResource("/eu/dnetlib/dhp/actionmanager/opencitations/inputFiles/input1.gz") - .getPath()), - new org.apache.hadoop.fs.Path(workingDir + "/COCI/input1.gz")); + .copyFromLocalFile( + false, new org.apache.hadoop.fs.Path(getClass() + .getResource("/eu/dnetlib/dhp/actionmanager/opencitations/inputFiles/input1.gz") + .getPath()), + new org.apache.hadoop.fs.Path(workingDir + "/COCI/input1.gz")); fs - .copyFromLocalFile( - false, new org.apache.hadoop.fs.Path(getClass() - .getResource("/eu/dnetlib/dhp/actionmanager/opencitations/inputFiles/input2.gz") - .getPath()), - new org.apache.hadoop.fs.Path(workingDir + "/COCI/input2.gz")); + .copyFromLocalFile( + false, new org.apache.hadoop.fs.Path(getClass() + .getResource("/eu/dnetlib/dhp/actionmanager/opencitations/inputFiles/input2.gz") + .getPath()), + new org.apache.hadoop.fs.Path(workingDir + "/COCI/input2.gz")); fs - .copyFromLocalFile( - false, new org.apache.hadoop.fs.Path(getClass() - .getResource("/eu/dnetlib/dhp/actionmanager/opencitations/inputFiles/input3.gz") - .getPath()), - new org.apache.hadoop.fs.Path(workingDir + "/COCI/input3.gz")); + .copyFromLocalFile( + false, new org.apache.hadoop.fs.Path(getClass() + .getResource("/eu/dnetlib/dhp/actionmanager/opencitations/inputFiles/input3.gz") + .getPath()), + new org.apache.hadoop.fs.Path(workingDir + "/COCI/input3.gz")); fs - .copyFromLocalFile( - false, new org.apache.hadoop.fs.Path(getClass() - .getResource("/eu/dnetlib/dhp/actionmanager/opencitations/inputFiles/input4.gz") - .getPath()), - new org.apache.hadoop.fs.Path(workingDir + "/COCI/input4.gz")); + .copyFromLocalFile( + false, new org.apache.hadoop.fs.Path(getClass() + .getResource("/eu/dnetlib/dhp/actionmanager/opencitations/inputFiles/input4.gz") + .getPath()), + new org.apache.hadoop.fs.Path(workingDir + "/COCI/input4.gz")); fs - .copyFromLocalFile( - false, new org.apache.hadoop.fs.Path(getClass() - .getResource("/eu/dnetlib/dhp/actionmanager/opencitations/inputFiles/input5.gz") - .getPath()), - new org.apache.hadoop.fs.Path(workingDir + "/COCI/input5.gz")); + .copyFromLocalFile( + false, new org.apache.hadoop.fs.Path(getClass() + .getResource("/eu/dnetlib/dhp/actionmanager/opencitations/inputFiles/input5.gz") + .getPath()), + new org.apache.hadoop.fs.Path(workingDir + "/COCI/input5.gz")); ReadCOCI - .main( - new String[] { - "-isSparkSessionManaged", - Boolean.FALSE.toString(), - "-workingPath", - workingDir.toString() + "/COCI", - "-outputPath", - workingDir.toString() + "/COCI_json/", - "-inputFile", "input1;input2;input3;input4;input5" - }); - - + .main( + new String[] { + "-isSparkSessionManaged", + Boolean.FALSE.toString(), + "-workingPath", + workingDir.toString() + "/COCI", + "-outputPath", + workingDir.toString() + "/COCI_json/", + "-inputFile", "input1;input2;input3;input4;input5" + }); final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext()); From 99f5b144692afe2d9f4df3e2431eeaa06ccc2faf Mon Sep 17 00:00:00 2001 From: Claudio Atzori Date: Fri, 18 Feb 2022 15:20:57 +0100 Subject: [PATCH 088/162] [graph raw] invisible records stored among the raw graph rather than the claimed subgraph --- .../eu/dnetlib/dhp/oa/graph/raw_all/oozie_app/workflow.xml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/raw_all/oozie_app/workflow.xml b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/raw_all/oozie_app/workflow.xml index 307e26267..c6cc46c0f 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/raw_all/oozie_app/workflow.xml +++ b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/raw_all/oozie_app/workflow.xml @@ -466,7 +466,7 @@ --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} - --sourcePaths${contentPath}/db_claims,${contentPath}/oaf_claims,${contentPath}/odf_claims,${contentPath}/oaf_records_invisible + --sourcePaths${contentPath}/db_claims,${contentPath}/oaf_claims,${contentPath}/odf_claims --targetPath${workingDir}/entities_claim --isLookupUrl${isLookupUrl} --shouldHashId${shouldHashId} @@ -515,7 +515,7 @@ --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} - --sourcePaths${contentPath}/db_openaire,${contentPath}/db_openorgs,${contentPath}/oaf_records,${contentPath}/odf_records,${contentPath}/oaf_records_hdfs,${contentPath}/odf_records_hdfs + --sourcePaths${contentPath}/db_openaire,${contentPath}/db_openorgs,${contentPath}/oaf_records,${contentPath}/odf_records,${contentPath}/oaf_records_hdfs,${contentPath}/odf_records_hdfs,${contentPath}/oaf_records_invisible --targetPath${workingDir}/entities --isLookupUrl${isLookupUrl} --shouldHashId${shouldHashId} From 0bff45e739c174a0a925f56c4bc847e638f0771f Mon Sep 17 00:00:00 2001 From: Antonis Lempesis Date: Fri, 18 Feb 2022 17:11:23 +0200 Subject: [PATCH 089/162] added sprint 6 indicators to monitor db --- .../scripts/step16-createIndicatorsTables.sql | 321 +++++++++--------- .../scripts/step20-createMonitorDB.sql | 9 + 2 files changed, 162 insertions(+), 168 deletions(-) diff --git a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step16-createIndicatorsTables.sql b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step16-createIndicatorsTables.sql index e5bc588be..1e2ac69f3 100755 --- a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step16-createIndicatorsTables.sql +++ b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step16-createIndicatorsTables.sql @@ -31,17 +31,6 @@ join datasource d on d.id = ri.collectedfrom where pidtype='Digital Object Identifier' and d.name ='Crossref') tmp on tmp.id=p.id; -create table indi_pub_gold_oa stored as parquet as -select distinct p.id, coalesce(gold_oa, 0) as gold_oa -from publication p -left outer join ( -select p.id, 1 as gold_oa -from publication p -join result_instance ri on ri.id = p.id -join datasource on datasource.id = ri.hostedby -where datasource.id like '%doajarticles%') tmp -on p.id= tmp.id; - ---- Sprint 2 ---- create table indi_result_has_cc_licence stored as parquet as select distinct r.id, (case when lic='' or lic is null then 0 else 1 end) as has_cc_license @@ -144,16 +133,6 @@ join stats_ext.plan_s_jn ps where (ps.issn_print=d.issn_printed and ps.issn_onli and (ps.journal_is_in_doaj=false and ps.journal_is_oa=false)) tmp on pd.id=tmp.id; -create table indi_is_gold_oa stored as parquet as -(select distinct pd.id, coalesce(gold_oa, 0) as gold_oa -from publication_datasources pd -left outer join ( -select pd.id, 1 as gold_oa from publication_datasources pd -join datasource d on d.id=pd.datasource -join stats_ext.plan_s_jn ps on (ps.issn_print=d.issn_printed or ps.issn_online=d.issn_online) -where ps.journal_is_in_doaj is true or ps.journal_is_oa is true) tmp -on pd.id=tmp.id); - create table indi_pub_in_transformative stored as parquet as select distinct pd.id, coalesce(is_transformative, 0) as is_transformative from publication pd @@ -180,180 +159,186 @@ create table indi_result_no_of_copies stored as parquet as select id, count(id) as number_of_copies from result_instance group by id; ---- Sprint 6 ---- -create table indi_pub_gold_oa_new stored as parquet as -WITH gold_oa AS (SELECT issn_l, journal_is_in_doaj,journal_is_oa, issn_1 as issn -FROM stats_ext.oa_journals -WHERE issn_1 != "" -UNION ALL -SELECT issn_l, journal_is_in_doaj, journal_is_oa, issn_2 as issn -FROM stats_ext.oa_journals -WHERE issn_2 != "" ), -issn AS (SELECT * FROM -(SELECT id, issn_printed as issn -FROM datasource WHERE issn_printed IS NOT NULL -UNION -SELECT id, issn_online as issn -FROM datasource WHERE issn_online IS NOT NULL) as issn -WHERE LENGTH(issn) > 7) +create table indi_pub_gold_oa stored as parquet as +WITH gold_oa AS ( + SELECT issn_l, journal_is_in_doaj,journal_is_oa, issn_1 as issn + FROM stats_ext.oa_journals + WHERE issn_1 != "" + UNION ALL + SELECT issn_l, journal_is_in_doaj, journal_is_oa, issn_2 as issn + FROM stats_ext.oa_journals + WHERE issn_2 != "" ), +issn AS ( + SELECT * FROM + (SELECT id, issn_printed as issn + FROM datasource WHERE issn_printed IS NOT NULL + UNION + SELECT id, issn_online as issn + FROM datasource WHERE issn_online IS NOT NULL) as issn + WHERE LENGTH(issn) > 7) SELECT DISTINCT pd.id, coalesce(is_gold, 0) as is_gold FROM publication_datasources pd -LEFT OUTER JOIN -(SELECT pd.id, 1 as is_gold FROM publication_datasources pd -JOIN issn on issn.id=pd.datasource -JOIN gold_oa on issn.issn = gold_oa.issn) tmp -ON pd.id=tmp.id; +LEFT OUTER JOIN ( + SELECT pd.id, 1 as is_gold FROM publication_datasources pd + JOIN issn on issn.id=pd.datasource + JOIN gold_oa on issn.issn = gold_oa.issn) tmp ON pd.id=tmp.id; -create table indi_datasets_gold_oa_new stored as parquet as -WITH gold_oa AS (SELECT issn_l, journal_is_in_doaj, journal_is_oa, issn_1 as issn -FROM stats_ext.oa_journals -WHERE issn_1 != "" -UNION -ALL SELECT issn_l,journal_is_in_doaj,journal_is_oa,issn_2 as issn -FROM stats_ext.oa_journals -WHERE issn_2 != "" ), -issn AS (SELECT * -FROM (SELECT id,issn_printed as issn -FROM datasource WHERE issn_printed IS NOT NULL -UNION -SELECT id, issn_online as issn -FROM datasource -WHERE issn_online IS NOT NULL ) as issn -WHERE LENGTH(issn) > 7) +create table indi_datasets_gold_oa stored as parquet as +WITH gold_oa AS ( + SELECT issn_l, journal_is_in_doaj, journal_is_oa, issn_1 as issn + FROM stats_ext.oa_journals + WHERE issn_1 != "" + UNION + ALL SELECT issn_l,journal_is_in_doaj,journal_is_oa,issn_2 as issn + FROM stats_ext.oa_journals + WHERE issn_2 != "" ), +issn AS ( + SELECT * + FROM ( + SELECT id,issn_printed as issn + FROM datasource + WHERE issn_printed IS NOT NULL + UNION + SELECT id, issn_online as issn + FROM datasource + WHERE issn_online IS NOT NULL ) as issn + WHERE LENGTH(issn) > 7) SELECT DISTINCT pd.id, coalesce(is_gold, 0) as is_gold FROM dataset_datasources pd -LEFT OUTER JOIN -(SELECT pd.id, 1 as is_gold FROM dataset_datasources pd -JOIN issn on issn.id=pd.datasource -JOIN gold_oa on issn.issn = gold_oa.issn) tmp -ON pd.id=tmp.id; +LEFT OUTER JOIN ( + SELECT pd.id, 1 as is_gold FROM dataset_datasources pd + JOIN issn on issn.id=pd.datasource + JOIN gold_oa on issn.issn = gold_oa.issn) tmp ON pd.id=tmp.id; -create table indi_software_gold_oa_new stored as parquet as -WITH gold_oa AS (SELECT issn_l, journal_is_in_doaj, journal_is_oa, issn_1 as issn -FROM stats_ext.oa_journals -WHERE issn_1 != "" -UNION -ALL SELECT issn_l,journal_is_in_doaj,journal_is_oa,issn_2 as issn -FROM stats_ext.oa_journals -WHERE issn_2 != "" ), -issn AS (SELECT * -FROM (SELECT id,issn_printed as issn -FROM datasource WHERE issn_printed IS NOT NULL -UNION -SELECT id, issn_online as issn -FROM datasource -WHERE issn_online IS NOT NULL ) as issn -WHERE LENGTH(issn) > 7) +create table indi_software_gold_oa stored as parquet as +WITH gold_oa AS ( + SELECT issn_l, journal_is_in_doaj, journal_is_oa, issn_1 as issn + FROM stats_ext.oa_journals + WHERE issn_1 != "" + UNION + ALL SELECT issn_l,journal_is_in_doaj,journal_is_oa,issn_2 as issn + FROM stats_ext.oa_journals + WHERE issn_2 != "" ), +issn AS ( + SELECT * + FROM ( + SELECT id,issn_printed as issn + FROM datasource + WHERE issn_printed IS NOT NULL + UNION + SELECT id, issn_online as issn + FROM datasource + WHERE issn_online IS NOT NULL ) as issn + WHERE LENGTH(issn) > 7) SELECT DISTINCT pd.id, coalesce(is_gold, 0) as is_gold FROM software_datasources pd -LEFT OUTER JOIN -(SELECT pd.id, 1 as is_gold FROM software_datasources pd -JOIN issn on issn.id=pd.datasource -JOIN gold_oa on issn.issn = gold_oa.issn) tmp -ON pd.id=tmp.id; +LEFT OUTER JOIN ( + SELECT pd.id, 1 as is_gold FROM software_datasources pd + JOIN issn on issn.id=pd.datasource + JOIN gold_oa on issn.issn = gold_oa.issn) tmp ON pd.id=tmp.id; create table indi_org_findable stored as parquet as -with result_with_pid as -(select ro.organization organization, count(distinct rp.id) no_result_with_pid from result_organization ro -join result_pids rp on rp.id=ro.id -group by ro.organization), -result_has_abstract as -(select ro.organization organization, count(distinct rp.id) no_result_with_abstract from result_organization ro -join result rp on rp.id=ro.id where rp.abstract=true -group by ro.organization), -allresults as (select organization, count(distinct id) no_allresults from result_organization -group by organization), -result_with_pid_share as( -select allresults.organization, result_with_pid.no_result_with_pid/allresults.no_allresults pid_share -from allresults -join result_with_pid on result_with_pid.organization=allresults.organization), -result_with_abstract_share as( -select allresults.organization, result_has_abstract.no_result_with_abstract/allresults.no_allresults abstract_share -from allresults -join result_has_abstract on result_has_abstract.organization=allresults.organization) +with result_with_pid as ( + select ro.organization organization, count(distinct rp.id) no_result_with_pid from result_organization ro + join result_pids rp on rp.id=ro.id + group by ro.organization), +result_has_abstract as ( + select ro.organization organization, count(distinct rp.id) no_result_with_abstract from result_organization ro + join result rp on rp.id=ro.id where rp.abstract=true + group by ro.organization), +allresults as ( + select organization, count(distinct id) no_allresults from result_organization + group by organization), +result_with_pid_share as ( + select allresults.organization, result_with_pid.no_result_with_pid/allresults.no_allresults pid_share + from allresults + join result_with_pid on result_with_pid.organization=allresults.organization), +result_with_abstract_share as ( + select allresults.organization, result_has_abstract.no_result_with_abstract/allresults.no_allresults abstract_share + from allresults + join result_has_abstract on result_has_abstract.organization=allresults.organization) select allresults.organization, coalesce((pid_share+abstract_share)/2,pid_share) org_findable from allresults join result_with_pid_share on result_with_pid_share.organization=allresults.organization left outer join ( -select organization, abstract_share from result_with_abstract_share -)tmp -on tmp.organization=allresults.organization; + select organization, abstract_share from result_with_abstract_share) tmp on tmp.organization=allresults.organization; create table indi_org_openess stored as parquet as WITH datasets_oa as ( -SELECT ro.organization, count(dg.id) no_oadatasets FROM indi_datasets_gold_oa_new dg -join openaire_prod_stats.result_organization ro on dg.id=ro.id -join openaire_prod_stats.dataset ds on dg.id=ds.id -WHERE dg.is_gold=1 -group by ro.organization), + SELECT ro.organization, count(dg.id) no_oadatasets FROM indi_datasets_gold_oa_new dg + join openaire_prod_stats.result_organization ro on dg.id=ro.id + join openaire_prod_stats.dataset ds on dg.id=ds.id + WHERE dg.is_gold=1 + group by ro.organization), software_oa as ( -SELECT ro.organization, count(dg.id) no_oasoftware FROM indi_software_gold_oa_new dg -join openaire_prod_stats.result_organization ro on dg.id=ro.id -join openaire_prod_stats.software ds on dg.id=ds.id -WHERE dg.is_gold=1 -group by ro.organization), + SELECT ro.organization, count(dg.id) no_oasoftware FROM indi_software_gold_oa_new dg + join openaire_prod_stats.result_organization ro on dg.id=ro.id + join openaire_prod_stats.software ds on dg.id=ds.id + WHERE dg.is_gold=1 + group by ro.organization), pubs_oa as ( -SELECT ro.organization, count(dg.id) no_oapubs FROM indi_pub_gold_oa_new dg -join openaire_prod_stats.result_organization ro on dg.id=ro.id -join openaire_prod_stats.publication ds on dg.id=ds.id -where dg.is_gold=1 -group by ro.organization), + SELECT ro.organization, count(dg.id) no_oapubs FROM indi_pub_gold_oa_new dg + join openaire_prod_stats.result_organization ro on dg.id=ro.id + join openaire_prod_stats.publication ds on dg.id=ds.id + where dg.is_gold=1 + group by ro.organization), allpubs as ( -SELECT ro.organization organization, count(ro.id) no_allpubs FROM result_organization ro -join openaire_prod_stats.publication ps on ps.id=ro.id -group by ro.organization), + SELECT ro.organization organization, count(ro.id) no_allpubs FROM result_organization ro + join openaire_prod_stats.publication ps on ps.id=ro.id + group by ro.organization), alldatasets as ( -SELECT ro.organization organization, count(ro.id) no_alldatasets FROM result_organization ro -join openaire_prod_stats.dataset ps on ps.id=ro.id -group by ro.organization), + SELECT ro.organization organization, count(ro.id) no_alldatasets FROM result_organization ro + join openaire_prod_stats.dataset ps on ps.id=ro.id + group by ro.organization), allsoftware as ( -SELECT ro.organization organization, count(ro.id) no_allsoftware FROM result_organization ro -join openaire_prod_stats.software ps on ps.id=ro.id -group by ro.organization), + SELECT ro.organization organization, count(ro.id) no_allsoftware FROM result_organization ro + join openaire_prod_stats.software ps on ps.id=ro.id + group by ro.organization), allpubsshare as ( -select pubs_oa.organization, pubs_oa.no_oapubs/allpubs.no_allpubs p from allpubs -join pubs_oa on allpubs.organization=pubs_oa.organization), + select pubs_oa.organization, pubs_oa.no_oapubs/allpubs.no_allpubs p from allpubs + join pubs_oa on allpubs.organization=pubs_oa.organization), alldatasetssshare as ( -select datasets_oa.organization, datasets_oa.no_oadatasets/alldatasets.no_alldatasets c -from alldatasets -join datasets_oa on alldatasets.organization=datasets_oa.organization), + select datasets_oa.organization, datasets_oa.no_oadatasets/alldatasets.no_alldatasets c + from alldatasets + join datasets_oa on alldatasets.organization=datasets_oa.organization), allsoftwaresshare as ( -select software_oa.organization, software_oa.no_oasoftware/allsoftware.no_allsoftware s -from allsoftware -join software_oa on allsoftware.organization=software_oa.organization) -select allpubsshare.organization, coalesce((c+p+s)/3, p) org_openess FROM allpubsshare -left outer join (select organization,c from -alldatasetssshare) tmp -on tmp.organization=allpubsshare.organization -left outer join (select organization,s from -allsoftwaresshare) tmp1 -on tmp1.organization=allpubsshare.organization; + select software_oa.organization, software_oa.no_oasoftware/allsoftware.no_allsoftware s + from allsoftware + join software_oa on allsoftware.organization=software_oa.organization) +select allpubsshare.organization, coalesce((c+p+s)/3, p) org_openess +FROM allpubsshare +left outer join ( + select organization,c from + alldatasetssshare) tmp on tmp.organization=allpubsshare.organization +left outer join ( + select organization,s from allsoftwaresshare) tmp1 on tmp1.organization=allpubsshare.organization; create table indi_pub_hybrid_oa_with_cc stored as parquet as -WITH hybrid_oa AS (SELECT issn_l, journal_is_in_doaj, journal_is_oa, issn_print as issn -FROM stats_ext.plan_s_jn -WHERE issn_print != "" -UNION ALL -SELECT issn_l, journal_is_in_doaj, journal_is_oa, issn_online as issn -FROM stats_ext.plan_s_jn -WHERE issn_online != "" and (journal_is_in_doaj = FALSE OR journal_is_oa = FALSE)), -issn AS (SELECT * FROM -(SELECT id, issn_printed as issn -FROM datasource -WHERE issn_printed IS NOT NULL -UNION -SELECT id,issn_online as issn -FROM datasource -WHERE issn_online IS NOT NULL ) as issn -WHERE LENGTH(issn) > 7) +WITH hybrid_oa AS ( + SELECT issn_l, journal_is_in_doaj, journal_is_oa, issn_print as issn + FROM stats_ext.plan_s_jn + WHERE issn_print != "" + UNION ALL + SELECT issn_l, journal_is_in_doaj, journal_is_oa, issn_online as issn + FROM stats_ext.plan_s_jn + WHERE issn_online != "" and (journal_is_in_doaj = FALSE OR journal_is_oa = FALSE)), +issn AS ( + SELECT * + FROM ( + SELECT id, issn_printed as issn + FROM datasource + WHERE issn_printed IS NOT NULL + UNION + SELECT id,issn_online as issn + FROM datasource + WHERE issn_online IS NOT NULL ) as issn + WHERE LENGTH(issn) > 7) SELECT DISTINCT pd.id, coalesce(is_hybrid_oa, 0) as is_hybrid_oa FROM publication_datasources pd LEFT OUTER JOIN ( -SELECT pd.id, 1 as is_hybrid_oa from publication_datasources pd -JOIN datasource d on d.id=pd.datasource -JOIN issn on issn.id=pd.datasource -JOIN hybrid_oa ON issn.issn = hybrid_oa.issn -JOIN indi_result_has_cc_licence cc on pd.id=cc.id -where cc.has_cc_license=1) tmp -on pd.id=tmp.id; - - + SELECT pd.id, 1 as is_hybrid_oa from publication_datasources pd + JOIN datasource d on d.id=pd.datasource + JOIN issn on issn.id=pd.datasource + JOIN hybrid_oa ON issn.issn = hybrid_oa.issn + JOIN indi_result_has_cc_licence cc on pd.id=cc.id + where cc.has_cc_license=1) tmp on pd.id=tmp.id; \ No newline at end of file diff --git a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step20-createMonitorDB.sql b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step20-createMonitorDB.sql index e75bdffc8..afd333492 100644 --- a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step20-createMonitorDB.sql +++ b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step20-createMonitorDB.sql @@ -123,6 +123,10 @@ create table TARGET.indi_pub_doi_from_crossref stored as parquet as select * fro compute stats TARGET.indi_pub_doi_from_crossref; create table TARGET.indi_pub_gold_oa stored as parquet as select * from SOURCE.indi_pub_gold_oa orig where exists (select 1 from TARGET.result r where r.id=orig.id); compute stats TARGET.indi_pub_gold_oa; +create table TARGET.indi_datasets_gold_oa stored as parquet as select * from SOURCE.indi_datasets_gold_oa orig where exists (select 1 from TARGET.result r where r.id=orig.id); +compute stats TARGET.indi_datasets_gold_oa; +create table TARGET.indi_software_gold_oa stored as parquet as select * from SOURCE.indi_software_gold_oa orig where exists (select 1 from TARGET.result r where r.id=orig.id); +compute stats TARGET.indi_software_gold_oa; create table TARGET.indi_pub_has_abstract stored as parquet as select * from SOURCE.indi_pub_has_abstract orig where exists (select 1 from TARGET.result r where r.id=orig.id); compute stats TARGET.indi_pub_has_abstract; create table TARGET.indi_result_has_cc_licence stored as parquet as select * from SOURCE.indi_result_has_cc_licence orig where exists (select 1 from TARGET.result r where r.id=orig.id); @@ -148,6 +152,11 @@ compute stats TARGET.indi_pub_closed_other_open; create table TARGET.indi_result_no_of_copies stored as parquet as select * from SOURCE.indi_result_no_of_copies orig where exists (select 1 from TARGET.result r where r.id=orig.id); compute stats TARGET.indi_result_no_of_copies; +create view TARGET.indi_org_findable as select * from SOURCE.indi_org_findable; +create view TARGET.indi_org_openess as select * from SOURCE.indi_org_openess; +create table TARGET.indi_pub_hybrid_oa_with_cc stored as parquet as select * from SOURCE.indi_pub_hybrid_oa_with_cc orig where exists (select 1 from TARGET.result r where r.id=orig.id); +compute stats TARGET.indi_pub_hybrid_oa_with_cc; + --- Usage statistics create table TARGET.usage_stats stored as parquet as select * from SOURCE.usage_stats orig where exists (select 1 from TARGET.result r where r.id=orig.result_id); From 87c91f70a26f73ea89a2878f13634f6699d0b945 Mon Sep 17 00:00:00 2001 From: Antonis Lempesis Date: Fri, 18 Feb 2022 17:11:23 +0200 Subject: [PATCH 090/162] added sprint 6 indicators to monitor db --- .../scripts/step16-createIndicatorsTables.sql | 344 +++++++++--------- .../scripts/step20-createMonitorDB.sql | 9 + 2 files changed, 187 insertions(+), 166 deletions(-) diff --git a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step16-createIndicatorsTables.sql b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step16-createIndicatorsTables.sql index e5bc588be..d925b3694 100755 --- a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step16-createIndicatorsTables.sql +++ b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step16-createIndicatorsTables.sql @@ -31,17 +31,6 @@ join datasource d on d.id = ri.collectedfrom where pidtype='Digital Object Identifier' and d.name ='Crossref') tmp on tmp.id=p.id; -create table indi_pub_gold_oa stored as parquet as -select distinct p.id, coalesce(gold_oa, 0) as gold_oa -from publication p -left outer join ( -select p.id, 1 as gold_oa -from publication p -join result_instance ri on ri.id = p.id -join datasource on datasource.id = ri.hostedby -where datasource.id like '%doajarticles%') tmp -on p.id= tmp.id; - ---- Sprint 2 ---- create table indi_result_has_cc_licence stored as parquet as select distinct r.id, (case when lic='' or lic is null then 0 else 1 end) as has_cc_license @@ -144,16 +133,6 @@ join stats_ext.plan_s_jn ps where (ps.issn_print=d.issn_printed and ps.issn_onli and (ps.journal_is_in_doaj=false and ps.journal_is_oa=false)) tmp on pd.id=tmp.id; -create table indi_is_gold_oa stored as parquet as -(select distinct pd.id, coalesce(gold_oa, 0) as gold_oa -from publication_datasources pd -left outer join ( -select pd.id, 1 as gold_oa from publication_datasources pd -join datasource d on d.id=pd.datasource -join stats_ext.plan_s_jn ps on (ps.issn_print=d.issn_printed or ps.issn_online=d.issn_online) -where ps.journal_is_in_doaj is true or ps.journal_is_oa is true) tmp -on pd.id=tmp.id); - create table indi_pub_in_transformative stored as parquet as select distinct pd.id, coalesce(is_transformative, 0) as is_transformative from publication pd @@ -180,180 +159,213 @@ create table indi_result_no_of_copies stored as parquet as select id, count(id) as number_of_copies from result_instance group by id; ---- Sprint 6 ---- -create table indi_pub_gold_oa_new stored as parquet as -WITH gold_oa AS (SELECT issn_l, journal_is_in_doaj,journal_is_oa, issn_1 as issn -FROM stats_ext.oa_journals -WHERE issn_1 != "" -UNION ALL -SELECT issn_l, journal_is_in_doaj, journal_is_oa, issn_2 as issn -FROM stats_ext.oa_journals -WHERE issn_2 != "" ), -issn AS (SELECT * FROM -(SELECT id, issn_printed as issn -FROM datasource WHERE issn_printed IS NOT NULL -UNION -SELECT id, issn_online as issn -FROM datasource WHERE issn_online IS NOT NULL) as issn -WHERE LENGTH(issn) > 7) +create table indi_pub_gold_oa stored as parquet as +WITH gold_oa AS ( + SELECT issn_l, journal_is_in_doaj,journal_is_oa, issn_1 as issn + FROM stats_ext.oa_journals + WHERE issn_1 != "" + UNION ALL + SELECT issn_l, journal_is_in_doaj, journal_is_oa, issn_2 as issn + FROM stats_ext.oa_journals + WHERE issn_2 != "" ), +issn AS ( + SELECT * FROM + (SELECT id, issn_printed as issn + FROM datasource WHERE issn_printed IS NOT NULL + UNION + SELECT id, issn_online as issn + FROM datasource WHERE issn_online IS NOT NULL) as issn + WHERE LENGTH(issn) > 7) SELECT DISTINCT pd.id, coalesce(is_gold, 0) as is_gold FROM publication_datasources pd -LEFT OUTER JOIN -(SELECT pd.id, 1 as is_gold FROM publication_datasources pd -JOIN issn on issn.id=pd.datasource -JOIN gold_oa on issn.issn = gold_oa.issn) tmp -ON pd.id=tmp.id; +LEFT OUTER JOIN ( + SELECT pd.id, 1 as is_gold FROM publication_datasources pd + JOIN issn on issn.id=pd.datasource + JOIN gold_oa on issn.issn = gold_oa.issn) tmp ON pd.id=tmp.id; -create table indi_datasets_gold_oa_new stored as parquet as -WITH gold_oa AS (SELECT issn_l, journal_is_in_doaj, journal_is_oa, issn_1 as issn -FROM stats_ext.oa_journals -WHERE issn_1 != "" -UNION -ALL SELECT issn_l,journal_is_in_doaj,journal_is_oa,issn_2 as issn -FROM stats_ext.oa_journals -WHERE issn_2 != "" ), -issn AS (SELECT * -FROM (SELECT id,issn_printed as issn -FROM datasource WHERE issn_printed IS NOT NULL -UNION -SELECT id, issn_online as issn -FROM datasource -WHERE issn_online IS NOT NULL ) as issn -WHERE LENGTH(issn) > 7) +create table indi_datasets_gold_oa stored as parquet as +WITH gold_oa AS ( + SELECT issn_l, journal_is_in_doaj, journal_is_oa, issn_1 as issn + FROM stats_ext.oa_journals + WHERE issn_1 != "" + UNION + ALL SELECT issn_l,journal_is_in_doaj,journal_is_oa,issn_2 as issn + FROM stats_ext.oa_journals + WHERE issn_2 != "" ), +issn AS ( + SELECT * + FROM ( + SELECT id,issn_printed as issn + FROM datasource + WHERE issn_printed IS NOT NULL + UNION + SELECT id, issn_online as issn + FROM datasource + WHERE issn_online IS NOT NULL ) as issn + WHERE LENGTH(issn) > 7) SELECT DISTINCT pd.id, coalesce(is_gold, 0) as is_gold FROM dataset_datasources pd -LEFT OUTER JOIN -(SELECT pd.id, 1 as is_gold FROM dataset_datasources pd -JOIN issn on issn.id=pd.datasource -JOIN gold_oa on issn.issn = gold_oa.issn) tmp -ON pd.id=tmp.id; +LEFT OUTER JOIN ( + SELECT pd.id, 1 as is_gold FROM dataset_datasources pd + JOIN issn on issn.id=pd.datasource + JOIN gold_oa on issn.issn = gold_oa.issn) tmp ON pd.id=tmp.id; -create table indi_software_gold_oa_new stored as parquet as -WITH gold_oa AS (SELECT issn_l, journal_is_in_doaj, journal_is_oa, issn_1 as issn -FROM stats_ext.oa_journals -WHERE issn_1 != "" -UNION -ALL SELECT issn_l,journal_is_in_doaj,journal_is_oa,issn_2 as issn -FROM stats_ext.oa_journals -WHERE issn_2 != "" ), -issn AS (SELECT * -FROM (SELECT id,issn_printed as issn -FROM datasource WHERE issn_printed IS NOT NULL -UNION -SELECT id, issn_online as issn -FROM datasource -WHERE issn_online IS NOT NULL ) as issn -WHERE LENGTH(issn) > 7) +create table indi_software_gold_oa stored as parquet as +WITH gold_oa AS ( + SELECT issn_l, journal_is_in_doaj, journal_is_oa, issn_1 as issn + FROM stats_ext.oa_journals + WHERE issn_1 != "" + UNION + ALL SELECT issn_l,journal_is_in_doaj,journal_is_oa,issn_2 as issn + FROM stats_ext.oa_journals + WHERE issn_2 != "" ), +issn AS ( + SELECT * + FROM ( + SELECT id,issn_printed as issn + FROM datasource + WHERE issn_printed IS NOT NULL + UNION + SELECT id, issn_online as issn + FROM datasource + WHERE issn_online IS NOT NULL ) as issn + WHERE LENGTH(issn) > 7) SELECT DISTINCT pd.id, coalesce(is_gold, 0) as is_gold FROM software_datasources pd -LEFT OUTER JOIN -(SELECT pd.id, 1 as is_gold FROM software_datasources pd -JOIN issn on issn.id=pd.datasource -JOIN gold_oa on issn.issn = gold_oa.issn) tmp -ON pd.id=tmp.id; +LEFT OUTER JOIN ( + SELECT pd.id, 1 as is_gold FROM software_datasources pd + JOIN issn on issn.id=pd.datasource + JOIN gold_oa on issn.issn = gold_oa.issn) tmp ON pd.id=tmp.id; create table indi_org_findable stored as parquet as -with result_with_pid as -(select ro.organization organization, count(distinct rp.id) no_result_with_pid from result_organization ro -join result_pids rp on rp.id=ro.id -group by ro.organization), -result_has_abstract as -(select ro.organization organization, count(distinct rp.id) no_result_with_abstract from result_organization ro -join result rp on rp.id=ro.id where rp.abstract=true -group by ro.organization), -allresults as (select organization, count(distinct id) no_allresults from result_organization -group by organization), -result_with_pid_share as( -select allresults.organization, result_with_pid.no_result_with_pid/allresults.no_allresults pid_share -from allresults -join result_with_pid on result_with_pid.organization=allresults.organization), -result_with_abstract_share as( -select allresults.organization, result_has_abstract.no_result_with_abstract/allresults.no_allresults abstract_share -from allresults -join result_has_abstract on result_has_abstract.organization=allresults.organization) +with result_with_pid as ( + select ro.organization organization, count(distinct rp.id) no_result_with_pid from result_organization ro + join result_pids rp on rp.id=ro.id + group by ro.organization), +result_has_abstract as ( + select ro.organization organization, count(distinct rp.id) no_result_with_abstract from result_organization ro + join result rp on rp.id=ro.id where rp.abstract=true + group by ro.organization), +allresults as ( + select organization, count(distinct id) no_allresults from result_organization + group by organization), +result_with_pid_share as ( + select allresults.organization, result_with_pid.no_result_with_pid/allresults.no_allresults pid_share + from allresults + join result_with_pid on result_with_pid.organization=allresults.organization), +result_with_abstract_share as ( + select allresults.organization, result_has_abstract.no_result_with_abstract/allresults.no_allresults abstract_share + from allresults + join result_has_abstract on result_has_abstract.organization=allresults.organization) select allresults.organization, coalesce((pid_share+abstract_share)/2,pid_share) org_findable from allresults join result_with_pid_share on result_with_pid_share.organization=allresults.organization left outer join ( -select organization, abstract_share from result_with_abstract_share -)tmp -on tmp.organization=allresults.organization; + select organization, abstract_share from result_with_abstract_share) tmp on tmp.organization=allresults.organization; create table indi_org_openess stored as parquet as WITH datasets_oa as ( -SELECT ro.organization, count(dg.id) no_oadatasets FROM indi_datasets_gold_oa_new dg -join openaire_prod_stats.result_organization ro on dg.id=ro.id -join openaire_prod_stats.dataset ds on dg.id=ds.id -WHERE dg.is_gold=1 -group by ro.organization), + SELECT ro.organization, count(dg.id) no_oadatasets FROM indi_datasets_gold_oa_new dg + join openaire_prod_stats.result_organization ro on dg.id=ro.id + join openaire_prod_stats.dataset ds on dg.id=ds.id + WHERE dg.is_gold=1 + group by ro.organization), software_oa as ( -SELECT ro.organization, count(dg.id) no_oasoftware FROM indi_software_gold_oa_new dg -join openaire_prod_stats.result_organization ro on dg.id=ro.id -join openaire_prod_stats.software ds on dg.id=ds.id -WHERE dg.is_gold=1 -group by ro.organization), + SELECT ro.organization, count(dg.id) no_oasoftware FROM indi_software_gold_oa_new dg + join openaire_prod_stats.result_organization ro on dg.id=ro.id + join openaire_prod_stats.software ds on dg.id=ds.id + WHERE dg.is_gold=1 + group by ro.organization), pubs_oa as ( -SELECT ro.organization, count(dg.id) no_oapubs FROM indi_pub_gold_oa_new dg -join openaire_prod_stats.result_organization ro on dg.id=ro.id -join openaire_prod_stats.publication ds on dg.id=ds.id -where dg.is_gold=1 -group by ro.organization), + SELECT ro.organization, count(dg.id) no_oapubs FROM indi_pub_gold_oa_new dg + join openaire_prod_stats.result_organization ro on dg.id=ro.id + join openaire_prod_stats.publication ds on dg.id=ds.id + where dg.is_gold=1 + group by ro.organization), allpubs as ( -SELECT ro.organization organization, count(ro.id) no_allpubs FROM result_organization ro -join openaire_prod_stats.publication ps on ps.id=ro.id -group by ro.organization), + SELECT ro.organization organization, count(ro.id) no_allpubs FROM result_organization ro + join openaire_prod_stats.publication ps on ps.id=ro.id + group by ro.organization), alldatasets as ( -SELECT ro.organization organization, count(ro.id) no_alldatasets FROM result_organization ro -join openaire_prod_stats.dataset ps on ps.id=ro.id -group by ro.organization), + SELECT ro.organization organization, count(ro.id) no_alldatasets FROM result_organization ro + join openaire_prod_stats.dataset ps on ps.id=ro.id + group by ro.organization), allsoftware as ( -SELECT ro.organization organization, count(ro.id) no_allsoftware FROM result_organization ro -join openaire_prod_stats.software ps on ps.id=ro.id -group by ro.organization), + SELECT ro.organization organization, count(ro.id) no_allsoftware FROM result_organization ro + join openaire_prod_stats.software ps on ps.id=ro.id + group by ro.organization), allpubsshare as ( -select pubs_oa.organization, pubs_oa.no_oapubs/allpubs.no_allpubs p from allpubs -join pubs_oa on allpubs.organization=pubs_oa.organization), + select pubs_oa.organization, pubs_oa.no_oapubs/allpubs.no_allpubs p from allpubs + join pubs_oa on allpubs.organization=pubs_oa.organization), alldatasetssshare as ( -select datasets_oa.organization, datasets_oa.no_oadatasets/alldatasets.no_alldatasets c -from alldatasets -join datasets_oa on alldatasets.organization=datasets_oa.organization), + select datasets_oa.organization, datasets_oa.no_oadatasets/alldatasets.no_alldatasets c + from alldatasets + join datasets_oa on alldatasets.organization=datasets_oa.organization), allsoftwaresshare as ( -select software_oa.organization, software_oa.no_oasoftware/allsoftware.no_allsoftware s -from allsoftware -join software_oa on allsoftware.organization=software_oa.organization) -select allpubsshare.organization, coalesce((c+p+s)/3, p) org_openess FROM allpubsshare -left outer join (select organization,c from -alldatasetssshare) tmp -on tmp.organization=allpubsshare.organization -left outer join (select organization,s from -allsoftwaresshare) tmp1 -on tmp1.organization=allpubsshare.organization; + select software_oa.organization, software_oa.no_oasoftware/allsoftware.no_allsoftware s + from allsoftware + join software_oa on allsoftware.organization=software_oa.organization) +select allpubsshare.organization, coalesce((c+p+s)/3, p) org_openess +FROM allpubsshare +left outer join ( + select organization,c from + alldatasetssshare) tmp on tmp.organization=allpubsshare.organization +left outer join ( + select organization,s from allsoftwaresshare) tmp1 on tmp1.organization=allpubsshare.organization; create table indi_pub_hybrid_oa_with_cc stored as parquet as -WITH hybrid_oa AS (SELECT issn_l, journal_is_in_doaj, journal_is_oa, issn_print as issn -FROM stats_ext.plan_s_jn -WHERE issn_print != "" -UNION ALL -SELECT issn_l, journal_is_in_doaj, journal_is_oa, issn_online as issn -FROM stats_ext.plan_s_jn -WHERE issn_online != "" and (journal_is_in_doaj = FALSE OR journal_is_oa = FALSE)), -issn AS (SELECT * FROM -(SELECT id, issn_printed as issn -FROM datasource -WHERE issn_printed IS NOT NULL -UNION -SELECT id,issn_online as issn -FROM datasource -WHERE issn_online IS NOT NULL ) as issn -WHERE LENGTH(issn) > 7) +WITH hybrid_oa AS ( + SELECT issn_l, journal_is_in_doaj, journal_is_oa, issn_print as issn + FROM stats_ext.plan_s_jn + WHERE issn_print != "" + UNION ALL + SELECT issn_l, journal_is_in_doaj, journal_is_oa, issn_online as issn + FROM stats_ext.plan_s_jn + WHERE issn_online != "" and (journal_is_in_doaj = FALSE OR journal_is_oa = FALSE)), +issn AS ( + SELECT * + FROM ( + SELECT id, issn_printed as issn + FROM datasource + WHERE issn_printed IS NOT NULL + UNION + SELECT id,issn_online as issn + FROM datasource + WHERE issn_online IS NOT NULL ) as issn + WHERE LENGTH(issn) > 7) SELECT DISTINCT pd.id, coalesce(is_hybrid_oa, 0) as is_hybrid_oa FROM publication_datasources pd LEFT OUTER JOIN ( -SELECT pd.id, 1 as is_hybrid_oa from publication_datasources pd -JOIN datasource d on d.id=pd.datasource -JOIN issn on issn.id=pd.datasource -JOIN hybrid_oa ON issn.issn = hybrid_oa.issn -JOIN indi_result_has_cc_licence cc on pd.id=cc.id -where cc.has_cc_license=1) tmp -on pd.id=tmp.id; + SELECT pd.id, 1 as is_hybrid_oa from publication_datasources pd + JOIN datasource d on d.id=pd.datasource + JOIN issn on issn.id=pd.datasource + JOIN hybrid_oa ON issn.issn = hybrid_oa.issn + JOIN indi_result_has_cc_licence cc on pd.id=cc.id + where cc.has_cc_license=1) tmp on pd.id=tmp.id; +create table indi_pub_downloads stored as parquet as +SELECT result_id, sum(downloads) no_dowloads from openaire_prod_usage_stats.usage_stats +join publication on result_id=id +where downloads>0 +GROUP BY result_id +order by no_dowloads desc; +create table indi_pub_downloads_datasource stored as parquet as +SELECT result_id, repository_id, sum(downloads) no_dowloads from openaire_prod_usage_stats.usage_stats +join publication on result_id=id +where downloads>0 +GROUP BY result_id, repository_id +order by result_id; + +create table indi_pub_downloads_year stored as parquet as +SELECT result_id, substring(us.`date`, 1,4) as `year`, sum(downloads) no_dowloads from openaire_prod_usage_stats.usage_stats us +join publication on result_id=id where downloads>0 +GROUP BY result_id, `year` +order by `year` asc; + +create table indi_pub_downloads_datasource_year stored as parquet as +SELECT result_id, substring(us.`date`, 1,4) as `year`, repository_id, sum(downloads) no_dowloads from openaire_prod_usage_stats.usage_stats us +join publication on result_id=id +where downloads>0 +GROUP BY result_id, repository_id, `year` +order by `year` asc, result_id; diff --git a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step20-createMonitorDB.sql b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step20-createMonitorDB.sql index e75bdffc8..afd333492 100644 --- a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step20-createMonitorDB.sql +++ b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step20-createMonitorDB.sql @@ -123,6 +123,10 @@ create table TARGET.indi_pub_doi_from_crossref stored as parquet as select * fro compute stats TARGET.indi_pub_doi_from_crossref; create table TARGET.indi_pub_gold_oa stored as parquet as select * from SOURCE.indi_pub_gold_oa orig where exists (select 1 from TARGET.result r where r.id=orig.id); compute stats TARGET.indi_pub_gold_oa; +create table TARGET.indi_datasets_gold_oa stored as parquet as select * from SOURCE.indi_datasets_gold_oa orig where exists (select 1 from TARGET.result r where r.id=orig.id); +compute stats TARGET.indi_datasets_gold_oa; +create table TARGET.indi_software_gold_oa stored as parquet as select * from SOURCE.indi_software_gold_oa orig where exists (select 1 from TARGET.result r where r.id=orig.id); +compute stats TARGET.indi_software_gold_oa; create table TARGET.indi_pub_has_abstract stored as parquet as select * from SOURCE.indi_pub_has_abstract orig where exists (select 1 from TARGET.result r where r.id=orig.id); compute stats TARGET.indi_pub_has_abstract; create table TARGET.indi_result_has_cc_licence stored as parquet as select * from SOURCE.indi_result_has_cc_licence orig where exists (select 1 from TARGET.result r where r.id=orig.id); @@ -148,6 +152,11 @@ compute stats TARGET.indi_pub_closed_other_open; create table TARGET.indi_result_no_of_copies stored as parquet as select * from SOURCE.indi_result_no_of_copies orig where exists (select 1 from TARGET.result r where r.id=orig.id); compute stats TARGET.indi_result_no_of_copies; +create view TARGET.indi_org_findable as select * from SOURCE.indi_org_findable; +create view TARGET.indi_org_openess as select * from SOURCE.indi_org_openess; +create table TARGET.indi_pub_hybrid_oa_with_cc stored as parquet as select * from SOURCE.indi_pub_hybrid_oa_with_cc orig where exists (select 1 from TARGET.result r where r.id=orig.id); +compute stats TARGET.indi_pub_hybrid_oa_with_cc; + --- Usage statistics create table TARGET.usage_stats stored as parquet as select * from SOURCE.usage_stats orig where exists (select 1 from TARGET.result r where r.id=orig.result_id); From 3b92a2ab9ce28af7bab0ea0e57e7841d0d29c2cf Mon Sep 17 00:00:00 2001 From: Antonis Lempesis Date: Wed, 23 Feb 2022 12:05:57 +0200 Subject: [PATCH 091/162] added the rest of spring 6 in monitor db --- .../stats/oozie_app/scripts/step20-createMonitorDB.sql | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step20-createMonitorDB.sql b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step20-createMonitorDB.sql index afd333492..b4964d2b3 100644 --- a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step20-createMonitorDB.sql +++ b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step20-createMonitorDB.sql @@ -157,8 +157,14 @@ create view TARGET.indi_org_openess as select * from SOURCE.indi_org_openess; create table TARGET.indi_pub_hybrid_oa_with_cc stored as parquet as select * from SOURCE.indi_pub_hybrid_oa_with_cc orig where exists (select 1 from TARGET.result r where r.id=orig.id); compute stats TARGET.indi_pub_hybrid_oa_with_cc; ---- Usage statistics -create table TARGET.usage_stats stored as parquet as select * from SOURCE.usage_stats orig where exists (select 1 from TARGET.result r where r.id=orig.result_id); +create table TARGET.indi_pub_downloads stored as parquet as select * from SOURCE.indi_pub_downloads orig where exists (select 1 from TARGET.result r where r.id=orig.id); +compute stats TARGET.indi_pub_downloads; +create table TARGET.indi_pub_downloads_datasource stored as parquet as select * from SOURCE.indi_pub_downloads_datasource orig where exists (select 1 from TARGET.result r where r.id=orig.id); +compute stats TARGET.indi_pub_downloads_datasource; +create table TARGET.indi_pub_downloads_year stored as parquet as select * from SOURCE.indi_pub_downloads_year orig where exists (select 1 from TARGET.result r where r.id=orig.id); +compute stats TARGET.indi_pub_downloads_year; +create table TARGET.indi_pub_downloads_datasource_year stored as parquet as select * from SOURCE.indi_pub_downloads_datasource_year orig where exists (select 1 from TARGET.result r where r.id=orig.id); +compute stats TARGET.indi_pub_downloads_datasource_year; --denorm alter table TARGET.result rename to TARGET.res_tmp; From 9d6203f79b6a9d16764ec9cc68c287b8da7a1fde Mon Sep 17 00:00:00 2001 From: Alessia Bardi Date: Wed, 23 Feb 2022 15:00:53 +0100 Subject: [PATCH 092/162] test mapping datasource --- .../oa/provision/XmlRecordFactoryTest.java | 30 +++++++++++++++++++ .../dnetlib/dhp/oa/provision/datasource.json | 1 + 2 files changed, 31 insertions(+) create mode 100644 dhp-workflows/dhp-graph-provision/src/test/resources/eu/dnetlib/dhp/oa/provision/datasource.json diff --git a/dhp-workflows/dhp-graph-provision/src/test/java/eu/dnetlib/dhp/oa/provision/XmlRecordFactoryTest.java b/dhp-workflows/dhp-graph-provision/src/test/java/eu/dnetlib/dhp/oa/provision/XmlRecordFactoryTest.java index 2b5e08e92..501784872 100644 --- a/dhp-workflows/dhp-graph-provision/src/test/java/eu/dnetlib/dhp/oa/provision/XmlRecordFactoryTest.java +++ b/dhp-workflows/dhp-graph-provision/src/test/java/eu/dnetlib/dhp/oa/provision/XmlRecordFactoryTest.java @@ -8,6 +8,7 @@ import java.io.IOException; import java.io.StringReader; import java.util.List; +import eu.dnetlib.dhp.schema.oaf.Datasource; import org.apache.commons.io.IOUtils; import org.dom4j.Document; import org.dom4j.DocumentException; @@ -129,4 +130,33 @@ public class XmlRecordFactoryTest { System.out.println(doc.asXML()); assertEquals("", doc.valueOf("//rel/validated")); } + + @Test + public void testDatasource() throws IOException, DocumentException { + final ContextMapper contextMapper = new ContextMapper(); + + final XmlRecordFactory xmlRecordFactory = new XmlRecordFactory(contextMapper, false, + XmlConverterJob.schemaLocation); + + final Datasource d = OBJECT_MAPPER + .readValue(IOUtils.toString(getClass().getResourceAsStream("datasource.json")), Datasource.class); + + final String xml = xmlRecordFactory.build(new JoinedEntity<>(d)); + + assertNotNull(xml); + + final Document doc = new SAXReader().read(new StringReader(xml)); + + assertNotNull(doc); + + System.out.println(doc.asXML()); + + // TODO add assertions based of values extracted from the XML record + + assertEquals("National", doc.valueOf("//jurisdiction/@classname")); + assertEquals("true", doc.valueOf("//thematic")); + assertEquals("Journal article", doc.valueOf("//contentpolicy/@classname")); + assertEquals("Journal archive", doc.valueOf("//datasourcetypeui/@classname")); + + } } diff --git a/dhp-workflows/dhp-graph-provision/src/test/resources/eu/dnetlib/dhp/oa/provision/datasource.json b/dhp-workflows/dhp-graph-provision/src/test/resources/eu/dnetlib/dhp/oa/provision/datasource.json new file mode 100644 index 000000000..ae069b8b5 --- /dev/null +++ b/dhp-workflows/dhp-graph-provision/src/test/resources/eu/dnetlib/dhp/oa/provision/datasource.json @@ -0,0 +1 @@ +{"collectedfrom":[{"key":"10|openaire____::13068d7823ea0bd86516ac2cb66e96ba","value":"Jurnal Fakultas Sastra Universitas Ekasakti","dataInfo":null}],"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.900","inferenceprovenance":null,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"sysimport:crosswalk:entityregistry","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}},"lastupdatetimestamp":1645012035118,"id":"10|274269ac6f3b::2a2e2793b500f3f7b47ef24b1a9277b7","originalId":["274269ac6f3b::2579-5449","piwik:13"],"pid":[],"dateofcollection":"2020-01-21","dateoftransformation":null,"extraInfo":[],"oaiprovenance":null,"datasourcetype":{"classid":"pubsrepository::journal","classname":"pubsrepository::journal","schemeid":"dnet:datasource_typologies","schemename":"dnet:datasource_typologies"},"datasourcetypeui":{"classid":"pubsrepository::journal","classname":"Journal archive","schemeid":"dnet:datasource_typologies_ui","schemename":"dnet:datasource_typologies_ui"},"openairecompatibility":{"classid":"hostedBy","classname":"hostedBy","schemeid":"dnet:datasourceCompatibilityLevel","schemename":"dnet:datasourceCompatibilityLevel"},"officialname":{"value":"Jurnal Ilmiah Pendidikan Scholastic","dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.900","inferenceprovenance":null,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"sysimport:crosswalk:entityregistry","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},"englishname":{"value":"Jurnal Ilmiah Pendidikan Scholastic","dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.900","inferenceprovenance":null,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"sysimport:crosswalk:entityregistry","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},"websiteurl":{"value":"http://e-journal.sastra-unes.com/index.php/JIPS/index","dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.900","inferenceprovenance":null,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"sysimport:crosswalk:entityregistry","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},"logourl":null,"contactemail":{"value":"test@test.it","dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.900","inferenceprovenance":null,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"sysimport:crosswalk:entityregistry","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},"namespaceprefix":{"value":"ojs_25795449","dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.900","inferenceprovenance":null,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"sysimport:crosswalk:entityregistry","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},"latitude":{"value":"0.0","dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.900","inferenceprovenance":null,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"sysimport:crosswalk:entityregistry","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},"longitude":{"value":"0.0","dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.900","inferenceprovenance":null,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"sysimport:crosswalk:entityregistry","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},"dateofvalidation":null,"description":null,"subjects":[],"odnumberofitems":{"value":"0.0","dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.900","inferenceprovenance":null,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"sysimport:crosswalk:entityregistry","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},"odnumberofitemsdate":null,"odpolicies":null,"odlanguages":[],"odcontenttypes":[{"value":"Journal articles","dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.900","inferenceprovenance":null,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"sysimport:crosswalk:entityregistry","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}}],"accessinfopackage":[],"releasestartdate":null,"releaseenddate":null,"missionstatementurl":null,"dataprovider":{"value":false,"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.900","inferenceprovenance":null,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"sysimport:crosswalk:entityregistry","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},"serviceprovider":{"value":false,"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.900","inferenceprovenance":null,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"sysimport:crosswalk:entityregistry","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},"databaseaccesstype":null,"datauploadtype":null,"databaseaccessrestriction":null,"datauploadrestriction":null,"versioning":{"value":false,"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.900","inferenceprovenance":null,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"sysimport:crosswalk:entityregistry","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},"citationguidelineurl":null,"qualitymanagementkind":null,"pidsystems":null,"certificates":null,"policies":[],"journal":{"name":"Jurnal Ilmiah Pendidikan Scholastic","issnPrinted":"2579-5449","issnOnline":"2579-5448","issnLinking":"2579-5447","ep":null,"iss":null,"sp":null,"vol":null,"edition":null,"conferenceplace":null,"conferencedate":null,"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.900","inferenceprovenance":null,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"sysimport:crosswalk:entityregistry","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},"providedentitytypes":null,"providedproducttypes":null,"jurisdiction":{"classid":"National","classname":"National","schemeid":"eosc:jurisdictions","schemename":"eosc:jurisdictions"},"thematic":true,"knowledgegraph":true,"contentpolicies":[{"classid":"Journal article","classname":"Journal article","schemeid":"eosc:contentpolicies","schemename":"eosc:contentpolicies"}]} From 86cdb7a38f52209398db510455488c4331d4db08 Mon Sep 17 00:00:00 2001 From: Claudio Atzori Date: Wed, 23 Feb 2022 15:54:18 +0100 Subject: [PATCH 093/162] [provision] serialize measures defined on the result level --- .../oa/provision/utils/XmlRecordFactory.java | 23 +++++++- .../utils/XmlSerializationUtils.java | 14 +++++ .../oa/provision/XmlRecordFactoryTest.java | 7 +++ .../dnetlib/dhp/oa/provision/publication.json | 58 +++++++++++++++++++ 4 files changed, 101 insertions(+), 1 deletion(-) diff --git a/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/utils/XmlRecordFactory.java b/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/utils/XmlRecordFactory.java index e51a84e02..cb29dc121 100644 --- a/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/utils/XmlRecordFactory.java +++ b/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/utils/XmlRecordFactory.java @@ -23,7 +23,6 @@ import javax.xml.transform.stream.StreamResult; import org.apache.commons.lang3.StringUtils; import org.apache.commons.lang3.tuple.ImmutablePair; import org.apache.commons.lang3.tuple.Pair; -import org.apache.http.protocol.HTTP; import org.apache.spark.util.LongAccumulator; import org.dom4j.Document; import org.dom4j.DocumentException; @@ -50,6 +49,7 @@ import eu.dnetlib.dhp.schema.common.*; import eu.dnetlib.dhp.schema.oaf.*; import eu.dnetlib.dhp.schema.oaf.Result; import eu.dnetlib.dhp.schema.oaf.utils.IdentifierFactory; +import scala.Tuple2; public class XmlRecordFactory implements Serializable { @@ -209,6 +209,10 @@ public class XmlRecordFactory implements Serializable { if (ModelSupport.isResult(type)) { final Result r = (Result) entity; + if (r.getMeasures() != null) { + metadata.addAll(measuresAsXml(r.getMeasures())); + } + if (r.getContext() != null) { contexts.addAll(r.getContext().stream().map(c -> c.getId()).collect(Collectors.toList())); /* FIXME: Workaround for CLARIN mining issue: #3670#note-29 */ @@ -936,6 +940,23 @@ public class XmlRecordFactory implements Serializable { return metadata; } + private List measuresAsXml(List measures) { + return measures + .stream() + .flatMap( + m -> m + .getUnit() + .stream() + .map( + u -> Lists + .newArrayList( + new Tuple2<>("id", m.getId()), + new Tuple2<>("key", u.getKey()), + new Tuple2<>("value", u.getValue()))) + .map(l -> XmlSerializationUtils.asXmlElement("measure", l))) + .collect(Collectors.toList()); + } + private String getAuthorPidType(final String s) { return XmlSerializationUtils .escapeXml(s) diff --git a/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/utils/XmlSerializationUtils.java b/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/utils/XmlSerializationUtils.java index 213a62b32..73667e056 100644 --- a/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/utils/XmlSerializationUtils.java +++ b/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/utils/XmlSerializationUtils.java @@ -5,7 +5,10 @@ import static eu.dnetlib.dhp.oa.provision.utils.GraphMappingUtils.removePrefix; import static org.apache.commons.lang3.StringUtils.isBlank; import static org.apache.commons.lang3.StringUtils.isNotBlank; +import java.util.List; + import eu.dnetlib.dhp.schema.oaf.*; +import scala.Tuple2; public class XmlSerializationUtils { @@ -147,4 +150,15 @@ public class XmlSerializationUtils { .append(attr("schemename", q.getSchemename())) .toString(); } + + public static String asXmlElement(String name, List> attributes) { + StringBuilder sb = new StringBuilder(); + sb.append("<"); + sb.append(name); + for (Tuple2 attr : attributes) { + sb.append(" ").append(attr(attr._1(), attr._2())); + } + sb.append("/>"); + return sb.toString(); + } } diff --git a/dhp-workflows/dhp-graph-provision/src/test/java/eu/dnetlib/dhp/oa/provision/XmlRecordFactoryTest.java b/dhp-workflows/dhp-graph-provision/src/test/java/eu/dnetlib/dhp/oa/provision/XmlRecordFactoryTest.java index c1e0567e6..1df980643 100644 --- a/dhp-workflows/dhp-graph-provision/src/test/java/eu/dnetlib/dhp/oa/provision/XmlRecordFactoryTest.java +++ b/dhp-workflows/dhp-graph-provision/src/test/java/eu/dnetlib/dhp/oa/provision/XmlRecordFactoryTest.java @@ -69,6 +69,13 @@ public class XmlRecordFactoryTest { assertEquals("1721.47", doc.valueOf("//processingchargeamount/text()")); assertEquals("EUR", doc.valueOf("//processingchargecurrency/text()")); + + assertEquals( + "1.00889953098e-08", doc.valueOf("//*[local-name() = 'result']/measure[./@id = 'influence']/@value")); + assertEquals( + "30.6576853333", doc.valueOf("//*[local-name() = 'result']/measure[./@id = 'popularity_alt']/@value")); + assertEquals( + "4.62970429725e-08", doc.valueOf("//*[local-name() = 'result']/measure[./@id = 'popularity']/@value")); } @Test diff --git a/dhp-workflows/dhp-graph-provision/src/test/resources/eu/dnetlib/dhp/oa/provision/publication.json b/dhp-workflows/dhp-graph-provision/src/test/resources/eu/dnetlib/dhp/oa/provision/publication.json index 3b5b54bbf..d0c8ea4ed 100644 --- a/dhp-workflows/dhp-graph-provision/src/test/resources/eu/dnetlib/dhp/oa/provision/publication.json +++ b/dhp-workflows/dhp-graph-provision/src/test/resources/eu/dnetlib/dhp/oa/provision/publication.json @@ -1,4 +1,33 @@ { + "measures": [ + { + "id": "influence", + "unit": [ + { + "key": "score", + "value": "1.00889953098e-08" + } + ] + }, + { + "id": "popularity_alt", + "unit": [ + { + "key": "score", + "value": "30.6576853333" + } + ] + }, + { + "id": "popularity", + "unit": [ + { + "key": "score", + "value": "4.62970429725e-08" + } + ] + } + ], "author": [ { "affiliation": [], @@ -284,6 +313,35 @@ "id": "50|CSC_________::0000ec4dd9df012feaafa77e71a0fb4c", "instance": [ { + "measures": [ + { + "id": "influence", + "unit": [ + { + "key": "score", + "value": "1.00889953098e-08" + } + ] + }, + { + "id": "popularity_alt", + "unit": [ + { + "key": "score", + "value": "30.6576853333" + } + ] + }, + { + "id": "popularity", + "unit": [ + { + "key": "score", + "value": "4.62970429725e-08" + } + ] + } + ], "pid": [ { "dataInfo": { From ab36154e3e6d6799fb14fc9de860f09c7178c1dd Mon Sep 17 00:00:00 2001 From: Claudio Atzori Date: Thu, 24 Feb 2022 12:16:17 +0100 Subject: [PATCH 094/162] added more ignores --- .gitignore | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.gitignore b/.gitignore index f4fb46f2e..0a50a4947 100644 --- a/.gitignore +++ b/.gitignore @@ -27,4 +27,4 @@ spark-warehouse /**/job-override.properties /**/*.log /**/.factorypath - +/**/.scalafmt.conf From 66c09b1bc73c73f5421efdc79237a82f8caf8323 Mon Sep 17 00:00:00 2001 From: Claudio Atzori Date: Thu, 24 Feb 2022 12:58:07 +0100 Subject: [PATCH 095/162] code formatting --- .../eu/dnetlib/dhp/oa/provision/XmlRecordFactoryTest.java | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/dhp-workflows/dhp-graph-provision/src/test/java/eu/dnetlib/dhp/oa/provision/XmlRecordFactoryTest.java b/dhp-workflows/dhp-graph-provision/src/test/java/eu/dnetlib/dhp/oa/provision/XmlRecordFactoryTest.java index 2142737f0..273132bff 100644 --- a/dhp-workflows/dhp-graph-provision/src/test/java/eu/dnetlib/dhp/oa/provision/XmlRecordFactoryTest.java +++ b/dhp-workflows/dhp-graph-provision/src/test/java/eu/dnetlib/dhp/oa/provision/XmlRecordFactoryTest.java @@ -8,7 +8,6 @@ import java.io.IOException; import java.io.StringReader; import java.util.List; -import eu.dnetlib.dhp.schema.oaf.Datasource; import org.apache.commons.io.IOUtils; import org.dom4j.Document; import org.dom4j.DocumentException; @@ -24,6 +23,7 @@ import eu.dnetlib.dhp.oa.provision.model.RelatedEntity; import eu.dnetlib.dhp.oa.provision.model.RelatedEntityWrapper; import eu.dnetlib.dhp.oa.provision.utils.ContextMapper; import eu.dnetlib.dhp.oa.provision.utils.XmlRecordFactory; +import eu.dnetlib.dhp.schema.oaf.Datasource; import eu.dnetlib.dhp.schema.oaf.Project; import eu.dnetlib.dhp.schema.oaf.Publication; import eu.dnetlib.dhp.schema.oaf.Relation; @@ -146,10 +146,10 @@ public class XmlRecordFactoryTest { final ContextMapper contextMapper = new ContextMapper(); final XmlRecordFactory xmlRecordFactory = new XmlRecordFactory(contextMapper, false, - XmlConverterJob.schemaLocation); + XmlConverterJob.schemaLocation); final Datasource d = OBJECT_MAPPER - .readValue(IOUtils.toString(getClass().getResourceAsStream("datasource.json")), Datasource.class); + .readValue(IOUtils.toString(getClass().getResourceAsStream("datasource.json")), Datasource.class); final String xml = xmlRecordFactory.build(new JoinedEntity<>(d)); From a40b59b7d55f358232ae6e7432bc67db9bfbb269 Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Thu, 24 Feb 2022 16:05:57 +0100 Subject: [PATCH 096/162] [ResultToOrgFromInstRepoTest] fixed issue in model of the input resources --- .../alreadyLinked/alreadyLinked_20.json.gz | Bin 3160 -> 3139 bytes .../alreadyLinked/alreadyLinked_20.json.gz | Bin 3175 -> 3154 bytes .../alreadyLinked/alreadyLinked_20.json.gz | Bin 3160 -> 3139 bytes 3 files changed, 0 insertions(+), 0 deletions(-) diff --git a/dhp-workflows/dhp-enrichment/src/test/resources/eu/dnetlib/dhp/resulttoorganizationfrominstrepo/noupdate/preparedInfo/alreadyLinked/alreadyLinked_20.json.gz b/dhp-workflows/dhp-enrichment/src/test/resources/eu/dnetlib/dhp/resulttoorganizationfrominstrepo/noupdate/preparedInfo/alreadyLinked/alreadyLinked_20.json.gz index ee822e372a4a590bd303308b2cc6fd849bf1bbfd..0c12568e0a5236bbc1c5bd5cb549e0cccda4bbd8 100644 GIT binary patch literal 3139 zcmV-J47~FniwFpIofl#N17U1(WnpA_OlfXwWn^D6FfM9yZ*BnHnage-wh@K*eGIR= zAZK{>Yjjx@#tbiQnu`P5X;376_kOhE*pdk;=(65evc;d|%$akcetGrn`uD5XuX6a+ z#`kYN{dxUbqiWGyL#?e%i7oh8JMBl!e z?>8aD*RQjA?t5ELL!-qMX0lvH?rB+WCBORHfBy8#=gr64ckA=!K|`}zcG=BGnqITD zSZ7wNs}@UNZ~pU#H(wmys?-uV|FG8bD!z6sxrJ0~SRtIlv*w3y{_(|{Jn9auk65c) z>bpx&-Bp&@=CAay=J6#*Yrb1s!O~nNHJUEjO=b2hqV4%hrqOom#n$dCr&j6~-$}Hr zK3WW4U-JDA@Av2RG4hwia#84|0n)WRIM1BX&IdT!PFvXWB_YQ@9y;i75p zTVY17A|6ywx0;3b99a?iV^5JEP&?lY$*X|*Q` zA~BizkYTgTNpalD!NHJn7y$8Li9$HZflGprQSLtX>o2_ zWw=X-Go~iG!|gs^ZOiU`#RZH&K`Mn_r2scb)pXmZ<~nj(sxz%Fc{Ijr%^g$*yN+>N z6KAdl+IDmh;FX(14SA26>gYDF`*8d=LfJKlw^n0M&BoLkCL{IPkb@K-k;4S$JcsFxzAg*v zwkD3r2TmzaE@zURc$eaf&{+0Be9gJ&w_D_X){-wj?E0pFV14 zT-S0)EGFRaX?{M#;NT4?HeU**de~vsT+{(>9W(AyuJbd1Vw^U2Txs|gSSL28c6;6WhqihR| z^ib-h42G6gdQEz(+k$7rSLv!*56nU-drril$hqCEZdFWyFyjm_Y}PP=>CEi130L$R zzb&v_nP&5Jz*AMbNfNk#(OBWPHQ#?T-o1VQc7J%lL1e5oJFrO~yA$?GaMYlJKBnC6 zgJi7EXZ!A;I5U8XNTYo@r>@g(wN~Ug)>GS56XI2n*SYZrs25$XHx7xLuB4<|Szv6I zw`xoDH0-)P-u9t2z!RdD2Fl=q8Ry`@eZZ{n3V^ zl20lEwoB<(ZOj6!BxV%yv<5mDbw&wLft9-Y^I~2&ROhFAvtn6R!?MW1tWOn_m+TVd z=nZOV3)vbYM3`TuCn&~e2zF~)?w$d+%bY8>c`OcGvocJsW!%@SmOKgqSnrc zDPF`-Khs-bAj8-uelGM)tI~Evc?=|Uq}pv?&!&hs4a+5-DaXO}UD6O#nwhNEK}7g+ z9vcgWOA$VN4YXk6+jdG_+QXW^zkm1Pi#0iW=T00X<%Rf#BQ&EkRSQA9y0%p#MMhMq zaieOKURy)X@wG9166VK)*gW7A5b_}0y~|7^WKf3wp=$lQj}9g=$XG|g8?=g}fKjG@v3)q(`2>{=S6YKwbs<83vL*q zoG3uwHLi%?n7w(L^IGdV6#u_!ZrFu_(u|dulCx#f)#OHc{|gRqVhYtIS|2S1E-x6z zhQVbR(Vgc9v9;I(D56URo7HA_1RrV{swaiTM~r6bNMA)+E7z`CbTM!s#y!g8@sLVoK9WyETod zhmggZ8hyjW;>aa1fOYJw@rAsHd6;;W-nr4?n6}Yh9M8Z2Ra-AMZT5}REke|ci7t_% zyiWseXw)d_`OQls&&a~puTB=K1DRy>Hi1ZB)|bL2rjY{gQ2WSxKlt1-HcmG;2>tF; z7>j-4R2XZ;Y_r!a3^6JK4QY7cz;@zDASIvZGSBy+cVRvfmSp~4q~pkEV?0V}6ZT9I zKL=(p39^;Ae1XIhH!Kj$P99ZKUqr*+@Mr1x0-&6KbI|s!eR;`H*i#5!jB}wR38x9S zK|xEHf$OZ`eda!y5MPQ*=+TILiaefw8P^m+$#PW6-^YgMBZ9BW@B4vGV3G&lg14QOI_pS)Vb&l7}KE zJZHn~`OVP@C-nC*Ar`?KMxd5(ihEk@@g=yDNH~3=rYF=9q*6y0caI4E%%3{>gef6m zD*}Y@LCzP1lY})-0TQ>-ZZcYQ!BxmCAq_U_d*})X8>ELbZ1W~>djN3Ck2*)Z7eb?5 ziM$puV9BppsC;zRc*|UgtHDsEN#w+wFF-ffv&9tVbOf5`fVgtpi0F?dG^LXJZ=HN= z|Fm}>z8T-G_rM6$K$AGqqvjPaNF1*mXyhJy)ti}4JBIBg4t5;-kPJUtwMREIi0t_T z7qlKH3>9L?iGUqrM{*`#IKaJ-)#GYyk}QS>6P1;i>T}NAEisXu&uPE!NsXeBRSNXw9UN}Ys|BP~fnvTgWuF1*AYIco2npkk zIn1>|dG$pGlas}?r^smHCMRSVpdm6zJWl>1(oZ%oHZ6w+j3&`6uFSmcPJHEx)a-WP z$*F3>M-;e^45-j zCnkikc3<bU#lYHD6c5~S-6-gioAe0y4dGaExy@M}lgD_1HC*hPcpG`S!T>kv#jP2gAKJG*` zC)!?GOuNv?5mS3fLPjCttR7lso+qlc(tw67QNjQbUXyZleDc<*Tj?Zb3x{>ui_a<) z?v{|q$dNfrItfUbXm#F(9AAh~f%2=VkCnkt(vhJEn^w@+kFUyO;_N;>EkE7|XR|+U z6-cmg5JmGo@7~14RxsZ;7SBfjXGOwEzXWpEZH)8l1gZ|H#Ur(;GQ#-ZrHrC&nsE3 z-8Jq8LT=8NyhY(K6!^M|*FLXcxE=`^@sI5~AlPy~-VZPsssU#$6@`x`UKw+6eW(9J z_j%&wC@&s6t%O4%k7tJ{F>#esLMqQc2M`FE)+)Aebqkgx4+adoVT1XK8xD}I-PUw(O#465(%wZMdPPNF#?$^irMbI21?FflOuG%XU zf?e+^%s^$Yd~Q@d3bB=+%ScEmwXZg|GEQ2qzmUl&dqQo-7Q zxqA8dJ4Po=uQ=9u&(cCCkj4;ij>c-pllE40+NVf9-Ypq0Ott56Wr06-pU02gS*yx< dsr3{1$Lbd;910 z@zv|Uypr(CT=T~tzWHf?6GD9bI-BRdxAinMT1;Uk%Vp%AmgQFRtH1v14?n*+$awdD zeK|?MQcK`U!&=L$_}a1L7E-NYg>de} znzwKM_PaHC)E!zMu~xU#cbA~Lt1Pk2Z|P~x^9zsGe7ClOrMXOMG+nZr%IsN0+XEL) zqwUs3j6;SX;=>@Ob^Q!gGpZHkt2=pM_KA+h>OEzEH{d?;0; z=T@yRD>=oWRy-^pE}Hhf6=~!u;(;}JffmcbtnI#K`K~R`r!~*dR^tpnB6*1poKh$) z_qrh#CJ6n%2CzZHsc9Rk$u5GdHx)ygz@oCL-;A;y}dyT`wV8Nb+nH&os zL~k_dw#Fz@>a8=X7U#BAhP#9~V``#1-0tJmw(Qln8+apr2EZAS+IUb#uskoTymj&Ae1565pKlwE^(Yc=-NY)q|T zGHIXj_JX<8T49MwZFMBqQHt)_!&<$1thaqys%l-j*RCs$0$JYvP!E;FJO*b0*n2u5QaUt;yBOeT^KZn^STYr|4q~um7!=GbuEX`Vge4I=jRIy4&H!b^QF+MhaG0kMIGSQG2<@fIzIy_ z#%XiMm4%R>c$u zGtThBW(^aV&de^Ga7DlI+XBm#X*N#>JXN)uB!LSUjTL@f^ZlFg{@sUn`{NT1B4e%D zfld0GeqXrf9G39n2Bx7|x+jj@WnE_P98y(C!b)9yrwW8Rup4z6G(6EBk&W(LQ z#OQLpaY)>BB_-9$0%NnhRa>H`Vb|^PwhyfVo)EP(PzD#wI0pyr17?LMpVqwlVZEL2 z?ETX|rD@)%+D?qoV(4;6KB)-UE~Q_!F$=Jgm{G{n8t7ou86`jkR_f{x2z%jVoh7|l zu`H`$W#j8(f*V{8-87y71cX*&Wv268%5?Y6IHQ-qx6@t{W&;vu+TI>NR(W-*)YO}lXaXZch*!97olvax1u@)5H z++l3#5rOFF>>RbGaI4H#MS4J5B4A)xrx(RK z)hk0bo!kyBS&nhi%7qhVk$W~(a*J1O*RSV>_lSh@*7ZI~Jd|o>K0_f*oow$8x zzrS!OY%PR1#=lUWgkyylp|2&(%yqW#P;;MbiND2d^k_s!MOx3ljB6U9d^swu=wqW1 z5woBQSpXu#Cx@%!AL=I((ZmL!k}FY zYN?~OyGKmK%%3~_f^i{pD+0psR?Z&?KMCuf0%BmJ-K4qbg7c7WLLh|J_s|s(OUM)F z+2&0$_h1NU3yL4{bLf_KB~n|&)Fr=Wt@6pa<6Uzl4hKWzCXp8NzX0Q0|GlunK#xFE z9fViT9B~t)30APc(1_#U@V@tQ>c%Hc-tu~)qr_q1c! zUgBWmu@5)#vsHU^GlK*lIDElS;JV>b5&)UA;~t6X1R4k7UnjcfRoo-|_=;genQguv?q*PmqU^X~xnXPE0MH3-Yj?7$(@jea$lp_eV0&i*}Q8_*1PX7LDz`Nl5#*@cm-V z;wSOrr}8ZhVc3b3q9j`1l;x3T?8uBU`NtvRdzbA}(HG(g3jB4_dhv3ty@NYxgV4XH zkiN>9=cb%IE`Q)4%=+F)2zE#`C*V+8OuNw85mS3mmPR3ptsYv21}3Vw(tz46QQ82Z zUK4$FJp9(FTj?-f4}*8wj?F6+^p@nv@{y5EItg-_2zcJJ9B+#FF$vee5i7%^q&P!m zHZ7vzpI*Yp#8`fQ4u8H6E^2?;Dv-3}ATsBD-s*|9tq=@uYLJhp&Wc4KrN`Jvqk7OK zz&8JunqSCYDjVs}8N(+IBP3j|c8se=mzJ+D6c6q5d@}3^8{G|(q%cg_*@V853L)9J zuF{-gMKI8a9p}M824NJ#g1QgXqSR|3biCG@{XBd?=Aog^3<`Vp^I-cxZXx87{sVI~ zT4)IPPDhsfydxs+>rp4{30sSuoy`{{Cq#GX3UN0)xwo5tL3hp<1>WIeVx=r1dzNe& zElMS{+Z!R?QZVls2{4>x+2@rn*X|m311&elO(dj6_IxM6D zKHd*V7_b4aEfs~cCLSHbas8g*q5Hh>ic}eIpH{;4%z@58rkKRaDIuE&o-jzPjBgcN zSjPpO5?BLi!Ejgui}2M3J}=Th`OSiL+%0hotV`S^YTU9=gm5eD?33Ulp@>!75V^-H z;epDL>D{kiL@cuK=u0D{4-advPz!0Hr!WImzVg4(fhZ(vel8<9rPRLK*vgn`xqi8n zaRmv*_Hc@6P~M36TX-Kwt*zg{g^law)AyxL=wETH^B$*#t{|l$-kg!ukSDFK$h%LG yeEeWCAem~<P^y)YL5 diff --git a/dhp-workflows/dhp-enrichment/src/test/resources/eu/dnetlib/dhp/resulttoorganizationfrominstrepo/updatemix/preparedInfo/alreadyLinked/alreadyLinked_20.json.gz b/dhp-workflows/dhp-enrichment/src/test/resources/eu/dnetlib/dhp/resulttoorganizationfrominstrepo/updatemix/preparedInfo/alreadyLinked/alreadyLinked_20.json.gz index d9b92debaa9c5abd597b9a2c8fb836b3e3ba3608..0f8d6fdebfedd1293d06df897ccd37420773c450 100644 GIT binary patch literal 3154 zcmV-Y46XAYiwFqPofl#N17U1(WnpA_OlfXwWn^D6FfM9yZ*Bn9nah$LHW7yRJcidS z)Kcpn=olli+w`XnuP0t^M=O!ICBOO8zkm1Bf7^VPI#-umLh=>FRI|sKTWfRFDDCUbfBg9N z%Rdhfe5kUvq%~U0R;Ia>+=A8`EQG~+)_nKPUw*YFuj--oC0uoA_Z~w^UZcg~{GpF) zo}P1YAaBjW&`r6jjAYSUio53+Q+syFcl&WO$t^xS6sx1pD3saEHJ6%N51QC7X;l6C zk{`bN;J+MLw3jJUed*3Q^eM&Tic+U(3wzxT9N0uCBs<$EStX=VW;lI~N!@RSS-HkI z18cI?z`w&7&AUgNUR&OeYo4C1$_}(%XOSuV5K7BEd(#j?=EApq_U78Hb{*zXbfmfB z8oTVBmA;q9HSgCC-~O^j^B7z?G|TLjL)v0RWB{=mS4g*gOtx0vv(9;{V76NvJ#)>r zi}i4;o25xZL)g(bWoj*HZJxnZl4x$X8Z5rDq!BZfENv83c&e zJVz`ciusV{)r>o-Jg#{P_SAx;US*B2Sm3)bd&>ZV%tn)LYX(Kq-Z~>WWc#qP=8QqM zXp70i?LM;(kKVh90V7bLrO+!4(n73BZ~N3-S5`}DtM!;yW4vs8NO7_2TDLW_a~+_~ zWrBcNxrwBZeO0ZikA2;T!0!VjG@wKg(WH0R+rASN=d$Z zFza56!AbDFL?3~XUWs}o z1|yYLdewBRo53@2TIng}bYK=r@x2j)63^{!b*phCAj~+!3(guQP|wWT*lUKB7jrfYGq<^P0bZv);e^@XkLz;y@Vdlqay!Pu&S_3P&v}IMVu&pq6IH*243^{Bci(V&)EE+?tmA%z%5wy)CzWEDl^_875C<-Pc&t849B=GZt4# zwKHNVC{AgYk#2>745e-STy{kl(DOk#-3j)FI66-UYa zk-dyFs&=beETiwDw0t%b%+i_#p8^VMqJDlQ3C4ByiJe@#&YtE-I))FN7sV;pT2q@I zal;VhMgh84F~M)_-lpz-t#w@~bhDZpcA=m&%akcOTWYcvZ>0CX-~cCm zM~q{`;4+Np&dYxPD zM&k01hm0n?hdm**s3<-N~Q=&gj#;#7~99)$r@IZhE8L*ktHhD9i^HEnmt z>6S>;6s1e9arRBRLX~_KeGxHI8hIufzTP@ns4irZ(IbUT>#X2?wx3LhFU2Kv8AQH>Jf8out|@|&Wl5C3FNfy^!B2_d z2P^}B8?H`$rFo(ZjcX7@HunN#L&NKxr<#|GNm_U8iIaUNh$&c$j8hpMD?jF|q=?%_ z7!a`&&H5IBB@abzcy6pQUfvv?n1uemHpCL~h62ELbZ2Kl} zz5qDoN1X-lh0vs)h`d8EU^=f^sC;tPc*|Ugt6`u@o5+bdH$b=T#bQS0bOo9>0deKH zg6OXnX-Xyc-#Ynj{B^wl_|5v8eE>$F2Aaf?9yPCcLE?DjKr46YHQmg#x&)s^7CSCK zB*SNG>N3X|5ZTKIE@(YY7%Ie&69GHMj^s?fm;m=eR!^(tBv}j%CMqXl>fC$hK9q^< z9CxN$Cuay9-iVO+z-UC(+El=6CIfhR?Z6G;is*@>VhuLHMHv|fpSVJtUmy6HQ#?;r z=UD+C6_D#viPB4QTSp4#HRc6K0|PMe4h^>*h5OFXu=jP0@>+#)?gmunrF4MIZSF^9P}D6hWCVDV(J)KwTw+~kG~12jY?k>lhqBK>UhGWN)!0i#Vc z6U)q-cj7Boq-M7RZ%$PeA5q{wlJi7*2-NrkJr5*T|LzP0D9a>30$E0JI))^L7(9ZZ zaP_-JxK*ZbWB{;-D-*(4&uRO>{QeLHJkeHC1wYk#V#xA2okZea0`9Xl#wRhsr`k4<1z7U}THSD-urnCD4=d(m_!Kp7~1}eI`{} z9GT4-_Z278uGq}fF_1}SZ26k}f73osCtw^wqOBonEDQ~{Qqe+^7o;h6mC+2<(fo$Z zKS4e&k{+%CC0;Oosn^hTeye1=p9clVIW**yfnd*m9zS2s34})ibzoFh3k|V#J*qO# za~wi!uR7AH*h_5XZqs0{NDPLG5aNL!p6%u@(2n!aK0Dk>T#H8h%`uORo@fc}_Qr%TZoDwwmHl$m4lJl$f~6DIt}Yp92VlOlyt9aCHVtk_Q8Z zePDzZ(*AYRo)%`I*kgpzah${-FdfA^et(b1#lx+bgmgkj3@6RvT*w@wV%(_~`FPIj zgcZO5I z0kti`b3*30+|>FFP*|>BKK+hy5~fX-o%gI%=mgRj)jC@#HrI5YBggdSw{M<4{%L%8|Bvzc z{nLlv+vhiL|L`Wh{+K*w?(U=ON-{*l){)D~U2Wu6@|!>Y`#1mmuLqf>&ebKCkbDI( z)$DQR*4i93O8e$P{{H#hPyf7q%kNe`qNFui%T}hjl-z>W8Z3mx$FSzZ`#=9;Or1%((C$5kl)OfZ!}&v>*StLL;#}UEg`t}YRvF2nw-k5JF{XCA@Y8*d-kRw zgv`Bf`|Qo_TkSf`qv%L;#Wi->J1bBq&uc!eAHMr}jpi}9a%h&>D~GhjipT(ZHLj3u z`VJ4RW1euK{ z-PR0>q`h@Ub;$N%Wz89bY|$2zhueK-9Ui@R69Yz|Kue)l8l;6-liv2Jxvs31(pKv+ zug0j^_K@OY*R^hIWam0So67_NvvLzjA^WOYS0DSj565pS6t5x9)~a-EW9?MK_SV1R z?GS3F3W zOE^rZ9&^~U(N`P6Zfj(1K5)lk)QUeni}hhMsco)S?rY*c<5m*ECJ!P$a zyHC(Dchgw3&OKMnjH}s%&_sd5*ZKJw1_y6Iu}w?R>kJ-t%_U8MTh~TO%5{DgP?SBk zL+>a|gmt7rwa%s5NOHSRs!~iAOfc(0S)>S^T7A-L4m)rAFw_dA)q?s^fw88*%e{lw zF>1PsK!TIteu+K;BfS#!N(@FSt@NttRyTuZ;!X;m0+9d%r$DeR}fG&o~gqI^_v$ z^hL29>KkiHsS zt~U-5rz@%{RTd03%brqGdK$d?l-oYE26#f$v4JwUV9Q<{xG$I$o_t>O^u2x9PvhhB zKBcAXsG2KdWDH#nIyV&o+oklYHs%PdqGS~Ev<5m@bxRCT!6?=GP1x7dNBhMz6;9S! z(%UF_C~JANcz`=M>K-4oRFtMSXr>twwy;E~K-^QIo4G?6w+7}uGeDnlZ_8~T>jUpt zhSgJ9_chjZhQip(jKx(_?TlLrii6r^q+5|7Mrj+*7rN6n^@%{Qg`6&}efafk2_dI> zxyYILSuu=7S46Ry&w3q7L@?*&STh_;K|`1u!Wa-N+#aS+Xh z5QZZk1jfp@k@1 zzwVP3QyJp2qvj2Y#c6VXBrxNWs@>`q%jml(EuYN<?p6D(>9_^_GK8=-?}9>L0x*Sirs4>~b-+>pw~5)RNDoL;0tSY4 z&yuiC^~#VfZElB_*u*%c%7rJE#+vaLa?NcGW}@@I+h)O7#)Tj(W@Qm$Ad`JtqY+~^ z-4I440ayx)Vbi3dx<_fZ1`-VsIH}s`A?6mR9s|Qzm**t+eE2mky_3-SgjA2w9)*ch zIW`hrL;swdhkqy!HgI>x4VOqv6{TaYarR9RL%Dnv{dFWsX#}5W1OV%Vrn=Bm=91Lr z5!!t)sA4iHyocIXc7OK8WjW3`H%R~PYZx6r@hdDd$z%7LBZH3MKvgL`a`asdv>i_n4j_S7wZd+uMk>IgZxPDxR_5ixti z=jrhmKtKQJ#K(8zmlqC&ts^0h@h_A|ajftn^ktEmxy}|IYWvBS_*>jYmqBz)NbC78 z>zYO=UzS8G`f@ZvFbgR`7Jz8rlf%{VuQX32qKOSc$>v@_bZB_S^CI*>VTt~Z&2fVC z1W5&Nl5sqvlMBXtlu&W-2n=F);$Gh(Fzlh%4M&bO#sddIbY>p0uMIOrOs9ZD#e(`C z(&a%&lX$pLq1Gof64bP#wcZ62vGdmszhYd-96|zNc&o_|gdfHFYd{RfYBy;v$>2Ps zn-I9kjlPlv#1itvdA5C%OkWHEZ9(w`KZkCmo`}>#Fm*bwS*v_;?s(T+iNj$aa+^qt z`8UA0?biz%8R!*g>IC7HGYf8FwMdt0$>YCt_-XuQeEj@={nb7JHIN0}VtkKVsCZ4H zf8}s1cj-0VjJvu7pG6iMFF)MG@7C01jxiv?2M%8_6u54I29$p?gd_x^0Lgh$h zgF>)vM!>?Dt`O%p5C6)co@cZ3w1IyMSc6iD^GkADM|S5mY6b`gvoY~04VNE#UtYr3##nxR4u84NmoJviR)M4~i^!b)ywwwHTOkKNA~Gq!wvp?GMYmyqtgGdy&kS6-1Sz$+GhvmkM1$ zN`u^-(WoJBT3wO%xns=759$DtT6Yjjx@#tbiQnu`P5X;376_kOhE*pdk;=(65evc;d|%$akcetGrn`uD5XuX6a+ z#`kYN{dxUbqiWGyL#?e%i7oh8JMBl!e z?>8aD*RQjA?t5ELL!-qMX0lvH?rB+WCBORHfBy8#=gr64ckA=!K|`}zcG=BGnqITD zSZ7wNs}@UNZ~pU#H(wmys?-uV|FG8bD!z6sxrJ0~SRtIlv*w3y{_(|{Jn9auk65c) z>bpx&-Bp&@=CAay=J6#*Yrb1s!O~nNHJUEjO=b2hqV4%hrqOom#n$dCr&j6~-$}Hr zK3WW4U-JDA@Av2RG4hwia#84|0n)WRIM1BX&IdT!PFvXWB_YQ@9y;i75p zTVY17A|6ywx0;3b99a?iV^5JEP&?lY$*X|*Q` zA~BizkYTgTNpalD!NHJn7y$8Li9$HZflGprQSLtX>o2_ zWw=X-Go~iG!|gs^ZOiU`#RZH&K`Mn_r2scb)pXmZ<~nj(sxz%Fc{Ijr%^g$*yN+>N z6KAdl+IDmh;FX(14SA26>gYDF`*8d=LfJKlw^n0M&BoLkCL{IPkb@K-k;4S$JcsFxzAg*v zwkD3r2TmzaE@zURc$eaf&{+0Be9gJ&w_D_X){-wj?E0pFV14 zT-S0)EGFRaX?{M#;NT4?HeU**de~vsT+{(>9W(AyuJbd1Vw^U2Txs|gSSL28c6;6WhqihR| z^ib-h42G6gdQEz(+k$7rSLv!*56nU-drril$hqCEZdFWyFyjm_Y}PP=>CEi130L$R zzb&v_nP&5Jz*AMbNfNk#(OBWPHQ#?T-o1VQc7J%lL1e5oJFrO~yA$?GaMYlJKBnC6 zgJi7EXZ!A;I5U8XNTYo@r>@g(wN~Ug)>GS56XI2n*SYZrs25$XHx7xLuB4<|Szv6I zw`xoDH0-)P-u9t2z!RdD2Fl=q8Ry`@eZZ{n3V^ zl20lEwoB<(ZOj6!BxV%yv<5mDbw&wLft9-Y^I~2&ROhFAvtn6R!?MW1tWOn_m+TVd z=nZOV3)vbYM3`TuCn&~e2zF~)?w$d+%bY8>c`OcGvocJsW!%@SmOKgqSnrc zDPF`-Khs-bAj8-uelGM)tI~Evc?=|Uq}pv?&!&hs4a+5-DaXO}UD6O#nwhNEK}7g+ z9vcgWOA$VN4YXk6+jdG_+QXW^zkm1Pi#0iW=T00X<%Rf#BQ&EkRSQA9y0%p#MMhMq zaieOKURy)X@wG9166VK)*gW7A5b_}0y~|7^WKf3wp=$lQj}9g=$XG|g8?=g}fKjG@v3)q(`2>{=S6YKwbs<83vL*q zoG3uwHLi%?n7w(L^IGdV6#u_!ZrFu_(u|dulCx#f)#OHc{|gRqVhYtIS|2S1E-x6z zhQVbR(Vgc9v9;I(D56URo7HA_1RrV{swaiTM~r6bNMA)+E7z`CbTM!s#y!g8@sLVoK9WyETod zhmggZ8hyjW;>aa1fOYJw@rAsHd6;;W-nr4?n6}Yh9M8Z2Ra-AMZT5}REke|ci7t_% zyiWseXw)d_`OQls&&a~puTB=K1DRy>Hi1ZB)|bL2rjY{gQ2WSxKlt1-HcmG;2>tF; z7>j-4R2XZ;Y_r!a3^6JK4QY7cz;@zDASIvZGSBy+cVRvfmSp~4q~pkEV?0V}6ZT9I zKL=(p39^;Ae1XIhH!Kj$P99ZKUqr*+@Mr1x0-&6KbI|s!eR;`H*i#5!jB}wR38x9S zK|xEHf$OZ`eda!y5MPQ*=+TILiaefw8P^m+$#PW6-^YgMBZ9BW@B4vGV3G&lg14QOI_pS)Vb&l7}KE zJZHn~`OVP@C-nC*Ar`?KMxd5(ihEk@@g=yDNH~3=rYF=9q*6y0caI4E%%3{>gef6m zD*}Y@LCzP1lY})-0TQ>-ZZcYQ!BxmCAq_U_d*})X8>ELbZ1W~>djN3Ck2*)Z7eb?5 ziM$puV9BppsC;zRc*|UgtHDsEN#w+wFF-ffv&9tVbOf5`fVgtpi0F?dG^LXJZ=HN= z|Fm}>z8T-G_rM6$K$AGqqvjPaNF1*mXyhJy)ti}4JBIBg4t5;-kPJUtwMREIi0t_T z7qlKH3>9L?iGUqrM{*`#IKaJ-)#GYyk}QS>6P1;i>T}NAEisXu&uPE!NsXeBRSNXw9UN}Ys|BP~fnvTgWuF1*AYIco2npkk zIn1>|dG$pGlas}?r^smHCMRSVpdm6zJWl>1(oZ%oHZ6w+j3&`6uFSmcPJHEx)a-WP z$*F3>M-;e^45-j zCnkikc3<bU#lYHD6c5~S-6-gioAe0y4dGaExy@M}lgD_1HC*hPcpG`S!T>kv#jP2gAKJG*` zC)!?GOuNv?5mS3fLPjCttR7lso+qlc(tw67QNjQbUXyZleDc<*Tj?Zb3x{>ui_a<) z?v{|q$dNfrItfUbXm#F(9AAh~f%2=VkCnkt(vhJEn^w@+kFUyO;_N;>EkE7|XR|+U z6-cmg5JmGo@7~14RxsZ;7SBfjXGOwEzXWpEZH)8l1gZ|H#Ur(;GQ#-ZrHrC&nsE3 z-8Jq8LT=8NyhY(K6!^M|*FLXcxE=`^@sI5~AlPy~-VZPsssU#$6@`x`UKw+6eW(9J z_j%&wC@&s6t%O4%k7tJ{F>#esLMqQc2M`FE)+)Aebqkgx4+adoVT1XK8xD}I-PUw(O#465(%wZMdPPNF#?$^irMbI21?FflOuG%XU zf?e+^%s^$Yd~Q@d3bB=+%ScEmwXZg|GEQ2qzmUl&dqQo-7Q zxqA8dJ4Po=uQ=9u&(cCCkj4;ij>c-pllE40+NVf9-Ypq0Ott56Wr06-pU02gS*yx< dsr3{1$Lbd;910 z@zv|Uypr(CT=T~tzWHf?6GD9bI-BRdxAinMT1;Uk%Vp%AmgQFRtH1v14?n*+$awdD zeK|?MQcK`U!&=L$_}a1L7E-NYg>de} znzwKM_PaHC)E!zMu~xU#cbA~Lt1Pk2Z|P~x^9zsGe7ClOrMXOMG+nZr%IsN0+XEL) zqwUs3j6;SX;=>@Ob^Q!gGpZHkt2=pM_KA+h>OEzEH{d?;0; z=T@yRD>=oWRy-^pE}Hhf6=~!u;(;}JffmcbtnI#K`K~R`r!~*dR^tpnB6*1poKh$) z_qrh#CJ6n%2CzZHsc9Rk$u5GdHx)ygz@oCL-;A;y}dyT`wV8Nb+nH&os zL~k_dw#Fz@>a8=X7U#BAhP#9~V``#1-0tJmw(Qln8+apr2EZAS+IUb#uskoTymj&Ae1565pKlwE^(Yc=-NY)q|T zGHIXj_JX<8T49MwZFMBqQHt)_!&<$1thaqys%l-j*RCs$0$JYvP!E;FJO*b0*n2u5QaUt;yBOeT^KZn^STYr|4q~um7!=GbuEX`Vge4I=jRIy4&H!b^QF+MhaG0kMIGSQG2<@fIzIy_ z#%XiMm4%R>c$u zGtThBW(^aV&de^Ga7DlI+XBm#X*N#>JXN)uB!LSUjTL@f^ZlFg{@sUn`{NT1B4e%D zfld0GeqXrf9G39n2Bx7|x+jj@WnE_P98y(C!b)9yrwW8Rup4z6G(6EBk&W(LQ z#OQLpaY)>BB_-9$0%NnhRa>H`Vb|^PwhyfVo)EP(PzD#wI0pyr17?LMpVqwlVZEL2 z?ETX|rD@)%+D?qoV(4;6KB)-UE~Q_!F$=Jgm{G{n8t7ou86`jkR_f{x2z%jVoh7|l zu`H`$W#j8(f*V{8-87y71cX*&Wv268%5?Y6IHQ-qx6@t{W&;vu+TI>NR(W-*)YO}lXaXZch*!97olvax1u@)5H z++l3#5rOFF>>RbGaI4H#MS4J5B4A)xrx(RK z)hk0bo!kyBS&nhi%7qhVk$W~(a*J1O*RSV>_lSh@*7ZI~Jd|o>K0_f*oow$8x zzrS!OY%PR1#=lUWgkyylp|2&(%yqW#P;;MbiND2d^k_s!MOx3ljB6U9d^swu=wqW1 z5woBQSpXu#Cx@%!AL=I((ZmL!k}FY zYN?~OyGKmK%%3~_f^i{pD+0psR?Z&?KMCuf0%BmJ-K4qbg7c7WLLh|J_s|s(OUM)F z+2&0$_h1NU3yL4{bLf_KB~n|&)Fr=Wt@6pa<6Uzl4hKWzCXp8NzX0Q0|GlunK#xFE z9fViT9B~t)30APc(1_#U@V@tQ>c%Hc-tu~)qr_q1c! zUgBWmu@5)#vsHU^GlK*lIDElS;JV>b5&)UA;~t6X1R4k7UnjcfRoo-|_=;genQguv?q*PmqU^X~xnXPE0MH3-Yj?7$(@jea$lp_eV0&i*}Q8_*1PX7LDz`Nl5#*@cm-V z;wSOrr}8ZhVc3b3q9j`1l;x3T?8uBU`NtvRdzbA}(HG(g3jB4_dhv3ty@NYxgV4XH zkiN>9=cb%IE`Q)4%=+F)2zE#`C*V+8OuNw85mS3mmPR3ptsYv21}3Vw(tz46QQ82Z zUK4$FJp9(FTj?-f4}*8wj?F6+^p@nv@{y5EItg-_2zcJJ9B+#FF$vee5i7%^q&P!m zHZ7vzpI*Yp#8`fQ4u8H6E^2?;Dv-3}ATsBD-s*|9tq=@uYLJhp&Wc4KrN`Jvqk7OK zz&8JunqSCYDjVs}8N(+IBP3j|c8se=mzJ+D6c6q5d@}3^8{G|(q%cg_*@V853L)9J zuF{-gMKI8a9p}M824NJ#g1QgXqSR|3biCG@{XBd?=Aog^3<`Vp^I-cxZXx87{sVI~ zT4)IPPDhsfydxs+>rp4{30sSuoy`{{Cq#GX3UN0)xwo5tL3hp<1>WIeVx=r1dzNe& zElMS{+Z!R?QZVls2{4>x+2@rn*X|m311&elO(dj6_IxM6D zKHd*V7_b4aEfs~cCLSHbas8g*q5Hh>ic}eIpH{;4%z@58rkKRaDIuE&o-jzPjBgcN zSjPpO5?BLi!Ejgui}2M3J}=Th`OSiL+%0hotV`S^YTU9=gm5eD?33Ulp@>!75V^-H z;epDL>D{kiL@cuK=u0D{4-advPz!0Hr!WImzVg4(fhZ(vel8<9rPRLK*vgn`xqi8n zaRmv*_Hc@6P~M36TX-Kwt*zg{g^law)AyxL=wETH^B$*#t{|l$-kg!ukSDFK$h%LG yeEeWCAem~<P^y)YL5 From 859cb7ac9d024c3c8d172ae1c1f8c2a5530cfd96 Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Thu, 24 Feb 2022 16:55:32 +0100 Subject: [PATCH 097/162] [DoiBoost AR] changed test resource to be sure the result will always have EMBARGO as value for AccessRight --- .../dnetlib/doiboost/crossref/publication_license_embargo.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dhp-workflows/dhp-doiboost/src/test/resources/eu/dnetlib/doiboost/crossref/publication_license_embargo.json b/dhp-workflows/dhp-doiboost/src/test/resources/eu/dnetlib/doiboost/crossref/publication_license_embargo.json index 47ca55f34..788946fea 100644 --- a/dhp-workflows/dhp-doiboost/src/test/resources/eu/dnetlib/doiboost/crossref/publication_license_embargo.json +++ b/dhp-workflows/dhp-doiboost/src/test/resources/eu/dnetlib/doiboost/crossref/publication_license_embargo.json @@ -1456,7 +1456,7 @@ "issued": { "date-parts": [ [ -2021, +3021, 2, 22 ] From b0f369dc78a7b2af64c647a8d042c55315e776d7 Mon Sep 17 00:00:00 2001 From: miconis Date: Thu, 24 Feb 2022 17:08:24 +0100 Subject: [PATCH 098/162] bug fix in the DedupRecordFactory, DataInfo set before merge --- .../dhp/oa/dedup/DedupRecordFactory.java | 3 +- .../dnetlib/dhp/dedup/conf/orp.new.conf.json | 214 ++++++++ .../dnetlib/dhp/dedup/conf/pub.new.conf.json | 475 ++++++++++++++++++ 3 files changed, 691 insertions(+), 1 deletion(-) create mode 100644 dhp-workflows/dhp-dedup-openaire/src/test/resources/eu/dnetlib/dhp/dedup/conf/orp.new.conf.json create mode 100644 dhp-workflows/dhp-dedup-openaire/src/test/resources/eu/dnetlib/dhp/dedup/conf/pub.new.conf.json diff --git a/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/DedupRecordFactory.java b/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/DedupRecordFactory.java index d65853aff..b8ae4ca02 100644 --- a/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/DedupRecordFactory.java +++ b/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/DedupRecordFactory.java @@ -77,6 +77,7 @@ public class DedupRecordFactory { throws IllegalAccessException, InstantiationException { T entity = clazz.newInstance(); + entity.setDataInfo(dataInfo); final Collection dates = Lists.newArrayList(); final List> authors = Lists.newArrayList(); @@ -106,7 +107,7 @@ public class DedupRecordFactory { entity.setId(id); entity.setLastupdatetimestamp(ts); - entity.setDataInfo(dataInfo); + entity.getDataInfo().setDeletedbyinference(false); return entity; } diff --git a/dhp-workflows/dhp-dedup-openaire/src/test/resources/eu/dnetlib/dhp/dedup/conf/orp.new.conf.json b/dhp-workflows/dhp-dedup-openaire/src/test/resources/eu/dnetlib/dhp/dedup/conf/orp.new.conf.json new file mode 100644 index 000000000..b0cdade5b --- /dev/null +++ b/dhp-workflows/dhp-dedup-openaire/src/test/resources/eu/dnetlib/dhp/dedup/conf/orp.new.conf.json @@ -0,0 +1,214 @@ +{ + "wf": { + "threshold" : "0.99", + "dedupRun" : "001", + "entityType" : "result", + "subEntityType" : "resulttype", + "subEntityValue" : "otherresearchproduct", + "orderField" : "title", + "queueMaxSize" : "100", + "groupMaxSize" : "100", + "maxChildren" : "100", + "slidingWindowSize" : "100", + "rootBuilder" : [ "result", "resultProject_outcome_isProducedBy", "resultResult_publicationDataset_isRelatedTo", "resultResult_similarity_isAmongTopNSimilarDocuments", "resultResult_similarity_hasAmongTopNSimilarDocuments", "resultOrganization_affiliation_hasAuthorInstitution", "resultResult_part_hasPart", "resultResult_part_isPartOf", "resultResult_supplement_isSupplementTo", "resultResult_supplement_isSupplementedBy", "resultResult_version_isVersionOf" ], + "includeChildren" : "true", + "idPath" : "$.id", + "maxIterations" : 20 + }, + "pace": { + "clustering": [ + { + "name": "wordsStatsSuffixPrefixChain", + "fields": [ + "title" + ], + "params": { + "mod": "10" + } + }, + { + "name": "lowercase", + "fields": [ + "doi", + "altdoi" + ], + "params": { + "collapseOn:pid": "0" + } + } + ], + "decisionTree": { + "start": { + "fields": [ + { + "field": "pid", + "comparator": "jsonListMatch", + "weight": 1.0, + "countIfUndefined": "false", + "params": { + "jpath_value": "$.value", + "jpath_classid": "$.qualifier.classid", + "mode": "count" + } + } + ], + "threshold": 1.0, + "aggregation": "MAX", + "positive": "MATCH", + "negative": "pidVSaltid", + "undefined": "pidVSaltid", + "ignoreUndefined": "false" + }, + "pidVSaltid": { + "fields": [ + { + "field": "pid", + "comparator": "jsonListMatch", + "weight": 1.0, + "countIfUndefined": "false", + "params": { + "jpath_value": "$.value", + "jpath_classid": "$.qualifier.classid", + "crossCompare": "alternateid", + "mode": "count" + } + } + ], + "threshold": 1.0, + "aggregation": "MAX", + "positive": "softCheck", + "negative": "earlyExits", + "undefined": "earlyExits", + "ignoreUndefined": "true" + }, + "softCheck": { + "fields": [ + { + "field": "title", + "comparator": "levensteinTitle", + "weight": 1.0, + "countIfUndefined": "true", + "params": {} + } + ], + "threshold": 0.9, + "aggregation": "AVG", + "positive": "MATCH", + "negative": "NO_MATCH", + "undefined": "NO_MATCH", + "ignoreUndefined": "true" + }, + "earlyExits": { + "fields": [ + { + "field": "title", + "comparator": "titleVersionMatch", + "weight": 1.0, + "countIfUndefined": "false", + "params": {} + }, + { + "field": "authors", + "comparator": "sizeMatch", + "weight": 1.0, + "countIfUndefined": "false", + "params": {} + } + ], + "threshold": 1.0, + "aggregation": "AND", + "positive": "strongCheck", + "negative": "NO_MATCH", + "undefined": "strongCheck", + "ignoreUndefined": "false" + }, + "strongCheck": { + "fields": [ + { + "field": "title", + "comparator": "levensteinTitle", + "weight": 1.0, + "countIfUndefined": "true", + "params": {} + } + ], + "threshold": 0.99, + "aggregation": "AVG", + "positive": "surnames", + "negative": "NO_MATCH", + "undefined": "NO_MATCH", + "ignoreUndefined": "true" + }, + "surnames": { + "fields": [ + { + "field": "authors", + "comparator": "authorsMatch", + "weight": 1.0, + "countIfUndefined": "false", + "params": { + "surname_th": 0.75, + "fullname_th": 0.75, + "mode": "surname" + } + } + ], + "threshold": 0.6, + "aggregation": "MAX", + "positive": "MATCH", + "negative": "NO_MATCH", + "undefined": "MATCH", + "ignoreUndefined": "true" + } + }, + "model": [ + { + "name": "doi", + "type": "String", + "path": "$.instance[*].pid[?(@.qualifier.classid == 'doi')].value" + }, + { + "name": "altdoi", + "type": "String", + "path": "$.instance[*].alternateIdentifier[?(@.qualifier.classid == 'doi')].value" + }, + { + "name": "pid", + "type": "JSON", + "path": "$.instance[*].pid[*]", + "overrideMatch": "true" + }, + { + "name": "alternateid", + "type": "JSON", + "path": "$.instance[*].alternateIdentifier[*]", + "overrideMatch": "true" + }, + { + "name": "title", + "type": "String", + "path": "$.title[?(@.qualifier.classid == 'main title')].value", + "length": 250, + "size": 5 + }, + { + "name": "authors", + "type": "List", + "path": "$.author[*].fullname", + "size": 200 + }, + { + "name": "resulttype", + "type": "String", + "path": "$.resulttype.classid" + }, + { + "name": "instance", + "type": "List", + "path": "$.instance[*].instancetype.classname" + } + ], + "blacklists": {}, + "synonyms": {} + } +} \ No newline at end of file diff --git a/dhp-workflows/dhp-dedup-openaire/src/test/resources/eu/dnetlib/dhp/dedup/conf/pub.new.conf.json b/dhp-workflows/dhp-dedup-openaire/src/test/resources/eu/dnetlib/dhp/dedup/conf/pub.new.conf.json new file mode 100644 index 000000000..89e492e39 --- /dev/null +++ b/dhp-workflows/dhp-dedup-openaire/src/test/resources/eu/dnetlib/dhp/dedup/conf/pub.new.conf.json @@ -0,0 +1,475 @@ +{ + "wf": { + "threshold": "0.99", + "dedupRun": "001", + "entityType": "result", + "subEntityType": "resulttype", + "subEntityValue": "publication", + "orderField": "title", + "queueMaxSize": "200", + "groupMaxSize": "100", + "maxChildren": "100", + "slidingWindowSize": "50", + "rootBuilder": [ + "result", + "resultProject_outcome_isProducedBy", + "resultResult_publicationDataset_isRelatedTo", + "resultResult_similarity_isAmongTopNSimilarDocuments", + "resultResult_similarity_hasAmongTopNSimilarDocuments", + "resultOrganization_affiliation_isAffiliatedWith", + "resultResult_part_hasPart", + "resultResult_part_isPartOf", + "resultResult_supplement_isSupplementTo", + "resultResult_supplement_isSupplementedBy", + "resultResult_version_isVersionOf" + ], + "includeChildren": "true", + "maxIterations": 20, + "idPath": "$.id" + }, + "pace": { + "clustering": [ + { + "name": "wordsStatsSuffixPrefixChain", + "fields": [ + "title" + ], + "params": { + "mod": "10" + } + }, + { + "name": "lowercase", + "fields": [ + "doi", + "altdoi" + ], + "params": { + "collapseOn:pid": "0" + } + } + ], + "decisionTree": { + "start": { + "fields": [ + { + "field": "pid", + "comparator": "jsonListMatch", + "weight": 1.0, + "countIfUndefined": "false", + "params": { + "jpath_value": "$.value", + "jpath_classid": "$.qualifier.classid", + "mode": "count" + } + } + ], + "threshold": 1.0, + "aggregation": "MAX", + "positive": "MATCH", + "negative": "instanceTypeCheck", + "undefined": "instanceTypeCheck", + "ignoreUndefined": "false" + }, + "instanceTypeCheck": { + "fields": [ + { + "field": "instance", + "comparator": "instanceTypeMatch", + "weight": 1.0, + "countIfUndefined": "false", + "params": {} + } + ], + "threshold": 0.5, + "aggregation": "MAX", + "positive": "pidVSaltid", + "negative": "NO_MATCH", + "undefined": "pidVSaltid", + "ignoreUndefined": "true" + }, + "pidVSaltid": { + "fields": [ + { + "field": "pid", + "comparator": "jsonListMatch", + "weight": 1.0, + "countIfUndefined": "false", + "params": { + "jpath_value": "$.value", + "jpath_classid": "$.qualifier.classid", + "crossCompare": "alternateid", + "mode": "count" + } + } + ], + "threshold": 1.0, + "aggregation": "MAX", + "positive": "softCheck", + "negative": "earlyExits", + "undefined": "earlyExits", + "ignoreUndefined": "true" + }, + "softCheck": { + "fields": [ + { + "field": "title", + "comparator": "levensteinTitle", + "weight": 1.0, + "countIfUndefined": "true", + "params": {} + } + ], + "threshold": 0.9, + "aggregation": "AVG", + "positive": "MATCH", + "negative": "NO_MATCH", + "undefined": "NO_MATCH", + "ignoreUndefined": "true" + }, + "earlyExits": { + "fields": [ + { + "field": "title", + "comparator": "titleVersionMatch", + "weight": 1.0, + "countIfUndefined": "false", + "params": {} + }, + { + "field": "authors", + "comparator": "sizeMatch", + "weight": 1.0, + "countIfUndefined": "false", + "params": {} + } + ], + "threshold": 1.0, + "aggregation": "AND", + "positive": "strongCheck", + "negative": "NO_MATCH", + "undefined": "strongCheck", + "ignoreUndefined": "false" + }, + "strongCheck": { + "fields": [ + { + "field": "title", + "comparator": "levensteinTitle", + "weight": 1.0, + "countIfUndefined": "true", + "params": {} + } + ], + "threshold": 0.99, + "aggregation": "AVG", + "positive": "surnames", + "negative": "NO_MATCH", + "undefined": "NO_MATCH", + "ignoreUndefined": "true" + }, + "surnames": { + "fields": [ + { + "field": "authors", + "comparator": "authorsMatch", + "weight": 1.0, + "countIfUndefined": "false", + "params": { + "surname_th": 0.75, + "fullname_th": 0.75, + "mode": "surname" + } + } + ], + "threshold": 0.6, + "aggregation": "MAX", + "positive": "MATCH", + "negative": "NO_MATCH", + "undefined": "MATCH", + "ignoreUndefined": "true" + } + }, + "model": [ + { + "name": "doi", + "type": "String", + "path": "$.instance[*].pid[?(@.qualifier.classid == 'doi')].value" + }, + { + "name": "altdoi", + "type": "String", + "path": "$.instance[*].alternateIdentifier[?(@.qualifier.classid == 'doi')].value" + }, + { + "name": "pid", + "type": "JSON", + "path": "$.instance[*].pid[*]", + "overrideMatch": "true" + }, + { + "name": "alternateid", + "type": "JSON", + "path": "$.instance[*].alternateIdentifier[*]", + "overrideMatch": "true" + }, + { + "name": "title", + "type": "String", + "path": "$.title[?(@.qualifier.classid == 'main title')].value", + "length": 250, + "size": 5 + }, + { + "name": "authors", + "type": "List", + "path": "$.author[*].fullname", + "size": 200 + }, + { + "name": "resulttype", + "type": "String", + "path": "$.resulttype.classid" + }, + { + "name": "instance", + "type": "List", + "path": "$.instance[*].instancetype.classname" + } + ], + "blacklists": { + "title": [ + "(?i)^Data Management Plan", + "^Inside Front Cover$", + "(?i)^Poster presentations$", + "^THE ASSOCIATION AND THE GENERAL MEDICAL COUNCIL$", + "^Problems with perinatal pathology\\.?$", + "(?i)^Cases? of Puerperal Convulsions$", + "(?i)^Operative Gyna?ecology$", + "(?i)^Mind the gap\\!?\\:?$", + "^Chronic fatigue syndrome\\.?$", + "^Cartas? ao editor Letters? to the Editor$", + "^Note from the Editor$", + "^Anesthesia Abstract$", + "^Annual report$", + "(?i)^“?THE RADICAL PREVENTION OF VENEREAL DISEASE\\.?”?$", + "(?i)^Graph and Table of Infectious Diseases?$", + "^Presentation$", + "(?i)^Reviews and Information on Publications$", + "(?i)^PUBLIC HEALTH SERVICES?$", + "(?i)^COMBINED TEXT-?BOOK OF OBSTETRICS AND GYN(Æ|ae)COLOGY$", + "(?i)^Adrese autora$", + "(?i)^Systematic Part .*\\. Catalogus Fossilium Austriae, Band 2: Echinoidea neogenica$", + "(?i)^Acknowledgement to Referees$", + "(?i)^Behçet's disease\\.?$", + "(?i)^Isolation and identification of restriction endonuclease.*$", + "(?i)^CEREBROVASCULAR DISEASES?.?$", + "(?i)^Screening for abdominal aortic aneurysms?\\.?$", + "^Event management$", + "(?i)^Breakfast and Crohn's disease.*\\.?$", + "^Cálculo de concentraciones en disoluciones acuosas. Ejercicio interactivo\\..*\\.$", + "(?i)^Genetic and functional analyses of SHANK2 mutations suggest a multiple hit model of Autism spectrum disorders?\\.?$", + "^Gushi hakubutsugaku$", + "^Starobosanski nadpisi u Bosni i Hercegovini \\(.*\\)$", + "^Intestinal spirocha?etosis$", + "^Treatment of Rodent Ulcer$", + "(?i)^\\W*Cloud Computing\\W*$", + "^Compendio mathematico : en que se contienen todas las materias mas principales de las Ciencias que tratan de la cantidad$", + "^Free Communications, Poster Presentations: Session [A-F]$", + "^“The Historical Aspects? of Quackery\\.?”$", + "^A designated centre for people with disabilities operated by St John of God Community Services (Limited|Ltd), Louth$", + "^P(er|re)-Mile Premiums for Auto Insurance\\.?$", + "(?i)^Case Report$", + "^Boletín Informativo$", + "(?i)^Glioblastoma Multiforme$", + "(?i)^Nuevos táxones animales descritos en la península Ibérica y Macaronesia desde 1994 \\(.*\\)$", + "^Zaměstnanecké výhody$", + "(?i)^The Economics of Terrorism and Counter-Terrorism: A Survey \\(Part .*\\)$", + "(?i)^Carotid body tumours?\\.?$", + "(?i)^\\[Españoles en Francia : La condición Emigrante.*\\]$", + "^Avant-propos$", + "(?i)^St\\. Patrick's Cathedral, Dublin, County Dublin - Head(s)? and Capital(s)?$", + "(?i)^St\\. Patrick's Cathedral, Dublin, County Dublin - Bases?$", + "(?i)^PUBLIC HEALTH VERSUS THE STATE$", + "^Viñetas de Cortázar$", + "(?i)^Search for heavy neutrinos and W(\\[|_|\\(|_\\{|-)?R(\\]|\\)|\\})? bosons with right-handed couplings in a left-right symmetric model in pp collisions at.*TeV(\\.)?$", + "(?i)^Measurement of the pseudorapidity and centrality dependence of the transverse energy density in Pb(-?)Pb collisions at.*tev(\\.?)$", + "(?i)^Search for resonances decaying into top-quark pairs using fully hadronic decays in pp collisions with ATLAS at.*TeV$", + "(?i)^Search for neutral minimal supersymmetric standard model Higgs bosons decaying to tau pairs in pp collisions at.*tev$", + "(?i)^Relatório de Estágio (de|em) Angiologia e Cirurgia Vascular$", + "^Aus der AGMB$", + "^Znanstveno-stručni prilozi$", + "(?i)^Zhodnocení finanční situace podniku a návrhy na zlepšení$", + "(?i)^Evaluation of the Financial Situation in the Firm and Proposals to its Improvement$", + "(?i)^Hodnocení finanční situace podniku a návrhy na její zlepšení$", + "^Finanční analýza podniku$", + "^Financial analysis( of business)?$", + "(?i)^Textbook of Gyn(a)?(Æ)?(e)?cology$", + "^Jikken nihon shūshinsho$", + "(?i)^CORONER('|s)(s|') INQUESTS$", + "(?i)^(Μελέτη παραγόντων )?risk management( για ανάπτυξη και εφαρμογή ενός πληροφοριακού συστήματος| και ανάπτυξη συστήματος)?$", + "(?i)^Consultants' contract(s)?$", + "(?i)^Upute autorima$", + "(?i)^Bijdrage tot de Kennis van den Godsdienst der Dajaks van Lan(d|f)ak en Tajan$", + "^Joshi shin kokubun$", + "^Kōtō shōgaku dokuhon nōson'yō$", + "^Jinjō shōgaku shōka$", + "^Shōgaku shūjichō$", + "^Nihon joshi dokuhon$", + "^Joshi shin dokuhon$", + "^Chūtō kanbun dokuhon$", + "^Wabun dokuhon$", + "(?i)^(Analysis of economy selected village or town|Rozbor hospodaření vybrané obce či města)$", + "(?i)^cardiac rehabilitation$", + "(?i)^Analytical summary$", + "^Thesaurus resolutionum Sacrae Congregationis Concilii$", + "(?i)^Sumario analítico(\\s{1})?(Analitic summary)?$", + "^Prikazi i osvrti$", + "^Rodinný dům s provozovnou$", + "^Family house with an establishment$", + "^Shinsei chūtō shin kokugun$", + "^Pulmonary alveolar proteinosis(\\.?)$", + "^Shinshū kanbun$", + "^Viñeta(s?) de Rodríguez$", + "(?i)^RUBRIKA UREDNIKA$", + "^A Matching Model of the Academic Publication Market$", + "^Yōgaku kōyō$", + "^Internetový marketing$", + "^Internet marketing$", + "^Chūtō kokugo dokuhon$", + "^Kokugo dokuhon$", + "^Antibiotic Cover for Dental Extraction(s?)$", + "^Strategie podniku$", + "^Strategy of an Enterprise$", + "(?i)^respiratory disease(s?)(\\.?)$", + "^Award(s?) for Gallantry in Civil Defence$", + "^Podniková kultura$", + "^Corporate Culture$", + "^Severe hyponatraemia in hospital inpatient(s?)(\\.?)$", + "^Pracovní motivace$", + "^Work Motivation$", + "^Kaitei kōtō jogaku dokuhon$", + "^Konsolidovaná účetní závěrka$", + "^Consolidated Financial Statements$", + "(?i)^intracranial tumour(s?)$", + "^Climate Change Mitigation Options and Directed Technical Change: A Decentralized Equilibrium Analysis$", + "^\\[CERVECERIAS MAHOU(\\.|\\:) INTERIOR\\] \\[Material gráfico\\]$", + "^Housing Market Dynamics(\\:|\\.) On the Contribution of Income Shocks and Credit Constraint(s?)$", + "^\\[Funciones auxiliares de la música en Radio París,.*\\]$", + "^Úroveň motivačního procesu jako způsobu vedení lidí$", + "^The level of motivation process as a leadership$", + "^Pay-beds in N(\\.?)H(\\.?)S(\\.?) Hospitals$", + "(?i)^news and events$", + "(?i)^NOVOSTI I DOGAĐAJI$", + "^Sansū no gakushū$", + "^Posouzení informačního systému firmy a návrh změn$", + "^Information System Assessment and Proposal for ICT Modification$", + "^Stresové zatížení pracovníků ve vybrané profesi$", + "^Stress load in a specific job$", + "^Sunday: Poster Sessions, Pt.*$", + "^Monday: Poster Sessions, Pt.*$", + "^Wednesday: Poster Sessions, Pt.*", + "^Tuesday: Poster Sessions, Pt.*$", + "^Analýza reklamy$", + "^Analysis of advertising$", + "^Shōgaku shūshinsho$", + "^Shōgaku sansū$", + "^Shintei joshi kokubun$", + "^Taishō joshi kokubun dokuhon$", + "^Joshi kokubun$", + "^Účetní uzávěrka a účetní závěrka v ČR$", + "(?i)^The \"?Causes\"? of Cancer$", + "^Normas para la publicación de artículos$", + "^Editor('|s)(s|') [Rr]eply$", + "^Editor(’|s)(s|’) letter$", + "^Redaktoriaus žodis$", + "^DISCUSSION ON THE PRECEDING PAPER$", + "^Kōtō shōgaku shūshinsho jidōyō$", + "^Shōgaku nihon rekishi$", + "^(Theory of the flow of action currents in isolated myelinated nerve fibers).*$", + "^Préface$", + "^Occupational [Hh]ealth [Ss]ervices.$", + "^In Memoriam Professor Toshiyuki TAKESHIMA$", + "^Účetní závěrka ve vybraném podniku.*$", + "^Financial statements in selected company$", + "^Abdominal [Aa]ortic [Aa]neurysms.*$", + "^Pseudomyxoma peritonei$", + "^Kazalo autora$", + "(?i)^uvodna riječ$", + "^Motivace jako způsob vedení lidí$", + "^Motivation as a leadership$", + "^Polyfunkční dům$", + "^Multi\\-funkcional building$", + "^Podnikatelský plán$", + "(?i)^Podnikatelský záměr$", + "(?i)^Business Plan$", + "^Oceňování nemovitostí$", + "^Marketingová komunikace$", + "^Marketing communication$", + "^Sumario Analítico$", + "^Riječ uredništva$", + "^Savjetovanja i priredbe$", + "^Índice$", + "^(Starobosanski nadpisi).*$", + "^Vzdělávání pracovníků v organizaci$", + "^Staff training in organization$", + "^(Life Histories of North American Geometridae).*$", + "^Strategická analýza podniku$", + "^Strategic Analysis of an Enterprise$", + "^Sadržaj$", + "^Upute suradnicima$", + "^Rodinný dům$", + "(?i)^Fami(l)?ly house$", + "^Upute autorima$", + "^Strategic Analysis$", + "^Finanční analýza vybraného podniku$", + "^Finanční analýza$", + "^Riječ urednika$", + "(?i)^Content(s?)$", + "(?i)^Inhalt$", + "^Jinjō shōgaku shūshinsho jidōyō$", + "(?i)^Index$", + "^Chūgaku kokubun kyōkasho$", + "^Retrato de una mujer$", + "^Retrato de un hombre$", + "^Kōtō shōgaku dokuhon$", + "^Shotōka kokugo$", + "^Shōgaku dokuhon$", + "^Jinjō shōgaku kokugo dokuhon$", + "^Shinsei kokugo dokuhon$", + "^Teikoku dokuhon$", + "^Instructions to Authors$", + "^KİTAP TAHLİLİ$", + "^PRZEGLĄD PIŚMIENNICTWA$", + "(?i)^Presentación$", + "^İçindekiler$", + "(?i)^Tabl?e of contents$", + "^(CODICE DEL BEATO DE LOS REYES FERNANDO I Y SANCHA).*$", + "^(\\[MADRID\\. BIBL\\. NAC\\. N.*KING FERDINAND I.*FROM SAN ISIDORO DE LEON\\. FACUNDUS SCRIPSIT DATED.*\\]).*", + "^Editorial( Board)?$", + "(?i)^Editorial \\(English\\)$", + "^Editörden$", + "^(Corpus Oral Dialectal \\(COD\\)\\.).*$", + "^(Kiri Karl Morgensternile).*$", + "^(\\[Eksliibris Aleksandr).*\\]$", + "^(\\[Eksliibris Aleksandr).*$", + "^(Eksliibris Aleksandr).*$", + "^(Kiri A\\. de Vignolles).*$", + "^(2 kirja Karl Morgensternile).*$", + "^(Pirita kloostri idaosa arheoloogilised).*$", + "^(Kiri tundmatule).*$", + "^(Kiri Jenaer Allgemeine Literaturzeitung toimetusele).*$", + "^(Eksliibris Nikolai Birukovile).*$", + "^(Eksliibris Nikolai Issakovile).*$", + "^(WHP Cruise Summary Information of section).*$", + "^(Measurement of the top quark\\-pair production cross section with ATLAS in pp collisions at).*$", + "^(Measurement of the spin\\-dependent structure function).*", + "(?i)^.*authors['’′]? reply\\.?$", + "(?i)^.*authors['’′]? response\\.?$", + "^Data [mM]anagement [sS]ervices\\.$", + "Research and Advanced Technology for Digital Libraries" + ] + }, + "synonyms": {} + } +} \ No newline at end of file From 8991d097b466b587a83ad62160798dfaaf20bb7a Mon Sep 17 00:00:00 2001 From: miconis Date: Thu, 24 Feb 2022 17:08:24 +0100 Subject: [PATCH 099/162] bug fix in the DedupRecordFactory, DataInfo set before merge --- .../dhp/oa/dedup/DedupRecordFactory.java | 3 +- .../dnetlib/dhp/dedup/conf/orp.new.conf.json | 214 ++++++++ .../dnetlib/dhp/dedup/conf/pub.new.conf.json | 475 ++++++++++++++++++ 3 files changed, 691 insertions(+), 1 deletion(-) create mode 100644 dhp-workflows/dhp-dedup-openaire/src/test/resources/eu/dnetlib/dhp/dedup/conf/orp.new.conf.json create mode 100644 dhp-workflows/dhp-dedup-openaire/src/test/resources/eu/dnetlib/dhp/dedup/conf/pub.new.conf.json diff --git a/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/DedupRecordFactory.java b/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/DedupRecordFactory.java index d65853aff..b8ae4ca02 100644 --- a/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/DedupRecordFactory.java +++ b/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/DedupRecordFactory.java @@ -77,6 +77,7 @@ public class DedupRecordFactory { throws IllegalAccessException, InstantiationException { T entity = clazz.newInstance(); + entity.setDataInfo(dataInfo); final Collection dates = Lists.newArrayList(); final List> authors = Lists.newArrayList(); @@ -106,7 +107,7 @@ public class DedupRecordFactory { entity.setId(id); entity.setLastupdatetimestamp(ts); - entity.setDataInfo(dataInfo); + entity.getDataInfo().setDeletedbyinference(false); return entity; } diff --git a/dhp-workflows/dhp-dedup-openaire/src/test/resources/eu/dnetlib/dhp/dedup/conf/orp.new.conf.json b/dhp-workflows/dhp-dedup-openaire/src/test/resources/eu/dnetlib/dhp/dedup/conf/orp.new.conf.json new file mode 100644 index 000000000..b0cdade5b --- /dev/null +++ b/dhp-workflows/dhp-dedup-openaire/src/test/resources/eu/dnetlib/dhp/dedup/conf/orp.new.conf.json @@ -0,0 +1,214 @@ +{ + "wf": { + "threshold" : "0.99", + "dedupRun" : "001", + "entityType" : "result", + "subEntityType" : "resulttype", + "subEntityValue" : "otherresearchproduct", + "orderField" : "title", + "queueMaxSize" : "100", + "groupMaxSize" : "100", + "maxChildren" : "100", + "slidingWindowSize" : "100", + "rootBuilder" : [ "result", "resultProject_outcome_isProducedBy", "resultResult_publicationDataset_isRelatedTo", "resultResult_similarity_isAmongTopNSimilarDocuments", "resultResult_similarity_hasAmongTopNSimilarDocuments", "resultOrganization_affiliation_hasAuthorInstitution", "resultResult_part_hasPart", "resultResult_part_isPartOf", "resultResult_supplement_isSupplementTo", "resultResult_supplement_isSupplementedBy", "resultResult_version_isVersionOf" ], + "includeChildren" : "true", + "idPath" : "$.id", + "maxIterations" : 20 + }, + "pace": { + "clustering": [ + { + "name": "wordsStatsSuffixPrefixChain", + "fields": [ + "title" + ], + "params": { + "mod": "10" + } + }, + { + "name": "lowercase", + "fields": [ + "doi", + "altdoi" + ], + "params": { + "collapseOn:pid": "0" + } + } + ], + "decisionTree": { + "start": { + "fields": [ + { + "field": "pid", + "comparator": "jsonListMatch", + "weight": 1.0, + "countIfUndefined": "false", + "params": { + "jpath_value": "$.value", + "jpath_classid": "$.qualifier.classid", + "mode": "count" + } + } + ], + "threshold": 1.0, + "aggregation": "MAX", + "positive": "MATCH", + "negative": "pidVSaltid", + "undefined": "pidVSaltid", + "ignoreUndefined": "false" + }, + "pidVSaltid": { + "fields": [ + { + "field": "pid", + "comparator": "jsonListMatch", + "weight": 1.0, + "countIfUndefined": "false", + "params": { + "jpath_value": "$.value", + "jpath_classid": "$.qualifier.classid", + "crossCompare": "alternateid", + "mode": "count" + } + } + ], + "threshold": 1.0, + "aggregation": "MAX", + "positive": "softCheck", + "negative": "earlyExits", + "undefined": "earlyExits", + "ignoreUndefined": "true" + }, + "softCheck": { + "fields": [ + { + "field": "title", + "comparator": "levensteinTitle", + "weight": 1.0, + "countIfUndefined": "true", + "params": {} + } + ], + "threshold": 0.9, + "aggregation": "AVG", + "positive": "MATCH", + "negative": "NO_MATCH", + "undefined": "NO_MATCH", + "ignoreUndefined": "true" + }, + "earlyExits": { + "fields": [ + { + "field": "title", + "comparator": "titleVersionMatch", + "weight": 1.0, + "countIfUndefined": "false", + "params": {} + }, + { + "field": "authors", + "comparator": "sizeMatch", + "weight": 1.0, + "countIfUndefined": "false", + "params": {} + } + ], + "threshold": 1.0, + "aggregation": "AND", + "positive": "strongCheck", + "negative": "NO_MATCH", + "undefined": "strongCheck", + "ignoreUndefined": "false" + }, + "strongCheck": { + "fields": [ + { + "field": "title", + "comparator": "levensteinTitle", + "weight": 1.0, + "countIfUndefined": "true", + "params": {} + } + ], + "threshold": 0.99, + "aggregation": "AVG", + "positive": "surnames", + "negative": "NO_MATCH", + "undefined": "NO_MATCH", + "ignoreUndefined": "true" + }, + "surnames": { + "fields": [ + { + "field": "authors", + "comparator": "authorsMatch", + "weight": 1.0, + "countIfUndefined": "false", + "params": { + "surname_th": 0.75, + "fullname_th": 0.75, + "mode": "surname" + } + } + ], + "threshold": 0.6, + "aggregation": "MAX", + "positive": "MATCH", + "negative": "NO_MATCH", + "undefined": "MATCH", + "ignoreUndefined": "true" + } + }, + "model": [ + { + "name": "doi", + "type": "String", + "path": "$.instance[*].pid[?(@.qualifier.classid == 'doi')].value" + }, + { + "name": "altdoi", + "type": "String", + "path": "$.instance[*].alternateIdentifier[?(@.qualifier.classid == 'doi')].value" + }, + { + "name": "pid", + "type": "JSON", + "path": "$.instance[*].pid[*]", + "overrideMatch": "true" + }, + { + "name": "alternateid", + "type": "JSON", + "path": "$.instance[*].alternateIdentifier[*]", + "overrideMatch": "true" + }, + { + "name": "title", + "type": "String", + "path": "$.title[?(@.qualifier.classid == 'main title')].value", + "length": 250, + "size": 5 + }, + { + "name": "authors", + "type": "List", + "path": "$.author[*].fullname", + "size": 200 + }, + { + "name": "resulttype", + "type": "String", + "path": "$.resulttype.classid" + }, + { + "name": "instance", + "type": "List", + "path": "$.instance[*].instancetype.classname" + } + ], + "blacklists": {}, + "synonyms": {} + } +} \ No newline at end of file diff --git a/dhp-workflows/dhp-dedup-openaire/src/test/resources/eu/dnetlib/dhp/dedup/conf/pub.new.conf.json b/dhp-workflows/dhp-dedup-openaire/src/test/resources/eu/dnetlib/dhp/dedup/conf/pub.new.conf.json new file mode 100644 index 000000000..89e492e39 --- /dev/null +++ b/dhp-workflows/dhp-dedup-openaire/src/test/resources/eu/dnetlib/dhp/dedup/conf/pub.new.conf.json @@ -0,0 +1,475 @@ +{ + "wf": { + "threshold": "0.99", + "dedupRun": "001", + "entityType": "result", + "subEntityType": "resulttype", + "subEntityValue": "publication", + "orderField": "title", + "queueMaxSize": "200", + "groupMaxSize": "100", + "maxChildren": "100", + "slidingWindowSize": "50", + "rootBuilder": [ + "result", + "resultProject_outcome_isProducedBy", + "resultResult_publicationDataset_isRelatedTo", + "resultResult_similarity_isAmongTopNSimilarDocuments", + "resultResult_similarity_hasAmongTopNSimilarDocuments", + "resultOrganization_affiliation_isAffiliatedWith", + "resultResult_part_hasPart", + "resultResult_part_isPartOf", + "resultResult_supplement_isSupplementTo", + "resultResult_supplement_isSupplementedBy", + "resultResult_version_isVersionOf" + ], + "includeChildren": "true", + "maxIterations": 20, + "idPath": "$.id" + }, + "pace": { + "clustering": [ + { + "name": "wordsStatsSuffixPrefixChain", + "fields": [ + "title" + ], + "params": { + "mod": "10" + } + }, + { + "name": "lowercase", + "fields": [ + "doi", + "altdoi" + ], + "params": { + "collapseOn:pid": "0" + } + } + ], + "decisionTree": { + "start": { + "fields": [ + { + "field": "pid", + "comparator": "jsonListMatch", + "weight": 1.0, + "countIfUndefined": "false", + "params": { + "jpath_value": "$.value", + "jpath_classid": "$.qualifier.classid", + "mode": "count" + } + } + ], + "threshold": 1.0, + "aggregation": "MAX", + "positive": "MATCH", + "negative": "instanceTypeCheck", + "undefined": "instanceTypeCheck", + "ignoreUndefined": "false" + }, + "instanceTypeCheck": { + "fields": [ + { + "field": "instance", + "comparator": "instanceTypeMatch", + "weight": 1.0, + "countIfUndefined": "false", + "params": {} + } + ], + "threshold": 0.5, + "aggregation": "MAX", + "positive": "pidVSaltid", + "negative": "NO_MATCH", + "undefined": "pidVSaltid", + "ignoreUndefined": "true" + }, + "pidVSaltid": { + "fields": [ + { + "field": "pid", + "comparator": "jsonListMatch", + "weight": 1.0, + "countIfUndefined": "false", + "params": { + "jpath_value": "$.value", + "jpath_classid": "$.qualifier.classid", + "crossCompare": "alternateid", + "mode": "count" + } + } + ], + "threshold": 1.0, + "aggregation": "MAX", + "positive": "softCheck", + "negative": "earlyExits", + "undefined": "earlyExits", + "ignoreUndefined": "true" + }, + "softCheck": { + "fields": [ + { + "field": "title", + "comparator": "levensteinTitle", + "weight": 1.0, + "countIfUndefined": "true", + "params": {} + } + ], + "threshold": 0.9, + "aggregation": "AVG", + "positive": "MATCH", + "negative": "NO_MATCH", + "undefined": "NO_MATCH", + "ignoreUndefined": "true" + }, + "earlyExits": { + "fields": [ + { + "field": "title", + "comparator": "titleVersionMatch", + "weight": 1.0, + "countIfUndefined": "false", + "params": {} + }, + { + "field": "authors", + "comparator": "sizeMatch", + "weight": 1.0, + "countIfUndefined": "false", + "params": {} + } + ], + "threshold": 1.0, + "aggregation": "AND", + "positive": "strongCheck", + "negative": "NO_MATCH", + "undefined": "strongCheck", + "ignoreUndefined": "false" + }, + "strongCheck": { + "fields": [ + { + "field": "title", + "comparator": "levensteinTitle", + "weight": 1.0, + "countIfUndefined": "true", + "params": {} + } + ], + "threshold": 0.99, + "aggregation": "AVG", + "positive": "surnames", + "negative": "NO_MATCH", + "undefined": "NO_MATCH", + "ignoreUndefined": "true" + }, + "surnames": { + "fields": [ + { + "field": "authors", + "comparator": "authorsMatch", + "weight": 1.0, + "countIfUndefined": "false", + "params": { + "surname_th": 0.75, + "fullname_th": 0.75, + "mode": "surname" + } + } + ], + "threshold": 0.6, + "aggregation": "MAX", + "positive": "MATCH", + "negative": "NO_MATCH", + "undefined": "MATCH", + "ignoreUndefined": "true" + } + }, + "model": [ + { + "name": "doi", + "type": "String", + "path": "$.instance[*].pid[?(@.qualifier.classid == 'doi')].value" + }, + { + "name": "altdoi", + "type": "String", + "path": "$.instance[*].alternateIdentifier[?(@.qualifier.classid == 'doi')].value" + }, + { + "name": "pid", + "type": "JSON", + "path": "$.instance[*].pid[*]", + "overrideMatch": "true" + }, + { + "name": "alternateid", + "type": "JSON", + "path": "$.instance[*].alternateIdentifier[*]", + "overrideMatch": "true" + }, + { + "name": "title", + "type": "String", + "path": "$.title[?(@.qualifier.classid == 'main title')].value", + "length": 250, + "size": 5 + }, + { + "name": "authors", + "type": "List", + "path": "$.author[*].fullname", + "size": 200 + }, + { + "name": "resulttype", + "type": "String", + "path": "$.resulttype.classid" + }, + { + "name": "instance", + "type": "List", + "path": "$.instance[*].instancetype.classname" + } + ], + "blacklists": { + "title": [ + "(?i)^Data Management Plan", + "^Inside Front Cover$", + "(?i)^Poster presentations$", + "^THE ASSOCIATION AND THE GENERAL MEDICAL COUNCIL$", + "^Problems with perinatal pathology\\.?$", + "(?i)^Cases? of Puerperal Convulsions$", + "(?i)^Operative Gyna?ecology$", + "(?i)^Mind the gap\\!?\\:?$", + "^Chronic fatigue syndrome\\.?$", + "^Cartas? ao editor Letters? to the Editor$", + "^Note from the Editor$", + "^Anesthesia Abstract$", + "^Annual report$", + "(?i)^“?THE RADICAL PREVENTION OF VENEREAL DISEASE\\.?”?$", + "(?i)^Graph and Table of Infectious Diseases?$", + "^Presentation$", + "(?i)^Reviews and Information on Publications$", + "(?i)^PUBLIC HEALTH SERVICES?$", + "(?i)^COMBINED TEXT-?BOOK OF OBSTETRICS AND GYN(Æ|ae)COLOGY$", + "(?i)^Adrese autora$", + "(?i)^Systematic Part .*\\. Catalogus Fossilium Austriae, Band 2: Echinoidea neogenica$", + "(?i)^Acknowledgement to Referees$", + "(?i)^Behçet's disease\\.?$", + "(?i)^Isolation and identification of restriction endonuclease.*$", + "(?i)^CEREBROVASCULAR DISEASES?.?$", + "(?i)^Screening for abdominal aortic aneurysms?\\.?$", + "^Event management$", + "(?i)^Breakfast and Crohn's disease.*\\.?$", + "^Cálculo de concentraciones en disoluciones acuosas. Ejercicio interactivo\\..*\\.$", + "(?i)^Genetic and functional analyses of SHANK2 mutations suggest a multiple hit model of Autism spectrum disorders?\\.?$", + "^Gushi hakubutsugaku$", + "^Starobosanski nadpisi u Bosni i Hercegovini \\(.*\\)$", + "^Intestinal spirocha?etosis$", + "^Treatment of Rodent Ulcer$", + "(?i)^\\W*Cloud Computing\\W*$", + "^Compendio mathematico : en que se contienen todas las materias mas principales de las Ciencias que tratan de la cantidad$", + "^Free Communications, Poster Presentations: Session [A-F]$", + "^“The Historical Aspects? of Quackery\\.?”$", + "^A designated centre for people with disabilities operated by St John of God Community Services (Limited|Ltd), Louth$", + "^P(er|re)-Mile Premiums for Auto Insurance\\.?$", + "(?i)^Case Report$", + "^Boletín Informativo$", + "(?i)^Glioblastoma Multiforme$", + "(?i)^Nuevos táxones animales descritos en la península Ibérica y Macaronesia desde 1994 \\(.*\\)$", + "^Zaměstnanecké výhody$", + "(?i)^The Economics of Terrorism and Counter-Terrorism: A Survey \\(Part .*\\)$", + "(?i)^Carotid body tumours?\\.?$", + "(?i)^\\[Españoles en Francia : La condición Emigrante.*\\]$", + "^Avant-propos$", + "(?i)^St\\. Patrick's Cathedral, Dublin, County Dublin - Head(s)? and Capital(s)?$", + "(?i)^St\\. Patrick's Cathedral, Dublin, County Dublin - Bases?$", + "(?i)^PUBLIC HEALTH VERSUS THE STATE$", + "^Viñetas de Cortázar$", + "(?i)^Search for heavy neutrinos and W(\\[|_|\\(|_\\{|-)?R(\\]|\\)|\\})? bosons with right-handed couplings in a left-right symmetric model in pp collisions at.*TeV(\\.)?$", + "(?i)^Measurement of the pseudorapidity and centrality dependence of the transverse energy density in Pb(-?)Pb collisions at.*tev(\\.?)$", + "(?i)^Search for resonances decaying into top-quark pairs using fully hadronic decays in pp collisions with ATLAS at.*TeV$", + "(?i)^Search for neutral minimal supersymmetric standard model Higgs bosons decaying to tau pairs in pp collisions at.*tev$", + "(?i)^Relatório de Estágio (de|em) Angiologia e Cirurgia Vascular$", + "^Aus der AGMB$", + "^Znanstveno-stručni prilozi$", + "(?i)^Zhodnocení finanční situace podniku a návrhy na zlepšení$", + "(?i)^Evaluation of the Financial Situation in the Firm and Proposals to its Improvement$", + "(?i)^Hodnocení finanční situace podniku a návrhy na její zlepšení$", + "^Finanční analýza podniku$", + "^Financial analysis( of business)?$", + "(?i)^Textbook of Gyn(a)?(Æ)?(e)?cology$", + "^Jikken nihon shūshinsho$", + "(?i)^CORONER('|s)(s|') INQUESTS$", + "(?i)^(Μελέτη παραγόντων )?risk management( για ανάπτυξη και εφαρμογή ενός πληροφοριακού συστήματος| και ανάπτυξη συστήματος)?$", + "(?i)^Consultants' contract(s)?$", + "(?i)^Upute autorima$", + "(?i)^Bijdrage tot de Kennis van den Godsdienst der Dajaks van Lan(d|f)ak en Tajan$", + "^Joshi shin kokubun$", + "^Kōtō shōgaku dokuhon nōson'yō$", + "^Jinjō shōgaku shōka$", + "^Shōgaku shūjichō$", + "^Nihon joshi dokuhon$", + "^Joshi shin dokuhon$", + "^Chūtō kanbun dokuhon$", + "^Wabun dokuhon$", + "(?i)^(Analysis of economy selected village or town|Rozbor hospodaření vybrané obce či města)$", + "(?i)^cardiac rehabilitation$", + "(?i)^Analytical summary$", + "^Thesaurus resolutionum Sacrae Congregationis Concilii$", + "(?i)^Sumario analítico(\\s{1})?(Analitic summary)?$", + "^Prikazi i osvrti$", + "^Rodinný dům s provozovnou$", + "^Family house with an establishment$", + "^Shinsei chūtō shin kokugun$", + "^Pulmonary alveolar proteinosis(\\.?)$", + "^Shinshū kanbun$", + "^Viñeta(s?) de Rodríguez$", + "(?i)^RUBRIKA UREDNIKA$", + "^A Matching Model of the Academic Publication Market$", + "^Yōgaku kōyō$", + "^Internetový marketing$", + "^Internet marketing$", + "^Chūtō kokugo dokuhon$", + "^Kokugo dokuhon$", + "^Antibiotic Cover for Dental Extraction(s?)$", + "^Strategie podniku$", + "^Strategy of an Enterprise$", + "(?i)^respiratory disease(s?)(\\.?)$", + "^Award(s?) for Gallantry in Civil Defence$", + "^Podniková kultura$", + "^Corporate Culture$", + "^Severe hyponatraemia in hospital inpatient(s?)(\\.?)$", + "^Pracovní motivace$", + "^Work Motivation$", + "^Kaitei kōtō jogaku dokuhon$", + "^Konsolidovaná účetní závěrka$", + "^Consolidated Financial Statements$", + "(?i)^intracranial tumour(s?)$", + "^Climate Change Mitigation Options and Directed Technical Change: A Decentralized Equilibrium Analysis$", + "^\\[CERVECERIAS MAHOU(\\.|\\:) INTERIOR\\] \\[Material gráfico\\]$", + "^Housing Market Dynamics(\\:|\\.) On the Contribution of Income Shocks and Credit Constraint(s?)$", + "^\\[Funciones auxiliares de la música en Radio París,.*\\]$", + "^Úroveň motivačního procesu jako způsobu vedení lidí$", + "^The level of motivation process as a leadership$", + "^Pay-beds in N(\\.?)H(\\.?)S(\\.?) Hospitals$", + "(?i)^news and events$", + "(?i)^NOVOSTI I DOGAĐAJI$", + "^Sansū no gakushū$", + "^Posouzení informačního systému firmy a návrh změn$", + "^Information System Assessment and Proposal for ICT Modification$", + "^Stresové zatížení pracovníků ve vybrané profesi$", + "^Stress load in a specific job$", + "^Sunday: Poster Sessions, Pt.*$", + "^Monday: Poster Sessions, Pt.*$", + "^Wednesday: Poster Sessions, Pt.*", + "^Tuesday: Poster Sessions, Pt.*$", + "^Analýza reklamy$", + "^Analysis of advertising$", + "^Shōgaku shūshinsho$", + "^Shōgaku sansū$", + "^Shintei joshi kokubun$", + "^Taishō joshi kokubun dokuhon$", + "^Joshi kokubun$", + "^Účetní uzávěrka a účetní závěrka v ČR$", + "(?i)^The \"?Causes\"? of Cancer$", + "^Normas para la publicación de artículos$", + "^Editor('|s)(s|') [Rr]eply$", + "^Editor(’|s)(s|’) letter$", + "^Redaktoriaus žodis$", + "^DISCUSSION ON THE PRECEDING PAPER$", + "^Kōtō shōgaku shūshinsho jidōyō$", + "^Shōgaku nihon rekishi$", + "^(Theory of the flow of action currents in isolated myelinated nerve fibers).*$", + "^Préface$", + "^Occupational [Hh]ealth [Ss]ervices.$", + "^In Memoriam Professor Toshiyuki TAKESHIMA$", + "^Účetní závěrka ve vybraném podniku.*$", + "^Financial statements in selected company$", + "^Abdominal [Aa]ortic [Aa]neurysms.*$", + "^Pseudomyxoma peritonei$", + "^Kazalo autora$", + "(?i)^uvodna riječ$", + "^Motivace jako způsob vedení lidí$", + "^Motivation as a leadership$", + "^Polyfunkční dům$", + "^Multi\\-funkcional building$", + "^Podnikatelský plán$", + "(?i)^Podnikatelský záměr$", + "(?i)^Business Plan$", + "^Oceňování nemovitostí$", + "^Marketingová komunikace$", + "^Marketing communication$", + "^Sumario Analítico$", + "^Riječ uredništva$", + "^Savjetovanja i priredbe$", + "^Índice$", + "^(Starobosanski nadpisi).*$", + "^Vzdělávání pracovníků v organizaci$", + "^Staff training in organization$", + "^(Life Histories of North American Geometridae).*$", + "^Strategická analýza podniku$", + "^Strategic Analysis of an Enterprise$", + "^Sadržaj$", + "^Upute suradnicima$", + "^Rodinný dům$", + "(?i)^Fami(l)?ly house$", + "^Upute autorima$", + "^Strategic Analysis$", + "^Finanční analýza vybraného podniku$", + "^Finanční analýza$", + "^Riječ urednika$", + "(?i)^Content(s?)$", + "(?i)^Inhalt$", + "^Jinjō shōgaku shūshinsho jidōyō$", + "(?i)^Index$", + "^Chūgaku kokubun kyōkasho$", + "^Retrato de una mujer$", + "^Retrato de un hombre$", + "^Kōtō shōgaku dokuhon$", + "^Shotōka kokugo$", + "^Shōgaku dokuhon$", + "^Jinjō shōgaku kokugo dokuhon$", + "^Shinsei kokugo dokuhon$", + "^Teikoku dokuhon$", + "^Instructions to Authors$", + "^KİTAP TAHLİLİ$", + "^PRZEGLĄD PIŚMIENNICTWA$", + "(?i)^Presentación$", + "^İçindekiler$", + "(?i)^Tabl?e of contents$", + "^(CODICE DEL BEATO DE LOS REYES FERNANDO I Y SANCHA).*$", + "^(\\[MADRID\\. BIBL\\. NAC\\. N.*KING FERDINAND I.*FROM SAN ISIDORO DE LEON\\. FACUNDUS SCRIPSIT DATED.*\\]).*", + "^Editorial( Board)?$", + "(?i)^Editorial \\(English\\)$", + "^Editörden$", + "^(Corpus Oral Dialectal \\(COD\\)\\.).*$", + "^(Kiri Karl Morgensternile).*$", + "^(\\[Eksliibris Aleksandr).*\\]$", + "^(\\[Eksliibris Aleksandr).*$", + "^(Eksliibris Aleksandr).*$", + "^(Kiri A\\. de Vignolles).*$", + "^(2 kirja Karl Morgensternile).*$", + "^(Pirita kloostri idaosa arheoloogilised).*$", + "^(Kiri tundmatule).*$", + "^(Kiri Jenaer Allgemeine Literaturzeitung toimetusele).*$", + "^(Eksliibris Nikolai Birukovile).*$", + "^(Eksliibris Nikolai Issakovile).*$", + "^(WHP Cruise Summary Information of section).*$", + "^(Measurement of the top quark\\-pair production cross section with ATLAS in pp collisions at).*$", + "^(Measurement of the spin\\-dependent structure function).*", + "(?i)^.*authors['’′]? reply\\.?$", + "(?i)^.*authors['’′]? response\\.?$", + "^Data [mM]anagement [sS]ervices\\.$", + "Research and Advanced Technology for Digital Libraries" + ] + }, + "synonyms": {} + } +} \ No newline at end of file From f5b0a6f89c9e393c566308f7efc76f222f03dafa Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Fri, 25 Feb 2022 10:21:57 +0100 Subject: [PATCH 100/162] [master to beta] fixed issues in test files --- .../crossref/publication_license_embargo.json | 2 +- .../alreadyLinked/alreadyLinked_20.json | 20 ++++++++++++++++++ .../alreadyLinked/alreadyLinked_20.json.gz | Bin 3160 -> 0 bytes .../alreadyLinked/alreadyLinked_20.json | 20 ++++++++++++++++++ .../alreadyLinked/alreadyLinked_20.json.gz | Bin 3175 -> 0 bytes .../alreadyLinked/alreadyLinked_20.json | 20 ++++++++++++++++++ .../alreadyLinked/alreadyLinked_20.json.gz | Bin 3160 -> 0 bytes 7 files changed, 61 insertions(+), 1 deletion(-) create mode 100644 dhp-workflows/dhp-enrichment/src/test/resources/eu/dnetlib/dhp/resulttoorganizationfrominstrepo/noupdate/preparedInfo/alreadyLinked/alreadyLinked_20.json delete mode 100644 dhp-workflows/dhp-enrichment/src/test/resources/eu/dnetlib/dhp/resulttoorganizationfrominstrepo/noupdate/preparedInfo/alreadyLinked/alreadyLinked_20.json.gz create mode 100644 dhp-workflows/dhp-enrichment/src/test/resources/eu/dnetlib/dhp/resulttoorganizationfrominstrepo/updatemix/preparedInfo/alreadyLinked/alreadyLinked_20.json delete mode 100644 dhp-workflows/dhp-enrichment/src/test/resources/eu/dnetlib/dhp/resulttoorganizationfrominstrepo/updatemix/preparedInfo/alreadyLinked/alreadyLinked_20.json.gz create mode 100644 dhp-workflows/dhp-enrichment/src/test/resources/eu/dnetlib/dhp/resulttoorganizationfrominstrepo/updatenomix/preparedInfo/alreadyLinked/alreadyLinked_20.json delete mode 100644 dhp-workflows/dhp-enrichment/src/test/resources/eu/dnetlib/dhp/resulttoorganizationfrominstrepo/updatenomix/preparedInfo/alreadyLinked/alreadyLinked_20.json.gz diff --git a/dhp-workflows/dhp-doiboost/src/test/resources/eu/dnetlib/doiboost/crossref/publication_license_embargo.json b/dhp-workflows/dhp-doiboost/src/test/resources/eu/dnetlib/doiboost/crossref/publication_license_embargo.json index 47ca55f34..788946fea 100644 --- a/dhp-workflows/dhp-doiboost/src/test/resources/eu/dnetlib/doiboost/crossref/publication_license_embargo.json +++ b/dhp-workflows/dhp-doiboost/src/test/resources/eu/dnetlib/doiboost/crossref/publication_license_embargo.json @@ -1456,7 +1456,7 @@ "issued": { "date-parts": [ [ -2021, +3021, 2, 22 ] diff --git a/dhp-workflows/dhp-enrichment/src/test/resources/eu/dnetlib/dhp/resulttoorganizationfrominstrepo/noupdate/preparedInfo/alreadyLinked/alreadyLinked_20.json b/dhp-workflows/dhp-enrichment/src/test/resources/eu/dnetlib/dhp/resulttoorganizationfrominstrepo/noupdate/preparedInfo/alreadyLinked/alreadyLinked_20.json new file mode 100644 index 000000000..ee737ce26 --- /dev/null +++ b/dhp-workflows/dhp-enrichment/src/test/resources/eu/dnetlib/dhp/resulttoorganizationfrominstrepo/noupdate/preparedInfo/alreadyLinked/alreadyLinked_20.json @@ -0,0 +1,20 @@ +{"key":"50|acm_________::3133635707788d2180bcef09e01a903c","valueSet":["20|dedup_wf_001::5ab59ffa94c31a140d4a56c594ea5865"]} +{"key":"50|core________::0308a76f6f8bc4db75a817d53a7e76a4","valueSet":["20|wt__________::a72760363ca885e6bef165804770e00c","20|nih_________::5c7f089c177ba49f92033f72e2aff724","20|dedup_wf_001::c88bf88e0a4dea271a3e2f832d952238"]} +{"key":"50|core________::04c8f896aef9e54867f2bf4236e9c810","valueSet":["20|snsf________::1496b1b4fc4d5509b16f2c217be480dc","20|dedup_wf_001::06731b587a9ea654103a6b0ebcb234ff","20|nih_________::c5722b087a5e707a50aa8f9f2ebf785d","20|snsf________::71d0a944b61b1a94068595f840005a2f","20|nih_________::dd3428794aef214a3bc2cad6dd548ba6","20|rcuk________::45aac2108e54b6503d1e611aa5872c03","20|nih_________::e1d47fdb7bba9eaeed82a95c578d6e90","20|dedup_wf_001::e3b52200d2fd4ff883478f5bef312efe","20|snsf________::c5c565d3422a7eb22886f3a4c93c32ea","20|nih_________::91154321f75ba26021efa18f7eeaa541","20|wt__________::38013971ca0c021fd65abce2171b82eb","20|wt__________::a6114989a56a1dfae6cbb201d14823f0","20|snsf________::b7af2f99e1e06750a4664ae401802734","20|wt__________::757c54e33d4e925c8c17edf032cdfacc","20|wt__________::1d57a87af1bbc2b7e211305fc747c9ad","20|wt__________::7cbb8c06f702b8871948acd370df892f","20|dedup_wf_001::53a8606f32787c4b3c663fd90ee97b17","20|wt__________::8473a929b793e56d2299a1a5aa08f617","20|nih_________::5e0fc2ef31bc189207f250db818fea0e","20|nih_________::1cd08fd26ef03fd2f51e9aeb34ed9486","20|nih_________::1c270e0dd2552b4e3cf295cdb7db1cc9","20|wt__________::04abd842647bdbc751b1eebe2f142013","20|nsf_________::3eaa8be3f16b0f0d7563b9117cd1f660","20|dedup_wf_001::c1b81dadf1e4cbf23a61833ff9ae8a31","20|nih_________::3716e1c14ab7ca14161278c9bbb8bdbb","20|dedup_wf_001::b7b403a764ea4e3acb12d999675aa73c","20|nih_________::300049f12fa0f5bc37db3a5636869743","20|wt__________::ed1e2be693353d370066fddbf862f23a","20|nsf_________::72a3747a18c56f3701494a0c0eadc5c9","20|rcuk________::e8877abcab4bc187339a242aa5bc2e09","20|microsoft___::119a535bfd240d7560fe4efec416bcd2","20|wt__________::be4e939abf9617557a35862e255493da","20|dedup_wf_001::3f6f17996747467f6047dfe019c8e4c7","20|snsf________::1f4e34433767faf965f33849bb0f7fb1","20|dedup_wf_001::9f1647eae28911113d1dcbe49c42275b","20|opendoar____::6835febcf408fe892504bdfd2ebe669c","20|dedup_wf_001::528bd21573101493c6c7d120b17a67e9"]} +{"key":"50|core________::061dc912098a6a52e85a824161bd32a2","valueSet":["20|dedup_wf_001::67f983a98e2c0cc0121e2db46d2bd00a","20|wt__________::59b5d99b2dde58df1655e0e5bb236c0a","20|wt__________::e84b06dbc1b26f413791c1304ca8d6a3","20|dedup_wf_001::7b118dab509f49b4fbd6dccfdbc479af","20|wt__________::53c2c2895613ff9b759f64c24b6cb17c","20|nih_________::43255cf9c16732bc4ec1d5f580f44928","20|gsrt________::455b984b47945e1fd04e92c9c0eeca04","20|dedup_wf_001::e712c08f721e8f167f93888f590314ea","20|rcuk________::8b0aee2a7026dc92d4c05683ae45c894","20|dedup_wf_001::3c19a02cea18f8eeb2034b6acc544b7e","20|wt__________::26a9d64d00b8e1005cb6bbad2b7364cf","20|rcuk________::e57ab9be7acd7b93ba34aafab1a5b96c","20|nih_________::ca09277064edbd89f71e1602d98b4dd8","20|gsrt________::7c309ee758e6c6e0dce43c67b9343e82","20|nsf_________::53c2c2895613ff9b759f64c24b6cb17c","20|nih_________::c57387345f51a40ad2284089b120be3f"]} +{"key":"50|core________::088190cf9dc9632e8d9ba5f5f1de1303","valueSet":["20|dedup_wf_001::286621caef868bbdc52918699e3cdc79","20|dedup_wf_001::84707a912d45d666fef35c5cd77fc203","20|dedup_wf_001::30fd03f4977438b0471a1d4db6317e71","20|dedup_wf_001::83d9dc4a0eba8737819b87ce8e737a49","20|dedup_wf_001::aeb9e738d873acebdca52c9ccd9559bd","20|dedup_wf_001::427a8bfe68648f9e30cb47434144da68","20|nih_________::ffa0ea174845f1548e520a047cf53165"]} +{"key":"50|core________::117d295998199f498fa561e9c26e7ae3","valueSet":["20|dedup_wf_001::d9f82e62c3114dc7f42b9da9b7f6fc64"]} +{"key":"50|core________::168a8f2e4ffe9b0e7c6bc100f34f2de5","valueSet":["20|wt__________::63fe73f079c5ff4e925c6cfc1758a98b","20|dedup_wf_001::b3c2f5e700cee15ad9752ab961df5930"]} +{"key":"50|core________::16a3b520030d82ad16a30992b124e69d","valueSet":["20|wt__________::a72760363ca885e6bef165804770e00c","20|nih_________::5c7f089c177ba49f92033f72e2aff724","20|dedup_wf_001::c88bf88e0a4dea271a3e2f832d952238"]} +{"key":"50|core________::172e3da668d18f41ea4ccdf7f2f39e53","valueSet":["20|nih_________::126cbf4b13249e65098ddb4835f47456","20|aka_________::506e3d2f7507a66584b8b3430ade20cb","20|dedup_wf_001::4746df4ff8bbb7e991ad343ccff8bbc7","20|wt__________::aff5133ca9cf0b810cc331d498bac9b0","20|wt__________::0c3bf892603817e5eff6e4f08a530ea2"]} +{"key":"50|core________::19f2fc91fe1db2ad62db598aa9aa8ab3","valueSet":["20|dedup_wf_001::dbbd988f8d57a9d11286caefdf35acaa"]} +{"key":"50|core________::1dceb5a29cd42728e410474fe0fda191","valueSet":["20|wt__________::b1ef2f643c948a2ef49005f9145ed556","20|dedup_wf_001::866fa622e3c0ab6227cd462f40cdcac8","20|rcuk________::63ecf5736189d299fc3e043e14428b8d","20|nsf_________::fcf880eab7315e0a5f3937c5a16c04b0","20|dedup_wf_001::65862ec7d57f700a130dee916bea66de"]} +{"key":"50|core________::2580c0b59b7457f571acdc829d1765a3","valueSet":["20|doajarticles::0f6e2c32a27c307b06edf7862c591973","20|opendoar____::4f10fb61c457cf124e5917391baaa3c2"]} +{"key":"50|core________::2624b8248a9febdad9bc456d358b30ed","valueSet":["20|dedup_wf_001::fb4eba3cea53264bddd59a4ade9973b3","20|rcuk________::b00968d2100a4b62447841aef5bdff62"]} +{"key":"50|core________::26820a282ef54882f7a5be74767fc02b","valueSet":["20|rcuk________::8ad6d06f3b4d09dc67142c158c7cf5b9","20|rcuk________::01ad471b66687b1213ceb08b5d7aa6c2"]} +{"key":"50|core________::2a8de3e0bbcab49066aa9de4bbb89bfa","valueSet":["20|dedup_wf_001::2ea78875d19c8cea63f7e958e5204136","20|corda_______::6821a8e260b8b97f5fb5e80168329d5b","20|dedup_wf_001::9d0ba437d73b19f55b53c578ac970ea2"]} +{"key":"50|core________::2c7d139419d2895d3bf0112b50108f75","valueSet":["20|dedup_wf_001::96ada508ea5d85a1e516bf9799413906","20|dedup_wf_001::d0ea749da6988bcdb2f30d77c64e2f1e","20|wt__________::f1ba5bd552edf15db494dc3020f27470","20|nih_________::ceeae4f78a5666daf4c45acdbbedde99","20|wt__________::84ef588eeeb4ef77e45ccfbbf3aef69c","20|wt__________::8eef7e1370ea81c2aa3dbc239b2bf5d8"]} +{"key":"50|core________::2cf1f6282498fa37aeaf678f8c6e5843","valueSet":["20|snsf________::73999c828ca67fd2d006100a8369c1eb"]} +{"key":"50|core________::2dffff00500c2354b506814f6a1ec148","valueSet":["20|wt__________::c6d89e908582fddf3e4c658a458807c3","20|wt__________::e7b2c9f3d3f3f1503092bf1ba2b163db","20|gsrt________::ab510bb43d6c654ed3d37b9c5ed5c971","20|dedup_wf_001::179d0313fa7d5fb2bef5f312ecdd16fe","20|gsrt________::cbffb510b01e81cc055fe61105c86154","20|opendoar____::5d462d78d512c1184dd384ef2dc35b7e","20|dedup_wf_001::646f14555ea42b260499239a7231b285","20|wt__________::5d462d78d512c1184dd384ef2dc35b7e","20|nih_________::a32a254b024265db2e24a66291c7c1e0","20|dedup_wf_001::5490ec18da9721e2c8d974fb73c62467","20|dedup_wf_001::3bc91ed90f44d0908258e132659bc754"]} +{"key":"50|core________::3031a50bf5c80865af4841ab42aaf57e","valueSet":["20|nih_________::1b46e3665d8be2b524c285a27ca952b8","20|nsf_________::71450a4b98015592ee3f525a51584608","20|snsf________::fc921725875adb56f2275579b31f805c","20|aka_________::fa5b7357f86c71ea15734282054f1183","20|wt__________::18fdb5b42b22fdcc45e323eb4d20c91b","20|wt__________::71450a4b98015592ee3f525a51584608","20|dedup_wf_001::8aaf46d4e4919dc55b8a5cac7a15399f"]} +{"key":"50|core________::31116372ae189ee456fc06dfa0f6cf7a","valueSet":["20|aka_________::c5b9aa0a905f89c51221f9f4fda22b20","20|aka_________::d9d3242062a7a3c483a7926fdba17bb6","20|nih_________::ede5c9e31cfb37a397d6cfe1940d045e","20|wt__________::8adcc12ffee195ae46679e8cf332a364","20|wt__________::5e954c57b0ac7aaf3fc16deeaf442389","20|snsf________::ddd964d550bfc6e1ce18f83655ba6901","20|rcuk________::a705d2ee7bf0bd225264b4a5794795ce","20|nih_________::8adcc12ffee195ae46679e8cf332a364","20|microsoft___::53732c6c7bb9daf5953fdb61fc0cd5bd"]} \ No newline at end of file diff --git a/dhp-workflows/dhp-enrichment/src/test/resources/eu/dnetlib/dhp/resulttoorganizationfrominstrepo/noupdate/preparedInfo/alreadyLinked/alreadyLinked_20.json.gz b/dhp-workflows/dhp-enrichment/src/test/resources/eu/dnetlib/dhp/resulttoorganizationfrominstrepo/noupdate/preparedInfo/alreadyLinked/alreadyLinked_20.json.gz deleted file mode 100644 index ee822e372a4a590bd303308b2cc6fd849bf1bbfd..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 3160 zcmV-e45#xSiwFo+9+zGK17U1(WnpA_OlfXwWn^D6FfM9yZ*BnHnaPeFwh=}5`3$YI zAgeg)53Lbd;910 z@zv|Uypr(CT=T~tzWHf?6GD9bI-BRdxAinMT1;Uk%Vp%AmgQFRtH1v14?n*+$awdD zeK|?MQcK`U!&=L$_}a1L7E-NYg>de} znzwKM_PaHC)E!zMu~xU#cbA~Lt1Pk2Z|P~x^9zsGe7ClOrMXOMG+nZr%IsN0+XEL) zqwUs3j6;SX;=>@Ob^Q!gGpZHkt2=pM_KA+h>OEzEH{d?;0; z=T@yRD>=oWRy-^pE}Hhf6=~!u;(;}JffmcbtnI#K`K~R`r!~*dR^tpnB6*1poKh$) z_qrh#CJ6n%2CzZHsc9Rk$u5GdHx)ygz@oCL-;A;y}dyT`wV8Nb+nH&os zL~k_dw#Fz@>a8=X7U#BAhP#9~V``#1-0tJmw(Qln8+apr2EZAS+IUb#uskoTymj&Ae1565pKlwE^(Yc=-NY)q|T zGHIXj_JX<8T49MwZFMBqQHt)_!&<$1thaqys%l-j*RCs$0$JYvP!E;FJO*b0*n2u5QaUt;yBOeT^KZn^STYr|4q~um7!=GbuEX`Vge4I=jRIy4&H!b^QF+MhaG0kMIGSQG2<@fIzIy_ z#%XiMm4%R>c$u zGtThBW(^aV&de^Ga7DlI+XBm#X*N#>JXN)uB!LSUjTL@f^ZlFg{@sUn`{NT1B4e%D zfld0GeqXrf9G39n2Bx7|x+jj@WnE_P98y(C!b)9yrwW8Rup4z6G(6EBk&W(LQ z#OQLpaY)>BB_-9$0%NnhRa>H`Vb|^PwhyfVo)EP(PzD#wI0pyr17?LMpVqwlVZEL2 z?ETX|rD@)%+D?qoV(4;6KB)-UE~Q_!F$=Jgm{G{n8t7ou86`jkR_f{x2z%jVoh7|l zu`H`$W#j8(f*V{8-87y71cX*&Wv268%5?Y6IHQ-qx6@t{W&;vu+TI>NR(W-*)YO}lXaXZch*!97olvax1u@)5H z++l3#5rOFF>>RbGaI4H#MS4J5B4A)xrx(RK z)hk0bo!kyBS&nhi%7qhVk$W~(a*J1O*RSV>_lSh@*7ZI~Jd|o>K0_f*oow$8x zzrS!OY%PR1#=lUWgkyylp|2&(%yqW#P;;MbiND2d^k_s!MOx3ljB6U9d^swu=wqW1 z5woBQSpXu#Cx@%!AL=I((ZmL!k}FY zYN?~OyGKmK%%3~_f^i{pD+0psR?Z&?KMCuf0%BmJ-K4qbg7c7WLLh|J_s|s(OUM)F z+2&0$_h1NU3yL4{bLf_KB~n|&)Fr=Wt@6pa<6Uzl4hKWzCXp8NzX0Q0|GlunK#xFE z9fViT9B~t)30APc(1_#U@V@tQ>c%Hc-tu~)qr_q1c! zUgBWmu@5)#vsHU^GlK*lIDElS;JV>b5&)UA;~t6X1R4k7UnjcfRoo-|_=;genQguv?q*PmqU^X~xnXPE0MH3-Yj?7$(@jea$lp_eV0&i*}Q8_*1PX7LDz`Nl5#*@cm-V z;wSOrr}8ZhVc3b3q9j`1l;x3T?8uBU`NtvRdzbA}(HG(g3jB4_dhv3ty@NYxgV4XH zkiN>9=cb%IE`Q)4%=+F)2zE#`C*V+8OuNw85mS3mmPR3ptsYv21}3Vw(tz46QQ82Z zUK4$FJp9(FTj?-f4}*8wj?F6+^p@nv@{y5EItg-_2zcJJ9B+#FF$vee5i7%^q&P!m zHZ7vzpI*Yp#8`fQ4u8H6E^2?;Dv-3}ATsBD-s*|9tq=@uYLJhp&Wc4KrN`Jvqk7OK zz&8JunqSCYDjVs}8N(+IBP3j|c8se=mzJ+D6c6q5d@}3^8{G|(q%cg_*@V853L)9J zuF{-gMKI8a9p}M824NJ#g1QgXqSR|3biCG@{XBd?=Aog^3<`Vp^I-cxZXx87{sVI~ zT4)IPPDhsfydxs+>rp4{30sSuoy`{{Cq#GX3UN0)xwo5tL3hp<1>WIeVx=r1dzNe& zElMS{+Z!R?QZVls2{4>x+2@rn*X|m311&elO(dj6_IxM6D zKHd*V7_b4aEfs~cCLSHbas8g*q5Hh>ic}eIpH{;4%z@58rkKRaDIuE&o-jzPjBgcN zSjPpO5?BLi!Ejgui}2M3J}=Th`OSiL+%0hotV`S^YTU9=gm5eD?33Ulp@>!75V^-H z;epDL>D{kiL@cuK=u0D{4-advPz!0Hr!WImzVg4(fhZ(vel8<9rPRLK*vgn`xqi8n zaRmv*_Hc@6P~M36TX-Kwt*zg{g^law)AyxL=wETH^B$*#t{|l$-kg!ukSDFK$h%LG yeEeWCAem~<P^y)YL5 diff --git a/dhp-workflows/dhp-enrichment/src/test/resources/eu/dnetlib/dhp/resulttoorganizationfrominstrepo/updatemix/preparedInfo/alreadyLinked/alreadyLinked_20.json b/dhp-workflows/dhp-enrichment/src/test/resources/eu/dnetlib/dhp/resulttoorganizationfrominstrepo/updatemix/preparedInfo/alreadyLinked/alreadyLinked_20.json new file mode 100644 index 000000000..a3fcffe92 --- /dev/null +++ b/dhp-workflows/dhp-enrichment/src/test/resources/eu/dnetlib/dhp/resulttoorganizationfrominstrepo/updatemix/preparedInfo/alreadyLinked/alreadyLinked_20.json @@ -0,0 +1,20 @@ +{"key":"50|dedup_wf_001::40ea2f24181f6ae77b866ebcbffba523","valueSet":["20|dedup_wf_001::5ab59ffa94c31a140d4a56c594ea5865"]} +{"key":"50|dedup_wf_001::b67bc915603fc01e445f2b5888ba7218","valueSet":["20|wt__________::a72760363ca885e6bef165804770e00c","20|nih_________::5c7f089c177ba49f92033f72e2aff724","20|dedup_wf_001::c88bf88e0a4dea271a3e2f832d952238"]} +{"key":"50|od______1582::6e7a9b21a2feef45673890432af34244","valueSet":["20|snsf________::1496b1b4fc4d5509b16f2c217be480dc","20|dedup_wf_001::06731b587a9ea654103a6b0ebcb234ff","20|nih_________::c5722b087a5e707a50aa8f9f2ebf785d","20|snsf________::71d0a944b61b1a94068595f840005a2f","20|nih_________::dd3428794aef214a3bc2cad6dd548ba6","20|rcuk________::45aac2108e54b6503d1e611aa5872c03","20|nih_________::e1d47fdb7bba9eaeed82a95c578d6e90","20|dedup_wf_001::e3b52200d2fd4ff883478f5bef312efe","20|snsf________::c5c565d3422a7eb22886f3a4c93c32ea","20|nih_________::91154321f75ba26021efa18f7eeaa541","20|wt__________::38013971ca0c021fd65abce2171b82eb","20|wt__________::a6114989a56a1dfae6cbb201d14823f0","20|snsf________::b7af2f99e1e06750a4664ae401802734","20|wt__________::757c54e33d4e925c8c17edf032cdfacc","20|wt__________::1d57a87af1bbc2b7e211305fc747c9ad","20|wt__________::7cbb8c06f702b8871948acd370df892f","20|dedup_wf_001::53a8606f32787c4b3c663fd90ee97b17","20|wt__________::8473a929b793e56d2299a1a5aa08f617","20|nih_________::5e0fc2ef31bc189207f250db818fea0e","20|nih_________::1cd08fd26ef03fd2f51e9aeb34ed9486","20|nih_________::1c270e0dd2552b4e3cf295cdb7db1cc9","20|wt__________::04abd842647bdbc751b1eebe2f142013","20|nsf_________::3eaa8be3f16b0f0d7563b9117cd1f660","20|dedup_wf_001::c1b81dadf1e4cbf23a61833ff9ae8a31","20|nih_________::3716e1c14ab7ca14161278c9bbb8bdbb","20|dedup_wf_001::b7b403a764ea4e3acb12d999675aa73c","20|nih_________::300049f12fa0f5bc37db3a5636869743","20|wt__________::ed1e2be693353d370066fddbf862f23a","20|nsf_________::72a3747a18c56f3701494a0c0eadc5c9","20|rcuk________::e8877abcab4bc187339a242aa5bc2e09","20|microsoft___::119a535bfd240d7560fe4efec416bcd2","20|wt__________::be4e939abf9617557a35862e255493da","20|dedup_wf_001::3f6f17996747467f6047dfe019c8e4c7","20|snsf________::1f4e34433767faf965f33849bb0f7fb1","20|dedup_wf_001::9f1647eae28911113d1dcbe49c42275b","20|opendoar____::6835febcf408fe892504bdfd2ebe669c","20|dedup_wf_001::528bd21573101493c6c7d120b17a67e9"]} +{"key":"50|core________::061dc912098a6a52e85a824161bd32a2","valueSet":["20|dedup_wf_001::67f983a98e2c0cc0121e2db46d2bd00a","20|wt__________::59b5d99b2dde58df1655e0e5bb236c0a","20|wt__________::e84b06dbc1b26f413791c1304ca8d6a3","20|dedup_wf_001::7b118dab509f49b4fbd6dccfdbc479af","20|wt__________::53c2c2895613ff9b759f64c24b6cb17c","20|nih_________::43255cf9c16732bc4ec1d5f580f44928","20|gsrt________::455b984b47945e1fd04e92c9c0eeca04","20|dedup_wf_001::e712c08f721e8f167f93888f590314ea","20|rcuk________::8b0aee2a7026dc92d4c05683ae45c894","20|dedup_wf_001::3c19a02cea18f8eeb2034b6acc544b7e","20|wt__________::26a9d64d00b8e1005cb6bbad2b7364cf","20|rcuk________::e57ab9be7acd7b93ba34aafab1a5b96c","20|nih_________::ca09277064edbd89f71e1602d98b4dd8","20|gsrt________::7c309ee758e6c6e0dce43c67b9343e82","20|nsf_________::53c2c2895613ff9b759f64c24b6cb17c","20|nih_________::c57387345f51a40ad2284089b120be3f"]} +{"key":"50|core________::088190cf9dc9632e8d9ba5f5f1de1303","valueSet":["20|dedup_wf_001::286621caef868bbdc52918699e3cdc79","20|dedup_wf_001::84707a912d45d666fef35c5cd77fc203","20|dedup_wf_001::30fd03f4977438b0471a1d4db6317e71","20|dedup_wf_001::83d9dc4a0eba8737819b87ce8e737a49","20|dedup_wf_001::aeb9e738d873acebdca52c9ccd9559bd","20|dedup_wf_001::427a8bfe68648f9e30cb47434144da68","20|nih_________::ffa0ea174845f1548e520a047cf53165"]} +{"key":"50|od_______109::f375befa62a741e9250e55bcfa88f9a6","valueSet":["20|dedup_wf_001::d9f82e62c3114dc7f42b9da9b7f6fc64"]} +{"key":"50|core________::168a8f2e4ffe9b0e7c6bc100f34f2de5","valueSet":["20|wt__________::63fe73f079c5ff4e925c6cfc1758a98b","20|dedup_wf_001::b3c2f5e700cee15ad9752ab961df5930"]} +{"key":"50|core________::16a3b520030d82ad16a30992b124e69d","valueSet":["20|wt__________::a72760363ca885e6bef165804770e00c","20|nih_________::5c7f089c177ba49f92033f72e2aff724","20|dedup_wf_001::c88bf88e0a4dea271a3e2f832d952238"]} +{"key":"50|core________::172e3da668d18f41ea4ccdf7f2f39e53","valueSet":["20|nih_________::126cbf4b13249e65098ddb4835f47456","20|aka_________::506e3d2f7507a66584b8b3430ade20cb","20|dedup_wf_001::4746df4ff8bbb7e991ad343ccff8bbc7","20|wt__________::aff5133ca9cf0b810cc331d498bac9b0","20|wt__________::0c3bf892603817e5eff6e4f08a530ea2"]} +{"key":"50|core________::19f2fc91fe1db2ad62db598aa9aa8ab3","valueSet":["20|dedup_wf_001::dbbd988f8d57a9d11286caefdf35acaa"]} +{"key":"50|core________::1dceb5a29cd42728e410474fe0fda191","valueSet":["20|wt__________::b1ef2f643c948a2ef49005f9145ed556","20|dedup_wf_001::866fa622e3c0ab6227cd462f40cdcac8","20|rcuk________::63ecf5736189d299fc3e043e14428b8d","20|nsf_________::fcf880eab7315e0a5f3937c5a16c04b0","20|dedup_wf_001::65862ec7d57f700a130dee916bea66de"]} +{"key":"50|core________::2580c0b59b7457f571acdc829d1765a3","valueSet":["20|doajarticles::0f6e2c32a27c307b06edf7862c591973","20|opendoar____::4f10fb61c457cf124e5917391baaa3c2"]} +{"key":"50|core________::2624b8248a9febdad9bc456d358b30ed","valueSet":["20|dedup_wf_001::fb4eba3cea53264bddd59a4ade9973b3","20|rcuk________::b00968d2100a4b62447841aef5bdff62"]} +{"key":"50|core________::26820a282ef54882f7a5be74767fc02b","valueSet":["20|rcuk________::8ad6d06f3b4d09dc67142c158c7cf5b9","20|rcuk________::01ad471b66687b1213ceb08b5d7aa6c2"]} +{"key":"50|core________::2a8de3e0bbcab49066aa9de4bbb89bfa","valueSet":["20|dedup_wf_001::2ea78875d19c8cea63f7e958e5204136","20|corda_______::6821a8e260b8b97f5fb5e80168329d5b","20|dedup_wf_001::9d0ba437d73b19f55b53c578ac970ea2"]} +{"key":"50|core________::2c7d139419d2895d3bf0112b50108f75","valueSet":["20|dedup_wf_001::96ada508ea5d85a1e516bf9799413906","20|dedup_wf_001::d0ea749da6988bcdb2f30d77c64e2f1e","20|wt__________::f1ba5bd552edf15db494dc3020f27470","20|nih_________::ceeae4f78a5666daf4c45acdbbedde99","20|wt__________::84ef588eeeb4ef77e45ccfbbf3aef69c","20|wt__________::8eef7e1370ea81c2aa3dbc239b2bf5d8"]} +{"key":"50|core________::2cf1f6282498fa37aeaf678f8c6e5843","valueSet":["20|snsf________::73999c828ca67fd2d006100a8369c1eb"]} +{"key":"50|core________::2dffff00500c2354b506814f6a1ec148","valueSet":["20|wt__________::c6d89e908582fddf3e4c658a458807c3","20|wt__________::e7b2c9f3d3f3f1503092bf1ba2b163db","20|gsrt________::ab510bb43d6c654ed3d37b9c5ed5c971","20|dedup_wf_001::179d0313fa7d5fb2bef5f312ecdd16fe","20|gsrt________::cbffb510b01e81cc055fe61105c86154","20|opendoar____::5d462d78d512c1184dd384ef2dc35b7e","20|dedup_wf_001::646f14555ea42b260499239a7231b285","20|wt__________::5d462d78d512c1184dd384ef2dc35b7e","20|nih_________::a32a254b024265db2e24a66291c7c1e0","20|dedup_wf_001::5490ec18da9721e2c8d974fb73c62467","20|dedup_wf_001::3bc91ed90f44d0908258e132659bc754"]} +{"key":"50|core________::3031a50bf5c80865af4841ab42aaf57e","valueSet":["20|nih_________::1b46e3665d8be2b524c285a27ca952b8","20|nsf_________::71450a4b98015592ee3f525a51584608","20|snsf________::fc921725875adb56f2275579b31f805c","20|aka_________::fa5b7357f86c71ea15734282054f1183","20|wt__________::18fdb5b42b22fdcc45e323eb4d20c91b","20|wt__________::71450a4b98015592ee3f525a51584608","20|dedup_wf_001::8aaf46d4e4919dc55b8a5cac7a15399f"]} +{"key":"50|core________::31116372ae189ee456fc06dfa0f6cf7a","valueSet":["20|aka_________::c5b9aa0a905f89c51221f9f4fda22b20","20|aka_________::d9d3242062a7a3c483a7926fdba17bb6","20|nih_________::ede5c9e31cfb37a397d6cfe1940d045e","20|wt__________::8adcc12ffee195ae46679e8cf332a364","20|wt__________::5e954c57b0ac7aaf3fc16deeaf442389","20|snsf________::ddd964d550bfc6e1ce18f83655ba6901","20|rcuk________::a705d2ee7bf0bd225264b4a5794795ce","20|nih_________::8adcc12ffee195ae46679e8cf332a364","20|microsoft___::53732c6c7bb9daf5953fdb61fc0cd5bd"]} \ No newline at end of file diff --git a/dhp-workflows/dhp-enrichment/src/test/resources/eu/dnetlib/dhp/resulttoorganizationfrominstrepo/updatemix/preparedInfo/alreadyLinked/alreadyLinked_20.json.gz b/dhp-workflows/dhp-enrichment/src/test/resources/eu/dnetlib/dhp/resulttoorganizationfrominstrepo/updatemix/preparedInfo/alreadyLinked/alreadyLinked_20.json.gz deleted file mode 100644 index d9b92debaa9c5abd597b9a2c8fb836b3e3ba3608..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 3175 zcmV-t44CsDiwFqp5|~~917U1(WnpA_OlfXwWn^D6FfM9yZ*BnHnah$LHW7yRJcidS z)Kcr_H()jC@#HrI5YBggdSw{M<4{%L%8|Bvzc z{nLlv+vhiL|L`Wh{+K*w?(U=ON-{*l){)D~U2Wu6@|!>Y`#1mmuLqf>&ebKCkbDI( z)$DQR*4i93O8e$P{{H#hPyf7q%kNe`qNFui%T}hjl-z>W8Z3mx$FSzZ`#=9;Or1%((C$5kl)OfZ!}&v>*StLL;#}UEg`t}YRvF2nw-k5JF{XCA@Y8*d-kRw zgv`Bf`|Qo_TkSf`qv%L;#Wi->J1bBq&uc!eAHMr}jpi}9a%h&>D~GhjipT(ZHLj3u z`VJ4RW1euK{ z-PR0>q`h@Ub;$N%Wz89bY|$2zhueK-9Ui@R69Yz|Kue)l8l;6-liv2Jxvs31(pKv+ zug0j^_K@OY*R^hIWam0So67_NvvLzjA^WOYS0DSj565pS6t5x9)~a-EW9?MK_SV1R z?GS3F3W zOE^rZ9&^~U(N`P6Zfj(1K5)lk)QUeni}hhMsco)S?rY*c<5m*ECJ!P$a zyHC(Dchgw3&OKMnjH}s%&_sd5*ZKJw1_y6Iu}w?R>kJ-t%_U8MTh~TO%5{DgP?SBk zL+>a|gmt7rwa%s5NOHSRs!~iAOfc(0S)>S^T7A-L4m)rAFw_dA)q?s^fw88*%e{lw zF>1PsK!TIteu+K;BfS#!N(@FSt@NttRyTuZ;!X;m0+9d%r$DeR}fG&o~gqI^_v$ z^hL29>KkiHsS zt~U-5rz@%{RTd03%brqGdK$d?l-oYE26#f$v4JwUV9Q<{xG$I$o_t>O^u2x9PvhhB zKBcAXsG2KdWDH#nIyV&o+oklYHs%PdqGS~Ev<5m@bxRCT!6?=GP1x7dNBhMz6;9S! z(%UF_C~JANcz`=M>K-4oRFtMSXr>twwy;E~K-^QIo4G?6w+7}uGeDnlZ_8~T>jUpt zhSgJ9_chjZhQip(jKx(_?TlLrii6r^q+5|7Mrj+*7rN6n^@%{Qg`6&}efafk2_dI> zxyYILSuu=7S46Ry&w3q7L@?*&STh_;K|`1u!Wa-N+#aS+Xh z5QZZk1jfp@k@1 zzwVP3QyJp2qvj2Y#c6VXBrxNWs@>`q%jml(EuYN<?p6D(>9_^_GK8=-?}9>L0x*Sirs4>~b-+>pw~5)RNDoL;0tSY4 z&yuiC^~#VfZElB_*u*%c%7rJE#+vaLa?NcGW}@@I+h)O7#)Tj(W@Qm$Ad`JtqY+~^ z-4I440ayx)Vbi3dx<_fZ1`-VsIH}s`A?6mR9s|Qzm**t+eE2mky_3-SgjA2w9)*ch zIW`hrL;swdhkqy!HgI>x4VOqv6{TaYarR9RL%Dnv{dFWsX#}5W1OV%Vrn=Bm=91Lr z5!!t)sA4iHyocIXc7OK8WjW3`H%R~PYZx6r@hdDd$z%7LBZH3MKvgL`a`asdv>i_n4j_S7wZd+uMk>IgZxPDxR_5ixti z=jrhmKtKQJ#K(8zmlqC&ts^0h@h_A|ajftn^ktEmxy}|IYWvBS_*>jYmqBz)NbC78 z>zYO=UzS8G`f@ZvFbgR`7Jz8rlf%{VuQX32qKOSc$>v@_bZB_S^CI*>VTt~Z&2fVC z1W5&Nl5sqvlMBXtlu&W-2n=F);$Gh(Fzlh%4M&bO#sddIbY>p0uMIOrOs9ZD#e(`C z(&a%&lX$pLq1Gof64bP#wcZ62vGdmszhYd-96|zNc&o_|gdfHFYd{RfYBy;v$>2Ps zn-I9kjlPlv#1itvdA5C%OkWHEZ9(w`KZkCmo`}>#Fm*bwS*v_;?s(T+iNj$aa+^qt z`8UA0?biz%8R!*g>IC7HGYf8FwMdt0$>YCt_-XuQeEj@={nb7JHIN0}VtkKVsCZ4H zf8}s1cj-0VjJvu7pG6iMFF)MG@7C01jxiv?2M%8_6u54I29$p?gd_x^0Lgh$h zgF>)vM!>?Dt`O%p5C6)co@cZ3w1IyMSc6iD^GkADM|S5mY6b`gvoY~04VNE#UtYr3##nxR4u84NmoJviR)M4~i^!b)ywwwHTOkKNA~Gq!wvp?GMYmyqtgGdy&kS6-1Sz$+GhvmkM1$ zN`u^-(WoJBT3wO%xns=759$DtT6Lbd;910 z@zv|Uypr(CT=T~tzWHf?6GD9bI-BRdxAinMT1;Uk%Vp%AmgQFRtH1v14?n*+$awdD zeK|?MQcK`U!&=L$_}a1L7E-NYg>de} znzwKM_PaHC)E!zMu~xU#cbA~Lt1Pk2Z|P~x^9zsGe7ClOrMXOMG+nZr%IsN0+XEL) zqwUs3j6;SX;=>@Ob^Q!gGpZHkt2=pM_KA+h>OEzEH{d?;0; z=T@yRD>=oWRy-^pE}Hhf6=~!u;(;}JffmcbtnI#K`K~R`r!~*dR^tpnB6*1poKh$) z_qrh#CJ6n%2CzZHsc9Rk$u5GdHx)ygz@oCL-;A;y}dyT`wV8Nb+nH&os zL~k_dw#Fz@>a8=X7U#BAhP#9~V``#1-0tJmw(Qln8+apr2EZAS+IUb#uskoTymj&Ae1565pKlwE^(Yc=-NY)q|T zGHIXj_JX<8T49MwZFMBqQHt)_!&<$1thaqys%l-j*RCs$0$JYvP!E;FJO*b0*n2u5QaUt;yBOeT^KZn^STYr|4q~um7!=GbuEX`Vge4I=jRIy4&H!b^QF+MhaG0kMIGSQG2<@fIzIy_ z#%XiMm4%R>c$u zGtThBW(^aV&de^Ga7DlI+XBm#X*N#>JXN)uB!LSUjTL@f^ZlFg{@sUn`{NT1B4e%D zfld0GeqXrf9G39n2Bx7|x+jj@WnE_P98y(C!b)9yrwW8Rup4z6G(6EBk&W(LQ z#OQLpaY)>BB_-9$0%NnhRa>H`Vb|^PwhyfVo)EP(PzD#wI0pyr17?LMpVqwlVZEL2 z?ETX|rD@)%+D?qoV(4;6KB)-UE~Q_!F$=Jgm{G{n8t7ou86`jkR_f{x2z%jVoh7|l zu`H`$W#j8(f*V{8-87y71cX*&Wv268%5?Y6IHQ-qx6@t{W&;vu+TI>NR(W-*)YO}lXaXZch*!97olvax1u@)5H z++l3#5rOFF>>RbGaI4H#MS4J5B4A)xrx(RK z)hk0bo!kyBS&nhi%7qhVk$W~(a*J1O*RSV>_lSh@*7ZI~Jd|o>K0_f*oow$8x zzrS!OY%PR1#=lUWgkyylp|2&(%yqW#P;;MbiND2d^k_s!MOx3ljB6U9d^swu=wqW1 z5woBQSpXu#Cx@%!AL=I((ZmL!k}FY zYN?~OyGKmK%%3~_f^i{pD+0psR?Z&?KMCuf0%BmJ-K4qbg7c7WLLh|J_s|s(OUM)F z+2&0$_h1NU3yL4{bLf_KB~n|&)Fr=Wt@6pa<6Uzl4hKWzCXp8NzX0Q0|GlunK#xFE z9fViT9B~t)30APc(1_#U@V@tQ>c%Hc-tu~)qr_q1c! zUgBWmu@5)#vsHU^GlK*lIDElS;JV>b5&)UA;~t6X1R4k7UnjcfRoo-|_=;genQguv?q*PmqU^X~xnXPE0MH3-Yj?7$(@jea$lp_eV0&i*}Q8_*1PX7LDz`Nl5#*@cm-V z;wSOrr}8ZhVc3b3q9j`1l;x3T?8uBU`NtvRdzbA}(HG(g3jB4_dhv3ty@NYxgV4XH zkiN>9=cb%IE`Q)4%=+F)2zE#`C*V+8OuNw85mS3mmPR3ptsYv21}3Vw(tz46QQ82Z zUK4$FJp9(FTj?-f4}*8wj?F6+^p@nv@{y5EItg-_2zcJJ9B+#FF$vee5i7%^q&P!m zHZ7vzpI*Yp#8`fQ4u8H6E^2?;Dv-3}ATsBD-s*|9tq=@uYLJhp&Wc4KrN`Jvqk7OK zz&8JunqSCYDjVs}8N(+IBP3j|c8se=mzJ+D6c6q5d@}3^8{G|(q%cg_*@V853L)9J zuF{-gMKI8a9p}M824NJ#g1QgXqSR|3biCG@{XBd?=Aog^3<`Vp^I-cxZXx87{sVI~ zT4)IPPDhsfydxs+>rp4{30sSuoy`{{Cq#GX3UN0)xwo5tL3hp<1>WIeVx=r1dzNe& zElMS{+Z!R?QZVls2{4>x+2@rn*X|m311&elO(dj6_IxM6D zKHd*V7_b4aEfs~cCLSHbas8g*q5Hh>ic}eIpH{;4%z@58rkKRaDIuE&o-jzPjBgcN zSjPpO5?BLi!Ejgui}2M3J}=Th`OSiL+%0hotV`S^YTU9=gm5eD?33Ulp@>!75V^-H z;epDL>D{kiL@cuK=u0D{4-advPz!0Hr!WImzVg4(fhZ(vel8<9rPRLK*vgn`xqi8n zaRmv*_Hc@6P~M36TX-Kwt*zg{g^law)AyxL=wETH^B$*#t{|l$-kg!ukSDFK$h%LG yeEeWCAem~<P^y)YL5 From efeeebfee17d1e807d90e38cc9228ee577b180de Mon Sep 17 00:00:00 2001 From: Antonis Lempesis Date: Wed, 2 Mar 2022 13:29:25 +0200 Subject: [PATCH 101/162] fixed query after the change in the indicator table --- .../oa/graph/stats/oozie_app/scripts/step16_1-definitions.sql | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step16_1-definitions.sql b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step16_1-definitions.sql index 6b4d9b1b0..7ad6f3888 100644 --- a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step16_1-definitions.sql +++ b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step16_1-definitions.sql @@ -17,6 +17,6 @@ left outer join ${stats_db_name}.indi_pub_green_oa green on green.id=r.id; -- GOLD OA: create table ${stats_db_name}.result_gold as -select r.id, case when gold.gold_oa=1 then true else false end as gold +select r.id, case when gold.is_gold=1 then true else false end as gold from ${stats_db_name}.result r left outer join ${stats_db_name}.indi_pub_gold_oa gold on gold.id=r.id; \ No newline at end of file From 3be8737c323c26ef93e0d57f200a9363c0d2e58a Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Wed, 2 Mar 2022 14:09:05 +0100 Subject: [PATCH 102/162] [graph-stats] fixed query after the change in the indicator table related to PR#200 --- .../oa/graph/stats/oozie_app/scripts/step16_1-definitions.sql | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step16_1-definitions.sql b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step16_1-definitions.sql index 6b4d9b1b0..7ad6f3888 100644 --- a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step16_1-definitions.sql +++ b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step16_1-definitions.sql @@ -17,6 +17,6 @@ left outer join ${stats_db_name}.indi_pub_green_oa green on green.id=r.id; -- GOLD OA: create table ${stats_db_name}.result_gold as -select r.id, case when gold.gold_oa=1 then true else false end as gold +select r.id, case when gold.is_gold=1 then true else false end as gold from ${stats_db_name}.result r left outer join ${stats_db_name}.indi_pub_gold_oa gold on gold.id=r.id; \ No newline at end of file From ad78e505da16fbbe8384976eb70ce48ee0f2314f Mon Sep 17 00:00:00 2001 From: antleb Date: Thu, 3 Mar 2022 12:28:12 +0200 Subject: [PATCH 103/162] yet another fix --- .../stats/oozie_app/scripts/step20-createMonitorDB.sql | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step20-createMonitorDB.sql b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step20-createMonitorDB.sql index b4964d2b3..7c5257edd 100644 --- a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step20-createMonitorDB.sql +++ b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step20-createMonitorDB.sql @@ -157,13 +157,13 @@ create view TARGET.indi_org_openess as select * from SOURCE.indi_org_openess; create table TARGET.indi_pub_hybrid_oa_with_cc stored as parquet as select * from SOURCE.indi_pub_hybrid_oa_with_cc orig where exists (select 1 from TARGET.result r where r.id=orig.id); compute stats TARGET.indi_pub_hybrid_oa_with_cc; -create table TARGET.indi_pub_downloads stored as parquet as select * from SOURCE.indi_pub_downloads orig where exists (select 1 from TARGET.result r where r.id=orig.id); +create table TARGET.indi_pub_downloads stored as parquet as select * from SOURCE.indi_pub_downloads orig where exists (select 1 from TARGET.result r where r.id=orig.result_id); compute stats TARGET.indi_pub_downloads; -create table TARGET.indi_pub_downloads_datasource stored as parquet as select * from SOURCE.indi_pub_downloads_datasource orig where exists (select 1 from TARGET.result r where r.id=orig.id); +create table TARGET.indi_pub_downloads_datasource stored as parquet as select * from SOURCE.indi_pub_downloads_datasource orig where exists (select 1 from TARGET.result r where r.id=orig.result_id); compute stats TARGET.indi_pub_downloads_datasource; -create table TARGET.indi_pub_downloads_year stored as parquet as select * from SOURCE.indi_pub_downloads_year orig where exists (select 1 from TARGET.result r where r.id=orig.id); +create table TARGET.indi_pub_downloads_year stored as parquet as select * from SOURCE.indi_pub_downloads_year orig where exists (select 1 from TARGET.result r where r.id=orig.result_id); compute stats TARGET.indi_pub_downloads_year; -create table TARGET.indi_pub_downloads_datasource_year stored as parquet as select * from SOURCE.indi_pub_downloads_datasource_year orig where exists (select 1 from TARGET.result r where r.id=orig.id); +create table TARGET.indi_pub_downloads_datasource_year stored as parquet as select * from SOURCE.indi_pub_downloads_datasource_year orig where exists (select 1 from TARGET.result r where r.id=orig.result_id); compute stats TARGET.indi_pub_downloads_datasource_year; --denorm From 44b0c03080f8919884f993c659746be14f35ec84 Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Fri, 4 Mar 2022 10:37:59 +0100 Subject: [PATCH 104/162] [HostedByMap] update to download the json instead of the csv --- .../oa/graph/hostedbymap/model/doaj/APC.java | 35 +++ .../graph/hostedbymap/model/doaj/Admin.java | 25 ++ .../graph/hostedbymap/model/doaj/Article.java | 44 +++ .../graph/hostedbymap/model/doaj/BibJson.java | 253 ++++++++++++++++++ .../hostedbymap/model/doaj/Copyright.java | 25 ++ .../hostedbymap/model/doaj/DOAJEntry.java | 52 ++++ .../hostedbymap/model/doaj/DepositPolicy.java | 35 +++ .../hostedbymap/model/doaj/Editorial.java | 35 +++ .../hostedbymap/model/doaj/Institution.java | 25 ++ .../graph/hostedbymap/model/doaj/License.java | 67 +++++ .../oa/graph/hostedbymap/model/doaj/Max.java | 25 ++ .../hostedbymap/model/doaj/OtherCharges.java | 25 ++ .../hostedbymap/model/doaj/PidScheme.java | 26 ++ .../hostedbymap/model/doaj/Plagiarism.java | 27 ++ .../hostedbymap/model/doaj/Preservation.java | 44 +++ .../hostedbymap/model/doaj/Publisher.java | 25 ++ .../oa/graph/hostedbymap/model/doaj/Ref.java | 52 ++++ .../graph/hostedbymap/model/doaj/Subject.java | 34 +++ .../graph/hostedbymap/model/doaj/Waiver.java | 25 ++ 19 files changed, 879 insertions(+) create mode 100644 dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/model/doaj/APC.java create mode 100644 dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/model/doaj/Admin.java create mode 100644 dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/model/doaj/Article.java create mode 100644 dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/model/doaj/BibJson.java create mode 100644 dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/model/doaj/Copyright.java create mode 100644 dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/model/doaj/DOAJEntry.java create mode 100644 dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/model/doaj/DepositPolicy.java create mode 100644 dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/model/doaj/Editorial.java create mode 100644 dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/model/doaj/Institution.java create mode 100644 dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/model/doaj/License.java create mode 100644 dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/model/doaj/Max.java create mode 100644 dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/model/doaj/OtherCharges.java create mode 100644 dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/model/doaj/PidScheme.java create mode 100644 dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/model/doaj/Plagiarism.java create mode 100644 dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/model/doaj/Preservation.java create mode 100644 dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/model/doaj/Publisher.java create mode 100644 dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/model/doaj/Ref.java create mode 100644 dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/model/doaj/Subject.java create mode 100644 dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/model/doaj/Waiver.java diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/model/doaj/APC.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/model/doaj/APC.java new file mode 100644 index 000000000..3473e0f9d --- /dev/null +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/model/doaj/APC.java @@ -0,0 +1,35 @@ + +package eu.dnetlib.dhp.oa.graph.hostedbymap.model.doaj; + +import java.io.Serializable; +import java.util.List; + +public class APC implements Serializable { + private Boolean has_apc; + private String url; + private List max; + + public List getMax() { + return max; + } + + public void setMax(List max) { + this.max = max; + } + + public Boolean getHas_apc() { + return has_apc; + } + + public void setHas_apc(Boolean has_apc) { + this.has_apc = has_apc; + } + + public String getUrl() { + return url; + } + + public void setUrl(String url) { + this.url = url; + } +} diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/model/doaj/Admin.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/model/doaj/Admin.java new file mode 100644 index 000000000..b823e3450 --- /dev/null +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/model/doaj/Admin.java @@ -0,0 +1,25 @@ + +package eu.dnetlib.dhp.oa.graph.hostedbymap.model.doaj; + +import java.io.Serializable; + +public class Admin implements Serializable { + private Boolean ticked; + private Boolean seal; + + public Boolean getTicked() { + return ticked; + } + + public void setTicked(Boolean ticked) { + this.ticked = ticked; + } + + public Boolean getSeal() { + return seal; + } + + public void setSeal(Boolean seal) { + this.seal = seal; + } +} diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/model/doaj/Article.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/model/doaj/Article.java new file mode 100644 index 000000000..7e90e24a2 --- /dev/null +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/model/doaj/Article.java @@ -0,0 +1,44 @@ + +package eu.dnetlib.dhp.oa.graph.hostedbymap.model.doaj; + +import java.io.Serializable; +import java.util.List; + +public class Article implements Serializable { + private String license_display_example_url; + private List license_display; + private Boolean orcid; + private Boolean i4oc_open_citations; + + public String getLicense_display_example_url() { + return license_display_example_url; + } + + public void setLicense_display_example_url(String license_display_example_url) { + this.license_display_example_url = license_display_example_url; + } + + public List getLicense_display() { + return license_display; + } + + public void setLicense_display(List license_display) { + this.license_display = license_display; + } + + public Boolean getOrcid() { + return orcid; + } + + public void setOrcid(Boolean orcid) { + this.orcid = orcid; + } + + public Boolean getI4oc_open_citations() { + return i4oc_open_citations; + } + + public void setI4oc_open_citations(Boolean i4oc_open_citations) { + this.i4oc_open_citations = i4oc_open_citations; + } +} diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/model/doaj/BibJson.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/model/doaj/BibJson.java new file mode 100644 index 000000000..f7c0d501f --- /dev/null +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/model/doaj/BibJson.java @@ -0,0 +1,253 @@ + +package eu.dnetlib.dhp.oa.graph.hostedbymap.model.doaj; + +import java.io.Serializable; +import java.util.List; + +import com.fasterxml.jackson.annotation.JsonIgnore; + +public class BibJson implements Serializable { + private Editorial editorial; + private PidScheme pid_scheme; + private Copyright copyright; + private List keywords; + private Plagiarism plagiarism; + private List subject; + private String eissn; + private String pissn; + private List language; + private String title; + private Article article; + private Institution institution; + private Preservation preservation; + private List license; + private Ref ref; + private Integer oa_start; + private APC apc; + private OtherCharges other_charges; + private Integer publication_time_weeks; + private DepositPolicy deposit_policy; + private Publisher publisher; + private Boolean boai; + private Waiver waiver; + private String alternative_title; + private List is_replaced_by; + private List replaces; + private String discontinued_date; + + public String getDiscontinued_date() { + return discontinued_date; + } + + public void setDiscontinued_date(String discontinued_date) { + this.discontinued_date = discontinued_date; + } + + public List getReplaces() { + return replaces; + } + + public void setReplaces(List replaces) { + this.replaces = replaces; + } + + public List getIs_replaced_by() { + return is_replaced_by; + } + + public void setIs_replaced_by(List is_replaced_by) { + this.is_replaced_by = is_replaced_by; + } + + public String getAlternative_title() { + return alternative_title; + } + + public void setAlternative_title(String alternative_title) { + this.alternative_title = alternative_title; + } + + public String getPissn() { + return pissn; + } + + public void setPissn(String pissn) { + this.pissn = pissn; + } + + public Editorial getEditorial() { + return editorial; + } + + public void setEditorial(Editorial editorial) { + this.editorial = editorial; + } + + public PidScheme getPid_scheme() { + return pid_scheme; + } + + public void setPid_scheme(PidScheme pid_scheme) { + this.pid_scheme = pid_scheme; + } + + public Copyright getCopyright() { + return copyright; + } + + public void setCopyright(Copyright copyright) { + this.copyright = copyright; + } + + public List getKeywords() { + return keywords; + } + + public void setKeywords(List keywords) { + this.keywords = keywords; + } + + public Plagiarism getPlagiarism() { + return plagiarism; + } + + public void setPlagiarism(Plagiarism plagiarism) { + this.plagiarism = plagiarism; + } + + public List getSubject() { + return subject; + } + + public void setSubject(List subject) { + this.subject = subject; + } + + public String getEissn() { + return eissn; + } + + public void setEissn(String eissn) { + this.eissn = eissn; + } + + public List getLanguage() { + return language; + } + + public void setLanguage(List language) { + this.language = language; + } + + public String getTitle() { + return title; + } + + public void setTitle(String title) { + this.title = title; + } + + public Article getArticle() { + return article; + } + + public void setArticle(Article article) { + this.article = article; + } + + public Institution getInstitution() { + return institution; + } + + public void setInstitution(Institution institution) { + this.institution = institution; + } + + public Preservation getPreservation() { + return preservation; + } + + public void setPreservation(Preservation preservation) { + this.preservation = preservation; + } + + public List getLicense() { + return license; + } + + public void setLicense(List license) { + this.license = license; + } + + public Ref getRef() { + return ref; + } + + public void setRef(Ref ref) { + this.ref = ref; + } + + public Integer getOa_start() { + return oa_start; + } + + public void setOa_start(Integer oa_start) { + this.oa_start = oa_start; + } + + public APC getApc() { + return apc; + } + + public void setApc(APC apc) { + this.apc = apc; + } + + public OtherCharges getOther_charges() { + return other_charges; + } + + public void setOther_charges(OtherCharges other_charges) { + this.other_charges = other_charges; + } + + public Integer getPublication_time_weeks() { + return publication_time_weeks; + } + + public void setPublication_time_weeks(Integer publication_time_weeks) { + this.publication_time_weeks = publication_time_weeks; + } + + public DepositPolicy getDeposit_policy() { + return deposit_policy; + } + + public void setDeposit_policy(DepositPolicy deposit_policy) { + this.deposit_policy = deposit_policy; + } + + public Publisher getPublisher() { + return publisher; + } + + public void setPublisher(Publisher publisher) { + this.publisher = publisher; + } + + public Boolean getBoai() { + return boai; + } + + public void setBoai(Boolean boai) { + this.boai = boai; + } + + public Waiver getWaiver() { + return waiver; + } + + public void setWaiver(Waiver waiver) { + this.waiver = waiver; + } +} diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/model/doaj/Copyright.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/model/doaj/Copyright.java new file mode 100644 index 000000000..c595c4c88 --- /dev/null +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/model/doaj/Copyright.java @@ -0,0 +1,25 @@ + +package eu.dnetlib.dhp.oa.graph.hostedbymap.model.doaj; + +import java.io.Serializable; + +public class Copyright implements Serializable { + private Boolean author_retains; + private String url; + + public Boolean getAuthor_retains() { + return author_retains; + } + + public void setAuthor_retains(Boolean author_retains) { + this.author_retains = author_retains; + } + + public String getUrl() { + return url; + } + + public void setUrl(String url) { + this.url = url; + } +} diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/model/doaj/DOAJEntry.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/model/doaj/DOAJEntry.java new file mode 100644 index 000000000..add5bf8bf --- /dev/null +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/model/doaj/DOAJEntry.java @@ -0,0 +1,52 @@ + +package eu.dnetlib.dhp.oa.graph.hostedbymap.model.doaj; + +import java.io.Serializable; + +public class DOAJEntry implements Serializable { + private String last_updated; + private BibJson bibjson; + private Admin admin; + private String created_date; + private String id; + + public String getLast_updated() { + return last_updated; + } + + public void setLast_updated(String last_updated) { + this.last_updated = last_updated; + } + + public BibJson getBibjson() { + return bibjson; + } + + public void setBibjson(BibJson bibjson) { + this.bibjson = bibjson; + } + + public Admin getAdmin() { + return admin; + } + + public void setAdmin(Admin admin) { + this.admin = admin; + } + + public String getCreated_date() { + return created_date; + } + + public void setCreated_date(String created_date) { + this.created_date = created_date; + } + + public String getId() { + return id; + } + + public void setId(String id) { + this.id = id; + } +} diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/model/doaj/DepositPolicy.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/model/doaj/DepositPolicy.java new file mode 100644 index 000000000..d86c97f34 --- /dev/null +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/model/doaj/DepositPolicy.java @@ -0,0 +1,35 @@ + +package eu.dnetlib.dhp.oa.graph.hostedbymap.model.doaj; + +import java.io.Serializable; +import java.util.List; + +public class DepositPolicy implements Serializable { + private List service; + private String url; + private Boolean has_policy; + + public List getService() { + return service; + } + + public void setService(List service) { + this.service = service; + } + + public String getUrl() { + return url; + } + + public void setUrl(String url) { + this.url = url; + } + + public Boolean getHas_policy() { + return has_policy; + } + + public void setHas_policy(Boolean has_policy) { + this.has_policy = has_policy; + } +} diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/model/doaj/Editorial.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/model/doaj/Editorial.java new file mode 100644 index 000000000..35bfba158 --- /dev/null +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/model/doaj/Editorial.java @@ -0,0 +1,35 @@ + +package eu.dnetlib.dhp.oa.graph.hostedbymap.model.doaj; + +import java.io.Serializable; +import java.util.List; + +public class Editorial implements Serializable { + private List review_process; + private String review_url; + private String board_url; + + public List getReview_process() { + return review_process; + } + + public void setReview_process(List review_process) { + this.review_process = review_process; + } + + public String getReview_url() { + return review_url; + } + + public void setReview_url(String review_url) { + this.review_url = review_url; + } + + public String getBoard_url() { + return board_url; + } + + public void setBoard_url(String board_url) { + this.board_url = board_url; + } +} diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/model/doaj/Institution.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/model/doaj/Institution.java new file mode 100644 index 000000000..3b4d90493 --- /dev/null +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/model/doaj/Institution.java @@ -0,0 +1,25 @@ + +package eu.dnetlib.dhp.oa.graph.hostedbymap.model.doaj; + +import java.io.Serializable; + +public class Institution implements Serializable { + private String country; + private String name; + + public String getCountry() { + return country; + } + + public void setCountry(String country) { + this.country = country; + } + + public String getName() { + return name; + } + + public void setName(String name) { + this.name = name; + } +} diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/model/doaj/License.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/model/doaj/License.java new file mode 100644 index 000000000..64c7bc18f --- /dev/null +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/model/doaj/License.java @@ -0,0 +1,67 @@ + +package eu.dnetlib.dhp.oa.graph.hostedbymap.model.doaj; + +import java.io.Serializable; + +import com.fasterxml.jackson.annotation.JsonProperty; + +public class License implements Serializable { + private Boolean nc; + private Boolean nd; + private Boolean by; + private String type; + private Boolean sa; + private String url; + + public Boolean getnC() { + return nc; + } + + @JsonProperty("NC") + public void setnC(Boolean NC) { + this.nc = NC; + } + + public String getType() { + return type; + } + + public void setType(String type) { + this.type = type; + } + + public Boolean getNd() { + return nd; + } + + @JsonProperty("ND") + public void setNd(Boolean nd) { + this.nd = nd; + } + + public Boolean getBy() { + return by; + } + + @JsonProperty("BY") + public void setBy(Boolean by) { + this.by = by; + } + + public Boolean getSa() { + return sa; + } + + @JsonProperty("SA") + public void setSa(Boolean sa) { + this.sa = sa; + } + + public String getUrl() { + return url; + } + + public void setUrl(String url) { + this.url = url; + } +} diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/model/doaj/Max.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/model/doaj/Max.java new file mode 100644 index 000000000..0e292b631 --- /dev/null +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/model/doaj/Max.java @@ -0,0 +1,25 @@ + +package eu.dnetlib.dhp.oa.graph.hostedbymap.model.doaj; + +import java.io.Serializable; + +public class Max implements Serializable { + private Integer price; + private String currency; + + public Integer getPrice() { + return price; + } + + public void setPrice(Integer price) { + this.price = price; + } + + public String getCurrency() { + return currency; + } + + public void setCurrency(String currency) { + this.currency = currency; + } +} diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/model/doaj/OtherCharges.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/model/doaj/OtherCharges.java new file mode 100644 index 000000000..1583481d2 --- /dev/null +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/model/doaj/OtherCharges.java @@ -0,0 +1,25 @@ + +package eu.dnetlib.dhp.oa.graph.hostedbymap.model.doaj; + +import java.io.Serializable; + +public class OtherCharges implements Serializable { + private Boolean has_other_charges; + private String url; + + public String getUrl() { + return url; + } + + public void setUrl(String url) { + this.url = url; + } + + public Boolean getHas_other_charges() { + return has_other_charges; + } + + public void setHas_other_charges(Boolean has_other_charges) { + this.has_other_charges = has_other_charges; + } +} diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/model/doaj/PidScheme.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/model/doaj/PidScheme.java new file mode 100644 index 000000000..bd7e710dd --- /dev/null +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/model/doaj/PidScheme.java @@ -0,0 +1,26 @@ + +package eu.dnetlib.dhp.oa.graph.hostedbymap.model.doaj; + +import java.io.Serializable; +import java.util.List; + +public class PidScheme implements Serializable { + private List scheme; + private Boolean has_pid_scheme; + + public List getScheme() { + return scheme; + } + + public void setScheme(List scheme) { + this.scheme = scheme; + } + + public Boolean getHas_pid_scheme() { + return has_pid_scheme; + } + + public void setHas_pid_scheme(Boolean has_pid_scheme) { + this.has_pid_scheme = has_pid_scheme; + } +} diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/model/doaj/Plagiarism.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/model/doaj/Plagiarism.java new file mode 100644 index 000000000..f2230b3ea --- /dev/null +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/model/doaj/Plagiarism.java @@ -0,0 +1,27 @@ + +package eu.dnetlib.dhp.oa.graph.hostedbymap.model.doaj; + +import java.io.Serializable; + +import javax.sql.rowset.serial.SerialArray; + +public class Plagiarism implements Serializable { + private Boolean detection; + private String url; + + public Boolean getDetection() { + return detection; + } + + public void setDetection(Boolean detection) { + this.detection = detection; + } + + public String getUrl() { + return url; + } + + public void setUrl(String url) { + this.url = url; + } +} diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/model/doaj/Preservation.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/model/doaj/Preservation.java new file mode 100644 index 000000000..f56ea9953 --- /dev/null +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/model/doaj/Preservation.java @@ -0,0 +1,44 @@ + +package eu.dnetlib.dhp.oa.graph.hostedbymap.model.doaj; + +import java.io.Serializable; +import java.util.List; + +public class Preservation implements Serializable { + private Boolean has_preservation; + private List service; + private List national_library; + private String url; + + public String getUrl() { + return url; + } + + public void setUrl(String url) { + this.url = url; + } + + public Boolean getHas_preservation() { + return has_preservation; + } + + public void setHas_preservation(Boolean has_preservation) { + this.has_preservation = has_preservation; + } + + public List getService() { + return service; + } + + public void setService(List service) { + this.service = service; + } + + public List getNational_library() { + return national_library; + } + + public void setNational_library(List national_library) { + this.national_library = national_library; + } +} diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/model/doaj/Publisher.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/model/doaj/Publisher.java new file mode 100644 index 000000000..6d97a7969 --- /dev/null +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/model/doaj/Publisher.java @@ -0,0 +1,25 @@ + +package eu.dnetlib.dhp.oa.graph.hostedbymap.model.doaj; + +import java.io.Serializable; + +public class Publisher implements Serializable { + private String country; + private String name; + + public String getCountry() { + return country; + } + + public void setCountry(String country) { + this.country = country; + } + + public String getName() { + return name; + } + + public void setName(String name) { + this.name = name; + } +} diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/model/doaj/Ref.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/model/doaj/Ref.java new file mode 100644 index 000000000..0f7c7dc95 --- /dev/null +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/model/doaj/Ref.java @@ -0,0 +1,52 @@ + +package eu.dnetlib.dhp.oa.graph.hostedbymap.model.doaj; + +import java.io.Serializable; + +public class Ref implements Serializable { + private String aims_scope; + private String journal; + private String oa_statement; + private String author_instructions; + private String license_terms; + + public String getAims_scope() { + return aims_scope; + } + + public void setAims_scope(String aims_scope) { + this.aims_scope = aims_scope; + } + + public String getJournal() { + return journal; + } + + public void setJournal(String journal) { + this.journal = journal; + } + + public String getOa_statement() { + return oa_statement; + } + + public void setOa_statement(String oa_statement) { + this.oa_statement = oa_statement; + } + + public String getAuthor_instructions() { + return author_instructions; + } + + public void setAuthor_instructions(String author_instructions) { + this.author_instructions = author_instructions; + } + + public String getLicense_terms() { + return license_terms; + } + + public void setLicense_terms(String license_terms) { + this.license_terms = license_terms; + } +} diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/model/doaj/Subject.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/model/doaj/Subject.java new file mode 100644 index 000000000..811638e76 --- /dev/null +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/model/doaj/Subject.java @@ -0,0 +1,34 @@ + +package eu.dnetlib.dhp.oa.graph.hostedbymap.model.doaj; + +import java.io.Serializable; + +public class Subject implements Serializable { + private String code; + private String scheme; + private String term; + + public String getCode() { + return code; + } + + public void setCode(String code) { + this.code = code; + } + + public String getScheme() { + return scheme; + } + + public void setScheme(String scheme) { + this.scheme = scheme; + } + + public String getTerm() { + return term; + } + + public void setTerm(String term) { + this.term = term; + } +} diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/model/doaj/Waiver.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/model/doaj/Waiver.java new file mode 100644 index 000000000..ca67dde0c --- /dev/null +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/model/doaj/Waiver.java @@ -0,0 +1,25 @@ + +package eu.dnetlib.dhp.oa.graph.hostedbymap.model.doaj; + +import java.io.Serializable; + +public class Waiver implements Serializable { + private Boolean has_waiver; + private String url; + + public String getUrl() { + return url; + } + + public void setUrl(String url) { + this.url = url; + } + + public Boolean getHas_waiver() { + return has_waiver; + } + + public void setHas_waiver(Boolean has_waiver) { + this.has_waiver = has_waiver; + } +} From 8a41f63348a7d7f3ea25ed0bafaa75c3e66afdbe Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Fri, 4 Mar 2022 10:38:43 +0100 Subject: [PATCH 105/162] [HostedByMap] update to download the json instead of the csv --- .../hostedbymap/ExtractAndMapDoajJson.java | 117 ++++++++++++++++++ 1 file changed, 117 insertions(+) create mode 100644 dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/ExtractAndMapDoajJson.java diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/ExtractAndMapDoajJson.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/ExtractAndMapDoajJson.java new file mode 100644 index 000000000..17a78760e --- /dev/null +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/ExtractAndMapDoajJson.java @@ -0,0 +1,117 @@ + +package eu.dnetlib.dhp.oa.graph.hostedbymap; + +import static eu.dnetlib.dhp.common.collection.DecompressTarGz.doExtract; + +import java.io.BufferedOutputStream; +import java.io.IOException; +import java.io.PrintWriter; +import java.util.Arrays; +import java.util.Objects; + +import eu.dnetlib.dhp.oa.graph.hostedbymap.model.DOAJModel; +import org.apache.commons.io.IOUtils; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.*; +import org.apache.hadoop.io.compress.CompressionCodec; +import org.apache.hadoop.io.compress.CompressionCodecFactory; +import org.apache.hadoop.io.compress.CompressionInputStream; +import org.jetbrains.annotations.NotNull; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import com.fasterxml.jackson.core.JsonProcessingException; +import com.fasterxml.jackson.databind.ObjectMapper; + +import eu.dnetlib.dhp.application.ArgumentApplicationParser; +import eu.dnetlib.dhp.oa.graph.hostedbymap.model.doaj.DOAJEntry; + +public class ExtractAndMapDoajJson { + + private static final Logger log = LoggerFactory.getLogger(ExtractAndMapDoajJson.class); + + public static void main(final String[] args) throws Exception { + final ArgumentApplicationParser parser = new ArgumentApplicationParser( + IOUtils + .toString( + Objects + .requireNonNull( + ExtractAndMapDoajJson.class + .getResourceAsStream( + "/eu/dnetlib/dhp/oa/graph/hostedbymap/download_json_parameters.json")))); + + parser.parseArgument(args); + + final String compressedInput = parser.get("compressedFile"); + log.info("compressedInput {}", compressedInput); + + final String hdfsNameNode = parser.get("hdfsNameNode"); + log.info("hdfsNameNode {}", hdfsNameNode); + + final String outputPath = parser.get("outputPath"); + log.info("outputPath {}", outputPath); + + final String workingPath = parser.get("workingPath"); + log.info("workingPath {}", workingPath); + + Configuration conf = new Configuration(); + conf.set("fs.defaultFS", hdfsNameNode); + + FileSystem fs = FileSystem.get(conf); + CompressionCodecFactory factory = new CompressionCodecFactory(conf); + CompressionCodec codec = factory.getCodecByClassName("org.apache.hadoop.io.compress.GzipCodec"); + doExtract(fs, workingPath, compressedInput); + doMap(fs, workingPath, outputPath, codec); + + } + + private static void doMap(FileSystem fs, String workingPath, String outputPath, CompressionCodec codec) + throws IOException { + RemoteIterator fileStatusListIterator = fs + .listFiles( + new Path(workingPath), true); + + Path hdfsWritePath = new Path(outputPath); + if (fs.exists(hdfsWritePath)) { + fs.delete(hdfsWritePath, true); + + } + try ( + + FSDataOutputStream out = fs + .create(hdfsWritePath); + PrintWriter writer = new PrintWriter(new BufferedOutputStream(out))) { + + while (fileStatusListIterator.hasNext()) { + Path path = fileStatusListIterator.next().getPath(); + if (!fs.isDirectory(path)) { + FSDataInputStream is = fs.open(path); + CompressionInputStream compressionInputStream = codec.createInputStream(is); + DOAJEntry[] doajEntries = new ObjectMapper().readValue(compressionInputStream, DOAJEntry[].class); + Arrays.stream(doajEntries).forEach(doaj -> { + try { + writer.println(new ObjectMapper().writeValueAsString(getDoajModel(doaj))); + } catch (JsonProcessingException e) { + e.printStackTrace(); + } + }); + } + + } + + } + + } + + @NotNull + public static DOAJModel getDoajModel(DOAJEntry doaj) { + DOAJModel doajModel = new DOAJModel(); + doajModel.setOaStart(doaj.getBibjson().getOa_start()); + doajModel.setEissn(doaj.getBibjson().getEissn()); + doajModel.setIssn(doaj.getBibjson().getPissn()); + doajModel.setJournalTitle(doaj.getBibjson().getTitle()); + doajModel.setReviewProcess(doaj.getBibjson().getEditorial().getReview_process()); + return doajModel; + } + +} From b7c2340952d9af572b97d6d4f774efe89796019b Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Fri, 4 Mar 2022 11:05:23 +0100 Subject: [PATCH 106/162] [HostedByMap - DOIBoost] changed to use code moved to common since used also from hostedbymap now --- .../common/collection/DecompressTarGz.java | 40 +++++++++++++++++++ .../crossref/ExtractCrossrefRecords.java | 31 +++----------- 2 files changed, 45 insertions(+), 26 deletions(-) create mode 100644 dhp-common/src/main/java/eu/dnetlib/dhp/common/collection/DecompressTarGz.java diff --git a/dhp-common/src/main/java/eu/dnetlib/dhp/common/collection/DecompressTarGz.java b/dhp-common/src/main/java/eu/dnetlib/dhp/common/collection/DecompressTarGz.java new file mode 100644 index 000000000..8bcf14ba4 --- /dev/null +++ b/dhp-common/src/main/java/eu/dnetlib/dhp/common/collection/DecompressTarGz.java @@ -0,0 +1,40 @@ + +package eu.dnetlib.dhp.common.collection; + +import java.io.BufferedOutputStream; +import java.io.IOException; +import java.util.zip.GZIPOutputStream; + +import org.apache.commons.compress.archivers.tar.TarArchiveEntry; +import org.apache.commons.compress.archivers.tar.TarArchiveInputStream; +import org.apache.commons.compress.compressors.gzip.GzipCompressorInputStream; +import org.apache.commons.io.IOUtils; +import org.apache.hadoop.fs.FSDataInputStream; +import org.apache.hadoop.fs.FSDataOutputStream; +import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.Path; + +public class DecompressTarGz { + + public static void doExtract(FileSystem fs, String outputPath, String tarGzPath) throws IOException { + + FSDataInputStream inputFileStream = fs.open(new Path(tarGzPath)); + try (TarArchiveInputStream tais = new TarArchiveInputStream( + new GzipCompressorInputStream(inputFileStream))) { + TarArchiveEntry entry = null; + while ((entry = tais.getNextTarEntry()) != null) { + if (!entry.isDirectory()) { + try ( + FSDataOutputStream out = fs + .create(new Path(outputPath.concat(entry.getName()).concat(".gz"))); + GZIPOutputStream gzipOs = new GZIPOutputStream(new BufferedOutputStream(out))) { + + IOUtils.copy(tais, gzipOs); + + } + + } + } + } + } +} diff --git a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/crossref/ExtractCrossrefRecords.java b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/crossref/ExtractCrossrefRecords.java index d1861ff0a..6060b619e 100644 --- a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/crossref/ExtractCrossrefRecords.java +++ b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/crossref/ExtractCrossrefRecords.java @@ -1,19 +1,13 @@ package eu.dnetlib.doiboost.crossref; -import java.io.BufferedOutputStream; -import java.net.URI; -import java.util.zip.GZIPOutputStream; +import static eu.dnetlib.dhp.common.collection.DecompressTarGz.doExtract; + +import java.net.URI; -import org.apache.commons.compress.archivers.tar.TarArchiveEntry; -import org.apache.commons.compress.archivers.tar.TarArchiveInputStream; -import org.apache.commons.compress.compressors.gzip.GzipCompressorInputStream; import org.apache.commons.io.IOUtils; import org.apache.hadoop.conf.Configuration; -import org.apache.hadoop.fs.FSDataInputStream; -import org.apache.hadoop.fs.FSDataOutputStream; import org.apache.hadoop.fs.FileSystem; -import org.apache.hadoop.fs.Path; import org.mortbay.log.Log; import eu.dnetlib.dhp.application.ArgumentApplicationParser; @@ -33,31 +27,16 @@ public class ExtractCrossrefRecords { final String outputPath = parser.get("outputPath"); final String crossrefFileNameTarGz = parser.get("crossrefFileNameTarGz"); - Path hdfsreadpath = new Path(workingPath.concat("/").concat(crossrefFileNameTarGz)); Configuration conf = new Configuration(); conf.set("fs.defaultFS", workingPath); conf.set("fs.hdfs.impl", org.apache.hadoop.hdfs.DistributedFileSystem.class.getName()); conf.set("fs.file.impl", org.apache.hadoop.fs.LocalFileSystem.class.getName()); FileSystem fs = FileSystem.get(URI.create(workingPath), conf); - FSDataInputStream crossrefFileStream = fs.open(hdfsreadpath); - try (TarArchiveInputStream tais = new TarArchiveInputStream( - new GzipCompressorInputStream(crossrefFileStream))) { - TarArchiveEntry entry = null; - while ((entry = tais.getNextTarEntry()) != null) { - if (!entry.isDirectory()) { - try ( - FSDataOutputStream out = fs - .create(new Path(outputPath.concat(entry.getName()).concat(".gz"))); - GZIPOutputStream gzipOs = new GZIPOutputStream(new BufferedOutputStream(out))) { - IOUtils.copy(tais, gzipOs); + doExtract(fs, outputPath, workingPath.concat("/").concat(crossrefFileNameTarGz)); - } - - } - } - } Log.info("Crossref dump reading completed"); } + } From 5d608d62911fb50c41b5bf15455395a5443e707a Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Fri, 4 Mar 2022 11:06:09 +0100 Subject: [PATCH 107/162] [HostedByMap] changed the model to include also oaStart date and review process that could be possibly used in the future --- .../oa/graph/hostedbymap/model/DOAJModel.java | 17 ++++- .../hostedbymap/SparkProduceHostedByMap.scala | 67 +++++++++++++------ 2 files changed, 62 insertions(+), 22 deletions(-) diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/model/DOAJModel.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/model/DOAJModel.java index 4b5dc22a6..c3b6f1f30 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/model/DOAJModel.java +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/model/DOAJModel.java @@ -2,6 +2,7 @@ package eu.dnetlib.dhp.oa.graph.hostedbymap.model; import java.io.Serializable; +import java.util.List; import com.opencsv.bean.CsvBindByName; @@ -17,7 +18,17 @@ public class DOAJModel implements Serializable { private String eissn; @CsvBindByName(column = "Review process") - private String reviewProcess; + private List reviewProcess; + + private Integer oaStart; + + public Integer getOaStart() { + return oaStart; + } + + public void setOaStart(Integer oaStart) { + this.oaStart = oaStart; + } public String getJournalTitle() { return journalTitle; @@ -43,11 +54,11 @@ public class DOAJModel implements Serializable { this.eissn = eissn; } - public String getReviewProcess() { + public List getReviewProcess() { return reviewProcess; } - public void setReviewProcess(String reviewProcess) { + public void setReviewProcess(List reviewProcess) { this.reviewProcess = reviewProcess; } } diff --git a/dhp-workflows/dhp-graph-mapper/src/main/scala/eu/dnetlib/dhp/oa/graph/hostedbymap/SparkProduceHostedByMap.scala b/dhp-workflows/dhp-graph-mapper/src/main/scala/eu/dnetlib/dhp/oa/graph/hostedbymap/SparkProduceHostedByMap.scala index 8d8965866..bdf0861ae 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/scala/eu/dnetlib/dhp/oa/graph/hostedbymap/SparkProduceHostedByMap.scala +++ b/dhp-workflows/dhp-graph-mapper/src/main/scala/eu/dnetlib/dhp/oa/graph/hostedbymap/SparkProduceHostedByMap.scala @@ -2,9 +2,10 @@ package eu.dnetlib.dhp.oa.graph.hostedbymap import com.fasterxml.jackson.databind.ObjectMapper import eu.dnetlib.dhp.application.ArgumentApplicationParser +import eu.dnetlib.dhp.common.HdfsSupport import eu.dnetlib.dhp.oa.graph.hostedbymap.model.{DOAJModel, UnibiGoldModel} import eu.dnetlib.dhp.schema.oaf.Datasource -import org.apache.commons.io.IOUtils +import org.apache.commons.io.{FileUtils, IOUtils} import org.apache.hadoop.conf.Configuration import org.apache.hadoop.fs.{FileSystem, Path} import org.apache.hadoop.io.compress.GzipCodec @@ -13,7 +14,8 @@ import org.apache.spark.sql.{Dataset, Encoder, Encoders, SparkSession} import org.json4s.DefaultFormats import org.slf4j.{Logger, LoggerFactory} -import java.io.PrintWriter +import java.io.{File, PrintWriter} +import scala.collection.JavaConverters._ object SparkProduceHostedByMap { @@ -34,7 +36,9 @@ object SparkProduceHostedByMap { openaire.journal_id, "", "", - isOpenAccess + isOpenAccess, + -1, + List[String]() ) case Constants.EISSN => HostedByItemType( @@ -43,7 +47,9 @@ object SparkProduceHostedByMap { "", openaire.journal_id, "", - isOpenAccess + isOpenAccess, + -1, + List[String]() ) case Constants.ISSNL => HostedByItemType( @@ -52,7 +58,9 @@ object SparkProduceHostedByMap { "", "", openaire.journal_id, - isOpenAccess + isOpenAccess, + -1, + List[String]() ) // catch the default with a variable so you can print it @@ -77,34 +85,36 @@ object SparkProduceHostedByMap { issn: String, eissn: String, issnl: String, - oa: Boolean + oa: Boolean, + oaDate: Int, + reviewProcess: List[String] ): HostedByItemType = { if (issn != null) { if (eissn != null) { if (issnl != null) { - HostedByItemType(id, officialname, issn, eissn, issnl, oa) + HostedByItemType(id, officialname, issn, eissn, issnl, oa, oaDate, reviewProcess) } else { - HostedByItemType(id, officialname, issn, eissn, "", oa) + HostedByItemType(id, officialname, issn, eissn, "", oa, oaDate, reviewProcess) } } else { if (issnl != null) { - HostedByItemType(id, officialname, issn, "", issnl, oa) + HostedByItemType(id, officialname, issn, "", issnl, oa, oaDate, reviewProcess) } else { - HostedByItemType(id, officialname, issn, "", "", oa) + HostedByItemType(id, officialname, issn, "", "", oa, oaDate, reviewProcess) } } } else { if (eissn != null) { if (issnl != null) { - HostedByItemType(id, officialname, "", eissn, issnl, oa) + HostedByItemType(id, officialname, "", eissn, issnl, oa, oaDate, reviewProcess) } else { - HostedByItemType(id, officialname, "", eissn, "", oa) + HostedByItemType(id, officialname, "", eissn, "", oa, oaDate, reviewProcess) } } else { if (issnl != null) { - HostedByItemType(id, officialname, "", "", issnl, oa) + HostedByItemType(id, officialname, "", "", issnl, oa, oaDate, reviewProcess) } else { - HostedByItemType("", "", "", "", "", oa) + HostedByItemType("", "", "", "", "", oa, oaDate, reviewProcess) } } } @@ -119,10 +129,12 @@ object SparkProduceHostedByMap { dats.getJournal.getIssnPrinted, dats.getJournal.getIssnOnline, dats.getJournal.getIssnLinking, - false + false, + -1, + List[String]() ) } - HostedByItemType("", "", "", "", "", false) + HostedByItemType("", "", "", "", "", false, -1, List[String]()) } def oaHostedByDataset(spark: SparkSession, datasourcePath: String): Dataset[HostedByItemType] = { @@ -148,7 +160,9 @@ object SparkProduceHostedByMap { gold.getIssn, "", gold.getIssnL, - true + true, + -1, + List[String]() ) } @@ -171,14 +185,27 @@ object SparkProduceHostedByMap { } def doajToHostedbyItemType(doaj: DOAJModel): HostedByItemType = { - + if (doaj.getOaStart == null) { + return getHostedByItemType( + Constants.DOAJ, + doaj.getJournalTitle, + doaj.getIssn, + doaj.getEissn, + "", + true, + -1, + doaj.getReviewProcess.asScala.toList + ) + } return getHostedByItemType( Constants.DOAJ, doaj.getJournalTitle, doaj.getIssn, doaj.getEissn, "", - true + true, + doaj.getOaStart, + doaj.getReviewProcess.asScala.toList ) } @@ -256,6 +283,8 @@ object SparkProduceHostedByMap { logger.info("Getting the Datasources") + HdfsSupport.remove(outputPath, spark.sparkContext.hadoopConfiguration) + Aggregators .explodeHostedByItemType( oaHostedByDataset(spark, datasourcePath) From 2c5087d55a53c745ac81db84edd995b9f6bdb6a7 Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Fri, 4 Mar 2022 15:18:21 +0100 Subject: [PATCH 108/162] [HostedByMap] download of doaj from json, modification of test resources, deletion of class no more needed for the CSV download --- .../dhp/oa/graph/hostedbymap/DownloadCSV.java | 27 +---- .../oa/graph/hostedbymap/DownloadCSV2.java | 84 --------------- .../hostedbymap/ExtractAndMapDoajJson.java | 2 +- .../graph/hostedbymap/oozie_app/workflow.xml | 101 ++++++++++++++---- .../hostedbymap/SparkProduceHostedByMap.scala | 50 +++------ .../oa/graph/hostedbymap/DownloadCsvTest.java | 51 --------- .../graph/hostedbymap/doaj_transformed.json | 50 ++++----- 7 files changed, 129 insertions(+), 236 deletions(-) delete mode 100644 dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/DownloadCSV2.java diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/DownloadCSV.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/DownloadCSV.java index dff761c34..bbadde524 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/DownloadCSV.java +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/DownloadCSV.java @@ -23,7 +23,7 @@ public class DownloadCSV { private static final Logger log = LoggerFactory.getLogger(DownloadCSV.class); - public static final char DEFAULT_DELIMITER = ';'; + public static final char DEFAULT_DELIMITER = ','; public static void main(final String[] args) throws Exception { final ArgumentApplicationParser parser = new ArgumentApplicationParser( @@ -40,9 +40,6 @@ public class DownloadCSV { final String fileURL = parser.get("fileURL"); log.info("fileURL {}", fileURL); - final String workingPath = parser.get("workingPath"); - log.info("workingPath {}", workingPath); - final String outputFile = parser.get("outputFile"); log.info("outputFile {}", outputFile); @@ -63,31 +60,15 @@ public class DownloadCSV { FileSystem fileSystem = FileSystem.get(conf); - new DownloadCSV().doDownload(fileURL, workingPath, outputFile, classForName, delimiter, fileSystem); + new DownloadCSV().doDownload(fileURL, outputFile, classForName, delimiter, fileSystem); } - protected void doDownload(String fileURL, String workingPath, String outputFile, String classForName, + protected void doDownload(String fileURL, String outputFile, String classForName, char delimiter, FileSystem fs) throws IOException, ClassNotFoundException, CollectorException { - final HttpConnector2 connector2 = new HttpConnector2(); - - final Path path = new Path(workingPath + "/replaced.csv"); - - try (BufferedReader in = new BufferedReader( - new InputStreamReader(connector2.getInputSourceAsStream(fileURL)))) { - - try (PrintWriter writer = new PrintWriter( - new OutputStreamWriter(fs.create(path, true), StandardCharsets.UTF_8))) { - String line; - while ((line = in.readLine()) != null) { - writer.println(line.replace("\\\"", "\"")); - } - } - } - - try (InputStreamReader reader = new InputStreamReader(fs.open(path))) { + try (InputStreamReader reader = new InputStreamReader(new HttpConnector2().getInputSourceAsStream(fileURL))) { GetCSV.getCsv(fs, reader, outputFile, classForName, delimiter); } } diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/DownloadCSV2.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/DownloadCSV2.java deleted file mode 100644 index d82d00862..000000000 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/DownloadCSV2.java +++ /dev/null @@ -1,84 +0,0 @@ - -package eu.dnetlib.dhp.oa.graph.hostedbymap; - -import java.io.*; -import java.util.Objects; -import java.util.Optional; - -import org.apache.commons.io.FileUtils; -import org.apache.commons.io.IOUtils; -import org.apache.hadoop.conf.Configuration; -import org.apache.hadoop.fs.FileSystem; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -import eu.dnetlib.dhp.application.ArgumentApplicationParser; -import eu.dnetlib.dhp.common.collection.GetCSV; -import eu.dnetlib.dhp.common.collection.HttpConnector2; - -public class DownloadCSV2 { - - private static final Logger log = LoggerFactory.getLogger(DownloadCSV2.class); - - public static final char DEFAULT_DELIMITER = ';'; - - public static void main(final String[] args) throws Exception { - final ArgumentApplicationParser parser = new ArgumentApplicationParser( - IOUtils - .toString( - Objects - .requireNonNull( - DownloadCSV2.class - .getResourceAsStream( - "/eu/dnetlib/dhp/oa/graph/hostedbymap/download_csv_parameters.json")))); - - parser.parseArgument(args); - - final String fileURL = parser.get("fileURL"); - log.info("fileURL {}", fileURL); - - final String tmpFile = parser.get("tmpFile"); - log.info("tmpFile {}", tmpFile); - - final String outputFile = parser.get("outputFile"); - log.info("outputFile {}", outputFile); - - final String hdfsNameNode = parser.get("hdfsNameNode"); - log.info("hdfsNameNode {}", hdfsNameNode); - - final String classForName = parser.get("classForName"); - log.info("classForName {}", classForName); - - final char delimiter = Optional - .ofNullable(parser.get("delimiter")) - .map(s -> s.charAt(0)) - .orElse(DEFAULT_DELIMITER); - log.info("delimiter {}", delimiter); - - HttpConnector2 connector2 = new HttpConnector2(); - - try (BufferedReader in = new BufferedReader( - new InputStreamReader(connector2.getInputSourceAsStream(fileURL)))) { - - try (PrintWriter writer = new PrintWriter(new BufferedWriter(new FileWriter(tmpFile)))) { - String line; - while ((line = in.readLine()) != null) { - writer.println(line.replace("\\\"", "\"")); - } - } - } - - try (BufferedReader in = new BufferedReader(new FileReader(tmpFile))) { - Configuration conf = new Configuration(); - conf.set("fs.defaultFS", hdfsNameNode); - - FileSystem fileSystem = FileSystem.get(conf); - - GetCSV.getCsv(fileSystem, in, outputFile, classForName, delimiter); - } finally { - FileUtils.deleteQuietly(new File(tmpFile)); - } - - } - -} diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/ExtractAndMapDoajJson.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/ExtractAndMapDoajJson.java index 17a78760e..5fede3f59 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/ExtractAndMapDoajJson.java +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/ExtractAndMapDoajJson.java @@ -9,7 +9,6 @@ import java.io.PrintWriter; import java.util.Arrays; import java.util.Objects; -import eu.dnetlib.dhp.oa.graph.hostedbymap.model.DOAJModel; import org.apache.commons.io.IOUtils; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.*; @@ -24,6 +23,7 @@ import com.fasterxml.jackson.core.JsonProcessingException; import com.fasterxml.jackson.databind.ObjectMapper; import eu.dnetlib.dhp.application.ArgumentApplicationParser; +import eu.dnetlib.dhp.oa.graph.hostedbymap.model.DOAJModel; import eu.dnetlib.dhp.oa.graph.hostedbymap.model.doaj.DOAJEntry; public class ExtractAndMapDoajJson { diff --git a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/hostedbymap/oozie_app/workflow.xml b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/hostedbymap/oozie_app/workflow.xml index 84035fe4e..1a3261ffb 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/hostedbymap/oozie_app/workflow.xml +++ b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/hostedbymap/oozie_app/workflow.xml @@ -74,7 +74,9 @@ ${wf:conf('resumeFrom') eq 'ProduceHBM'} - ${wf:conf('resumeFrom') eq 'download_csv'} + ${wf:conf('resumeFrom') eq 'DownloadBoth'} + ${wf:conf('resumeFrom') eq 'DownloadGold'} + ${wf:conf('resumeFrom') eq 'DownloadDoaj'} @@ -83,18 +85,9 @@ Action failed, error message[${wf:errorMessage(wf:lastErrorNode())}] - - - - - - - - - - + - + @@ -103,21 +96,43 @@ --hdfsNameNode${nameNode} --fileURL${unibiFileURL} --tmpFile/tmp/unibi_gold_replaced.csv - --outputFile${workingDir}/unibi_gold.json + --outputFile/user/${wf:user()}/data/unibi_gold.json --classForNameeu.dnetlib.dhp.oa.graph.hostedbymap.model.UnibiGoldModel - + + + ${jobTracker} + ${nameNode} + + + mapred.job.queue.name + ${queueName} + + + download.sh + ${doajJsonFileURL} + ${dumpPath} + ${dumpFileName} + HADOOP_USER_NAME=${wf:user()} + download.sh + + + + + + + + - eu.dnetlib.dhp.oa.graph.hostedbymap.DownloadCSV2 + eu.dnetlib.dhp.oa.graph.hostedbymap.ExtractAndMapDoajJson --hdfsNameNode${nameNode} - --fileURL${doajFileURL} - --tmpFile/tmp/doaj_replaced.csv - --outputFile${workingDir}/doaj.json - --classForNameeu.dnetlib.dhp.oa.graph.hostedbymap.model.DOAJModel + --compressedFile${dumpPath}/${dumpFileName} + --workingPath${workingDir}/DOAJ/ + --outputPath/user/${wf:user()}/data/doaj.json @@ -125,6 +140,54 @@ + + + eu.dnetlib.dhp.oa.graph.hostedbymap.DownloadCSV + --hdfsNameNode${nameNode} + --fileURL${unibiFileURL} + --tmpFile/tmp/unibi_gold_replaced.csv + --outputFile/user/${wf:user()}/data/unibi_gold.json + --classForNameeu.dnetlib.dhp.oa.graph.hostedbymap.model.UnibiGoldModel + + + + + + + + ${jobTracker} + ${nameNode} + + + mapred.job.queue.name + ${queueName} + + + download.sh + ${doajJsonFileURL} + ${dumpPath} + ${dumpFileName} + HADOOP_USER_NAME=${wf:user()} + download.sh + + + + + + + + + + eu.dnetlib.dhp.oa.graph.hostedbymap.ExtractAndMapDoajJson + --hdfsNameNode${nameNode} + --compressedFile${dumpPath}/${dumpFileName} + --workingPath${workingDir}/DOAJ/ + --outputPath/user/${wf:user()}/data/doaj.json + + + + + yarn-cluster diff --git a/dhp-workflows/dhp-graph-mapper/src/main/scala/eu/dnetlib/dhp/oa/graph/hostedbymap/SparkProduceHostedByMap.scala b/dhp-workflows/dhp-graph-mapper/src/main/scala/eu/dnetlib/dhp/oa/graph/hostedbymap/SparkProduceHostedByMap.scala index bdf0861ae..9e3a451e8 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/scala/eu/dnetlib/dhp/oa/graph/hostedbymap/SparkProduceHostedByMap.scala +++ b/dhp-workflows/dhp-graph-mapper/src/main/scala/eu/dnetlib/dhp/oa/graph/hostedbymap/SparkProduceHostedByMap.scala @@ -36,9 +36,7 @@ object SparkProduceHostedByMap { openaire.journal_id, "", "", - isOpenAccess, - -1, - List[String]() + isOpenAccess ) case Constants.EISSN => HostedByItemType( @@ -47,9 +45,7 @@ object SparkProduceHostedByMap { "", openaire.journal_id, "", - isOpenAccess, - -1, - List[String]() + isOpenAccess ) case Constants.ISSNL => HostedByItemType( @@ -58,9 +54,7 @@ object SparkProduceHostedByMap { "", "", openaire.journal_id, - isOpenAccess, - -1, - List[String]() + isOpenAccess ) // catch the default with a variable so you can print it @@ -85,36 +79,34 @@ object SparkProduceHostedByMap { issn: String, eissn: String, issnl: String, - oa: Boolean, - oaDate: Int, - reviewProcess: List[String] + oa: Boolean ): HostedByItemType = { if (issn != null) { if (eissn != null) { if (issnl != null) { - HostedByItemType(id, officialname, issn, eissn, issnl, oa, oaDate, reviewProcess) + HostedByItemType(id, officialname, issn, eissn, issnl, oa) } else { - HostedByItemType(id, officialname, issn, eissn, "", oa, oaDate, reviewProcess) + HostedByItemType(id, officialname, issn, eissn, "", oa) } } else { if (issnl != null) { - HostedByItemType(id, officialname, issn, "", issnl, oa, oaDate, reviewProcess) + HostedByItemType(id, officialname, issn, "", issnl, oa) } else { - HostedByItemType(id, officialname, issn, "", "", oa, oaDate, reviewProcess) + HostedByItemType(id, officialname, issn, "", "", oa) } } } else { if (eissn != null) { if (issnl != null) { - HostedByItemType(id, officialname, "", eissn, issnl, oa, oaDate, reviewProcess) + HostedByItemType(id, officialname, "", eissn, issnl, oa) } else { - HostedByItemType(id, officialname, "", eissn, "", oa, oaDate, reviewProcess) + HostedByItemType(id, officialname, "", eissn, "", oa) } } else { if (issnl != null) { - HostedByItemType(id, officialname, "", "", issnl, oa, oaDate, reviewProcess) + HostedByItemType(id, officialname, "", "", issnl, oa) } else { - HostedByItemType("", "", "", "", "", oa, oaDate, reviewProcess) + HostedByItemType("", "", "", "", "", oa) } } } @@ -129,12 +121,10 @@ object SparkProduceHostedByMap { dats.getJournal.getIssnPrinted, dats.getJournal.getIssnOnline, dats.getJournal.getIssnLinking, - false, - -1, - List[String]() + false ) } - HostedByItemType("", "", "", "", "", false, -1, List[String]()) + HostedByItemType("", "", "", "", "", false) } def oaHostedByDataset(spark: SparkSession, datasourcePath: String): Dataset[HostedByItemType] = { @@ -160,9 +150,7 @@ object SparkProduceHostedByMap { gold.getIssn, "", gold.getIssnL, - true, - -1, - List[String]() + true ) } @@ -192,9 +180,7 @@ object SparkProduceHostedByMap { doaj.getIssn, doaj.getEissn, "", - true, - -1, - doaj.getReviewProcess.asScala.toList + true ) } return getHostedByItemType( @@ -203,9 +189,7 @@ object SparkProduceHostedByMap { doaj.getIssn, doaj.getEissn, "", - true, - doaj.getOaStart, - doaj.getReviewProcess.asScala.toList + true ) } diff --git a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/hostedbymap/DownloadCsvTest.java b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/hostedbymap/DownloadCsvTest.java index edf74fc6a..48f1e0c06 100644 --- a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/hostedbymap/DownloadCsvTest.java +++ b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/hostedbymap/DownloadCsvTest.java @@ -55,7 +55,6 @@ public class DownloadCsvTest { new DownloadCSV() .doDownload( fileURL, - workingDir + "/unibi_gold", outputFile, UnibiGoldModel.class.getName(), ',', @@ -91,56 +90,6 @@ public class DownloadCsvTest { assertEquals(67028, count); } - @Disabled - @Test - void getDoajFileTest() throws CollectorException, IOException, ClassNotFoundException { - - String fileURL = "https://doaj.org/csv"; - - final String outputFile = workingDir + "/doaj.json"; - new DownloadCSV() - .doDownload( - fileURL, - workingDir + "/doaj", - outputFile, - DOAJModel.class.getName(), - ',', - fs); - - BufferedReader in = new BufferedReader(new InputStreamReader(fs.open(new Path(outputFile)))); - - String line; - int count = 0; - while ((line = in.readLine()) != null) { - DOAJModel doaj = new ObjectMapper().readValue(line, DOAJModel.class); - if (count == 0) { - assertEquals("0001-3765", doaj.getIssn()); - assertEquals("1678-2690", doaj.getEissn()); - assertEquals("Anais da Academia Brasileira de Ciências", doaj.getJournalTitle()); - } - if (count == 22) { - log.info(new ObjectMapper().writeValueAsString(doaj)); - System.out.println(new ObjectMapper().writeValueAsString(doaj)); - } - if (count == 7904) { - // log.info(new ObjectMapper().writeValueAsString(doaj)); - assertEquals("", doaj.getIssn()); - assertEquals("2055-7159", doaj.getEissn()); - assertEquals("BJR|case reports", doaj.getJournalTitle()); - } - if (count == 16707) { - - assertEquals("2783-1043", doaj.getIssn()); - assertEquals("2783-1051", doaj.getEissn()); - assertEquals("فیزیک کاربردی ایران", doaj.getJournalTitle()); - } - - count += 1; - } - - assertEquals(16715, count); - } - @AfterAll public static void cleanup() { FileUtils.deleteQuietly(new File(workingDir)); diff --git a/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/hostedbymap/doaj_transformed.json b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/hostedbymap/doaj_transformed.json index 9cec80eb4..09730f1da 100644 --- a/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/hostedbymap/doaj_transformed.json +++ b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/hostedbymap/doaj_transformed.json @@ -1,25 +1,25 @@ -{"journalTitle":"Lëd i Sneg","issn":"2076-6734","eissn":"2412-3765","reviewProcess":"Double blind peer review"} -{"journalTitle":"Компьютерные исследования и моделирование","issn":"2076-7633","eissn":"2077-6853","reviewProcess":"Blind peer review"} -{"journalTitle":" Историко-биологические исследования","issn":"2076-8176","eissn":"2500-1221","reviewProcess":"Double blind peer review"} -{"journalTitle":"Інформаційні технології і засоби навчання","issn":"2076-8184","eissn":"","reviewProcess":"Double blind peer review"} -{"journalTitle":"Revue Internationale de Pédagogie de l’Enseignement Supérieur","issn":"","eissn":"2076-8427","reviewProcess":"Double blind peer review"} -{"journalTitle":"Проблемы развития территории","issn":"2076-8915","eissn":"2409-9007","reviewProcess":"Double blind peer review"} -{"journalTitle":"Rambam Maimonides Medical Journal","issn":"","eissn":"2076-9172","reviewProcess":"Peer review"} -{"journalTitle":"Membranes","issn":"2077-0375","eissn":"","reviewProcess":"Blind peer review"} -{"journalTitle":"Journal of Clinical Medicine","issn":"","eissn":"2077-0383","reviewProcess":"Blind peer review"} -{"journalTitle":"Agriculture","issn":"","eissn":"2077-0472","reviewProcess":"Blind peer review"} -{"journalTitle":"Standartnye Obrazcy","issn":"2077-1177","eissn":"","reviewProcess":"Double blind peer review"} -{"journalTitle":"Металл и литье Украины","issn":"2077-1304","eissn":"2706-5529","reviewProcess":"Double blind peer review"} -{"journalTitle":"Journal of Marine Science and Engineering","issn":"","eissn":"2077-1312","reviewProcess":"Blind peer review"} -{"journalTitle":"Religions","issn":"","eissn":"2077-1444","reviewProcess":"Double blind peer review"} -{"journalTitle":"GW-Unterricht","issn":"2077-1517","eissn":"2414-4169","reviewProcess":"Double blind peer review"} -{"journalTitle":"UCV-Scientia","issn":"2077-172X","eissn":"","reviewProcess":"Peer review"} -{"journalTitle":"Sovremennye Issledovaniâ Socialʹnyh Problem","issn":"2077-1770","eissn":"2218-7405","reviewProcess":"Double blind peer review"} -{"journalTitle":"Granì","issn":"2077-1800","eissn":"2413-8738","reviewProcess":"Double blind peer review"} -{"journalTitle":"Journal of Economics Finance and Administrative Science","issn":"2077-1886","eissn":"2218-0648","reviewProcess":"Double blind peer review"} -{"journalTitle":"Science Education International","issn":"","eissn":"2077-2327","reviewProcess":"Double blind peer review"} -{"journalTitle":"Edumecentro","issn":"","eissn":"2077-2874","reviewProcess":"Double blind peer review"} -{"journalTitle":"Monteverdia","issn":"","eissn":"2077-2890","reviewProcess":"Double blind peer review"} -{"journalTitle":"Transformación","issn":"","eissn":"2077-2955","reviewProcess":"Double blind peer review"} -{"journalTitle":"Journal of Space Technology","issn":"2077-3099","eissn":"2411-5029","reviewProcess":"Double blind peer review"} -{"journalTitle":"Revue de Primatologie","issn":"","eissn":"2077-3757","reviewProcess":"Peer review"} \ No newline at end of file +{"journalTitle":"Lëd i Sneg","issn":"2076-6734","eissn":"2412-3765","reviewProcess":["Double blind peer review"],"oaStart":2015} +{"journalTitle":"Компьютерные исследования и моделирование","issn":"2076-7633","eissn":"2077-6853","reviewProcess":["Blind peer review"],"oaStart":2009} +{"journalTitle":" Историко-биологические исследования","issn":"2076-8176","eissn":"2500-1221","reviewProcess":["Double blind peer review"],"oaStart":2010} +{"journalTitle":"Інформаційні технології і засоби навчання","issn":"2076-8184","eissn":null,"reviewProcess":["Double blind peer review"],"oaStart":2006} +{"journalTitle":"Revue Internationale de Pédagogie de l’Enseignement Supérieur","issn":null,"eissn":"2076-8427","reviewProcess":["Double blind peer review"],"oaStart":2009} +{"journalTitle":"Проблемы развития территории","issn":"2076-8915","eissn":"2409-9007","reviewProcess":["Double blind peer review"],"oaStart":2008} +{"journalTitle":"Rambam Maimonides Medical Journal","issn":null,"eissn":"2076-9172","reviewProcess":["Peer review"],"oaStart":2010} +{"journalTitle":"Membranes","issn":"2077-0375","eissn":null,"reviewProcess":["Blind peer review"],"oaStart":2011} +{"journalTitle":"Journal of Clinical Medicine","issn":null,"eissn":"2077-0383","reviewProcess":["Blind peer review"],"oaStart":2012} +{"journalTitle":"Agriculture","issn":null,"eissn":"2077-0472","reviewProcess":["Blind peer review"],"oaStart":2011} +{"journalTitle":"Standartnye Obrazcy","issn":"2077-1177","eissn":null,"reviewProcess":["Double blind peer review"],"oaStart":2014} +{"journalTitle":"Металл и литье Украины","issn":"2077-1304","eissn":"2706-5529","reviewProcess":["Double blind peer review"],"oaStart":2019} +{"journalTitle":"Journal of Marine Science and Engineering","issn":null,"eissn":"2077-1312","reviewProcess":["Blind peer review"],"oaStart":2013} +{"journalTitle":"Religions","issn":null,"eissn":"2077-1444","reviewProcess":["Double blind peer review"],"oaStart":2010} +{"journalTitle":"GW-Unterricht","issn":"2077-1517","eissn":"2414-4169","reviewProcess":["Double blind peer review"],"oaStart":2010} +{"journalTitle":"UCV-Scientia","issn":"2077-172X","eissn":null,"reviewProcess":["Peer review"],"oaStart":2009} +{"journalTitle":"Sovremennye Issledovaniâ Socialʹnyh Problem","issn":"2077-1770","eissn":"2218-7405","reviewProcess":["Double blind peer review"],"oaStart":2010} +{"journalTitle":"Granì","issn":"2077-1800","eissn":"2413-8738","reviewProcess":["Double blind peer review"],"oaStart":2014} +{"journalTitle":"Journal of Economics Finance and Administrative Science","issn":"2077-1886","eissn":"2218-0648","reviewProcess":["Double blind peer review"],"oaStart":2017} +{"journalTitle":"Science Education International","issn":null,"eissn":"2077-2327","reviewProcess":["Double blind peer review"],"oaStart":2017} +{"journalTitle":"Edumecentro","issn":null,"eissn":"2077-2874","reviewProcess":["Double blind peer review"],"oaStart":2013} +{"journalTitle":"Monteverdia","issn":null,"eissn":"2077-2890","reviewProcess":["Double blind peer review"],"oaStart":2008} +{"journalTitle":"Transformación","issn":null,"eissn":"2077-2955","reviewProcess":["Double blind peer review"],"oaStart":2010} +{"journalTitle":"Journal of Space Technology","issn":"2077-3099","eissn":"2411-5029","reviewProcess":["Double blind peer review"],"oaStart":2011} +{"journalTitle":"Revue de Primatologie","issn":null,"eissn":"2077-3757","reviewProcess":["Peer review"],"oaStart":2009} \ No newline at end of file From f25407bbe24016355e782b1d5d6e7ccb6e23be97 Mon Sep 17 00:00:00 2001 From: Claudio Atzori Date: Fri, 11 Mar 2022 09:32:42 +0100 Subject: [PATCH 109/162] added mapping for datasource consent fields to integrate them in the graph --- .../graph/raw/MigrateDbEntitiesApplication.java | 3 +++ .../dnetlib/dhp/oa/graph/sql/queryDatasources.sql | 3 +++ .../raw/MigrateDbEntitiesApplicationTest.java | 3 +++ .../oa/graph/raw/datasources_resultset_entry.json | 15 +++++++++++++++ pom.xml | 2 +- 5 files changed, 25 insertions(+), 1 deletion(-) diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/MigrateDbEntitiesApplication.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/MigrateDbEntitiesApplication.java index e453f7918..1f8c23ac3 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/MigrateDbEntitiesApplication.java +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/MigrateDbEntitiesApplication.java @@ -309,6 +309,9 @@ public class MigrateDbEntitiesApplication extends AbstractMigrationApplication i ds.setThematic(rs.getBoolean("thematic")); ds.setKnowledgegraph(rs.getBoolean("knowledgegraph")); ds.setContentpolicies(prepareListOfQualifiers(rs.getArray("contentpolicies"))); + ds.setConsenttermsofuse(rs.getBoolean("consenttermsofuse")); + ds.setFulltextdownload(rs.getBoolean("fulltextdownload")); + ds.setConsenttermsofusedate(rs.getDate("consenttermsofusedate").toString()); return Arrays.asList(ds); } catch (final Exception e) { diff --git a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/sql/queryDatasources.sql b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/sql/queryDatasources.sql index 98092e882..2623c65c4 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/sql/queryDatasources.sql +++ b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/sql/queryDatasources.sql @@ -89,6 +89,9 @@ SELECT d.issn AS issnPrinted, d.eissn AS issnOnline, d.lissn AS issnLinking, + d.consenttermsofuse AS consenttermsofuse, + d.fulltextdownload AS fulltextdownload, + d.consenttermsofusedate AS consenttermsofusedate, de.jurisdiction||'@@@eosc:jurisdictions' AS jurisdiction, de.thematic AS thematic, de.knowledge_graph AS knowledgegraph, diff --git a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/raw/MigrateDbEntitiesApplicationTest.java b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/raw/MigrateDbEntitiesApplicationTest.java index b65bd9fd8..05ae5be74 100644 --- a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/raw/MigrateDbEntitiesApplicationTest.java +++ b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/raw/MigrateDbEntitiesApplicationTest.java @@ -100,6 +100,9 @@ public class MigrateDbEntitiesApplicationTest { assertEquals("Journal article", ds.getContentpolicies().get(0).getClassid()); assertEquals("eosc:contentpolicies", ds.getContentpolicies().get(0).getSchemeid()); + assertEquals(true, ds.getConsenttermsofuse()); + assertEquals(true, ds.getFulltextdownload()); + assertEquals("2022-03-11", ds.getConsenttermsofusedate()); } @Test diff --git a/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/raw/datasources_resultset_entry.json b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/raw/datasources_resultset_entry.json index 42b140306..11b884cde 100644 --- a/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/raw/datasources_resultset_entry.json +++ b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/raw/datasources_resultset_entry.json @@ -268,5 +268,20 @@ "value": [ "Journal article@@@eosc:contentpolicies" ] + }, + { + "field": "consenttermsofuse", + "type": "boolean", + "value": true + }, + { + "field": "fulltextdownload", + "type": "boolean", + "value": true + }, + { + "field": "consenttermsofusedate", + "type": "date", + "value": "2022-03-11" } ] diff --git a/pom.xml b/pom.xml index 603a4cf1e..7a20a1652 100644 --- a/pom.xml +++ b/pom.xml @@ -797,7 +797,7 @@ 3.3.3 3.4.2 [2.12,3.0) - [2.10.29] + [2.10.30-SNAPSHOT] [4.0.3] [6.0.5] [3.1.6] From 2b643059fa5627c42cc6d893d6a35afa8e4239cb Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Fri, 11 Mar 2022 13:56:48 +0100 Subject: [PATCH 110/162] [Country Propagation] changed the logic to get the collectedfrom at the result level. To fix issue when no instance is created for a result that should have the country associated. Change the code to use spark instead of hive to prepare the data needed for the propagation step. Added new tests for the intermediate steps and new verification for the propagation itself --- .../dhp/countrypropagation/CountrySbs.java | 7 ++ .../countrypropagation/DatasourceCountry.java | 7 ++ .../countrypropagation/EntityEntityRel.java | 32 +++++ .../PrepareDatasourceCountryAssociation.java | 78 ++++++------ .../PrepareResultCountrySet.java | 100 +++++++++------ .../SparkCountryPropagationJob.java | 44 +++---- .../input_countrypropagation_parameters.json | 6 - .../input_prepareassoc_parameters.json | 6 - ...input_prepareresultcountry_parameters.json | 6 +- .../CountryPropagationJobTest.java | 73 ++++++++++- .../DatasourceCountryPreparationTest.java | 111 +++++++++++++++++ .../ResultCountryPreparationTest.java | 115 ++++++++++++++++++ .../datasourcecountry/datasourcecountry | 3 + .../graph/datasource/datasource.json | 11 ++ .../graph/organization/organization | 8 ++ .../graph/publication/publication | 12 ++ .../graph/relation/relation.json | 24 ++++ .../software/software_10.json.gz | Bin .../preparedInfo/publication/resultCountrySet | 5 + .../{ => software}/preparedInfo.json.gz | Bin .../oa/provision/XmlRecordFactoryTest.java | 6 +- 21 files changed, 532 insertions(+), 122 deletions(-) create mode 100644 dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/countrypropagation/EntityEntityRel.java create mode 100644 dhp-workflows/dhp-enrichment/src/test/java/eu/dnetlib/dhp/countrypropagation/DatasourceCountryPreparationTest.java create mode 100644 dhp-workflows/dhp-enrichment/src/test/java/eu/dnetlib/dhp/countrypropagation/ResultCountryPreparationTest.java create mode 100644 dhp-workflows/dhp-enrichment/src/test/resources/eu/dnetlib/dhp/countrypropagation/datasourcecountry/datasourcecountry create mode 100644 dhp-workflows/dhp-enrichment/src/test/resources/eu/dnetlib/dhp/countrypropagation/graph/datasource/datasource.json create mode 100644 dhp-workflows/dhp-enrichment/src/test/resources/eu/dnetlib/dhp/countrypropagation/graph/organization/organization create mode 100644 dhp-workflows/dhp-enrichment/src/test/resources/eu/dnetlib/dhp/countrypropagation/graph/publication/publication create mode 100644 dhp-workflows/dhp-enrichment/src/test/resources/eu/dnetlib/dhp/countrypropagation/graph/relation/relation.json rename dhp-workflows/dhp-enrichment/src/test/resources/eu/dnetlib/dhp/countrypropagation/{sample => graph}/software/software_10.json.gz (100%) create mode 100644 dhp-workflows/dhp-enrichment/src/test/resources/eu/dnetlib/dhp/countrypropagation/preparedInfo/publication/resultCountrySet rename dhp-workflows/dhp-enrichment/src/test/resources/eu/dnetlib/dhp/countrypropagation/preparedInfo/{ => software}/preparedInfo.json.gz (100%) diff --git a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/countrypropagation/CountrySbs.java b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/countrypropagation/CountrySbs.java index 271cc6bb3..5dbd912cb 100644 --- a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/countrypropagation/CountrySbs.java +++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/countrypropagation/CountrySbs.java @@ -22,4 +22,11 @@ public class CountrySbs implements Serializable { public void setClassname(String classname) { this.classname = classname; } + + public static CountrySbs newInstance(String classid, String classname) { + CountrySbs csbs = new CountrySbs(); + csbs.classid = classid; + csbs.classname = classname; + return csbs; + } } diff --git a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/countrypropagation/DatasourceCountry.java b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/countrypropagation/DatasourceCountry.java index 642192f73..4edc0f7a2 100644 --- a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/countrypropagation/DatasourceCountry.java +++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/countrypropagation/DatasourceCountry.java @@ -22,4 +22,11 @@ public class DatasourceCountry implements Serializable { public void setCountry(CountrySbs country) { this.country = country; } + + public static DatasourceCountry newInstance(String dataSourceId, CountrySbs country) { + DatasourceCountry dsc = new DatasourceCountry(); + dsc.dataSourceId = dataSourceId; + dsc.country = country; + return dsc; + } } diff --git a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/countrypropagation/EntityEntityRel.java b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/countrypropagation/EntityEntityRel.java new file mode 100644 index 000000000..eece5a0b9 --- /dev/null +++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/countrypropagation/EntityEntityRel.java @@ -0,0 +1,32 @@ + +package eu.dnetlib.dhp.countrypropagation; + +import java.io.Serializable; + +public class EntityEntityRel implements Serializable { + private String entity1Id; + private String entity2Id; + + public static EntityEntityRel newInstance(String source, String target) { + EntityEntityRel dso = new EntityEntityRel(); + dso.entity1Id = source; + dso.entity2Id = target; + return dso; + } + + public String getEntity1Id() { + return entity1Id; + } + + public void setEntity1Id(String entity1Id) { + this.entity1Id = entity1Id; + } + + public String getEntity2Id() { + return entity2Id; + } + + public void setEntity2Id(String entity2Id) { + this.entity2Id = entity2Id; + } +} diff --git a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/countrypropagation/PrepareDatasourceCountryAssociation.java b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/countrypropagation/PrepareDatasourceCountryAssociation.java index ddc7f93f7..c02f37015 100644 --- a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/countrypropagation/PrepareDatasourceCountryAssociation.java +++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/countrypropagation/PrepareDatasourceCountryAssociation.java @@ -2,14 +2,17 @@ package eu.dnetlib.dhp.countrypropagation; import static eu.dnetlib.dhp.PropagationConstant.*; -import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkHiveSession; +import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession; import java.util.Arrays; import java.util.List; -import java.util.stream.Collectors; +import com.fasterxml.jackson.databind.ObjectMapper; import org.apache.commons.io.IOUtils; import org.apache.spark.SparkConf; +import org.apache.spark.api.java.function.FilterFunction; +import org.apache.spark.api.java.function.ForeachFunction; +import org.apache.spark.api.java.function.MapFunction; import org.apache.spark.sql.Dataset; import org.apache.spark.sql.Encoders; import org.apache.spark.sql.SaveMode; @@ -21,7 +24,9 @@ import eu.dnetlib.dhp.application.ArgumentApplicationParser; import eu.dnetlib.dhp.schema.common.ModelConstants; import eu.dnetlib.dhp.schema.oaf.Datasource; import eu.dnetlib.dhp.schema.oaf.Organization; +import eu.dnetlib.dhp.schema.oaf.Qualifier; import eu.dnetlib.dhp.schema.oaf.Relation; +import scala.Tuple2; /** * For the association of the country to the datasource The association is computed only for datasource of specific type @@ -54,9 +59,8 @@ public class PrepareDatasourceCountryAssociation { log.info("outputPath {}: ", outputPath); SparkConf conf = new SparkConf(); - conf.set("hive.metastore.uris", parser.get("hive_metastore_uris")); - runWithSparkHiveSession( + runWithSparkSession( conf, isSparkSessionManaged, spark -> { @@ -77,40 +81,46 @@ public class PrepareDatasourceCountryAssociation { String inputPath, String outputPath) { - final String whitelisted = whitelist - .stream() - .map(id -> " d.id = '" + id + "'") - .collect(Collectors.joining(" OR ")); + // filtering of the datasource taking only the non deleted by inference and those with the allowed types or + // whose id is in whitelist + Dataset datasource = readPath(spark, inputPath + "/datasource", Datasource.class) + .filter( + (FilterFunction) ds -> !ds.getDataInfo().getDeletedbyinference() && + (allowedtypes.contains(ds.getDatasourcetype().getClassid()) || + whitelist.contains(ds.getId()))); - final String allowed = allowedtypes - .stream() - .map(type -> " d.datasourcetype.classid = '" + type + "'") - .collect(Collectors.joining(" OR ")); + // filtering of the relations taking the non deleted by inference and those with IsProvidedBy as relclass + Dataset relation = readPath(spark, inputPath + "/relation", Relation.class) + .filter( + (FilterFunction) rel -> rel.getRelClass().equalsIgnoreCase(ModelConstants.IS_PROVIDED_BY) && + !rel.getDataInfo().getDeletedbyinference()); - Dataset datasource = readPath(spark, inputPath + "/datasource", Datasource.class); - Dataset relation = readPath(spark, inputPath + "/relation", Relation.class); - Dataset organization = readPath(spark, inputPath + "/organization", Organization.class); + // filtering of the organization taking only the non deleted by inference and those with information about the + // country + Dataset organization = readPath(spark, inputPath + "/organization", Organization.class) + .filter( + (FilterFunction) o -> !o.getDataInfo().getDeletedbyinference() && + o.getCountry().getClassid().length() > 0 && + !o.getCountry().getClassid().equals(ModelConstants.UNKNOWN)) ; - datasource.createOrReplaceTempView("datasource"); - relation.createOrReplaceTempView("relation"); - organization.createOrReplaceTempView("organization"); + // associated the datasource id with the id of the organization providing the datasource + Dataset dse = datasource + .joinWith(relation, datasource.col("id").equalTo(relation.col("source"))) + .map( + (MapFunction, EntityEntityRel>) t2 -> EntityEntityRel + .newInstance(t2._2.getSource(), t2._2.getTarget()), + Encoders.bean(EntityEntityRel.class)); - String query = "SELECT source dataSourceId, " + - "named_struct('classid', country.classid, 'classname', country.classname) country " + - "FROM datasource d " + - "JOIN relation rel " + - "ON d.id = rel.source " + - "JOIN organization o " + - "ON o.id = rel.target " + - "WHERE rel.datainfo.deletedbyinference = false " + - "and lower(rel.relclass) = '" + ModelConstants.IS_PROVIDED_BY.toLowerCase() + "'" + - "and o.datainfo.deletedbyinference = false " + - "and length(o.country.classid) > 0 " + - "and (" + allowed + " or " + whitelisted + ")"; - - spark - .sql(query) - .as(Encoders.bean(DatasourceCountry.class)) + // joins with the information stored in the organization dataset to associate the country to the datasource id + dse + .joinWith(organization, dse.col("entity2Id").equalTo(organization.col("id"))) + .map((MapFunction, DatasourceCountry>) t2 -> { + Qualifier country = t2._2.getCountry(); + return DatasourceCountry + .newInstance( + t2._1.getEntity1Id(), + CountrySbs.newInstance(country.getClassid(), country.getClassname())); + }, Encoders.bean(DatasourceCountry.class)) .write() .option("compression", "gzip") .mode(SaveMode.Overwrite) diff --git a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/countrypropagation/PrepareResultCountrySet.java b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/countrypropagation/PrepareResultCountrySet.java index 77f7288f6..8ca87fa21 100644 --- a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/countrypropagation/PrepareResultCountrySet.java +++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/countrypropagation/PrepareResultCountrySet.java @@ -3,14 +3,21 @@ package eu.dnetlib.dhp.countrypropagation; import static eu.dnetlib.dhp.PropagationConstant.*; import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkHiveSession; +import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession; import java.util.ArrayList; +import java.util.Arrays; +import java.util.HashSet; import java.util.Set; import java.util.stream.Collectors; import org.apache.commons.io.IOUtils; import org.apache.hadoop.io.compress.GzipCodec; import org.apache.spark.SparkConf; +import org.apache.spark.api.java.function.FilterFunction; +import org.apache.spark.api.java.function.FlatMapFunction; +import org.apache.spark.api.java.function.MapFunction; +import org.apache.spark.api.java.function.MapGroupsFunction; import org.apache.spark.sql.*; import org.apache.spark.sql.Dataset; import org.slf4j.Logger; @@ -23,14 +30,6 @@ import scala.Tuple2; public class PrepareResultCountrySet { private static final Logger log = LoggerFactory.getLogger(PrepareResultCountrySet.class); - private static final String RESULT_COUNTRYSET_QUERY = "SELECT id resultId, collect_set(country) countrySet " - + "FROM ( SELECT id, country " - + "FROM datasource_country JOIN cfhb ON cf = dataSourceId " - + "UNION ALL " - + "SELECT id, country FROM datasource_country " - + "JOIN cfhb ON hb = dataSourceId ) tmp " - + "GROUP BY id"; - public static void main(String[] args) throws Exception { String jsonConfiguration = IOUtils .toString( @@ -45,6 +44,8 @@ public class PrepareResultCountrySet { Boolean isSparkSessionManaged = isSparkSessionManaged(parser); log.info("isSparkSessionManaged: {}", isSparkSessionManaged); + String workingPath = parser.get("workingPath"); + String inputPath = parser.get("sourcePath"); log.info("inputPath: {}", inputPath); @@ -60,9 +61,9 @@ public class PrepareResultCountrySet { Class resultClazz = (Class) Class.forName(resultClassName); SparkConf conf = new SparkConf(); - conf.set("hive.metastore.uris", parser.get("hive_metastore_uris")); + //conf.set("hive.metastore.uris", parser.get("hive_metastore_uris")); - runWithSparkHiveSession( + runWithSparkSession( conf, isSparkSessionManaged, spark -> { @@ -72,6 +73,7 @@ public class PrepareResultCountrySet { inputPath, outputPath, datasourcecountrypath, + workingPath, resultClazz); }); } @@ -81,43 +83,63 @@ public class PrepareResultCountrySet { String inputPath, String outputPath, String datasourcecountrypath, + String workingPath, Class resultClazz) { - Dataset result = readPath(spark, inputPath, resultClazz); - result.createOrReplaceTempView("result"); + // selects all the results non deleted by inference and non invisible + Dataset result = readPath(spark, inputPath, resultClazz) + .filter( + (FilterFunction) r -> !r.getDataInfo().getDeletedbyinference() && + !r.getDataInfo().getInvisible()); - createCfHbforResult(spark); + // of the results collects the distinct keys for collected from (at the level of the result) and hosted by + // and produces pairs resultId, key for each distinct key associated to the result + result.flatMap((FlatMapFunction) r -> { + Set cfhb = r.getCollectedfrom().stream().map(cf -> cf.getKey()).collect(Collectors.toSet()); + cfhb.addAll(r.getInstance().stream().map(i -> i.getHostedby().getKey()).collect(Collectors.toSet())); + return cfhb + .stream() + .map(value -> EntityEntityRel.newInstance(r.getId(), value)) + .collect(Collectors.toList()) + .iterator(); + }, Encoders.bean(EntityEntityRel.class)) + .write() + .mode(SaveMode.Overwrite) + .option("compression", "gzip") + .json(workingPath + "/resultCfHb"); Dataset datasource_country = readPath(spark, datasourcecountrypath, DatasourceCountry.class); - datasource_country.createOrReplaceTempView("datasource_country"); - - spark - .sql(RESULT_COUNTRYSET_QUERY) - .as(Encoders.bean(ResultCountrySet.class)) - .toJavaRDD() - .mapToPair(value -> new Tuple2<>(value.getResultId(), value)) - .reduceByKey((a, b) -> { - ArrayList countryList = a.getCountrySet(); - Set countryCodes = countryList - .stream() - .map(CountrySbs::getClassid) - .collect(Collectors.toSet()); - b - .getCountrySet() - .stream() - .forEach(c -> { - if (!countryCodes.contains(c.getClassid())) { - countryList.add(c); - countryCodes.add(c.getClassid()); - } + Dataset cfhb = readPath(spark, workingPath + "/resultCfHb", EntityEntityRel.class); + datasource_country + .joinWith( + cfhb, cfhb + .col("entity2Id") + .equalTo(datasource_country.col("datasourceId"))) + .groupByKey( + (MapFunction, String>) t2 -> t2._2().getEntity1Id(), + Encoders.STRING()) + .mapGroups( + (MapGroupsFunction, ResultCountrySet>) (k, it) -> { + ResultCountrySet rcs = new ResultCountrySet(); + rcs.setResultId(k); + Set set = new HashSet<>(); + Set countryCodes = new HashSet<>(); + DatasourceCountry first = it.next()._1(); + countryCodes.add(first.getCountry().getClassid()); + set.add(first.getCountry()); + it.forEachRemaining(t2 -> { + if (!countryCodes.contains(t2._1().getCountry().getClassid())) + set.add(t2._1().getCountry()); }); - a.setCountrySet(countryList); - return a; - }) - .map(couple -> OBJECT_MAPPER.writeValueAsString(couple._2())) - .saveAsTextFile(outputPath, GzipCodec.class); + rcs.setCountrySet(new ArrayList<>(set)); + return rcs; + }, Encoders.bean(ResultCountrySet.class)) + .write() + .mode(SaveMode.Overwrite) + .option("compression", "gzip") + .json(outputPath); } } diff --git a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/countrypropagation/SparkCountryPropagationJob.java b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/countrypropagation/SparkCountryPropagationJob.java index 4aa48583f..1bbfbdafd 100644 --- a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/countrypropagation/SparkCountryPropagationJob.java +++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/countrypropagation/SparkCountryPropagationJob.java @@ -56,12 +56,6 @@ public class SparkCountryPropagationJob { final String resultClassName = parser.get("resultTableName"); log.info("resultTableName: {}", resultClassName); - final Boolean saveGraph = Optional - .ofNullable(parser.get("saveGraph")) - .map(Boolean::valueOf) - .orElse(Boolean.TRUE); - log.info("saveGraph: {}", saveGraph); - Class resultClazz = (Class) Class.forName(resultClassName); SparkConf conf = new SparkConf(); @@ -75,8 +69,7 @@ public class SparkCountryPropagationJob { sourcePath, preparedInfoPath, outputPath, - resultClazz, - saveGraph); + resultClazz); }); } @@ -85,27 +78,26 @@ public class SparkCountryPropagationJob { String sourcePath, String preparedInfoPath, String outputPath, - Class resultClazz, - boolean saveGraph) { + Class resultClazz) { - if (saveGraph) { - log.info("Reading Graph table from: {}", sourcePath); - Dataset res = readPath(spark, sourcePath, resultClazz); - log.info("Reading prepared info: {}", preparedInfoPath); - Dataset prepared = spark - .read() - .json(preparedInfoPath) - .as(Encoders.bean(ResultCountrySet.class)); + log.info("Reading Graph table from: {}", sourcePath); + Dataset res = readPath(spark, sourcePath, resultClazz); + + log.info("Reading prepared info: {}", preparedInfoPath); + Dataset prepared = spark + .read() + .json(preparedInfoPath) + .as(Encoders.bean(ResultCountrySet.class)); + + res + .joinWith(prepared, res.col("id").equalTo(prepared.col("resultId")), "left_outer") + .map(getCountryMergeFn(), Encoders.bean(resultClazz)) + .write() + .option("compression", "gzip") + .mode(SaveMode.Overwrite) + .json(outputPath); - res - .joinWith(prepared, res.col("id").equalTo(prepared.col("resultId")), "left_outer") - .map(getCountryMergeFn(), Encoders.bean(resultClazz)) - .write() - .option("compression", "gzip") - .mode(SaveMode.Overwrite) - .json(outputPath); - } } private static MapFunction, R> getCountryMergeFn() { diff --git a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/countrypropagation/input_countrypropagation_parameters.json b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/countrypropagation/input_countrypropagation_parameters.json index 984b40774..e7a6bf5b0 100644 --- a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/countrypropagation/input_countrypropagation_parameters.json +++ b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/countrypropagation/input_countrypropagation_parameters.json @@ -11,12 +11,6 @@ "paramDescription": "the hive metastore uris", "paramRequired": false }, - { - "paramName":"sg", - "paramLongName":"saveGraph", - "paramDescription": "true if the new version of the graph must be saved", - "paramRequired": false - }, { "paramName":"tn", "paramLongName":"resultTableName", diff --git a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/countrypropagation/input_prepareassoc_parameters.json b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/countrypropagation/input_prepareassoc_parameters.json index 95d4c1c60..a00105f2b 100644 --- a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/countrypropagation/input_prepareassoc_parameters.json +++ b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/countrypropagation/input_prepareassoc_parameters.json @@ -5,12 +5,6 @@ "paramDescription": "the path of the sequencial file to read", "paramRequired": true }, - { - "paramName":"h", - "paramLongName":"hive_metastore_uris", - "paramDescription": "the hive metastore uris", - "paramRequired": true - }, { "paramName": "out", "paramLongName": "outputPath", diff --git a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/countrypropagation/input_prepareresultcountry_parameters.json b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/countrypropagation/input_prepareresultcountry_parameters.json index 5efa3dbd6..18163d1f9 100644 --- a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/countrypropagation/input_prepareresultcountry_parameters.json +++ b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/countrypropagation/input_prepareresultcountry_parameters.json @@ -12,9 +12,9 @@ "paramRequired": true }, { - "paramName":"h", - "paramLongName":"hive_metastore_uris", - "paramDescription": "the hive metastore uris", + "paramName":"w", + "paramLongName":"workingPath", + "paramDescription": "the working path", "paramRequired": true }, { diff --git a/dhp-workflows/dhp-enrichment/src/test/java/eu/dnetlib/dhp/countrypropagation/CountryPropagationJobTest.java b/dhp-workflows/dhp-enrichment/src/test/java/eu/dnetlib/dhp/countrypropagation/CountryPropagationJobTest.java index 963ee5529..e0688b430 100644 --- a/dhp-workflows/dhp-enrichment/src/test/java/eu/dnetlib/dhp/countrypropagation/CountryPropagationJobTest.java +++ b/dhp-workflows/dhp-enrichment/src/test/java/eu/dnetlib/dhp/countrypropagation/CountryPropagationJobTest.java @@ -7,6 +7,7 @@ import java.nio.file.Path; import java.util.ArrayList; import java.util.List; +import eu.dnetlib.dhp.schema.oaf.Publication; import org.apache.commons.io.FileUtils; import org.apache.spark.SparkConf; import org.apache.spark.api.java.JavaRDD; @@ -69,17 +70,16 @@ public class CountryPropagationJobTest { @Test void testCountryPropagationSoftware() throws Exception { final String sourcePath = getClass() - .getResource("/eu/dnetlib/dhp/countrypropagation/sample/software") + .getResource("/eu/dnetlib/dhp/countrypropagation/graph/software") .getPath(); final String preparedInfoPath = getClass() - .getResource("/eu/dnetlib/dhp/countrypropagation/preparedInfo") + .getResource("/eu/dnetlib/dhp/countrypropagation/preparedInfo/software") .getPath(); SparkCountryPropagationJob .main( new String[] { "--isSparkSessionManaged", Boolean.FALSE.toString(), "--sourcePath", sourcePath, - "-saveGraph", "true", "-resultTableName", Software.class.getCanonicalName(), "-outputPath", workingDir.toString() + "/software", "-preparedInfoPath", preparedInfoPath @@ -91,8 +91,6 @@ public class CountryPropagationJobTest { .textFile(workingDir.toString() + "/software") .map(item -> OBJECT_MAPPER.readValue(item, Software.class)); - // tmp.map(s -> new Gson().toJson(s)).foreach(s -> System.out.println(s)); - Assertions.assertEquals(10, tmp.count()); Dataset verificationDs = spark.createDataset(tmp.rdd(), Encoders.bean(Software.class)); @@ -259,4 +257,69 @@ public class CountryPropagationJobTest { 7, countryExplodedWithCountryProvenance.filter("_2 = 'propagation'").count()); } + @Test + void testCountryPropagationPublication() throws Exception { + final String sourcePath = getClass() + .getResource("/eu/dnetlib/dhp/countrypropagation/graph/publication") + .getPath(); + final String preparedInfoPath = getClass() + .getResource("/eu/dnetlib/dhp/countrypropagation/preparedInfo/publication") + .getPath(); + SparkCountryPropagationJob + .main( + new String[] { + "--isSparkSessionManaged", Boolean.FALSE.toString(), + "--sourcePath", sourcePath, + "-resultTableName", Publication.class.getCanonicalName(), + "-outputPath", workingDir.toString() + "/publication", + "-preparedInfoPath", preparedInfoPath + }); + + final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext()); + + JavaRDD tmp = sc + .textFile(workingDir.toString() + "/publication") + .map(item -> OBJECT_MAPPER.readValue(item, Publication.class)); + + Assertions.assertEquals(12, tmp.count()); + + Assertions.assertEquals(5, tmp.filter(r -> r.getCountry().size() > 0).count()); + + tmp.foreach(r -> r.getCountry().stream().forEach(c -> Assertions.assertEquals("dnet:countries", c.getSchemeid()))); + tmp.foreach(r -> r.getCountry().stream().forEach(c -> Assertions.assertEquals("dnet:countries", c.getSchemename()))); + tmp.foreach(r -> r.getCountry().stream().forEach(c -> Assertions.assertFalse(c.getDataInfo().getDeletedbyinference()))); + tmp.foreach(r -> r.getCountry().stream().forEach(c -> Assertions.assertFalse(c.getDataInfo().getInvisible()))); + tmp.foreach(r -> r.getCountry().stream().forEach(c -> Assertions.assertTrue(c.getDataInfo().getInferred()))); + tmp.foreach(r -> r.getCountry().stream().forEach(c -> Assertions.assertEquals("0.85", c.getDataInfo().getTrust()))); + tmp.foreach(r -> r.getCountry().stream().forEach(c -> Assertions.assertEquals("propagation", c.getDataInfo().getInferenceprovenance()))); + tmp.foreach(r -> r.getCountry().stream().forEach(c -> Assertions.assertEquals("country:instrepos", c.getDataInfo().getProvenanceaction().getClassid()))); + tmp.foreach(r -> r.getCountry().stream().forEach(c -> Assertions.assertEquals("dnet:provenanceActions", c.getDataInfo().getProvenanceaction().getSchemeid()))); + tmp.foreach(r -> r.getCountry().stream().forEach(c -> Assertions.assertEquals("dnet:provenanceActions", c.getDataInfo().getProvenanceaction().getSchemename()))); + + List countries = tmp.filter(r -> r.getId().equals("50|06cdd3ff4700::49ec404cee4e1452808aabeaffbd3072")).collect().get(0).getCountry(); + Assertions.assertEquals(1, countries.size()); + Assertions.assertEquals("NL",countries.get(0).getClassid()); + Assertions.assertEquals("Netherlands",countries.get(0).getClassname()); + + countries = tmp.filter(r -> r.getId().equals("50|07b5c0ccd4fe::e7f5459cc97865f2af6e3da964c1250b")).collect().get(0).getCountry(); + Assertions.assertEquals(1, countries.size()); + Assertions.assertEquals("NL",countries.get(0).getClassid()); + Assertions.assertEquals("Netherlands",countries.get(0).getClassname()); + + countries = tmp.filter(r -> r.getId().equals("50|355e65625b88::e7d48a470b13bda61f7ebe3513e20cb6")).collect().get(0).getCountry(); + Assertions.assertEquals(2, countries.size()); + Assertions.assertTrue(countries.stream().anyMatch(cs -> cs.getClassid().equals("IT") && cs.getClassname().equals("Italy"))); + Assertions.assertTrue(countries.stream().anyMatch(cs -> cs.getClassid().equals("FR") && cs.getClassname().equals("France"))); + + countries = tmp.filter(r -> r.getId().equals("50|355e65625b88::74009c567c81b4aa55c813db658734df")).collect().get(0).getCountry(); + Assertions.assertEquals(2, countries.size()); + Assertions.assertTrue(countries.stream().anyMatch(cs -> cs.getClassid().equals("IT") && cs.getClassname().equals("Italy"))); + Assertions.assertTrue(countries.stream().anyMatch(cs -> cs.getClassid().equals("NL") && cs.getClassname().equals("Netherlands"))); + + countries = tmp.filter(r -> r.getId().equals("50|355e65625b88::54a1c76f520bb2c8da27d12e42891088")).collect().get(0).getCountry(); + Assertions.assertEquals(2, countries.size()); + Assertions.assertTrue(countries.stream().anyMatch(cs -> cs.getClassid().equals("IT") && cs.getClassname().equals("Italy"))); + Assertions.assertTrue(countries.stream().anyMatch(cs -> cs.getClassid().equals("FR") && cs.getClassname().equals("France"))); + } + } diff --git a/dhp-workflows/dhp-enrichment/src/test/java/eu/dnetlib/dhp/countrypropagation/DatasourceCountryPreparationTest.java b/dhp-workflows/dhp-enrichment/src/test/java/eu/dnetlib/dhp/countrypropagation/DatasourceCountryPreparationTest.java new file mode 100644 index 000000000..8560a9d66 --- /dev/null +++ b/dhp-workflows/dhp-enrichment/src/test/java/eu/dnetlib/dhp/countrypropagation/DatasourceCountryPreparationTest.java @@ -0,0 +1,111 @@ +package eu.dnetlib.dhp.countrypropagation; + +import com.fasterxml.jackson.databind.ObjectMapper; + +import org.apache.commons.io.FileUtils; +import org.apache.spark.SparkConf; +import org.apache.spark.api.java.JavaRDD; +import org.apache.spark.api.java.JavaSparkContext; + +import org.apache.spark.sql.SparkSession; +import org.junit.jupiter.api.AfterAll; +import org.junit.jupiter.api.Assertions; +import org.junit.jupiter.api.BeforeAll; +import org.junit.jupiter.api.Test; + + +import java.io.IOException; +import java.nio.file.Files; +import java.nio.file.Path; + + +public class DatasourceCountryPreparationTest { + + private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper(); + + private static SparkSession spark; + + private static Path workingDir; + + @BeforeAll + public static void beforeAll() throws IOException { + workingDir = Files.createTempDirectory(DatasourceCountryPreparationTest.class.getSimpleName()); + + SparkConf conf = new SparkConf(); + conf.setAppName(DatasourceCountryPreparationTest.class.getSimpleName()); + + conf.setMaster("local[*]"); + conf.set("spark.driver.host", "localhost"); + conf.set("hive.metastore.local", "true"); + conf.set("spark.ui.enabled", "false"); + conf.set("spark.sql.warehouse.dir", workingDir.toString()); + conf.set("hive.metastore.warehouse.dir", workingDir.resolve("warehouse").toString()); + + spark = SparkSession + .builder() + .appName(DatasourceCountryPreparationTest.class.getSimpleName()) + .config(conf) + .getOrCreate(); + } + + @AfterAll + public static void afterAll() throws IOException { + FileUtils.deleteDirectory(workingDir.toFile()); + spark.stop(); + } + + @Test + void testPrepareDatasourceCountry() throws Exception { + final String sourcePath = getClass() + .getResource("/eu/dnetlib/dhp/countrypropagation/graph") + .getPath(); + + PrepareDatasourceCountryAssociation + .main( + new String[] { + "--isSparkSessionManaged", Boolean.FALSE.toString(), + "--sourcePath", sourcePath, + "--outputPath", workingDir.toString() + "/datasourceCountry", + "--allowedtypes","pubsrepository::institutional" , + "--whitelist","10|openaire____::3795d6478e30e2c9f787d427ff160944;10|opendoar____::16e6a3326dd7d868cbc926602a61e4d0;10|eurocrisdris::fe4903425d9040f680d8610d9079ea14;10|openaire____::5b76240cc27a58c6f7ceef7d8c36660e;10|openaire____::172bbccecf8fca44ab6a6653e84cb92a;10|openaire____::149c6590f8a06b46314eed77bfca693f;10|eurocrisdris::a6026877c1a174d60f81fd71f62df1c1;10|openaire____::4692342f0992d91f9e705c26959f09e0;10|openaire____::8d529dbb05ec0284662b391789e8ae2a;10|openaire____::345c9d171ef3c5d706d08041d506428c;10|opendoar____::1c1d4df596d01da60385f0bb17a4a9e0;10|opendoar____::7a614fd06c325499f1680b9896beedeb;10|opendoar____::1ee3dfcd8a0645a25a35977997223d22;10|opendoar____::d296c101daa88a51f6ca8cfc1ac79b50;10|opendoar____::798ed7d4ee7138d49b8828958048130a;10|openaire____::c9d2209ecc4d45ba7b4ca7597acb88a2;10|eurocrisdris::c49e0fe4b9ba7b7fab717d1f0f0a674d;10|eurocrisdris::9ae43d14471c4b33661fedda6f06b539;10|eurocrisdris::432ca599953ff50cd4eeffe22faf3e48" + }); + + final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext()); + + JavaRDD tmp = sc + .textFile(workingDir.toString() + "/datasourceCountry") + .map(item -> OBJECT_MAPPER.readValue(item, DatasourceCountry.class)); + + Assertions.assertEquals(3, tmp.count()); + Assertions.assertEquals(1, tmp.filter(dsc -> dsc.getDataSourceId() + .equals("10|eurocrisdris::fe4903425d9040f680d8610d9079ea14")).count()); + Assertions.assertEquals(1, tmp.filter(dsc -> dsc.getDataSourceId() + .equals("10|opendoar____::f0dd4a99fba6075a9494772b58f95280")).count()); + Assertions.assertEquals(1, tmp.filter(dsc -> dsc.getDataSourceId() + .equals("10|eurocrisdris::9ae43d14471c4b33661fedda6f06b539")).count()); + + Assertions.assertEquals("NL", tmp.filter(dsc -> dsc.getDataSourceId() + .equals("10|eurocrisdris::fe4903425d9040f680d8610d9079ea14")).collect() + .get(0).getCountry().getClassid()); + Assertions.assertEquals("Netherlands", tmp.filter(dsc -> dsc.getDataSourceId() + .equals("10|eurocrisdris::fe4903425d9040f680d8610d9079ea14")).collect() + .get(0).getCountry().getClassname()); + + Assertions.assertEquals("IT", tmp.filter(dsc -> dsc.getDataSourceId() + .equals("10|opendoar____::f0dd4a99fba6075a9494772b58f95280")).collect() + .get(0).getCountry().getClassid()); + Assertions.assertEquals("Italy", tmp.filter(dsc -> dsc.getDataSourceId() + .equals("10|opendoar____::f0dd4a99fba6075a9494772b58f95280")).collect() + .get(0).getCountry().getClassname()); + + Assertions.assertEquals("FR", tmp.filter(dsc -> dsc.getDataSourceId() + .equals("10|eurocrisdris::9ae43d14471c4b33661fedda6f06b539")).collect() + .get(0).getCountry().getClassid()); + Assertions.assertEquals("France", tmp.filter(dsc -> dsc.getDataSourceId() + .equals("10|eurocrisdris::9ae43d14471c4b33661fedda6f06b539")).collect() + .get(0).getCountry().getClassname()); + + tmp.foreach(e -> System.out.println(OBJECT_MAPPER.writeValueAsString(e))); + + } +} diff --git a/dhp-workflows/dhp-enrichment/src/test/java/eu/dnetlib/dhp/countrypropagation/ResultCountryPreparationTest.java b/dhp-workflows/dhp-enrichment/src/test/java/eu/dnetlib/dhp/countrypropagation/ResultCountryPreparationTest.java new file mode 100644 index 000000000..37cc4f342 --- /dev/null +++ b/dhp-workflows/dhp-enrichment/src/test/java/eu/dnetlib/dhp/countrypropagation/ResultCountryPreparationTest.java @@ -0,0 +1,115 @@ +package eu.dnetlib.dhp.countrypropagation; + +import com.fasterxml.jackson.databind.ObjectMapper; + +import eu.dnetlib.dhp.schema.oaf.Publication; +import org.apache.commons.io.FileUtils; +import org.apache.spark.SparkConf; +import org.apache.spark.api.java.JavaRDD; +import org.apache.spark.api.java.JavaSparkContext; +import org.apache.spark.sql.SparkSession; +import org.junit.jupiter.api.AfterAll; +import org.junit.jupiter.api.Assertions; +import org.junit.jupiter.api.BeforeAll; +import org.junit.jupiter.api.Test; + +import java.io.IOException; +import java.nio.file.Files; +import java.nio.file.Path; + +import static eu.dnetlib.dhp.PropagationConstant.isSparkSessionManaged; + +public class ResultCountryPreparationTest { + private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper(); + + private static SparkSession spark; + + private static Path workingDir; + + @BeforeAll + public static void beforeAll() throws IOException { + workingDir = Files.createTempDirectory(ResultCountryPreparationTest.class.getSimpleName()); + + SparkConf conf = new SparkConf(); + conf.setAppName(ResultCountryPreparationTest.class.getSimpleName()); + + conf.setMaster("local[*]"); + conf.set("spark.driver.host", "localhost"); + conf.set("hive.metastore.local", "true"); + conf.set("spark.ui.enabled", "false"); + conf.set("spark.sql.warehouse.dir", workingDir.toString()); + conf.set("hive.metastore.warehouse.dir", workingDir.resolve("warehouse").toString()); + + spark = SparkSession + .builder() + .appName(ResultCountryPreparationTest.class.getSimpleName()) + .config(conf) + .getOrCreate(); + } + + @AfterAll + public static void afterAll() throws IOException { + FileUtils.deleteDirectory(workingDir.toFile()); + spark.stop(); + } + + @Test + void testPrepareResultCountry() throws Exception { + final String sourcePath = getClass() + .getResource("/eu/dnetlib/dhp/countrypropagation/graph/publication") + .getPath(); + + final String preparedInfoPath = getClass() + .getResource("/eu/dnetlib/dhp/countrypropagation/datasourcecountry") + .getPath(); + + + PrepareResultCountrySet + .main( + new String[] { + "--isSparkSessionManaged", Boolean.FALSE.toString(), + "--workingPath", workingDir.toString() + "/working", + "--sourcePath", sourcePath, + "--outputPath", workingDir.toString() + "/resultCountry", + "--preparedInfoPath", preparedInfoPath, + "--resultTableName", Publication.class.getCanonicalName() + }); + + final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext()); + + JavaRDD tmp = sc + .textFile(workingDir.toString() + "/resultCountry") + .map(item -> OBJECT_MAPPER.readValue(item, ResultCountrySet.class)); + + + Assertions.assertEquals(5, tmp.count()); + + ResultCountrySet rc = tmp.filter(r -> r.getResultId().equals("50|06cdd3ff4700::49ec404cee4e1452808aabeaffbd3072")).collect().get(0); + Assertions.assertEquals(1, rc.getCountrySet().size()); + Assertions.assertEquals("NL",rc.getCountrySet().get(0).getClassid()); + Assertions.assertEquals("Netherlands",rc.getCountrySet().get(0).getClassname()); + + rc = tmp.filter(r -> r.getResultId().equals("50|07b5c0ccd4fe::e7f5459cc97865f2af6e3da964c1250b")).collect().get(0); + Assertions.assertEquals(1, rc.getCountrySet().size()); + Assertions.assertEquals("NL",rc.getCountrySet().get(0).getClassid()); + Assertions.assertEquals("Netherlands",rc.getCountrySet().get(0).getClassname()); + + rc = tmp.filter(r -> r.getResultId().equals("50|355e65625b88::e7d48a470b13bda61f7ebe3513e20cb6")).collect().get(0); + Assertions.assertEquals(2, rc.getCountrySet().size()); + Assertions.assertTrue(rc.getCountrySet().stream().anyMatch(cs -> cs.getClassid().equals("IT") && cs.getClassname().equals("Italy"))); + Assertions.assertTrue(rc.getCountrySet().stream().anyMatch(cs -> cs.getClassid().equals("FR") && cs.getClassname().equals("France"))); + + rc = tmp.filter(r -> r.getResultId().equals("50|355e65625b88::74009c567c81b4aa55c813db658734df")).collect().get(0); + Assertions.assertEquals(2, rc.getCountrySet().size()); + Assertions.assertTrue(rc.getCountrySet().stream().anyMatch(cs -> cs.getClassid().equals("IT") && cs.getClassname().equals("Italy"))); + Assertions.assertTrue(rc.getCountrySet().stream().anyMatch(cs -> cs.getClassid().equals("NL") && cs.getClassname().equals("Netherlands"))); + + + rc = tmp.filter(r -> r.getResultId().equals("50|355e65625b88::54a1c76f520bb2c8da27d12e42891088")).collect().get(0); + Assertions.assertEquals(2, rc.getCountrySet().size()); + Assertions.assertTrue(rc.getCountrySet().stream().anyMatch(cs -> cs.getClassid().equals("IT") && cs.getClassname().equals("Italy"))); + Assertions.assertTrue(rc.getCountrySet().stream().anyMatch(cs -> cs.getClassid().equals("FR") && cs.getClassname().equals("France"))); + + + } +} diff --git a/dhp-workflows/dhp-enrichment/src/test/resources/eu/dnetlib/dhp/countrypropagation/datasourcecountry/datasourcecountry b/dhp-workflows/dhp-enrichment/src/test/resources/eu/dnetlib/dhp/countrypropagation/datasourcecountry/datasourcecountry new file mode 100644 index 000000000..ac31abfbd --- /dev/null +++ b/dhp-workflows/dhp-enrichment/src/test/resources/eu/dnetlib/dhp/countrypropagation/datasourcecountry/datasourcecountry @@ -0,0 +1,3 @@ +{"dataSourceId":"10|eurocrisdris::fe4903425d9040f680d8610d9079ea14","country":{"classid":"NL","classname":"Netherlands"}} +{"dataSourceId":"10|opendoar____::f0dd4a99fba6075a9494772b58f95280","country":{"classid":"IT","classname":"Italy"}} +{"dataSourceId":"10|eurocrisdris::9ae43d14471c4b33661fedda6f06b539","country":{"classid":"FR","classname":"France"}} \ No newline at end of file diff --git a/dhp-workflows/dhp-enrichment/src/test/resources/eu/dnetlib/dhp/countrypropagation/graph/datasource/datasource.json b/dhp-workflows/dhp-enrichment/src/test/resources/eu/dnetlib/dhp/countrypropagation/graph/datasource/datasource.json new file mode 100644 index 000000000..6b71024fd --- /dev/null +++ b/dhp-workflows/dhp-enrichment/src/test/resources/eu/dnetlib/dhp/countrypropagation/graph/datasource/datasource.json @@ -0,0 +1,11 @@ +{"accessinfopackage":[],"collectedfrom":[{"key":"10|openaire____::081b82f96300b6a6e3d282bad31cb6e2","value":"Crossref"}],"contentpolicies":[{"classid":"Journal articles","classname":"Journal articles","schemeid":"eosc:contentpolicies","schemename":"eosc:contentpolicies"}],"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"dataprovider":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":false},"datasourcetype":{"classid":"pubsrepository::journal","classname":"Journal","schemeid":"dnet:datasource_typologies","schemename":"dnet:datasource_typologies"},"datasourcetypeui":{"classid":"Journal archive","classname":"Journal archive","schemeid":"dnet:datasource_typologies_ui","schemename":"dnet:datasource_typologies_ui"},"dateofcollection":"2020-07-10","englishname":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"Estudos Portugueses"},"extraInfo":[],"id":"10|issn___print::a7a2010e75d849442790955162ef4e42","journal":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"issnPrinted":"2447-0546","name":"Estudos Portugueses"},"knowledgegraph":false,"lastupdatetimestamp":1646230297782,"latitude":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"0.0"},"longitude":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"0.0"},"namespaceprefix":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"jrnl24470546"},"odcontenttypes":[],"odlanguages":[],"odnumberofitems":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"0.0"},"officialname":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"Estudos Portugueses"},"openairecompatibility":{"classid":"hostedBy","classname":"collected from a compatible aggregator","schemeid":"dnet:datasourceCompatibilityLevel","schemename":"dnet:datasourceCompatibilityLevel"},"originalId":["issn___print::2447-0546"],"pid":[],"policies":[],"serviceprovider":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":false},"subjects":[],"thematic":false,"versioning":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":false}} +{"accessinfopackage":[],"collectedfrom":[{"key":"10|openaire____::081b82f96300b6a6e3d282bad31cb6e2","value":"Crossref"}],"contentpolicies":[{"classid":"Journal articles","classname":"Journal articles","schemeid":"eosc:contentpolicies","schemename":"eosc:contentpolicies"}],"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"dataprovider":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":false},"datasourcetype":{"classid":"crissystem","classname":"Journal","schemeid":"dnet:datasource_typologies","schemename":"dnet:datasource_typologies"},"datasourcetypeui":{"classid":"Journal archive","classname":"Journal archive","schemeid":"dnet:datasource_typologies_ui","schemename":"dnet:datasource_typologies_ui"},"dateofcollection":"2020-07-10","englishname":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"NARCIS"},"extraInfo":[],"id":"10|eurocrisdris::fe4903425d9040f680d8610d9079ea14","journal":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"issnPrinted":"2447-0546","name":"Estudos Portugueses"},"knowledgegraph":false,"lastupdatetimestamp":1646230297782,"latitude":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"0.0"},"longitude":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"0.0"},"namespaceprefix":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"jrnl24470546"},"odcontenttypes":[],"odlanguages":[],"odnumberofitems":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"0.0"},"officialname":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"Estudos Portugueses"},"openairecompatibility":{"classid":"hostedBy","classname":"collected from a compatible aggregator","schemeid":"dnet:datasourceCompatibilityLevel","schemename":"dnet:datasourceCompatibilityLevel"},"originalId":["issn___print::2447-0546"],"pid":[],"policies":[],"serviceprovider":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":false},"subjects":[],"thematic":false,"versioning":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":false}} +{"accessinfopackage":[],"collectedfrom":[{"key":"10|openaire____::081b82f96300b6a6e3d282bad31cb6e2","value":"Crossref"}],"contentpolicies":[{"classid":"Journal articles","classname":"Journal articles","schemeid":"eosc:contentpolicies","schemename":"eosc:contentpolicies"}],"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"dataprovider":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":false},"datasourcetype":{"classid":"pubsrepository::institutional","classname":"Journal","schemeid":"dnet:datasource_typologies","schemename":"dnet:datasource_typologies"},"datasourcetypeui":{"classid":"Journal archive","classname":"Journal archive","schemeid":"dnet:datasource_typologies_ui","schemename":"dnet:datasource_typologies_ui"},"dateofcollection":"2020-07-10","englishname":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"BELARUS"},"extraInfo":[],"id":"10|opendoar____::fd272fe04b7d4e68effd01bddcc6bb34","journal":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"issnPrinted":"2447-0546","name":"Estudos Portugueses"},"knowledgegraph":false,"lastupdatetimestamp":1646230297782,"latitude":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"0.0"},"longitude":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"0.0"},"namespaceprefix":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"jrnl24470546"},"odcontenttypes":[],"odlanguages":[],"odnumberofitems":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"0.0"},"officialname":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"Estudos Portugueses"},"openairecompatibility":{"classid":"hostedBy","classname":"collected from a compatible aggregator","schemeid":"dnet:datasourceCompatibilityLevel","schemename":"dnet:datasourceCompatibilityLevel"},"originalId":["issn___print::2447-0546"],"pid":[],"policies":[],"serviceprovider":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":false},"subjects":[],"thematic":false,"versioning":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":false}} +{"accessinfopackage":[],"collectedfrom":[{"key":"10|openaire____::081b82f96300b6a6e3d282bad31cb6e2","value":"Crossref"}],"contentpolicies":[{"classid":"Journal articles","classname":"Journal articles","schemeid":"eosc:contentpolicies","schemename":"eosc:contentpolicies"}],"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"dataprovider":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":false},"datasourcetype":{"classid":"pubsrepository::institutional","classname":"Journal","schemeid":"dnet:datasource_typologies","schemename":"dnet:datasource_typologies"},"datasourcetypeui":{"classid":"Journal archive","classname":"Journal archive","schemeid":"dnet:datasource_typologies_ui","schemename":"dnet:datasource_typologies_ui"},"dateofcollection":"2020-07-10","englishname":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"UNIGE"},"extraInfo":[],"id":"10|opendoar____::f0dd4a99fba6075a9494772b58f95280","journal":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"issnPrinted":"2447-0546","name":"Estudos Portugueses"},"knowledgegraph":false,"lastupdatetimestamp":1646230297782,"latitude":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"0.0"},"longitude":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"0.0"},"namespaceprefix":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"jrnl24470546"},"odcontenttypes":[],"odlanguages":[],"odnumberofitems":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"0.0"},"officialname":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"Estudos Portugueses"},"openairecompatibility":{"classid":"hostedBy","classname":"collected from a compatible aggregator","schemeid":"dnet:datasourceCompatibilityLevel","schemename":"dnet:datasourceCompatibilityLevel"},"originalId":["issn___print::2447-0546"],"pid":[],"policies":[],"serviceprovider":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":false},"subjects":[],"thematic":false,"versioning":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":false}} +{"accessinfopackage":[],"collectedfrom":[{"key":"10|openaire____::081b82f96300b6a6e3d282bad31cb6e2","value":"Crossref"}],"contentpolicies":[{"classid":"Journal articles","classname":"Journal articles","schemeid":"eosc:contentpolicies","schemename":"eosc:contentpolicies"}],"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"dataprovider":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":false},"datasourcetype":{"classid":"crissystem","classname":"Journal","schemeid":"dnet:datasource_typologies","schemename":"dnet:datasource_typologies"},"datasourcetypeui":{"classid":"Journal archive","classname":"Journal archive","schemeid":"dnet:datasource_typologies_ui","schemename":"dnet:datasource_typologies_ui"},"dateofcollection":"2020-07-10","englishname":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"TUHH"},"extraInfo":[],"id":"10|eurocrisdris::9ae43d14471c4b33661fedda6f06b539","journal":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"issnPrinted":"2447-0546","name":"Estudos Portugueses"},"knowledgegraph":false,"lastupdatetimestamp":1646230297782,"latitude":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"0.0"},"longitude":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"0.0"},"namespaceprefix":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"jrnl24470546"},"odcontenttypes":[],"odlanguages":[],"odnumberofitems":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"0.0"},"officialname":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"Estudos Portugueses"},"openairecompatibility":{"classid":"hostedBy","classname":"collected from a compatible aggregator","schemeid":"dnet:datasourceCompatibilityLevel","schemename":"dnet:datasourceCompatibilityLevel"},"originalId":["issn___print::2447-0546"],"pid":[],"policies":[],"serviceprovider":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":false},"subjects":[],"thematic":false,"versioning":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":false}} +{"accessinfopackage":[],"collectedfrom":[{"key":"10|openaire____::081b82f96300b6a6e3d282bad31cb6e2","value":"Crossref"}],"contentpolicies":[{"classid":"Journal articles","classname":"Journal articles","schemeid":"eosc:contentpolicies","schemename":"eosc:contentpolicies"}],"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"dataprovider":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":false},"datasourcetype":{"classid":"pubsrepository::journal","classname":"Journal","schemeid":"dnet:datasource_typologies","schemename":"dnet:datasource_typologies"},"datasourcetypeui":{"classid":"Journal archive","classname":"Journal archive","schemeid":"dnet:datasource_typologies_ui","schemename":"dnet:datasource_typologies_ui"},"dateofcollection":"2020-07-10","englishname":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"Estudos Portugueses"},"extraInfo":[],"id":"10|issn___print::a7a2010e75d849442790955162ef4e43","journal":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"issnPrinted":"2447-0546","name":"Estudos Portugueses"},"knowledgegraph":false,"lastupdatetimestamp":1646230297782,"latitude":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"0.0"},"longitude":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"0.0"},"namespaceprefix":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"jrnl24470546"},"odcontenttypes":[],"odlanguages":[],"odnumberofitems":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"0.0"},"officialname":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"Estudos Portugueses"},"openairecompatibility":{"classid":"hostedBy","classname":"collected from a compatible aggregator","schemeid":"dnet:datasourceCompatibilityLevel","schemename":"dnet:datasourceCompatibilityLevel"},"originalId":["issn___print::2447-0546"],"pid":[],"policies":[],"serviceprovider":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":false},"subjects":[],"thematic":false,"versioning":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":false}} +{"accessinfopackage":[],"collectedfrom":[{"key":"10|openaire____::081b82f96300b6a6e3d282bad31cb6e2","value":"Crossref"}],"contentpolicies":[{"classid":"Journal articles","classname":"Journal articles","schemeid":"eosc:contentpolicies","schemename":"eosc:contentpolicies"}],"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"dataprovider":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":false},"datasourcetype":{"classid":"pubsrepository::journal","classname":"Journal","schemeid":"dnet:datasource_typologies","schemename":"dnet:datasource_typologies"},"datasourcetypeui":{"classid":"Journal archive","classname":"Journal archive","schemeid":"dnet:datasource_typologies_ui","schemename":"dnet:datasource_typologies_ui"},"dateofcollection":"2020-07-10","englishname":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"Estudos Portugueses"},"extraInfo":[],"id":"10|issn___print::a7a2010e75d849442790955162ef4e44","journal":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"issnPrinted":"2447-0546","name":"Estudos Portugueses"},"knowledgegraph":false,"lastupdatetimestamp":1646230297782,"latitude":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"0.0"},"longitude":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"0.0"},"namespaceprefix":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"jrnl24470546"},"odcontenttypes":[],"odlanguages":[],"odnumberofitems":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"0.0"},"officialname":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"Estudos Portugueses"},"openairecompatibility":{"classid":"hostedBy","classname":"collected from a compatible aggregator","schemeid":"dnet:datasourceCompatibilityLevel","schemename":"dnet:datasourceCompatibilityLevel"},"originalId":["issn___print::2447-0546"],"pid":[],"policies":[],"serviceprovider":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":false},"subjects":[],"thematic":false,"versioning":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":false}} +{"accessinfopackage":[],"collectedfrom":[{"key":"10|openaire____::081b82f96300b6a6e3d282bad31cb6e2","value":"Crossref"}],"contentpolicies":[{"classid":"Journal articles","classname":"Journal articles","schemeid":"eosc:contentpolicies","schemename":"eosc:contentpolicies"}],"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"dataprovider":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":false},"datasourcetype":{"classid":"pubsrepository::journal","classname":"Journal","schemeid":"dnet:datasource_typologies","schemename":"dnet:datasource_typologies"},"datasourcetypeui":{"classid":"Journal archive","classname":"Journal archive","schemeid":"dnet:datasource_typologies_ui","schemename":"dnet:datasource_typologies_ui"},"dateofcollection":"2020-07-10","englishname":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"Estudos Portugueses"},"extraInfo":[],"id":"10|issn___print::a7a2010e75d849442790955162ef4e45","journal":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"issnPrinted":"2447-0546","name":"Estudos Portugueses"},"knowledgegraph":false,"lastupdatetimestamp":1646230297782,"latitude":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"0.0"},"longitude":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"0.0"},"namespaceprefix":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"jrnl24470546"},"odcontenttypes":[],"odlanguages":[],"odnumberofitems":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"0.0"},"officialname":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"Estudos Portugueses"},"openairecompatibility":{"classid":"hostedBy","classname":"collected from a compatible aggregator","schemeid":"dnet:datasourceCompatibilityLevel","schemename":"dnet:datasourceCompatibilityLevel"},"originalId":["issn___print::2447-0546"],"pid":[],"policies":[],"serviceprovider":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":false},"subjects":[],"thematic":false,"versioning":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":false}} +{"accessinfopackage":[],"collectedfrom":[{"key":"10|openaire____::081b82f96300b6a6e3d282bad31cb6e2","value":"Crossref"}],"contentpolicies":[{"classid":"Journal articles","classname":"Journal articles","schemeid":"eosc:contentpolicies","schemename":"eosc:contentpolicies"}],"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"dataprovider":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":false},"datasourcetype":{"classid":"pubsrepository::journal","classname":"Journal","schemeid":"dnet:datasource_typologies","schemename":"dnet:datasource_typologies"},"datasourcetypeui":{"classid":"Journal archive","classname":"Journal archive","schemeid":"dnet:datasource_typologies_ui","schemename":"dnet:datasource_typologies_ui"},"dateofcollection":"2020-07-10","englishname":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"Estudos Portugueses"},"extraInfo":[],"id":"10|issn___print::a7a2010e75d849442790955162ef4e46","journal":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"issnPrinted":"2447-0546","name":"Estudos Portugueses"},"knowledgegraph":false,"lastupdatetimestamp":1646230297782,"latitude":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"0.0"},"longitude":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"0.0"},"namespaceprefix":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"jrnl24470546"},"odcontenttypes":[],"odlanguages":[],"odnumberofitems":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"0.0"},"officialname":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"Estudos Portugueses"},"openairecompatibility":{"classid":"hostedBy","classname":"collected from a compatible aggregator","schemeid":"dnet:datasourceCompatibilityLevel","schemename":"dnet:datasourceCompatibilityLevel"},"originalId":["issn___print::2447-0546"],"pid":[],"policies":[],"serviceprovider":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":false},"subjects":[],"thematic":false,"versioning":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":false}} +{"accessinfopackage":[],"collectedfrom":[{"key":"10|openaire____::081b82f96300b6a6e3d282bad31cb6e2","value":"Crossref"}],"contentpolicies":[{"classid":"Journal articles","classname":"Journal articles","schemeid":"eosc:contentpolicies","schemename":"eosc:contentpolicies"}],"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"dataprovider":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":false},"datasourcetype":{"classid":"pubsrepository::journal","classname":"Journal","schemeid":"dnet:datasource_typologies","schemename":"dnet:datasource_typologies"},"datasourcetypeui":{"classid":"Journal archive","classname":"Journal archive","schemeid":"dnet:datasource_typologies_ui","schemename":"dnet:datasource_typologies_ui"},"dateofcollection":"2020-07-10","englishname":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"Estudos Portugueses"},"extraInfo":[],"id":"10|issn___print::a7a2010e75d849442790955162ef4e47","journal":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"issnPrinted":"2447-0546","name":"Estudos Portugueses"},"knowledgegraph":false,"lastupdatetimestamp":1646230297782,"latitude":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"0.0"},"longitude":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"0.0"},"namespaceprefix":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"jrnl24470546"},"odcontenttypes":[],"odlanguages":[],"odnumberofitems":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"0.0"},"officialname":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"Estudos Portugueses"},"openairecompatibility":{"classid":"hostedBy","classname":"collected from a compatible aggregator","schemeid":"dnet:datasourceCompatibilityLevel","schemename":"dnet:datasourceCompatibilityLevel"},"originalId":["issn___print::2447-0546"],"pid":[],"policies":[],"serviceprovider":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":false},"subjects":[],"thematic":false,"versioning":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":false}} +{"accessinfopackage":[],"collectedfrom":[{"key":"10|openaire____::081b82f96300b6a6e3d282bad31cb6e2","value":"Crossref"}],"contentpolicies":[{"classid":"Journal articles","classname":"Journal articles","schemeid":"eosc:contentpolicies","schemename":"eosc:contentpolicies"}],"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"dataprovider":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":false},"datasourcetype":{"classid":"pubsrepository::journal","classname":"Journal","schemeid":"dnet:datasource_typologies","schemename":"dnet:datasource_typologies"},"datasourcetypeui":{"classid":"Journal archive","classname":"Journal archive","schemeid":"dnet:datasource_typologies_ui","schemename":"dnet:datasource_typologies_ui"},"dateofcollection":"2020-07-10","englishname":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"Estudos Portugueses"},"extraInfo":[],"id":"10|issn___print::a7a2010e75d849442790955162ef4e48","journal":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"issnPrinted":"2447-0546","name":"Estudos Portugueses"},"knowledgegraph":false,"lastupdatetimestamp":1646230297782,"latitude":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"0.0"},"longitude":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"0.0"},"namespaceprefix":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"jrnl24470546"},"odcontenttypes":[],"odlanguages":[],"odnumberofitems":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"0.0"},"officialname":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"Estudos Portugueses"},"openairecompatibility":{"classid":"hostedBy","classname":"collected from a compatible aggregator","schemeid":"dnet:datasourceCompatibilityLevel","schemename":"dnet:datasourceCompatibilityLevel"},"originalId":["issn___print::2447-0546"],"pid":[],"policies":[],"serviceprovider":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":false},"subjects":[],"thematic":false,"versioning":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":false}} diff --git a/dhp-workflows/dhp-enrichment/src/test/resources/eu/dnetlib/dhp/countrypropagation/graph/organization/organization b/dhp-workflows/dhp-enrichment/src/test/resources/eu/dnetlib/dhp/countrypropagation/graph/organization/organization new file mode 100644 index 000000000..653252243 --- /dev/null +++ b/dhp-workflows/dhp-enrichment/src/test/resources/eu/dnetlib/dhp/countrypropagation/graph/organization/organization @@ -0,0 +1,8 @@ +{"alternativeNames":[],"collectedfrom":[{"key":"10|openaire____::6ac933301a3933c8a22ceebea7000326","value":"Academy of Finland"}],"country":{"classid":"FI","classname":"Finland","schemeid":"dnet:countries","schemename":"dnet:countries"},"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"dateofcollection":"2019-01-25","dateoftransformation":"2022-02-08","ecenterprise":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"false"},"echighereducation":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"false"},"ecinternationalorganization":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"false"},"ecinternationalorganizationeurinterests":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"false"},"eclegalbody":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"false"},"eclegalperson":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"false"},"ecnonprofit":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"false"},"ecnutscode":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"false"},"ecresearchorganization":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"false"},"ecsmevalidated":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"false"},"extraInfo":[],"id":"20|aka_________::cffd8c5427c035e5d4bddc5647942ba8","lastupdatetimestamp":1646230297782,"legalname":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"Graduate Institute of International Studies"},"originalId":["aka_________::4a12fb514672d706d7e9d4605ad45d78"],"pid":[]} +{"alternativeNames":[],"collectedfrom":[{"key":"10|openaire____::457528c43fabd74e212db2ed61101075","value":"Agence Nationale de la Recherche"}],"country":{"classid":"UNKNOWN","classname":"Unknown","schemeid":"dnet:countries","schemename":"dnet:countries"},"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.810"},"dateofcollection":"2021-02-15","dateoftransformation":"2021-05-19","ecenterprise":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.810"},"value":"false"},"echighereducation":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.810"},"value":"false"},"ecinternationalorganization":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.810"},"value":"false"},"ecinternationalorganizationeurinterests":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.810"},"value":"false"},"eclegalbody":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.810"},"value":"false"},"eclegalperson":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.810"},"value":"false"},"ecnonprofit":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.810"},"value":"false"},"ecnutscode":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.810"},"value":"false"},"ecresearchorganization":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.810"},"value":"false"},"ecsmevalidated":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.810"},"value":"false"},"extraInfo":[],"id":"20|anr_________::357ee61b6fe46c7c07210a1cd9acf6ed","lastupdatetimestamp":1646230297782,"legalname":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.810"},"value":"INSTITUT NATIONAL DE LA SANTE ET DE LA RECHERCHE MEDICALE - DELEGATION PARIS XI"},"originalId":["anr_________::145402d7c38cf25af807084e757e1161"],"pid":[]} +{"alternativeNames":[],"collectedfrom":[{"key":"10|openaire____::457528c43fabd74e212db2ed61101075","value":"Agence Nationale de la Recherche"}],"country":{"classid":"UNKNOWN","classname":"Unknown","schemeid":"dnet:countries","schemename":"dnet:countries"},"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.810"},"dateofcollection":"2021-02-15","dateoftransformation":"2021-05-19","ecenterprise":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.810"},"value":"false"},"echighereducation":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.810"},"value":"false"},"ecinternationalorganization":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.810"},"value":"false"},"ecinternationalorganizationeurinterests":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.810"},"value":"false"},"eclegalbody":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.810"},"value":"false"},"eclegalperson":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.810"},"value":"false"},"ecnonprofit":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.810"},"value":"false"},"ecnutscode":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.810"},"value":"false"},"ecresearchorganization":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.810"},"value":"false"},"ecsmevalidated":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.810"},"value":"false"},"extraInfo":[],"id":"20|anr_________::43084487236103c68872cf929c57eaff","lastupdatetimestamp":1646230297782,"legalname":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.810"},"value":"Laboratoire dInformatique Paris Descartes"},"originalId":["anr_________::a8cb45ed89911406d924a2c5831c1c5b"],"pid":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.810"},"qualifier":{"classid":"RNSR","classname":"RNSR","schemeid":"","schemename":""},"value":"RNSR:200014469G"}]} +{"alternativeNames":[],"collectedfrom":[{"key":"10|openaire____::457528c43fabd74e212db2ed61101075","value":"Agence Nationale de la Recherche"}],"country":{"classid":"UNKNOWN","classname":"Unknown","schemeid":"dnet:countries","schemename":"dnet:countries"},"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.810"},"dateofcollection":"2021-05-20","dateoftransformation":"2021-05-19","ecenterprise":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.810"},"value":"false"},"echighereducation":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.810"},"value":"false"},"ecinternationalorganization":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.810"},"value":"false"},"ecinternationalorganizationeurinterests":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.810"},"value":"false"},"eclegalbody":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.810"},"value":"false"},"eclegalperson":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.810"},"value":"false"},"ecnonprofit":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.810"},"value":"false"},"ecnutscode":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.810"},"value":"false"},"ecresearchorganization":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.810"},"value":"false"},"ecsmevalidated":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.810"},"value":"false"},"extraInfo":[],"id":"20|anr_________::b22add4abf57294cb68882dab6062788","lastupdatetimestamp":1646230297782,"legalname":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.810"},"value":"Association de gestion de l'Ecole Centrale Electronique"},"originalId":["anr_________::50ced32bb0d6464fb3c0e3bff9347484"],"pid":[]} +{"alternativeNames":[],"collectedfrom":[{"key":"10|openaire____::6ac933301a3933c8a22ceebea7000326","value":"Academy of Finland"}],"country":{"classid":"NL","classname":"Netherlands","schemeid":"dnet:countries","schemename":"dnet:countries"},"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"dateofcollection":"2019-01-25","dateoftransformation":"2022-02-08","ecenterprise":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"false"},"echighereducation":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"false"},"ecinternationalorganization":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"false"},"ecinternationalorganizationeurinterests":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"false"},"eclegalbody":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"false"},"eclegalperson":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"false"},"ecnonprofit":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"false"},"ecnutscode":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"false"},"ecresearchorganization":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"false"},"ecsmevalidated":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"false"},"extraInfo":[],"id":"20|pending_org_::82f63b2d21ae88596b9d8991780e9888","lastupdatetimestamp":1646230297782,"legalname":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"Graduate Institute of International Studies"},"originalId":["aka_________::4a12fb514672d706d7e9d4605ad45d78"],"pid":[]} +{"alternativeNames":[],"collectedfrom":[{"key":"10|openaire____::457528c43fabd74e212db2ed61101075","value":"Agence Nationale de la Recherche"}],"country":{"classid":"UNKNOWN","classname":"Unknown","schemeid":"dnet:countries","schemename":"dnet:countries"},"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.810"},"dateofcollection":"2021-02-15","dateoftransformation":"2021-05-19","ecenterprise":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.810"},"value":"false"},"echighereducation":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.810"},"value":"false"},"ecinternationalorganization":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.810"},"value":"false"},"ecinternationalorganizationeurinterests":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.810"},"value":"false"},"eclegalbody":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.810"},"value":"false"},"eclegalperson":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.810"},"value":"false"},"ecnonprofit":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.810"},"value":"false"},"ecnutscode":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.810"},"value":"false"},"ecresearchorganization":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.810"},"value":"false"},"ecsmevalidated":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.810"},"value":"false"},"extraInfo":[],"id":"20|pending_org_::5b73b8b2d0df764e13a62291dfedf8f6","lastupdatetimestamp":1646230297782,"legalname":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.810"},"value":"INSTITUT NATIONAL DE LA SANTE ET DE LA RECHERCHE MEDICALE - DELEGATION PARIS XI"},"originalId":["anr_________::145402d7c38cf25af807084e757e1161"],"pid":[]} +{"alternativeNames":[],"collectedfrom":[{"key":"10|openaire____::457528c43fabd74e212db2ed61101075","value":"Agence Nationale de la Recherche"}],"country":{"classid":"IT","classname":"Italy","schemeid":"dnet:countries","schemename":"dnet:countries"},"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.810"},"dateofcollection":"2021-02-15","dateoftransformation":"2021-05-19","ecenterprise":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.810"},"value":"false"},"echighereducation":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.810"},"value":"false"},"ecinternationalorganization":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.810"},"value":"false"},"ecinternationalorganizationeurinterests":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.810"},"value":"false"},"eclegalbody":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.810"},"value":"false"},"eclegalperson":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.810"},"value":"false"},"ecnonprofit":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.810"},"value":"false"},"ecnutscode":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.810"},"value":"false"},"ecresearchorganization":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.810"},"value":"false"},"ecsmevalidated":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.810"},"value":"false"},"extraInfo":[],"id":"20|openorgs____::322ff2a6524820640bc5d1311871585e","lastupdatetimestamp":1646230297782,"legalname":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.810"},"value":"Laboratoire dInformatique Paris Descartes"},"originalId":["anr_________::a8cb45ed89911406d924a2c5831c1c5b"],"pid":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.810"},"qualifier":{"classid":"RNSR","classname":"RNSR","schemeid":"","schemename":""},"value":"RNSR:200014469G"}]} +{"alternativeNames":[],"collectedfrom":[{"key":"10|openaire____::457528c43fabd74e212db2ed61101075","value":"Agence Nationale de la Recherche"}],"country":{"classid":"FR","classname":"France","schemeid":"dnet:countries","schemename":"dnet:countries"},"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.810"},"dateofcollection":"2021-02-15","dateoftransformation":"2021-05-19","ecenterprise":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.810"},"value":"false"},"echighereducation":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.810"},"value":"false"},"ecinternationalorganization":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.810"},"value":"false"},"ecinternationalorganizationeurinterests":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.810"},"value":"false"},"eclegalbody":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.810"},"value":"false"},"eclegalperson":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.810"},"value":"false"},"ecnonprofit":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.810"},"value":"false"},"ecnutscode":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.810"},"value":"false"},"ecresearchorganization":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.810"},"value":"false"},"ecsmevalidated":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.810"},"value":"false"},"extraInfo":[],"id":"20|openorgs____::58e60f1715d219aa6757ba0b0f2ccbce","lastupdatetimestamp":1646230297782,"legalname":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.810"},"value":"Laboratoire dInformatique Paris Descartes"},"originalId":["anr_________::a8cb45ed89911406d924a2c5831c1c5b"],"pid":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.810"},"qualifier":{"classid":"RNSR","classname":"RNSR","schemeid":"","schemename":""},"value":"RNSR:200014469G"}]} \ No newline at end of file diff --git a/dhp-workflows/dhp-enrichment/src/test/resources/eu/dnetlib/dhp/countrypropagation/graph/publication/publication b/dhp-workflows/dhp-enrichment/src/test/resources/eu/dnetlib/dhp/countrypropagation/graph/publication/publication new file mode 100644 index 000000000..2d351b4b5 --- /dev/null +++ b/dhp-workflows/dhp-enrichment/src/test/resources/eu/dnetlib/dhp/countrypropagation/graph/publication/publication @@ -0,0 +1,12 @@ +{"author":[{"fullname":"Tanouayi, Gnon","name":"Gnon","pid":[],"rank":1,"surname":"Tanouayi"},{"fullname":"GNANDI, Kissao","name":"Kissao","pid":[],"rank":2,"surname":"Gnandi"},{"fullname":"Ouro-Sama, Kamilou","name":"Kamilou","pid":[],"rank":3,"surname":"Ouro-Sama"},{"fullname":"Ahoudi, Housséni","name":"Housséni","pid":[],"rank":4,"surname":"Ahoudi"},{"fullname":"Solitoke, Hodabalo Dhéoulaba","name":"Hodabalo Dhéoulaba","pid":[],"rank":5,"surname":"Solitoke"},{"fullname":"Badassan, Tchaa Esso-Essinam","name":"Tchaa Esso-Essinam","pid":[],"rank":6,"surname":"Badassan"},{"fullname":"Nyametso, A. Yawovi","name":"A. Yawovi","pid":[],"rank":7,"surname":"Nyametso"},{"fullname":"Agbéko, Aduayi-Akué Adoté","name":"Aduayi-Akué Adoté","pid":[],"rank":8,"surname":"Agbéko"}],"bestaccessright":{"classid":"OPEN","classname":"Open Access","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":[{"key":"10|eurocrisdris::fe4903425d9040f680d8610d9079ea14","value":"Episciences"}],"context":[],"contributor":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"Importer of dst articles previously hosted by inist Eid system account"}],"country":[],"coverage":[],"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"2017-05-01"},"dateofcollection":"2022-03-01T00:17:08.896Z","dateoftransformation":"2022-03-01T02:57:50.566Z","description":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"This study is a contribution to the development of adsorption techniques for the removal of fluoride in natural waters. The work is carried out on a laboratory scale using local geo-materials sorbents, on the one hand the residues from the treatment of natural phosphorite of Hahotoé-Kpogamé and on the other hand the attapulgite clay mineral from the costal basin of Togo. The work carried out concerns the adsorption of fluoride on those sorbents. The following parameters are batch tested on synthetic fluoride solutions: time, solution pH, geo-material dose and fluoride concentration. The fluoride is analyzed by absorption spectrometry. The adsorption yields on the phosphorite treatment residues for aqueous fluoride solutions at an initial concentration of 10 mg/L and an adsorbent concentration of 10 g/L are 49 % at pH 6.5 and 66 % at pH 4.0. In the same experimental conditions, the yields on clay minerals are 28.2 % and 36.3 %. These yields are logically improved by increasing the adsorbent dosage (from 2 to 30 g/L). Additional tests are carried out on natural water at an initial fluoride concentration of 3.76 mg/L."},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"Cette étude est une contribution au développement de techniques d’adsorption pour l’élimination du fluor dans les eaux naturelles. Les travaux ont été réalisés à l’échelle du laboratoire en utilisant comme sorbants des géo-matériaux locaux, d’une part les résidus du traitement des phosphates naturels de Hahotoé-Kpogamé et d’autre part l’argilite feuilletée du bassin sédimentaire côtier du Togo. Les travaux réalisés concernent l’adsorption du fluor sur les sorbants considérés. Les paramètres suivants ont été testés en batch sur des solutions synthétiques de fluor : le temps, le pH de la solution, la dose du géo-matériau et la concentration du fluor. Le fluor a été dosé par spectromètrie d’absorption. Les rendements d’adsorption sur les résidus de traitement des phosphates pour des solutions aqueuses de fluor à concentration initiale de 10 mg/L et une concentration en adsorbant de 10 g/L ont été de 49 % à pH 6,5 et 66 % à pH de 4,0. Dans les mêmes conditions expérimentales, les rendements sur les argilites ont été de 28,2 % et 36,3 %. Ces rendements ont logiquement été améliorés en augmentant le dosage en adsorbant (de 2 à 30 g/L). Des essais complémentaires ont été réalisés sur une eau naturelle à une concentration initiale en fluor de 3,76 mg/L."}],"externalReference":[],"extraInfo":[],"format":[],"fulltext":[],"id":"50|06cdd3ff4700::49ec404cee4e1452808aabeaffbd3072","instance":[{"accessright":{"classid":"OPEN","classname":"Open Access","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"alternateIdentifier":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"doi","classname":"Digital Object Identifier","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"value":"10.4267/dechets-sciences-techniques.3534"}],"collectedfrom":{"key":"10|openaire____::6824b298c96ba906a3e6a70593affbf5","value":"Episciences"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"2017-05-01"},"distributionlocation":"","hostedby":{"key":"10|openaire____::6824b298c96ba906a3e6a70593affbf5","value":"Episciences"},"instancetype":{"classid":"0001","classname":"Article","schemeid":"dnet:publication_resource","schemename":"dnet:publication_resource"},"pid":[],"refereed":{"classid":"0000","classname":"UNKNOWN","schemeid":"dnet:review_levels","schemename":"dnet:review_levels"},"url":["https://eid.episciences.org/7781"]}],"language":{"classid":"fra/fre","classname":"French","schemeid":"dnet:languages","schemename":"dnet:languages"},"lastupdatetimestamp":1646506202085,"oaiprovenance":{"originDescription":{"altered":true,"baseURL":"https%3A%2F%2Foai.episciences.org%2F","datestamp":"2017-05-01","harvestDate":"2022-03-01T00:17:08.896Z","identifier":"oai:episciences.org:eid:7781","metadataNamespace":"http://www.openarchives.org/OAI/2.0/oai_dc/"}},"originalId":["oai:episciences.org:eid:7781","50|06cdd3ff4700::49ec404cee4e1452808aabeaffbd3072"],"pid":[],"relevantdate":[],"resourcetype":{"classid":"UNKNOWN","classname":"UNKNOWN","schemeid":"dnet:dataCite_resource","schemename":"dnet:dataCite_resource"},"resulttype":{"classid":"publication","classname":"publication","schemeid":"dnet:result_typologies","schemename":"dnet:result_typologies"},"source":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"ISSN: 2778-844X"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"Environnement, Ingénierie & Développement"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"Episciences.org"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"eid:7781 - Environnement, Ingénierie & Développement, 2017-05-01, N°73 - mai 2017"}],"subject":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"clay"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"geo-materials"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"phosphorite of Hahotoé-Kpogamé"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"removal of fluoride"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"argilite"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"élimination du fluor"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"géo-matériaux"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"phosphate de Hahotoé-Kpogamé"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"[SDE.IE]Environmental Sciences/Environmental Engineering"}],"title":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"main title","classname":"main title","schemeid":"dnet:dataCite_title","schemename":"dnet:dataCite_title"},"value":"Défluoruration des eaux à l’aide des résidus du traitement des phosphates naturels et des argilites feuilletées"}]} +{"author":[],"bestaccessright":{"classid":"OPEN","classname":"Open Access","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":[{"key":"10|openaire____::160a261e5d06fd542c2efcac6e17e08c","value":"RS Global Journals"}],"context":[],"contributor":[],"country":[],"coverage":[],"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"2016-02-28"},"dateofcollection":"2020-06-01T07:11:47.22Z","dateoftransformation":"2020-07-25T07:25:11.051Z","description":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"The results of treatment of 21 patients with multiple injuries, including 12 (57.2%) patients with lesions of limb bones, 9 (42.8%) patients with injuries of the pelvis treated at the Department of Traumatology number 2, 5 for the period of 2013 to 2014 were analyzed by the authors. Developed gentle immobilization of the lower limbs in patients with multiple injuries provides adequate fixation and extension of the lower limb in the intensive care period and during emergency external fixation by the device of external fixation device shin bone or hip.The introduction of surgical treatment of patients in the acute period of polytrauma, using minimally invasive fracture fixation technology allowed providing the early stabilization of the victim’s condition, to avoid diagnostic errors and obtain positive results of treatment in 98% of cases."}],"externalReference":[],"extraInfo":[],"format":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"application/pdf"}],"fulltext":[],"id":"50|07b5c0ccd4fe::e7f5459cc97865f2af6e3da964c1250b","instance":[{"accessright":{"classid":"OPEN","classname":"Open Access","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"alternateIdentifier":[],"collectedfrom":{"key":"10|openaire____::160a261e5d06fd542c2efcac6e17e08c","value":"RS Global Journals"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"2016-02-28"},"distributionlocation":"","hostedby":{"key":"10|eurocrisdris::fe4903425d9040f680d8610d9079ea14","value":"World Science"},"instancetype":{"classid":"0001","classname":"Article","schemeid":"dnet:publication_resource","schemename":"dnet:publication_resource"},"license":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"https://creativecommons.org/licenses/by/4.0"},"pid":[],"refereed":{"classid":"0000","classname":"UNKNOWN","schemeid":"dnet:review_levels","schemename":"dnet:review_levels"},"url":["https://rsglobal.pl/index.php/ws/article/view/895"]}],"journal":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"edition":"","ep":"","iss":"","issnLinking":"","issnOnline":"2413-1032","issnPrinted":"2414-6404","name":"World Science","sp":"","vol":""},"language":{"classid":"eng","classname":"English","schemeid":"dnet:languages","schemename":"dnet:languages"},"lastupdatetimestamp":1646506135978,"oaiprovenance":{"originDescription":{"altered":true,"baseURL":"https%3A%2F%2Frsglobal.pl%2Findex.php%2Findex%2Foai","datestamp":"2020-05-23T18:09:46Z","harvestDate":"2020-06-01T07:11:47.22Z","identifier":"oai:ojs2.rsglobal.pl:article/895","metadataNamespace":"http://www.openarchives.org/OAI/2.0/oai_dc/"}},"originalId":["oai:ojs2.rsglobal.pl:article/895","50|07b5c0ccd4fe::e7f5459cc97865f2af6e3da964c1250b"],"pid":[],"publisher":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"RS Global Sp. z O.O."},"relevantdate":[],"resourcetype":{"classid":"UNKNOWN","classname":"UNKNOWN","schemeid":"dnet:dataCite_resource","schemename":"dnet:dataCite_resource"},"resulttype":{"classid":"publication","classname":"publication","schemeid":"dnet:result_typologies","schemename":"dnet:result_typologies"},"source":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"World Science; Vol 3 No 2(6) (2016): World Science; 43-50"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"World Science; Том 3 № 2(6) (2016): World Science; 43-50"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"2414-6404"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"2413-1032"}],"subject":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"Polytrauma"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"multiple trauma"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"combined injury injury"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"the severity of the damage"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"the device of external fixator"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"submersible osteosynthesis"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"«damage control»"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"Polytrauma"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"multiple trauma"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"combined injury injury"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"the severity of the damage"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"the device of external fixator"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"submersible osteosynthesis"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"«damage control»"}],"title":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"main title","classname":"main title","schemeid":"dnet:dataCite_title","schemename":"dnet:dataCite_title"},"value":"ЛЕЧЕНИЯ ПАЦИЕНТОВ С СОЧЕТАННЫМИ И МНОЖЕСТВЕННЫМИ ТРАВМАМИ КОНЕЧНОСТЕЙ И КОСТЕЙ ТАЗА"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"main title","classname":"main title","schemeid":"dnet:dataCite_title","schemename":"dnet:dataCite_title"},"value":"ЛЕЧЕНИЯ ПАЦИЕНТОВ С СОЧЕТАННЫМИ И МНОЖЕСТВЕННЫМИ ТРАВМАМИ КОНЕЧНОСТЕЙ И КОСТЕЙ ТАЗА"}]} +{"author":[{"fullname":"Kemppainen, Mika","name":"Mika","pid":[],"rank":1,"surname":"Kemppainen"},{"fullname":"Virkkunen, Iikka","name":"Iikka","pid":[],"rank":2,"surname":"Virkkunen"},{"fullname":"Pitkänen, Jorma","name":"Jorma","pid":[],"rank":3,"surname":"Pitkänen"},{"fullname":"Paussu, Raimo","name":"Raimo","pid":[],"rank":4,"surname":"Paussu"},{"fullname":"Hänninen, Hannu","name":"Hannu","pid":[],"rank":5,"surname":"Hänninen"}],"bestaccessright":{"classid":"OPEN","classname":"Open Access","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":[{"key":"10|openaire____::4692342f0992d91f9e705c26959f09e0","value":"VTT Research Information System"}],"context":[],"contributor":[],"country":[],"coverage":[],"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"2003-01-01"},"dateofcollection":"2022-02-28T12:34:39.606Z","dateoftransformation":"2022-02-28T14:02:46.142Z","description":[],"externalReference":[],"extraInfo":[],"format":[],"fulltext":[],"id":"50|355e65625b88::210c52944502777ba567442480e6a76e","instance":[{"accessright":{"classid":"OPEN","classname":"Open Access","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"alternateIdentifier":[],"collectedfrom":{"key":"10|eurocrisdris::fe4903425d9040f680d8610d9079ea14","value":"VTT Research Information System"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"2003-01-01"},"distributionlocation":"","hostedby":{"key":"10|openaire____::4692342f0992d91f9e705c26959f09e0","value":"VTT Research Information System"},"instancetype":{"classid":"0001","classname":"Article","schemeid":"dnet:publication_resource","schemename":"dnet:publication_resource"},"pid":[],"refereed":{"classid":"0000","classname":"UNKNOWN","schemeid":"dnet:review_levels","schemename":"dnet:review_levels"},"url":["https://cris.vtt.fi/en/publications/66c69ab9-a33b-4817-97cd-1f38b3b31405"]}],"language":{"classid":"eng","classname":"English","schemeid":"dnet:languages","schemename":"dnet:languages"},"lastupdatetimestamp":1646505945226,"oaiprovenance":{"originDescription":{"altered":true,"baseURL":"https%3A%2F%2Fcris.vtt.fi%2Fws%2Foai","datestamp":"2019-09-23T08:45:14Z","harvestDate":"2022-02-28T12:34:39.606Z","identifier":"oai:cris.vtt.fi:publications/66c69ab9-a33b-4817-97cd-1f38b3b31405","metadataNamespace":"http://www.openarchives.org/OAI/2.0/oai_dc/"}},"originalId":["50|355e65625b88::210c52944502777ba567442480e6a76e","oai:cris.vtt.fi:publications/66c69ab9-a33b-4817-97cd-1f38b3b31405"],"pid":[],"relevantdate":[],"resourcetype":{"classid":"UNKNOWN","classname":"UNKNOWN","schemeid":"dnet:dataCite_resource","schemename":"dnet:dataCite_resource"},"resulttype":{"classid":"publication","classname":"publication","schemeid":"dnet:result_typologies","schemename":"dnet:result_typologies"},"source":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"Kemppainen , M , Virkkunen , I , Pitkänen , J , Paussu , R & Hänninen , H 2003 , ' Comparison of realistic artificial cracks and in-service cracks ' , The e-Journal of Nondestructive Testing & Ultrasonics , vol. 8 , no. 3 , 6 . < http://www.ndt.net/article/ecndt02/401/401.htm >"}],"subject":[],"title":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"main title","classname":"main title","schemeid":"dnet:dataCite_title","schemename":"dnet:dataCite_title"},"value":"Comparison of realistic artificial cracks and in-service cracks"}]} +{"author":[{"fullname":"Kelhä, Väinö","name":"Väinö","pid":[],"rank":1,"surname":"Kelhä"},{"fullname":"Manninen, M.","name":"M.","pid":[],"rank":2,"surname":"Manninen"},{"fullname":"Oittinen, P.","name":"P.","pid":[],"rank":3,"surname":"Oittinen"},{"fullname":"Tiesmäki, Jarkko","name":"Jarkko","pid":[],"rank":4,"surname":"Tiesmäki"}],"bestaccessright":{"classid":"RESTRICTED","classname":"Restricted","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":[{"key":"10|openaire____::4692342f0992d91f9e705c26959f09e0","value":"VTT Research Information System"}],"context":[],"contributor":[],"country":[],"coverage":[],"dataInfo":{"deletedbyinference":true,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"1974-01-01"},"dateofcollection":"2022-02-28T12:32:28.547Z","dateoftransformation":"2022-02-28T14:46:21.4Z","description":[],"externalReference":[],"extraInfo":[],"format":[],"fulltext":[],"id":"50|355e65625b88::3754cff043a1700077031ea29f8cc240","instance":[{"accessright":{"classid":"RESTRICTED","classname":"Restricted","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"alternateIdentifier":[],"collectedfrom":{"key":"10|openaire____::4692342f0992d91f9e705c26959f09e0","value":"VTT Research Information System"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"1974-01-01"},"distributionlocation":"","hostedby":{"key":"10|openaire____::4692342f0992d91f9e705c26959f09e0","value":"VTT Research Information System"},"instancetype":{"classid":"0001","classname":"Article","schemeid":"dnet:publication_resource","schemename":"dnet:publication_resource"},"pid":[],"refereed":{"classid":"0000","classname":"UNKNOWN","schemeid":"dnet:review_levels","schemename":"dnet:review_levels"},"url":["https://cris.vtt.fi/en/publications/d2b5302f-004a-407c-8f9c-4312e0dbf679"]}],"language":{"classid":"eng","classname":"English","schemeid":"dnet:languages","schemename":"dnet:languages"},"lastupdatetimestamp":1646505968213,"oaiprovenance":{"originDescription":{"altered":true,"baseURL":"https%3A%2F%2Fcris.vtt.fi%2Fws%2Foai","datestamp":"2021-08-31T11:55:56Z","harvestDate":"2022-02-28T12:32:28.547Z","identifier":"oai:cris.vtt.fi:publications/d2b5302f-004a-407c-8f9c-4312e0dbf679","metadataNamespace":"http://www.openarchives.org/OAI/2.0/oai_dc/"}},"originalId":["50|355e65625b88::3754cff043a1700077031ea29f8cc240","oai:cris.vtt.fi:publications/d2b5302f-004a-407c-8f9c-4312e0dbf679"],"pid":[],"relevantdate":[],"resourcetype":{"classid":"UNKNOWN","classname":"UNKNOWN","schemeid":"dnet:dataCite_resource","schemename":"dnet:dataCite_resource"},"resulttype":{"classid":"publication","classname":"publication","schemeid":"dnet:result_typologies","schemename":"dnet:result_typologies"},"source":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"Kelhä , V , Manninen , M , Oittinen , P & Tiesmäki , J 1974 , ' A parallel plate tackmeter for measuring the splitting resistance of printing inks ' , Surface Coatings International: JOCCA , vol. 57 , pp. 184-188 ."}],"subject":[],"title":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"main title","classname":"main title","schemeid":"dnet:dataCite_title","schemename":"dnet:dataCite_title"},"value":"A parallel plate tackmeter for measuring the splitting resistance of printing inks"}]} +{"author":[{"fullname":"Mononen, Petri","name":"Petri","pid":[],"rank":1,"surname":"Mononen"},{"fullname":"Innamaa, Satu","name":"Satu","pid":[],"rank":2,"surname":"Innamaa"}],"bestaccessright":{"classid":"RESTRICTED","classname":"Restricted","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":[{"key":"10|openaire____::4692342f0992d91f9e705c26959f09e0","value":"VTT Research Information System"}],"context":[],"contributor":[],"country":[],"coverage":[],"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"2013-01-01"},"dateofcollection":"2022-02-28T12:38:14.128Z","dateoftransformation":"2022-02-28T14:48:17.052Z","description":[],"externalReference":[],"extraInfo":[],"format":[],"fulltext":[],"id":"50|355e65625b88::3875365f5052758953b072682e62bc80","instance":[{"accessright":{"classid":"RESTRICTED","classname":"Restricted","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"alternateIdentifier":[],"collectedfrom":{"key":"10|openaire____::4692342f0992d91f9e705c26959f09e0","value":"VTT Research Information System"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"2013-01-01"},"distributionlocation":"","hostedby":{"key":"10|openaire____::4692342f0992d91f9e705c26959f09e0","value":"VTT Research Information System"},"instancetype":{"classid":"0001","classname":"Article","schemeid":"dnet:publication_resource","schemename":"dnet:publication_resource"},"pid":[],"refereed":{"classid":"0000","classname":"UNKNOWN","schemeid":"dnet:review_levels","schemename":"dnet:review_levels"},"url":["https://cris.vtt.fi/en/publications/8602dae4-00e8-4f45-828b-65a367eb4730"]}],"language":{"classid":"eng","classname":"English","schemeid":"dnet:languages","schemename":"dnet:languages"},"lastupdatetimestamp":1646505969378,"oaiprovenance":{"originDescription":{"altered":true,"baseURL":"https%3A%2F%2Fcris.vtt.fi%2Fws%2Foai","datestamp":"2021-09-17T12:01:51Z","harvestDate":"2022-02-28T12:38:14.128Z","identifier":"oai:cris.vtt.fi:publications/8602dae4-00e8-4f45-828b-65a367eb4730","metadataNamespace":"http://www.openarchives.org/OAI/2.0/oai_dc/"}},"originalId":["oai:cris.vtt.fi:publications/8602dae4-00e8-4f45-828b-65a367eb4730","50|355e65625b88::3875365f5052758953b072682e62bc80"],"pid":[],"relevantdate":[],"resourcetype":{"classid":"UNKNOWN","classname":"UNKNOWN","schemeid":"dnet:dataCite_resource","schemename":"dnet:dataCite_resource"},"resulttype":{"classid":"publication","classname":"publication","schemeid":"dnet:result_typologies","schemename":"dnet:result_typologies"},"source":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"Mononen , P & Innamaa , S 2013 , ' Enhancing journey quality : Field Operational Test of Aftermarket and Nomadic Devices in Vehicles ' , Baltic Transport Journal , pp. 46-47 ."}],"subject":[],"title":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"main title","classname":"main title","schemeid":"dnet:dataCite_title","schemename":"dnet:dataCite_title"},"value":"Enhancing journey quality:Field Operational Test of Aftermarket and Nomadic Devices in Vehicles"}]} +{"author":[{"fullname":"Tsupari, Eemeli","name":"Eemeli","pid":[],"rank":1,"surname":"Tsupari"}],"bestaccessright":{"classid":"OPEN","classname":"Open Access","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":[{"key":"10|eurocrisdris::9ae43d14471c4b33661fedda6f06b539","value":"VTT Research Information System"},{"key":"10|opendoar____::f0dd4a99fba6075a9494772b58f95280","value":"VTT Research Information System"}],"context":[],"contributor":[],"country":[],"coverage":[],"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"2018-10-01"},"dateofcollection":"2022-02-28T12:38:39.075Z","dateoftransformation":"2022-02-28T15:37:43.154Z","description":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"Despite international agreements, global greenhouse gas (GHG) emissions have not decreased according to the targets. Consequently, our generation is creating an enormous problem for future generations. As climate change is a global problem, GHG emissions must decrease globally. Consequently, international policies are needed, actions should be effective and the impacts should be assessed with broad boundaries. In Europe, the cornerstone of climate policy is the EU Emissions Trading Scheme (EU ETS) but the rebound impacts within the EU ETS are often excluded in the assessments. This dissertation examines the impacts of major CO2 emission reduction solutions with different system boundaries, highlighting the importance of boundary selection on the results. In addition, the economic feasibilities of the selected solutions are evaluated.The case examples represent the most important sectors in terms of global CO2 emissions, such as electricity and heat production, the steel industry and transport. The studied technologies include efficient Waste-to-Energy (WtE) concepts with high power-to-heat ratio, utilisation of CO2 Capture and Storage (CCS) in different applications, replacing steel mill blast furnaces with Oxygen Blast Furnaces (OBF), Combined Heat and Power (CHP) and Carbon Capture and Utilisation (CCU) for storable fuels, which can be used for example in transportation. The results highlight the importance of the consequences in the electricity production system as well as the rebound impacts in the EU ETS. For example, the studied concepts to decrease direct GHG emissions of steel mills lead to increased power purchase from markets and consequently increase in emissions of the power system. The impacts of CCU concepts based on electrolysis increase the emissions in electricity production but enable a decrease in the usage of fossil fuels in transportation. In addition, converting electricity to storable fuels enable higher shares of variable solar and wind energy in the power systems. The consequences in the power systems are complex, including for example the impacts on electricity imports and exports, future investments and the EU ETS. Even if these impacts can be recognised by qualitative means, unambiguous quantitative consequences cannot be given. Understanding the decisive impacts of the framework and boundaries is crucial to interpreting different assessments and making effective actions and policy decisions. Solutions which decrease emissions within a narrow system boundary can actually increase the emissions of the broader system."}],"externalReference":[],"extraInfo":[],"format":[],"fulltext":[],"id":"50|355e65625b88::54a1c76f520bb2c8da27d12e42891088","instance":[{"accessright":{"classid":"OPEN","classname":"Open Access","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"alternateIdentifier":[],"collectedfrom":{"key":"10|openaire____::4692342f0992d91f9e705c26959f09e0","value":"VTT Research Information System"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"2018-10-01"},"distributionlocation":"","hostedby":{"key":"10|openaire____::4692342f0992d91f9e705c26959f09e0","value":"VTT Research Information System"},"instancetype":{"classid":"0006","classname":"Doctoral thesis","schemeid":"dnet:publication_resource","schemename":"dnet:publication_resource"},"pid":[],"refereed":{"classid":"0000","classname":"UNKNOWN","schemeid":"dnet:review_levels","schemename":"dnet:review_levels"},"url":["https://cris.vtt.fi/en/publications/d62ac5ef-7347-400f-95b2-59d970ceb505"]}],"language":{"classid":"eng","classname":"English","schemeid":"dnet:languages","schemename":"dnet:languages"},"lastupdatetimestamp":1646505683219,"oaiprovenance":{"originDescription":{"altered":true,"baseURL":"https%3A%2F%2Fcris.vtt.fi%2Fws%2Foai","datestamp":"2021-05-18T10:33:00Z","harvestDate":"2022-02-28T12:38:39.075Z","identifier":"oai:cris.vtt.fi:publications/d62ac5ef-7347-400f-95b2-59d970ceb505","metadataNamespace":"http://www.openarchives.org/OAI/2.0/oai_dc/"}},"originalId":["oai:cris.vtt.fi:publications/d62ac5ef-7347-400f-95b2-59d970ceb505","50|355e65625b88::54a1c76f520bb2c8da27d12e42891088"],"pid":[],"publisher":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"Aalto University"},"relevantdate":[],"resourcetype":{"classid":"UNKNOWN","classname":"UNKNOWN","schemeid":"dnet:dataCite_resource","schemename":"dnet:dataCite_resource"},"resulttype":{"classid":"publication","classname":"publication","schemeid":"dnet:result_typologies","schemename":"dnet:result_typologies"},"source":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"Tsupari , E 2018 , ' Impact of system boundaries on the effectiveness of climate change mitigation actions : Dissertation ' , Doctor Degree , Aalto University . < http://urn.fi/URN:ISBN:978-952-60-8358-2 >"}],"subject":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:result_subject","schemename":"dnet:result_subject"},"value":"energy"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:result_subject","schemename":"dnet:result_subject"},"value":"environmental science"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:result_subject","schemename":"dnet:result_subject"},"value":"climate change mitigation"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:result_subject","schemename":"dnet:result_subject"},"value":"greenhouse gases"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:result_subject","schemename":"dnet:result_subject"},"value":"carbon dioxide"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:result_subject","schemename":"dnet:result_subject"},"value":"emissions trading"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:result_subject","schemename":"dnet:result_subject"},"value":"economic feasibility"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:result_subject","schemename":"dnet:result_subject"},"value":"/dk/atira/pure/sustainabledevelopmentgoals/climate_action"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:result_subject","schemename":"dnet:result_subject"},"value":"SDG 13 - Climate Action"}],"title":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"main title","classname":"main title","schemeid":"dnet:dataCite_title","schemename":"dnet:dataCite_title"},"value":"Impact of system boundaries on the effectiveness of climate change mitigation actions:Dissertation"}]} +{"author":[{"fullname":"Turkia, Heidi","name":"Heidi","pid":[],"rank":1,"surname":"Turkia"},{"fullname":"Sirén, Heli","name":"Heli","pid":[],"rank":2,"surname":"Sirén"},{"fullname":"Penttilä, Merja","name":"Merja","pid":[],"rank":3,"surname":"Penttilä"},{"fullname":"Pitkänen, Juha Pekka","name":"Juha Pekka","pid":[],"rank":4,"surname":"Pitkänen"}],"bestaccessright":{"classid":"CLOSED","classname":"Closed Access","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":[{"key":"10|openaire____::4692342f0992d91f9e705c26959f09e0","value":"VTT Research Information System"}],"context":[],"contributor":[],"country":[],"coverage":[],"dataInfo":{"deletedbyinference":true,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"2013-02-22"},"dateofcollection":"2022-02-28T12:29:51.291Z","dateoftransformation":"2022-02-28T16:19:35.201Z","description":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"

Hydrolysates of lignocellulosic biomass, used as substrates for the sustainable production of fuels and chemicals often contain high amounts of phenolic compounds inhibiting the production microbiota. Quantification of these inhibitor compounds may help to understand possible difficulties in bioprocessing and further the development of more efficient, robust and tolerable processes. A separation method based on capillary electrophoresis with UV detection was developed for the simultaneous quantification of 10 phenolic compounds that may have inhibitor properties. Intraday relative standard deviations were less than 0.7% for migration times and between 2.6% and 6.4% for peak areas. Interday relative standard deviations were less than 3.0% for migration times and between 5.0% and 7.2% for peak areas. The method was applied to demonstrate that Saccharomyces cerevisiae was able to decrease the concentrations of vanillin, coniferyl aldehyde, syringaldehyde, acetoguaiacone and cinnamic acid during the cultivation, whereas the concentrations of phenols increased.

"}],"externalReference":[],"extraInfo":[],"format":[],"fulltext":[],"id":"50|355e65625b88::6c232359e3b3165574cb88f0554d9264","instance":[{"accessright":{"classid":"CLOSED","classname":"Closed Access","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"alternateIdentifier":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"doi","classname":"Digital Object Identifier","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"value":"10.1016/j.chroma.2013.01.004"}],"collectedfrom":{"key":"10|openaire____::4692342f0992d91f9e705c26959f09e0","value":"VTT Research Information System"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"2013-02-22"},"distributionlocation":"","hostedby":{"key":"10|openaire____::4692342f0992d91f9e705c26959f09e0","value":"VTT Research Information System"},"instancetype":{"classid":"0001","classname":"Article","schemeid":"dnet:publication_resource","schemename":"dnet:publication_resource"},"pid":[],"refereed":{"classid":"0000","classname":"UNKNOWN","schemeid":"dnet:review_levels","schemename":"dnet:review_levels"},"url":["https://cris.vtt.fi/en/publications/91f411d0-d8f8-4bf1-9072-345303cc776c"]}],"language":{"classid":"eng","classname":"English","schemeid":"dnet:languages","schemename":"dnet:languages"},"lastupdatetimestamp":1646505708387,"oaiprovenance":{"originDescription":{"altered":true,"baseURL":"https%3A%2F%2Fcris.vtt.fi%2Fws%2Foai","datestamp":"2022-01-29T08:16:51Z","harvestDate":"2022-02-28T12:29:51.291Z","identifier":"oai:cris.vtt.fi:publications/91f411d0-d8f8-4bf1-9072-345303cc776c","metadataNamespace":"http://www.openarchives.org/OAI/2.0/oai_dc/"}},"originalId":["oai:cris.vtt.fi:publications/91f411d0-d8f8-4bf1-9072-345303cc776c","50|355e65625b88::6c232359e3b3165574cb88f0554d9264"],"pid":[],"relevantdate":[],"resourcetype":{"classid":"UNKNOWN","classname":"UNKNOWN","schemeid":"dnet:dataCite_resource","schemename":"dnet:dataCite_resource"},"resulttype":{"classid":"publication","classname":"publication","schemeid":"dnet:result_typologies","schemename":"dnet:result_typologies"},"source":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"Turkia , H , Sirén , H , Penttilä , M & Pitkänen , J P 2013 , ' Capillary electrophoresis for the monitoring of phenolic compounds in bioprocesses ' , Journal of Chromatography A , vol. 1278 , pp. 175-180 . https://doi.org/10.1016/j.chroma.2013.01.004"}],"subject":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:result_subject","schemename":"dnet:result_subject"},"value":"Bioprocess monitoring"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:result_subject","schemename":"dnet:result_subject"},"value":"Capillary electrophoresis"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:result_subject","schemename":"dnet:result_subject"},"value":"Phenolic compounds"}],"title":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"main title","classname":"main title","schemeid":"dnet:dataCite_title","schemename":"dnet:dataCite_title"},"value":"Capillary electrophoresis for the monitoring of phenolic compounds in bioprocesses"}]} +{"author":[{"fullname":"Veijalainen, Jari","name":"Jari","pid":[],"rank":1,"surname":"Veijalainen"},{"fullname":"Wolski, Antoni","name":"Antoni","pid":[],"rank":2,"surname":"Wolski"}],"bestaccessright":{"classid":"RESTRICTED","classname":"Restricted","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":[{"key":"10|openaire____::4692342f0992d91f9e705c26959f09e0","value":"VTT Research Information System"}],"context":[],"contributor":[],"country":[],"coverage":[],"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"1991-01-01"},"dateofcollection":"2022-02-28T12:33:57.005Z","dateoftransformation":"2022-02-28T16:33:35.101Z","description":[],"externalReference":[],"extraInfo":[],"format":[],"fulltext":[],"id":"50|355e65625b88::74009c567c81b4aa55c813db658734df","instance":[{"accessright":{"classid":"RESTRICTED","classname":"Restricted","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"alternateIdentifier":[],"collectedfrom":{"key":"10|openaire____::4692342f0992d91f9e705c26959f09e0","value":"VTT Research Information System"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"1991-01-01"},"distributionlocation":"","hostedby":{"key":"10|openaire____::4692342f0992d91f9e705c26959f09e0","value":"VTT Research Information System"},"instancetype":{"classid":"0002","classname":"Book","schemeid":"dnet:publication_resource","schemename":"dnet:publication_resource"},"pid":[],"refereed":{"classid":"0000","classname":"UNKNOWN","schemeid":"dnet:review_levels","schemename":"dnet:review_levels"},"url":["https://cris.vtt.fi/en/publications/bb17c77a-f574-4921-a5cb-32dc1f283fa3"]},{"accessright":{"classid":"RESTRICTED","classname":"Restricted","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"alternateIdentifier":[],"collectedfrom":{"key":"10|openaire____::4692342f0992d91f9e705c26959f09e0","value":"VTT Research Information System"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"1991-01-01"},"distributionlocation":"","hostedby":{"key":"10|eurocrisdris::fe4903425d9040f680d8610d9079ea14","value":"VTT Research Information System"},"instancetype":{"classid":"0002","classname":"Book","schemeid":"dnet:publication_resource","schemename":"dnet:publication_resource"},"pid":[],"refereed":{"classid":"0000","classname":"UNKNOWN","schemeid":"dnet:review_levels","schemename":"dnet:review_levels"},"url":["https://cris.vtt.fi/en/publications/bb17c77a-f574-4921-a5cb-32dc1f283fa3"]}, {"accessright":{"classid":"RESTRICTED","classname":"Restricted","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"alternateIdentifier":[],"collectedfrom":{"key":"10|openaire____::4692342f0992d91f9e705c26959f09e0","value":"VTT Research Information System"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"1991-01-01"},"distributionlocation":"","hostedby":{"key":"10|opendoar____::f0dd4a99fba6075a9494772b58f95280","value":"VTT Research Information System"},"instancetype":{"classid":"0002","classname":"Book","schemeid":"dnet:publication_resource","schemename":"dnet:publication_resource"},"pid":[],"refereed":{"classid":"0000","classname":"UNKNOWN","schemeid":"dnet:review_levels","schemename":"dnet:review_levels"},"url":["https://cris.vtt.fi/en/publications/bb17c77a-f574-4921-a5cb-32dc1f283fa3"]}],"language":{"classid":"eng","classname":"English","schemeid":"dnet:languages","schemename":"dnet:languages"},"lastupdatetimestamp":1646505716994,"oaiprovenance":{"originDescription":{"altered":true,"baseURL":"https%3A%2F%2Fcris.vtt.fi%2Fws%2Foai","datestamp":"2020-12-21T07:05:54Z","harvestDate":"2022-02-28T12:33:57.005Z","identifier":"oai:cris.vtt.fi:publications/bb17c77a-f574-4921-a5cb-32dc1f283fa3","metadataNamespace":"http://www.openarchives.org/OAI/2.0/oai_dc/"}},"originalId":["50|355e65625b88::74009c567c81b4aa55c813db658734df","oai:cris.vtt.fi:publications/bb17c77a-f574-4921-a5cb-32dc1f283fa3"],"pid":[],"publisher":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"VTT Technical Research Centre of Finland"},"relevantdate":[],"resourcetype":{"classid":"UNKNOWN","classname":"UNKNOWN","schemeid":"dnet:dataCite_resource","schemename":"dnet:dataCite_resource"},"resulttype":{"classid":"publication","classname":"publication","schemeid":"dnet:result_typologies","schemename":"dnet:result_typologies"},"source":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"Veijalainen , J & Wolski , A 1991 , Prepare and commit certification for decentralized transaction management in rigorous multidatabases : Research Report No. J-1 . VTT Technical Research Centre of Finland ."}],"subject":[],"title":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"main title","classname":"main title","schemeid":"dnet:dataCite_title","schemename":"dnet:dataCite_title"},"value":"Prepare and commit certification for decentralized transaction management in rigorous multidatabases:Research Report No. J-1"}]} +{"author":[{"fullname":"Hanhijärvi, Antti","name":"Antti","pid":[],"rank":1,"surname":"Hanhijärvi"},{"fullname":"Hukka, A.","name":"A.","pid":[],"rank":2,"surname":"Hukka"},{"fullname":"Paajanen, T.","name":"T.","pid":[],"rank":3,"surname":"Paajanen"},{"fullname":"Pulkkinen, P.","name":"P.","pid":[],"rank":4,"surname":"Pulkkinen"},{"fullname":"Sundman, S.","name":"S.","pid":[],"rank":5,"surname":"Sundman"}],"bestaccessright":{"classid":"CLOSED","classname":"Closed Access","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":[{"key":"10|openaire____::4692342f0992d91f9e705c26959f09e0","value":"VTT Research Information System"}],"context":[],"contributor":[],"country":[],"coverage":[],"dataInfo":{"deletedbyinference":true,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"2003-01-01"},"dateofcollection":"2022-02-28T12:32:33.974Z","dateoftransformation":"2022-02-28T17:38:24.191Z","description":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"The paper presents experimental results of jet drying tests of birch (Betula pendula) and spruce (Picea abies) veneers at temperatures of 140°C and 190°C. Results include drying rates for 1.5 mm thick birch veneers as well as 1.5 mm and 2.6 mm thick spruce veneers of both heartwood and sapwood. Based on the test results, material parameter values for a simplified drying model are assessed. The model is based on the use of an effective diffusion coefficient and an effective surface emission coefficient, which values are calibrated to fit to the experimental results. It is observed, that separate model parameter sets are needed for the two different species but also for occurrence of heartwood or sapwood (spruce) and different thickness values of veneers."}],"externalReference":[],"extraInfo":[],"format":[],"fulltext":[],"id":"50|355e65625b88::9cb10895b4a92b0215b85acb2c3268b9","instance":[{"accessright":{"classid":"CLOSED","classname":"Closed Access","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"alternateIdentifier":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"doi","classname":"Digital Object Identifier","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"value":"10.1007/s00107-003-0379-4"}],"collectedfrom":{"key":"10|openaire____::4692342f0992d91f9e705c26959f09e0","value":"VTT Research Information System"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"2003-01-01"},"distributionlocation":"","hostedby":{"key":"10|openaire____::4692342f0992d91f9e705c26959f09e0","value":"VTT Research Information System"},"instancetype":{"classid":"0001","classname":"Article","schemeid":"dnet:publication_resource","schemename":"dnet:publication_resource"},"pid":[],"refereed":{"classid":"0000","classname":"UNKNOWN","schemeid":"dnet:review_levels","schemename":"dnet:review_levels"},"url":["https://cris.vtt.fi/en/publications/8cc91100-904f-43c5-bb3d-1cc3e0f4a4b5"]}],"language":{"classid":"eng","classname":"English","schemeid":"dnet:languages","schemename":"dnet:languages"},"lastupdatetimestamp":1646505760180,"oaiprovenance":{"originDescription":{"altered":true,"baseURL":"https%3A%2F%2Fcris.vtt.fi%2Fws%2Foai","datestamp":"2022-01-31T21:18:51Z","harvestDate":"2022-02-28T12:32:33.974Z","identifier":"oai:cris.vtt.fi:publications/8cc91100-904f-43c5-bb3d-1cc3e0f4a4b5","metadataNamespace":"http://www.openarchives.org/OAI/2.0/oai_dc/"}},"originalId":["50|355e65625b88::9cb10895b4a92b0215b85acb2c3268b9","oai:cris.vtt.fi:publications/8cc91100-904f-43c5-bb3d-1cc3e0f4a4b5"],"pid":[],"relevantdate":[],"resourcetype":{"classid":"UNKNOWN","classname":"UNKNOWN","schemeid":"dnet:dataCite_resource","schemename":"dnet:dataCite_resource"},"resulttype":{"classid":"publication","classname":"publication","schemeid":"dnet:result_typologies","schemename":"dnet:result_typologies"},"source":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"Hanhijärvi , A , Hukka , A , Paajanen , T , Pulkkinen , P & Sundman , S 2003 , ' Experimental investigation of jet drying of birch and spruce veneers and modelling with a simplified approach ' , Holz als Roh- und Werkstoff , vol. 61 , no. 2 , pp. 83-88 . https://doi.org/10.1007/s00107-003-0379-4"}],"subject":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:result_subject","schemename":"dnet:result_subject"},"value":"jet drying"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:result_subject","schemename":"dnet:result_subject"},"value":"drying"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:result_subject","schemename":"dnet:result_subject"},"value":"veneers"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:result_subject","schemename":"dnet:result_subject"},"value":"birch"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:result_subject","schemename":"dnet:result_subject"},"value":"spruce"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:result_subject","schemename":"dnet:result_subject"},"value":"heartwood"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:result_subject","schemename":"dnet:result_subject"},"value":"sapwood"}],"title":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"main title","classname":"main title","schemeid":"dnet:dataCite_title","schemename":"dnet:dataCite_title"},"value":"Experimental investigation of jet drying of birch and spruce veneers and modelling with a simplified approach"}]} +{"author":[{"fullname":"Vainonen-Ahlgren, Elizaveta","name":"Elizaveta","pid":[],"rank":1,"surname":"Vainonen-Ahlgren"},{"fullname":"Likonen, Jari","name":"Jari","pid":[],"rank":2,"surname":"Likonen"},{"fullname":"Renvall,","pid":[],"rank":3},{"fullname":"Rohde, V.","name":"V.","pid":[],"rank":4,"surname":"Rohde"},{"fullname":"Mayer, M.","name":"M.","pid":[],"rank":5,"surname":"Mayer"}],"bestaccessright":{"classid":"CLOSED","classname":"Closed Access","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":[{"key":"10|openaire____::4692342f0992d91f9e705c26959f09e0","value":"VTT Research Information System"}],"context":[],"contributor":[],"country":[],"coverage":[],"dataInfo":{"deletedbyinference":true,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"2007-01-01"},"dateofcollection":"2022-02-28T12:32:50.667Z","dateoftransformation":"2022-02-28T17:49:49.964Z","description":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"To investigate material transport in scrape-off layer plasma and long term deposition in divertor, 13CH4 was puffed at the end of 2004 and 2005 experimental campaigns into ASDEX Upgrade from the outer mid-plane.
Ex situ analyses of the tiles were performed by secondary ion mass spectrometry.
The peaks of 13C were detected below the bottom inner strike point and at the horizontal tile at the outer lower divertor. It was detected ∼21% of the total puffed 13C amount.
The deposition rate for carbon by plasma was also calculated in long term experiment. It was obtained to be 22 × 10−3 and 8.7 × 10−3 g/s for the upper (campaign 2004) and lower (campaign 2003) divertors, respectively."}],"externalReference":[],"extraInfo":[],"format":[],"fulltext":[],"id":"50|355e65625b88::a29614444f5030f11e75c6c27264d272","instance":[{"accessright":{"classid":"CLOSED","classname":"Closed Access","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"alternateIdentifier":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"doi","classname":"Digital Object Identifier","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"value":"10.1016/j.jnucmat.2007.01.026"}],"collectedfrom":{"key":"10|openaire____::4692342f0992d91f9e705c26959f09e0","value":"VTT Research Information System"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"2007-01-01"},"distributionlocation":"","hostedby":{"key":"10|openaire____::4692342f0992d91f9e705c26959f09e0","value":"VTT Research Information System"},"instancetype":{"classid":"0001","classname":"Article","schemeid":"dnet:publication_resource","schemename":"dnet:publication_resource"},"pid":[],"refereed":{"classid":"0000","classname":"UNKNOWN","schemeid":"dnet:review_levels","schemename":"dnet:review_levels"},"url":["https://cris.vtt.fi/en/publications/2472b21e-1fdc-4121-946e-e9c8fae6d02d"]}],"language":{"classid":"eng","classname":"English","schemeid":"dnet:languages","schemename":"dnet:languages"},"lastupdatetimestamp":1646505766149,"oaiprovenance":{"originDescription":{"altered":true,"baseURL":"https%3A%2F%2Fcris.vtt.fi%2Fws%2Foai","datestamp":"2022-02-01T02:35:05Z","harvestDate":"2022-02-28T12:32:50.667Z","identifier":"oai:cris.vtt.fi:publications/2472b21e-1fdc-4121-946e-e9c8fae6d02d","metadataNamespace":"http://www.openarchives.org/OAI/2.0/oai_dc/"}},"originalId":["50|355e65625b88::a29614444f5030f11e75c6c27264d272","oai:cris.vtt.fi:publications/2472b21e-1fdc-4121-946e-e9c8fae6d02d"],"pid":[],"relevantdate":[],"resourcetype":{"classid":"UNKNOWN","classname":"UNKNOWN","schemeid":"dnet:dataCite_resource","schemename":"dnet:dataCite_resource"},"resulttype":{"classid":"publication","classname":"publication","schemeid":"dnet:result_typologies","schemename":"dnet:result_typologies"},"source":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"Vainonen-Ahlgren , E , Likonen , J , Renvall , Rohde , V & Mayer , M 2007 , ' Migration of 13C and deposition at ASDEX Upgrade ' , Journal of Nuclear Materials , vol. 363-365 , pp. 270-275 . https://doi.org/10.1016/j.jnucmat.2007.01.026"}],"subject":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:result_subject","schemename":"dnet:result_subject"},"value":"ASDEX upgrade"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:result_subject","schemename":"dnet:result_subject"},"value":"divertor"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:result_subject","schemename":"dnet:result_subject"},"value":"carbon based materials"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:result_subject","schemename":"dnet:result_subject"},"value":"erosion"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:result_subject","schemename":"dnet:result_subject"},"value":"deposition"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:result_subject","schemename":"dnet:result_subject"},"value":"ITER"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:result_subject","schemename":"dnet:result_subject"},"value":"JET"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:result_subject","schemename":"dnet:result_subject"},"value":"plasma"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:result_subject","schemename":"dnet:result_subject"},"value":"fusion energy"}],"title":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"main title","classname":"main title","schemeid":"dnet:dataCite_title","schemename":"dnet:dataCite_title"},"value":"Migration of 13C and deposition at ASDEX Upgrade"}]} +{"author":[{"fullname":"Aalto, Timo","name":"Timo","pid":[],"rank":1,"surname":"Aalto"},{"fullname":"Harjanne, Mikko","name":"Mikko","pid":[],"rank":2,"surname":"Harjanne"},{"fullname":"Kapulainen, Markku","name":"Markku","pid":[],"rank":3,"surname":"Kapulainen"}],"bestaccessright":{"classid":"CLOSED","classname":"Closed Access","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":[{"key":"10|openaire____::4692342f0992d91f9e705c26959f09e0","value":"VTT Research Information System"}],"context":[],"contributor":[],"country":[],"coverage":[],"dataInfo":{"deletedbyinference":true,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"2003-01-01"},"dateofcollection":"2022-02-28T12:32:37.581Z","dateoftransformation":"2022-02-28T19:39:18.717Z","description":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"A novel method has been developed for measuring the rotational angle of a fiber's or a waveguide's polarization axis with respect to a reference angle. The reference angle is the polarization axis of the measuring device. The method also gives the true polarization extinction ratio of the measured fiber or waveguide. The method is suitable for the characterization and rotational alignment of polarization-maintaining waveguides and fibers. In particular, the method can be used to rotationally align the fiber-waveguide interconnections during waveguide characterization. The measuring device is either a linear polarizer or a polarization splitter that is accurately rotated with respect to the device under test. According to the experiments with a polarization-maintaining fiber, the method is very easy and inexpensive to implement, and the angular accuracy can be better than 0.2 deg."}],"externalReference":[],"extraInfo":[],"format":[],"fulltext":[],"id":"50|355e65625b88::df0143af011fd82af8ac2d07b03ee8cd","instance":[{"accessright":{"classid":"CLOSED","classname":"Closed Access","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"alternateIdentifier":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"doi","classname":"Digital Object Identifier","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"value":"10.1117/1.1600730"}],"collectedfrom":{"key":"10|openaire____::4692342f0992d91f9e705c26959f09e0","value":"VTT Research Information System"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"2003-01-01"},"distributionlocation":"","hostedby":{"key":"10|openaire____::4692342f0992d91f9e705c26959f09e0","value":"VTT Research Information System"},"instancetype":{"classid":"0001","classname":"Article","schemeid":"dnet:publication_resource","schemename":"dnet:publication_resource"},"pid":[],"refereed":{"classid":"0000","classname":"UNKNOWN","schemeid":"dnet:review_levels","schemename":"dnet:review_levels"},"url":["https://cris.vtt.fi/en/publications/8cd538fb-6484-4655-81dd-47348d358fd4"]}],"language":{"classid":"eng","classname":"English","schemeid":"dnet:languages","schemename":"dnet:languages"},"lastupdatetimestamp":1646505829230,"oaiprovenance":{"originDescription":{"altered":true,"baseURL":"https%3A%2F%2Fcris.vtt.fi%2Fws%2Foai","datestamp":"2022-01-31T21:47:37Z","harvestDate":"2022-02-28T12:32:37.581Z","identifier":"oai:cris.vtt.fi:publications/8cd538fb-6484-4655-81dd-47348d358fd4","metadataNamespace":"http://www.openarchives.org/OAI/2.0/oai_dc/"}},"originalId":["50|355e65625b88::df0143af011fd82af8ac2d07b03ee8cd","oai:cris.vtt.fi:publications/8cd538fb-6484-4655-81dd-47348d358fd4"],"pid":[],"relevantdate":[],"resourcetype":{"classid":"UNKNOWN","classname":"UNKNOWN","schemeid":"dnet:dataCite_resource","schemename":"dnet:dataCite_resource"},"resulttype":{"classid":"publication","classname":"publication","schemeid":"dnet:result_typologies","schemename":"dnet:result_typologies"},"source":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"Aalto , T , Harjanne , M & Kapulainen , M 2003 , ' Method for the rotational alignment of polarization-maintaining optical fibers and waveguides ' , Optical Engineering , vol. 42 , no. 10 , pp. 2861-2867 . https://doi.org/10.1117/1.1600730"}],"subject":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:result_subject","schemename":"dnet:result_subject"},"value":"optical waveguide"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:result_subject","schemename":"dnet:result_subject"},"value":"polarization-maintaining fiber"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:result_subject","schemename":"dnet:result_subject"},"value":"characterization"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:result_subject","schemename":"dnet:result_subject"},"value":"fiber-waveguide coupling"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:result_subject","schemename":"dnet:result_subject"},"value":"polarization"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:result_subject","schemename":"dnet:result_subject"},"value":"polarization axis"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:result_subject","schemename":"dnet:result_subject"},"value":"polarizer"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:result_subject","schemename":"dnet:result_subject"},"value":"polarization splitter"}],"title":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"main title","classname":"main title","schemeid":"dnet:dataCite_title","schemename":"dnet:dataCite_title"},"value":"Method for the rotational alignment of polarization-maintaining optical fibers and waveguides"}]} +{"author":[{"fullname":"Penttilä, Raimo","name":"Raimo","pid":[],"rank":1,"surname":"Penttilä"},{"fullname":"Vanttaja, Ilkka","name":"Ilkka","pid":[],"rank":2,"surname":"Vanttaja"},{"fullname":"Haapamäki, Petteri","name":"Petteri","pid":[],"rank":3,"surname":"Haapamäki"},{"fullname":"Kujanpää, Veli","name":"Veli","pid":[],"rank":4,"surname":"Kujanpää"}],"bestaccessright":{"classid":"RESTRICTED","classname":"Restricted","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":[{"key":"10|opendoar____::f0dd4a99fba6075a9494772b58f95280","value":"VTT Research Information System"}],"context":[],"contributor":[],"country":[],"coverage":[],"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"1994-01-01"},"dateofcollection":"2022-02-28T12:35:26.769Z","dateoftransformation":"2022-02-28T19:54:10.494Z","description":[],"externalReference":[],"extraInfo":[],"format":[],"fulltext":[],"id":"50|355e65625b88::e7d48a470b13bda61f7ebe3513e20cb6","instance":[{"accessright":{"classid":"RESTRICTED","classname":"Restricted","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"alternateIdentifier":[],"collectedfrom":{"key":"10|openaire____::4692342f0992d91f9e705c26959f09e0","value":"VTT Research Information System"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"1994-01-01"},"distributionlocation":"","hostedby":{"key":"10|eurocrisdris::9ae43d14471c4b33661fedda6f06b539","value":"VTT Research Information System"},"instancetype":{"classid":"0001","classname":"Article","schemeid":"dnet:publication_resource","schemename":"dnet:publication_resource"},"pid":[],"refereed":{"classid":"0000","classname":"UNKNOWN","schemeid":"dnet:review_levels","schemename":"dnet:review_levels"},"url":["https://cris.vtt.fi/en/publications/ddbd28ea-5fbf-43e1-896f-e69856870c26"]}],"language":{"classid":"fin","classname":"Finnish","schemeid":"dnet:languages","schemename":"dnet:languages"},"lastupdatetimestamp":1646505838552,"oaiprovenance":{"originDescription":{"altered":true,"baseURL":"https%3A%2F%2Fcris.vtt.fi%2Fws%2Foai","datestamp":"2019-08-08T07:09:42Z","harvestDate":"2022-02-28T12:35:26.769Z","identifier":"oai:cris.vtt.fi:publications/ddbd28ea-5fbf-43e1-896f-e69856870c26","metadataNamespace":"http://www.openarchives.org/OAI/2.0/oai_dc/"}},"originalId":["oai:cris.vtt.fi:publications/ddbd28ea-5fbf-43e1-896f-e69856870c26","50|355e65625b88::e7d48a470b13bda61f7ebe3513e20cb6"],"pid":[],"relevantdate":[],"resourcetype":{"classid":"UNKNOWN","classname":"UNKNOWN","schemeid":"dnet:dataCite_resource","schemename":"dnet:dataCite_resource"},"resulttype":{"classid":"publication","classname":"publication","schemeid":"dnet:result_typologies","schemename":"dnet:result_typologies"},"source":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"Penttilä , R , Vanttaja , I , Haapamäki , P & Kujanpää , V 1994 , ' Liimauksen ja puristusliittämisen yhdistämisellä lisää lujuutta, jäykkyyttä ja tiiveyttä ' , Ohutlevyuutiset , no. 2 , pp. 17-19 ."}],"subject":[],"title":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"main title","classname":"main title","schemeid":"dnet:dataCite_title","schemename":"dnet:dataCite_title"},"value":"Liimauksen ja puristusliittämisen yhdistämisellä lisää lujuutta, jäykkyyttä ja tiiveyttä"}]} \ No newline at end of file diff --git a/dhp-workflows/dhp-enrichment/src/test/resources/eu/dnetlib/dhp/countrypropagation/graph/relation/relation.json b/dhp-workflows/dhp-enrichment/src/test/resources/eu/dnetlib/dhp/countrypropagation/graph/relation/relation.json new file mode 100644 index 000000000..26a4ef69b --- /dev/null +++ b/dhp-workflows/dhp-enrichment/src/test/resources/eu/dnetlib/dhp/countrypropagation/graph/relation/relation.json @@ -0,0 +1,24 @@ +{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"propagation","inferred":true,"invisible":false,"provenanceaction":{"classid":"result:organization:instrepo","classname":"Propagation of affiliation to result collected from datasources of type institutional repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.85"},"properties":[],"relClass":"IsProvidedBy","relType":"resultOrganization","source":"10|eurocrisdris::fe4903425d9040f680d8610d9079ea14","subRelType":"affiliation","target":"20|pending_org_::82f63b2d21ae88596b9d8991780e9888","validated":false} +{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"propagation","inferred":true,"invisible":false,"provenanceaction":{"classid":"result:organization:instrepo","classname":"Propagation of affiliation to result collected from datasources of type institutional repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.85"},"properties":[],"relClass":"IsProvidedBy","relType":"resultOrganization","source":"10|opendoar____::fd272fe04b7d4e68effd01bddcc6bb34","subRelType":"affiliation","target":"20|pending_org_::5b73b8b2d0df764e13a62291dfedf8f6","validated":false} +{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"propagation","inferred":true,"invisible":false,"provenanceaction":{"classid":"result:organization:instrepo","classname":"Propagation of affiliation to result collected from datasources of type institutional repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.85"},"properties":[],"relClass":"IsProvidedBy","relType":"resultOrganization","source":"10|opendoar____::f0dd4a99fba6075a9494772b58f95280","subRelType":"affiliation","target":"20|openorgs____::322ff2a6524820640bc5d1311871585e","validated":false} +{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"propagation","inferred":true,"invisible":false,"provenanceaction":{"classid":"result:organization:instrepo","classname":"Propagation of affiliation to result collected from datasources of type institutional repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.85"},"properties":[],"relClass":"IsProvidedBy","relType":"resultOrganization","source":"10|eurocrisdris::9ae43d14471c4b33661fedda6f06b539","subRelType":"affiliation","target":"20|openorgs____::58e60f1715d219aa6757ba0b0f2ccbce","validated":false} +{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"propagation","inferred":true,"invisible":false,"provenanceaction":{"classid":"result:organization:instrepo","classname":"Propagation of affiliation to result collected from datasources of type institutional repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.85"},"properties":[],"relClass":"IsProvidedBy","relType":"resultOrganization","target":"20|openorgs____::64badd35233ba2cd4946368ef2f4cf57","subRelType":"affiliation","source":"10|issn___print::a7a2010e75d849442790955162ef4e42","validated":false} +{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"propagation","inferred":true,"invisible":false,"provenanceaction":{"classid":"result:organization:instrepo","classname":"Propagation of affiliation to result collected from datasources of type institutional repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.85"},"properties":[],"relClass":"IsProvidedBy","relType":"resultOrganization","source":"10|issn___print::a7a2010e75d849442790955162ef4e43","subRelType":"affiliation","target":"20|openorgs____::64badd35233ba2cd4946368ef2f4cf57","validated":false} +{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"propagation","inferred":true,"invisible":false,"provenanceaction":{"classid":"result:organization:instrepo","classname":"Propagation of affiliation to result collected from datasources of type institutional repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.85"},"properties":[],"relClass":"IsProvidedBy","relType":"resultOrganization","source":"10|issn___print::a7a2010e75d849442790955162ef4e44","subRelType":"affiliation","target":"20|openorgs____::548cbb0c5a93722f3a9aa62aa17a1ba1","validated":false} +{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"propagation","inferred":true,"invisible":false,"provenanceaction":{"classid":"result:organization:instrepo","classname":"Propagation of affiliation to result collected from datasources of type institutional repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.85"},"properties":[],"relClass":"IsProvidedBy","relType":"resultOrganization","source":"10|issn___print::a7a2010e75d849442790955162ef4e45","subRelType":"affiliation","target":"20|pending_org_::c522a7c935f9fd9578122e60eeec282c","validated":false} +{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"propagation","inferred":true,"invisible":false,"provenanceaction":{"classid":"result:organization:instrepo","classname":"Propagation of affiliation to result collected from datasources of type institutional repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.85"},"properties":[],"relClass":"isAuthorInstitutionOf","relType":"resultOrganization","source":"20|openorgs____::64badd35233ba2cd4946368ef2f4cf57","subRelType":"affiliation","target":"50|dedup_wf_001::06e51d2bf295531b2d2e7a1b55500783","validated":false} +{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"propagation","inferred":true,"invisible":false,"provenanceaction":{"classid":"result:organization:instrepo","classname":"Propagation of affiliation to result collected from datasources of type institutional repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.85"},"properties":[],"relClass":"hasAuthorInstitution","relType":"resultOrganization","source":"50|dedup_wf_001::06e51d2bf295531b2d2e7a1b55500783","subRelType":"affiliation","target":"20|openorgs____::64badd35233ba2cd4946368ef2f4cf57","validated":false} +{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"propagation","inferred":true,"invisible":false,"provenanceaction":{"classid":"result:organization:instrepo","classname":"Propagation of affiliation to result collected from datasources of type institutional repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.85"},"properties":[],"relClass":"isAuthorInstitutionOf","relType":"resultOrganization","source":"20|openorgs____::91a81877815afb4ebf25c1a3f3b03c5d","subRelType":"affiliation","target":"50|dedup_wf_001::08d6f2001319c86d0e69b0f83ad75df2","validated":false} +{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"propagation","inferred":true,"invisible":false,"provenanceaction":{"classid":"result:organization:instrepo","classname":"Propagation of affiliation to result collected from datasources of type institutional repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.85"},"properties":[],"relClass":"hasAuthorInstitution","relType":"resultOrganization","source":"50|dedup_wf_001::08d6f2001319c86d0e69b0f83ad75df2","subRelType":"affiliation","target":"20|openorgs____::91a81877815afb4ebf25c1a3f3b03c5d","validated":false} +{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"propagation","inferred":true,"invisible":false,"provenanceaction":{"classid":"result:organization:instrepo","classname":"Propagation of affiliation to result collected from datasources of type institutional repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.85"},"properties":[],"relClass":"isAuthorInstitutionOf","relType":"resultOrganization","source":"20|openorgs____::548cbb0c5a93722f3a9aa62aa17a1ba1","subRelType":"affiliation","target":"50|dedup_wf_001::0a1cdf269375d32ce341fdeb0e92dfa8","validated":false} +{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"propagation","inferred":true,"invisible":false,"provenanceaction":{"classid":"result:organization:instrepo","classname":"Propagation of affiliation to result collected from datasources of type institutional repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.85"},"properties":[],"relClass":"hasAuthorInstitution","relType":"resultOrganization","source":"50|dedup_wf_001::0a1cdf269375d32ce341fdeb0e92dfa8","subRelType":"affiliation","target":"20|openorgs____::548cbb0c5a93722f3a9aa62aa17a1ba1","validated":false} +{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"propagation","inferred":true,"invisible":false,"provenanceaction":{"classid":"result:organization:instrepo","classname":"Propagation of affiliation to result collected from datasources of type institutional repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.85"},"properties":[],"relClass":"isAuthorInstitutionOf","relType":"resultOrganization","source":"20|pending_org_::a50fdd7f7e77b74ea2b16823151c391a","subRelType":"affiliation","target":"50|dedup_wf_001::0ab92bed024ee6883c7a1244722e5eec","validated":false} +{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"propagation","inferred":true,"invisible":false,"provenanceaction":{"classid":"result:organization:instrepo","classname":"Propagation of affiliation to result collected from datasources of type institutional repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.85"},"properties":[],"relClass":"hasAuthorInstitution","relType":"resultOrganization","source":"50|dedup_wf_001::0ab92bed024ee6883c7a1244722e5eec","subRelType":"affiliation","target":"20|pending_org_::a50fdd7f7e77b74ea2b16823151c391a","validated":false} +{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"propagation","inferred":true,"invisible":false,"provenanceaction":{"classid":"result:organization:instrepo","classname":"Propagation of affiliation to result collected from datasources of type institutional repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.85"},"properties":[],"relClass":"isAuthorInstitutionOf","relType":"resultOrganization","source":"20|openorgs____::64badd35233ba2cd4946368ef2f4cf57","subRelType":"affiliation","target":"50|dedup_wf_001::0ca26c736ad4d15b3d5ee90a4d7853e1","validated":false} +{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"propagation","inferred":true,"invisible":false,"provenanceaction":{"classid":"result:organization:instrepo","classname":"Propagation of affiliation to result collected from datasources of type institutional repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.85"},"properties":[],"relClass":"hasAuthorInstitution","relType":"resultOrganization","source":"50|dedup_wf_001::0ca26c736ad4d15b3d5ee90a4d7853e1","subRelType":"affiliation","target":"20|openorgs____::64badd35233ba2cd4946368ef2f4cf57","validated":false} +{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"propagation","inferred":true,"invisible":false,"provenanceaction":{"classid":"result:organization:instrepo","classname":"Propagation of affiliation to result collected from datasources of type institutional repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.85"},"properties":[],"relClass":"isAuthorInstitutionOf","relType":"resultOrganization","source":"20|pending_org_::a50fdd7f7e77b74ea2b16823151c391a","subRelType":"affiliation","target":"50|dedup_wf_001::0ef8dfab3927cb4d69df0d3113f05a42","validated":false} +{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"propagation","inferred":true,"invisible":false,"provenanceaction":{"classid":"result:organization:instrepo","classname":"Propagation of affiliation to result collected from datasources of type institutional repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.85"},"properties":[],"relClass":"hasAuthorInstitution","relType":"resultOrganization","source":"50|dedup_wf_001::0ef8dfab3927cb4d69df0d3113f05a42","subRelType":"affiliation","target":"20|pending_org_::a50fdd7f7e77b74ea2b16823151c391a","validated":false} +{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"propagation","inferred":true,"invisible":false,"provenanceaction":{"classid":"result:organization:instrepo","classname":"Propagation of affiliation to result collected from datasources of type institutional repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.85"},"properties":[],"relClass":"isAuthorInstitutionOf","relType":"resultOrganization","source":"20|openorgs____::548cbb0c5a93722f3a9aa62aa17a1ba1","subRelType":"affiliation","target":"50|dedup_wf_001::0f488ad00253126c14a21abe6b2d406c","validated":false} +{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"propagation","inferred":true,"invisible":false,"provenanceaction":{"classid":"result:organization:instrepo","classname":"Propagation of affiliation to result collected from datasources of type institutional repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.85"},"properties":[],"relClass":"hasAuthorInstitution","relType":"resultOrganization","source":"50|dedup_wf_001::0f488ad00253126c14a21abe6b2d406c","subRelType":"affiliation","target":"20|openorgs____::548cbb0c5a93722f3a9aa62aa17a1ba1","validated":false} +{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"propagation","inferred":true,"invisible":false,"provenanceaction":{"classid":"result:organization:instrepo","classname":"Propagation of affiliation to result collected from datasources of type institutional repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.85"},"properties":[],"relClass":"isAuthorInstitutionOf","relType":"resultOrganization","source":"20|pending_org_::c522a7c935f9fd9578122e60eeec282c","subRelType":"affiliation","target":"50|dedup_wf_001::12206bf78aabd7d52132477182d19147","validated":false} +{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"propagation","inferred":true,"invisible":false,"provenanceaction":{"classid":"result:organization:instrepo","classname":"Propagation of affiliation to result collected from datasources of type institutional repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.85"},"properties":[],"relClass":"hasAuthorInstitution","relType":"resultOrganization","source":"50|dedup_wf_001::12206bf78aabd7d52132477182d19147","subRelType":"affiliation","target":"20|pending_org_::c522a7c935f9fd9578122e60eeec282c","validated":false} \ No newline at end of file diff --git a/dhp-workflows/dhp-enrichment/src/test/resources/eu/dnetlib/dhp/countrypropagation/sample/software/software_10.json.gz b/dhp-workflows/dhp-enrichment/src/test/resources/eu/dnetlib/dhp/countrypropagation/graph/software/software_10.json.gz similarity index 100% rename from dhp-workflows/dhp-enrichment/src/test/resources/eu/dnetlib/dhp/countrypropagation/sample/software/software_10.json.gz rename to dhp-workflows/dhp-enrichment/src/test/resources/eu/dnetlib/dhp/countrypropagation/graph/software/software_10.json.gz diff --git a/dhp-workflows/dhp-enrichment/src/test/resources/eu/dnetlib/dhp/countrypropagation/preparedInfo/publication/resultCountrySet b/dhp-workflows/dhp-enrichment/src/test/resources/eu/dnetlib/dhp/countrypropagation/preparedInfo/publication/resultCountrySet new file mode 100644 index 000000000..87ba23aeb --- /dev/null +++ b/dhp-workflows/dhp-enrichment/src/test/resources/eu/dnetlib/dhp/countrypropagation/preparedInfo/publication/resultCountrySet @@ -0,0 +1,5 @@ +{"resultId":"50|07b5c0ccd4fe::e7f5459cc97865f2af6e3da964c1250b","countrySet":[{"classid":"NL","classname":"Netherlands"}]} +{"resultId":"50|355e65625b88::54a1c76f520bb2c8da27d12e42891088","countrySet":[{"classid":"IT","classname":"Italy"},{"classid":"FR","classname":"France"}]} +{"resultId":"50|06cdd3ff4700::49ec404cee4e1452808aabeaffbd3072","countrySet":[{"classid":"NL","classname":"Netherlands"}]} +{"resultId":"50|355e65625b88::74009c567c81b4aa55c813db658734df","countrySet":[{"classid":"NL","classname":"Netherlands"},{"classid":"IT","classname":"Italy"}]} +{"resultId":"50|355e65625b88::e7d48a470b13bda61f7ebe3513e20cb6","countrySet":[{"classid":"IT","classname":"Italy"},{"classid":"FR","classname":"France"}]} \ No newline at end of file diff --git a/dhp-workflows/dhp-enrichment/src/test/resources/eu/dnetlib/dhp/countrypropagation/preparedInfo/preparedInfo.json.gz b/dhp-workflows/dhp-enrichment/src/test/resources/eu/dnetlib/dhp/countrypropagation/preparedInfo/software/preparedInfo.json.gz similarity index 100% rename from dhp-workflows/dhp-enrichment/src/test/resources/eu/dnetlib/dhp/countrypropagation/preparedInfo/preparedInfo.json.gz rename to dhp-workflows/dhp-enrichment/src/test/resources/eu/dnetlib/dhp/countrypropagation/preparedInfo/software/preparedInfo.json.gz diff --git a/dhp-workflows/dhp-graph-provision/src/test/java/eu/dnetlib/dhp/oa/provision/XmlRecordFactoryTest.java b/dhp-workflows/dhp-graph-provision/src/test/java/eu/dnetlib/dhp/oa/provision/XmlRecordFactoryTest.java index 2142737f0..273132bff 100644 --- a/dhp-workflows/dhp-graph-provision/src/test/java/eu/dnetlib/dhp/oa/provision/XmlRecordFactoryTest.java +++ b/dhp-workflows/dhp-graph-provision/src/test/java/eu/dnetlib/dhp/oa/provision/XmlRecordFactoryTest.java @@ -8,7 +8,6 @@ import java.io.IOException; import java.io.StringReader; import java.util.List; -import eu.dnetlib.dhp.schema.oaf.Datasource; import org.apache.commons.io.IOUtils; import org.dom4j.Document; import org.dom4j.DocumentException; @@ -24,6 +23,7 @@ import eu.dnetlib.dhp.oa.provision.model.RelatedEntity; import eu.dnetlib.dhp.oa.provision.model.RelatedEntityWrapper; import eu.dnetlib.dhp.oa.provision.utils.ContextMapper; import eu.dnetlib.dhp.oa.provision.utils.XmlRecordFactory; +import eu.dnetlib.dhp.schema.oaf.Datasource; import eu.dnetlib.dhp.schema.oaf.Project; import eu.dnetlib.dhp.schema.oaf.Publication; import eu.dnetlib.dhp.schema.oaf.Relation; @@ -146,10 +146,10 @@ public class XmlRecordFactoryTest { final ContextMapper contextMapper = new ContextMapper(); final XmlRecordFactory xmlRecordFactory = new XmlRecordFactory(contextMapper, false, - XmlConverterJob.schemaLocation); + XmlConverterJob.schemaLocation); final Datasource d = OBJECT_MAPPER - .readValue(IOUtils.toString(getClass().getResourceAsStream("datasource.json")), Datasource.class); + .readValue(IOUtils.toString(getClass().getResourceAsStream("datasource.json")), Datasource.class); final String xml = xmlRecordFactory.build(new JoinedEntity<>(d)); From 4437f9345d2430b0bac9edab129603b90dfd379b Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Fri, 11 Mar 2022 13:57:47 +0100 Subject: [PATCH 111/162] [Country Propagation] left out from previous commit --- .../dhp/countrypropagation/CountryPropagationJobTest.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dhp-workflows/dhp-enrichment/src/test/java/eu/dnetlib/dhp/countrypropagation/CountryPropagationJobTest.java b/dhp-workflows/dhp-enrichment/src/test/java/eu/dnetlib/dhp/countrypropagation/CountryPropagationJobTest.java index e0688b430..50c7f3a69 100644 --- a/dhp-workflows/dhp-enrichment/src/test/java/eu/dnetlib/dhp/countrypropagation/CountryPropagationJobTest.java +++ b/dhp-workflows/dhp-enrichment/src/test/java/eu/dnetlib/dhp/countrypropagation/CountryPropagationJobTest.java @@ -315,7 +315,7 @@ public class CountryPropagationJobTest { Assertions.assertEquals(2, countries.size()); Assertions.assertTrue(countries.stream().anyMatch(cs -> cs.getClassid().equals("IT") && cs.getClassname().equals("Italy"))); Assertions.assertTrue(countries.stream().anyMatch(cs -> cs.getClassid().equals("NL") && cs.getClassname().equals("Netherlands"))); - + countries = tmp.filter(r -> r.getId().equals("50|355e65625b88::54a1c76f520bb2c8da27d12e42891088")).collect().get(0).getCountry(); Assertions.assertEquals(2, countries.size()); Assertions.assertTrue(countries.stream().anyMatch(cs -> cs.getClassid().equals("IT") && cs.getClassname().equals("Italy"))); From 12de9acb0da61cc83e4178a7d6619722532d97fd Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Fri, 11 Mar 2022 14:17:02 +0100 Subject: [PATCH 112/162] [Country Propagation] left out from previous commit --- .../PrepareDatasourceCountryAssociation.java | 7 +- .../PrepareResultCountrySet.java | 2 +- .../SparkCountryPropagationJob.java | 1 - .../CountryPropagationJobTest.java | 152 +++++++++--- .../DatasourceCountryPreparationTest.java | 219 ++++++++++++------ .../ResultCountryPreparationTest.java | 201 +++++++++------- 6 files changed, 383 insertions(+), 199 deletions(-) diff --git a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/countrypropagation/PrepareDatasourceCountryAssociation.java b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/countrypropagation/PrepareDatasourceCountryAssociation.java index c02f37015..42b7804ea 100644 --- a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/countrypropagation/PrepareDatasourceCountryAssociation.java +++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/countrypropagation/PrepareDatasourceCountryAssociation.java @@ -7,7 +7,6 @@ import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession; import java.util.Arrays; import java.util.List; -import com.fasterxml.jackson.databind.ObjectMapper; import org.apache.commons.io.IOUtils; import org.apache.spark.SparkConf; import org.apache.spark.api.java.function.FilterFunction; @@ -20,6 +19,8 @@ import org.apache.spark.sql.SparkSession; import org.slf4j.Logger; import org.slf4j.LoggerFactory; +import com.fasterxml.jackson.databind.ObjectMapper; + import eu.dnetlib.dhp.application.ArgumentApplicationParser; import eu.dnetlib.dhp.schema.common.ModelConstants; import eu.dnetlib.dhp.schema.oaf.Datasource; @@ -100,8 +101,8 @@ public class PrepareDatasourceCountryAssociation { Dataset organization = readPath(spark, inputPath + "/organization", Organization.class) .filter( (FilterFunction) o -> !o.getDataInfo().getDeletedbyinference() && - o.getCountry().getClassid().length() > 0 && - !o.getCountry().getClassid().equals(ModelConstants.UNKNOWN)) ; + o.getCountry().getClassid().length() > 0 && + !o.getCountry().getClassid().equals(ModelConstants.UNKNOWN)); // associated the datasource id with the id of the organization providing the datasource Dataset dse = datasource diff --git a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/countrypropagation/PrepareResultCountrySet.java b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/countrypropagation/PrepareResultCountrySet.java index 8ca87fa21..560a22381 100644 --- a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/countrypropagation/PrepareResultCountrySet.java +++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/countrypropagation/PrepareResultCountrySet.java @@ -61,7 +61,7 @@ public class PrepareResultCountrySet { Class resultClazz = (Class) Class.forName(resultClassName); SparkConf conf = new SparkConf(); - //conf.set("hive.metastore.uris", parser.get("hive_metastore_uris")); + // conf.set("hive.metastore.uris", parser.get("hive_metastore_uris")); runWithSparkSession( conf, diff --git a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/countrypropagation/SparkCountryPropagationJob.java b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/countrypropagation/SparkCountryPropagationJob.java index 1bbfbdafd..56aa953b4 100644 --- a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/countrypropagation/SparkCountryPropagationJob.java +++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/countrypropagation/SparkCountryPropagationJob.java @@ -80,7 +80,6 @@ public class SparkCountryPropagationJob { String outputPath, Class resultClazz) { - log.info("Reading Graph table from: {}", sourcePath); Dataset res = readPath(spark, sourcePath, resultClazz); diff --git a/dhp-workflows/dhp-enrichment/src/test/java/eu/dnetlib/dhp/countrypropagation/CountryPropagationJobTest.java b/dhp-workflows/dhp-enrichment/src/test/java/eu/dnetlib/dhp/countrypropagation/CountryPropagationJobTest.java index 50c7f3a69..c4141b3e8 100644 --- a/dhp-workflows/dhp-enrichment/src/test/java/eu/dnetlib/dhp/countrypropagation/CountryPropagationJobTest.java +++ b/dhp-workflows/dhp-enrichment/src/test/java/eu/dnetlib/dhp/countrypropagation/CountryPropagationJobTest.java @@ -7,7 +7,6 @@ import java.nio.file.Path; import java.util.ArrayList; import java.util.List; -import eu.dnetlib.dhp.schema.oaf.Publication; import org.apache.commons.io.FileUtils; import org.apache.spark.SparkConf; import org.apache.spark.api.java.JavaRDD; @@ -25,6 +24,7 @@ import org.slf4j.LoggerFactory; import com.fasterxml.jackson.databind.ObjectMapper; import eu.dnetlib.dhp.schema.oaf.Country; +import eu.dnetlib.dhp.schema.oaf.Publication; import eu.dnetlib.dhp.schema.oaf.Qualifier; import eu.dnetlib.dhp.schema.oaf.Software; import scala.Tuple2; @@ -260,66 +260,142 @@ public class CountryPropagationJobTest { @Test void testCountryPropagationPublication() throws Exception { final String sourcePath = getClass() - .getResource("/eu/dnetlib/dhp/countrypropagation/graph/publication") - .getPath(); + .getResource("/eu/dnetlib/dhp/countrypropagation/graph/publication") + .getPath(); final String preparedInfoPath = getClass() - .getResource("/eu/dnetlib/dhp/countrypropagation/preparedInfo/publication") - .getPath(); + .getResource("/eu/dnetlib/dhp/countrypropagation/preparedInfo/publication") + .getPath(); SparkCountryPropagationJob - .main( - new String[] { - "--isSparkSessionManaged", Boolean.FALSE.toString(), - "--sourcePath", sourcePath, - "-resultTableName", Publication.class.getCanonicalName(), - "-outputPath", workingDir.toString() + "/publication", - "-preparedInfoPath", preparedInfoPath - }); + .main( + new String[] { + "--isSparkSessionManaged", Boolean.FALSE.toString(), + "--sourcePath", sourcePath, + "-resultTableName", Publication.class.getCanonicalName(), + "-outputPath", workingDir.toString() + "/publication", + "-preparedInfoPath", preparedInfoPath + }); final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext()); JavaRDD tmp = sc - .textFile(workingDir.toString() + "/publication") - .map(item -> OBJECT_MAPPER.readValue(item, Publication.class)); + .textFile(workingDir.toString() + "/publication") + .map(item -> OBJECT_MAPPER.readValue(item, Publication.class)); Assertions.assertEquals(12, tmp.count()); Assertions.assertEquals(5, tmp.filter(r -> r.getCountry().size() > 0).count()); - tmp.foreach(r -> r.getCountry().stream().forEach(c -> Assertions.assertEquals("dnet:countries", c.getSchemeid()))); - tmp.foreach(r -> r.getCountry().stream().forEach(c -> Assertions.assertEquals("dnet:countries", c.getSchemename()))); - tmp.foreach(r -> r.getCountry().stream().forEach(c -> Assertions.assertFalse(c.getDataInfo().getDeletedbyinference()))); + tmp + .foreach( + r -> r.getCountry().stream().forEach(c -> Assertions.assertEquals("dnet:countries", c.getSchemeid()))); + tmp + .foreach( + r -> r + .getCountry() + .stream() + .forEach(c -> Assertions.assertEquals("dnet:countries", c.getSchemename()))); + tmp + .foreach( + r -> r + .getCountry() + .stream() + .forEach(c -> Assertions.assertFalse(c.getDataInfo().getDeletedbyinference()))); tmp.foreach(r -> r.getCountry().stream().forEach(c -> Assertions.assertFalse(c.getDataInfo().getInvisible()))); tmp.foreach(r -> r.getCountry().stream().forEach(c -> Assertions.assertTrue(c.getDataInfo().getInferred()))); - tmp.foreach(r -> r.getCountry().stream().forEach(c -> Assertions.assertEquals("0.85", c.getDataInfo().getTrust()))); - tmp.foreach(r -> r.getCountry().stream().forEach(c -> Assertions.assertEquals("propagation", c.getDataInfo().getInferenceprovenance()))); - tmp.foreach(r -> r.getCountry().stream().forEach(c -> Assertions.assertEquals("country:instrepos", c.getDataInfo().getProvenanceaction().getClassid()))); - tmp.foreach(r -> r.getCountry().stream().forEach(c -> Assertions.assertEquals("dnet:provenanceActions", c.getDataInfo().getProvenanceaction().getSchemeid()))); - tmp.foreach(r -> r.getCountry().stream().forEach(c -> Assertions.assertEquals("dnet:provenanceActions", c.getDataInfo().getProvenanceaction().getSchemename()))); + tmp + .foreach( + r -> r.getCountry().stream().forEach(c -> Assertions.assertEquals("0.85", c.getDataInfo().getTrust()))); + tmp + .foreach( + r -> r + .getCountry() + .stream() + .forEach(c -> Assertions.assertEquals("propagation", c.getDataInfo().getInferenceprovenance()))); + tmp + .foreach( + r -> r + .getCountry() + .stream() + .forEach( + c -> Assertions + .assertEquals("country:instrepos", c.getDataInfo().getProvenanceaction().getClassid()))); + tmp + .foreach( + r -> r + .getCountry() + .stream() + .forEach( + c -> Assertions + .assertEquals( + "dnet:provenanceActions", c.getDataInfo().getProvenanceaction().getSchemeid()))); + tmp + .foreach( + r -> r + .getCountry() + .stream() + .forEach( + c -> Assertions + .assertEquals( + "dnet:provenanceActions", c.getDataInfo().getProvenanceaction().getSchemename()))); - List countries = tmp.filter(r -> r.getId().equals("50|06cdd3ff4700::49ec404cee4e1452808aabeaffbd3072")).collect().get(0).getCountry(); + List countries = tmp + .filter(r -> r.getId().equals("50|06cdd3ff4700::49ec404cee4e1452808aabeaffbd3072")) + .collect() + .get(0) + .getCountry(); Assertions.assertEquals(1, countries.size()); - Assertions.assertEquals("NL",countries.get(0).getClassid()); - Assertions.assertEquals("Netherlands",countries.get(0).getClassname()); + Assertions.assertEquals("NL", countries.get(0).getClassid()); + Assertions.assertEquals("Netherlands", countries.get(0).getClassname()); - countries = tmp.filter(r -> r.getId().equals("50|07b5c0ccd4fe::e7f5459cc97865f2af6e3da964c1250b")).collect().get(0).getCountry(); + countries = tmp + .filter(r -> r.getId().equals("50|07b5c0ccd4fe::e7f5459cc97865f2af6e3da964c1250b")) + .collect() + .get(0) + .getCountry(); Assertions.assertEquals(1, countries.size()); - Assertions.assertEquals("NL",countries.get(0).getClassid()); - Assertions.assertEquals("Netherlands",countries.get(0).getClassname()); + Assertions.assertEquals("NL", countries.get(0).getClassid()); + Assertions.assertEquals("Netherlands", countries.get(0).getClassname()); - countries = tmp.filter(r -> r.getId().equals("50|355e65625b88::e7d48a470b13bda61f7ebe3513e20cb6")).collect().get(0).getCountry(); + countries = tmp + .filter(r -> r.getId().equals("50|355e65625b88::e7d48a470b13bda61f7ebe3513e20cb6")) + .collect() + .get(0) + .getCountry(); Assertions.assertEquals(2, countries.size()); - Assertions.assertTrue(countries.stream().anyMatch(cs -> cs.getClassid().equals("IT") && cs.getClassname().equals("Italy"))); - Assertions.assertTrue(countries.stream().anyMatch(cs -> cs.getClassid().equals("FR") && cs.getClassname().equals("France"))); + Assertions + .assertTrue( + countries.stream().anyMatch(cs -> cs.getClassid().equals("IT") && cs.getClassname().equals("Italy"))); + Assertions + .assertTrue( + countries.stream().anyMatch(cs -> cs.getClassid().equals("FR") && cs.getClassname().equals("France"))); - countries = tmp.filter(r -> r.getId().equals("50|355e65625b88::74009c567c81b4aa55c813db658734df")).collect().get(0).getCountry(); + countries = tmp + .filter(r -> r.getId().equals("50|355e65625b88::74009c567c81b4aa55c813db658734df")) + .collect() + .get(0) + .getCountry(); Assertions.assertEquals(2, countries.size()); - Assertions.assertTrue(countries.stream().anyMatch(cs -> cs.getClassid().equals("IT") && cs.getClassname().equals("Italy"))); - Assertions.assertTrue(countries.stream().anyMatch(cs -> cs.getClassid().equals("NL") && cs.getClassname().equals("Netherlands"))); + Assertions + .assertTrue( + countries.stream().anyMatch(cs -> cs.getClassid().equals("IT") && cs.getClassname().equals("Italy"))); + Assertions + .assertTrue( + countries + .stream() + .anyMatch(cs -> cs.getClassid().equals("NL") && cs.getClassname().equals("Netherlands"))); - countries = tmp.filter(r -> r.getId().equals("50|355e65625b88::54a1c76f520bb2c8da27d12e42891088")).collect().get(0).getCountry(); + countries = tmp + .filter(r -> r.getId().equals("50|355e65625b88::54a1c76f520bb2c8da27d12e42891088")) + .collect() + .get(0) + .getCountry(); Assertions.assertEquals(2, countries.size()); - Assertions.assertTrue(countries.stream().anyMatch(cs -> cs.getClassid().equals("IT") && cs.getClassname().equals("Italy"))); - Assertions.assertTrue(countries.stream().anyMatch(cs -> cs.getClassid().equals("FR") && cs.getClassname().equals("France"))); + Assertions + .assertTrue( + countries.stream().anyMatch(cs -> cs.getClassid().equals("IT") && cs.getClassname().equals("Italy"))); + Assertions + .assertTrue( + countries.stream().anyMatch(cs -> cs.getClassid().equals("FR") && cs.getClassname().equals("France"))); } } diff --git a/dhp-workflows/dhp-enrichment/src/test/java/eu/dnetlib/dhp/countrypropagation/DatasourceCountryPreparationTest.java b/dhp-workflows/dhp-enrichment/src/test/java/eu/dnetlib/dhp/countrypropagation/DatasourceCountryPreparationTest.java index 8560a9d66..d9b879de8 100644 --- a/dhp-workflows/dhp-enrichment/src/test/java/eu/dnetlib/dhp/countrypropagation/DatasourceCountryPreparationTest.java +++ b/dhp-workflows/dhp-enrichment/src/test/java/eu/dnetlib/dhp/countrypropagation/DatasourceCountryPreparationTest.java @@ -1,111 +1,176 @@ + package eu.dnetlib.dhp.countrypropagation; -import com.fasterxml.jackson.databind.ObjectMapper; +import java.io.IOException; +import java.nio.file.Files; +import java.nio.file.Path; import org.apache.commons.io.FileUtils; import org.apache.spark.SparkConf; import org.apache.spark.api.java.JavaRDD; import org.apache.spark.api.java.JavaSparkContext; - import org.apache.spark.sql.SparkSession; import org.junit.jupiter.api.AfterAll; import org.junit.jupiter.api.Assertions; import org.junit.jupiter.api.BeforeAll; import org.junit.jupiter.api.Test; - -import java.io.IOException; -import java.nio.file.Files; -import java.nio.file.Path; - +import com.fasterxml.jackson.databind.ObjectMapper; public class DatasourceCountryPreparationTest { - private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper(); + private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper(); - private static SparkSession spark; + private static SparkSession spark; - private static Path workingDir; + private static Path workingDir; - @BeforeAll - public static void beforeAll() throws IOException { - workingDir = Files.createTempDirectory(DatasourceCountryPreparationTest.class.getSimpleName()); + @BeforeAll + public static void beforeAll() throws IOException { + workingDir = Files.createTempDirectory(DatasourceCountryPreparationTest.class.getSimpleName()); - SparkConf conf = new SparkConf(); - conf.setAppName(DatasourceCountryPreparationTest.class.getSimpleName()); + SparkConf conf = new SparkConf(); + conf.setAppName(DatasourceCountryPreparationTest.class.getSimpleName()); - conf.setMaster("local[*]"); - conf.set("spark.driver.host", "localhost"); - conf.set("hive.metastore.local", "true"); - conf.set("spark.ui.enabled", "false"); - conf.set("spark.sql.warehouse.dir", workingDir.toString()); - conf.set("hive.metastore.warehouse.dir", workingDir.resolve("warehouse").toString()); + conf.setMaster("local[*]"); + conf.set("spark.driver.host", "localhost"); + conf.set("hive.metastore.local", "true"); + conf.set("spark.ui.enabled", "false"); + conf.set("spark.sql.warehouse.dir", workingDir.toString()); + conf.set("hive.metastore.warehouse.dir", workingDir.resolve("warehouse").toString()); - spark = SparkSession - .builder() - .appName(DatasourceCountryPreparationTest.class.getSimpleName()) - .config(conf) - .getOrCreate(); - } + spark = SparkSession + .builder() + .appName(DatasourceCountryPreparationTest.class.getSimpleName()) + .config(conf) + .getOrCreate(); + } - @AfterAll - public static void afterAll() throws IOException { - FileUtils.deleteDirectory(workingDir.toFile()); - spark.stop(); - } + @AfterAll + public static void afterAll() throws IOException { + FileUtils.deleteDirectory(workingDir.toFile()); + spark.stop(); + } - @Test - void testPrepareDatasourceCountry() throws Exception { - final String sourcePath = getClass() - .getResource("/eu/dnetlib/dhp/countrypropagation/graph") - .getPath(); + @Test + void testPrepareDatasourceCountry() throws Exception { + final String sourcePath = getClass() + .getResource("/eu/dnetlib/dhp/countrypropagation/graph") + .getPath(); - PrepareDatasourceCountryAssociation - .main( - new String[] { - "--isSparkSessionManaged", Boolean.FALSE.toString(), - "--sourcePath", sourcePath, - "--outputPath", workingDir.toString() + "/datasourceCountry", - "--allowedtypes","pubsrepository::institutional" , - "--whitelist","10|openaire____::3795d6478e30e2c9f787d427ff160944;10|opendoar____::16e6a3326dd7d868cbc926602a61e4d0;10|eurocrisdris::fe4903425d9040f680d8610d9079ea14;10|openaire____::5b76240cc27a58c6f7ceef7d8c36660e;10|openaire____::172bbccecf8fca44ab6a6653e84cb92a;10|openaire____::149c6590f8a06b46314eed77bfca693f;10|eurocrisdris::a6026877c1a174d60f81fd71f62df1c1;10|openaire____::4692342f0992d91f9e705c26959f09e0;10|openaire____::8d529dbb05ec0284662b391789e8ae2a;10|openaire____::345c9d171ef3c5d706d08041d506428c;10|opendoar____::1c1d4df596d01da60385f0bb17a4a9e0;10|opendoar____::7a614fd06c325499f1680b9896beedeb;10|opendoar____::1ee3dfcd8a0645a25a35977997223d22;10|opendoar____::d296c101daa88a51f6ca8cfc1ac79b50;10|opendoar____::798ed7d4ee7138d49b8828958048130a;10|openaire____::c9d2209ecc4d45ba7b4ca7597acb88a2;10|eurocrisdris::c49e0fe4b9ba7b7fab717d1f0f0a674d;10|eurocrisdris::9ae43d14471c4b33661fedda6f06b539;10|eurocrisdris::432ca599953ff50cd4eeffe22faf3e48" - }); + PrepareDatasourceCountryAssociation + .main( + new String[] { + "--isSparkSessionManaged", Boolean.FALSE.toString(), + "--sourcePath", sourcePath, + "--outputPath", workingDir.toString() + "/datasourceCountry", + "--allowedtypes", "pubsrepository::institutional", + "--whitelist", + "10|openaire____::3795d6478e30e2c9f787d427ff160944;10|opendoar____::16e6a3326dd7d868cbc926602a61e4d0;10|eurocrisdris::fe4903425d9040f680d8610d9079ea14;10|openaire____::5b76240cc27a58c6f7ceef7d8c36660e;10|openaire____::172bbccecf8fca44ab6a6653e84cb92a;10|openaire____::149c6590f8a06b46314eed77bfca693f;10|eurocrisdris::a6026877c1a174d60f81fd71f62df1c1;10|openaire____::4692342f0992d91f9e705c26959f09e0;10|openaire____::8d529dbb05ec0284662b391789e8ae2a;10|openaire____::345c9d171ef3c5d706d08041d506428c;10|opendoar____::1c1d4df596d01da60385f0bb17a4a9e0;10|opendoar____::7a614fd06c325499f1680b9896beedeb;10|opendoar____::1ee3dfcd8a0645a25a35977997223d22;10|opendoar____::d296c101daa88a51f6ca8cfc1ac79b50;10|opendoar____::798ed7d4ee7138d49b8828958048130a;10|openaire____::c9d2209ecc4d45ba7b4ca7597acb88a2;10|eurocrisdris::c49e0fe4b9ba7b7fab717d1f0f0a674d;10|eurocrisdris::9ae43d14471c4b33661fedda6f06b539;10|eurocrisdris::432ca599953ff50cd4eeffe22faf3e48" + }); - final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext()); + final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext()); - JavaRDD tmp = sc - .textFile(workingDir.toString() + "/datasourceCountry") - .map(item -> OBJECT_MAPPER.readValue(item, DatasourceCountry.class)); + JavaRDD tmp = sc + .textFile(workingDir.toString() + "/datasourceCountry") + .map(item -> OBJECT_MAPPER.readValue(item, DatasourceCountry.class)); - Assertions.assertEquals(3, tmp.count()); - Assertions.assertEquals(1, tmp.filter(dsc -> dsc.getDataSourceId() - .equals("10|eurocrisdris::fe4903425d9040f680d8610d9079ea14")).count()); - Assertions.assertEquals(1, tmp.filter(dsc -> dsc.getDataSourceId() - .equals("10|opendoar____::f0dd4a99fba6075a9494772b58f95280")).count()); - Assertions.assertEquals(1, tmp.filter(dsc -> dsc.getDataSourceId() - .equals("10|eurocrisdris::9ae43d14471c4b33661fedda6f06b539")).count()); + Assertions.assertEquals(3, tmp.count()); + Assertions + .assertEquals( + 1, tmp + .filter( + dsc -> dsc + .getDataSourceId() + .equals("10|eurocrisdris::fe4903425d9040f680d8610d9079ea14")) + .count()); + Assertions + .assertEquals( + 1, tmp + .filter( + dsc -> dsc + .getDataSourceId() + .equals("10|opendoar____::f0dd4a99fba6075a9494772b58f95280")) + .count()); + Assertions + .assertEquals( + 1, tmp + .filter( + dsc -> dsc + .getDataSourceId() + .equals("10|eurocrisdris::9ae43d14471c4b33661fedda6f06b539")) + .count()); - Assertions.assertEquals("NL", tmp.filter(dsc -> dsc.getDataSourceId() - .equals("10|eurocrisdris::fe4903425d9040f680d8610d9079ea14")).collect() - .get(0).getCountry().getClassid()); - Assertions.assertEquals("Netherlands", tmp.filter(dsc -> dsc.getDataSourceId() - .equals("10|eurocrisdris::fe4903425d9040f680d8610d9079ea14")).collect() - .get(0).getCountry().getClassname()); + Assertions + .assertEquals( + "NL", tmp + .filter( + dsc -> dsc + .getDataSourceId() + .equals("10|eurocrisdris::fe4903425d9040f680d8610d9079ea14")) + .collect() + .get(0) + .getCountry() + .getClassid()); + Assertions + .assertEquals( + "Netherlands", tmp + .filter( + dsc -> dsc + .getDataSourceId() + .equals("10|eurocrisdris::fe4903425d9040f680d8610d9079ea14")) + .collect() + .get(0) + .getCountry() + .getClassname()); - Assertions.assertEquals("IT", tmp.filter(dsc -> dsc.getDataSourceId() - .equals("10|opendoar____::f0dd4a99fba6075a9494772b58f95280")).collect() - .get(0).getCountry().getClassid()); - Assertions.assertEquals("Italy", tmp.filter(dsc -> dsc.getDataSourceId() - .equals("10|opendoar____::f0dd4a99fba6075a9494772b58f95280")).collect() - .get(0).getCountry().getClassname()); + Assertions + .assertEquals( + "IT", tmp + .filter( + dsc -> dsc + .getDataSourceId() + .equals("10|opendoar____::f0dd4a99fba6075a9494772b58f95280")) + .collect() + .get(0) + .getCountry() + .getClassid()); + Assertions + .assertEquals( + "Italy", tmp + .filter( + dsc -> dsc + .getDataSourceId() + .equals("10|opendoar____::f0dd4a99fba6075a9494772b58f95280")) + .collect() + .get(0) + .getCountry() + .getClassname()); - Assertions.assertEquals("FR", tmp.filter(dsc -> dsc.getDataSourceId() - .equals("10|eurocrisdris::9ae43d14471c4b33661fedda6f06b539")).collect() - .get(0).getCountry().getClassid()); - Assertions.assertEquals("France", tmp.filter(dsc -> dsc.getDataSourceId() - .equals("10|eurocrisdris::9ae43d14471c4b33661fedda6f06b539")).collect() - .get(0).getCountry().getClassname()); + Assertions + .assertEquals( + "FR", tmp + .filter( + dsc -> dsc + .getDataSourceId() + .equals("10|eurocrisdris::9ae43d14471c4b33661fedda6f06b539")) + .collect() + .get(0) + .getCountry() + .getClassid()); + Assertions + .assertEquals( + "France", tmp + .filter( + dsc -> dsc + .getDataSourceId() + .equals("10|eurocrisdris::9ae43d14471c4b33661fedda6f06b539")) + .collect() + .get(0) + .getCountry() + .getClassname()); - tmp.foreach(e -> System.out.println(OBJECT_MAPPER.writeValueAsString(e))); + tmp.foreach(e -> System.out.println(OBJECT_MAPPER.writeValueAsString(e))); - } + } } diff --git a/dhp-workflows/dhp-enrichment/src/test/java/eu/dnetlib/dhp/countrypropagation/ResultCountryPreparationTest.java b/dhp-workflows/dhp-enrichment/src/test/java/eu/dnetlib/dhp/countrypropagation/ResultCountryPreparationTest.java index 37cc4f342..797d1c979 100644 --- a/dhp-workflows/dhp-enrichment/src/test/java/eu/dnetlib/dhp/countrypropagation/ResultCountryPreparationTest.java +++ b/dhp-workflows/dhp-enrichment/src/test/java/eu/dnetlib/dhp/countrypropagation/ResultCountryPreparationTest.java @@ -1,8 +1,12 @@ + package eu.dnetlib.dhp.countrypropagation; -import com.fasterxml.jackson.databind.ObjectMapper; +import static eu.dnetlib.dhp.PropagationConstant.isSparkSessionManaged; + +import java.io.IOException; +import java.nio.file.Files; +import java.nio.file.Path; -import eu.dnetlib.dhp.schema.oaf.Publication; import org.apache.commons.io.FileUtils; import org.apache.spark.SparkConf; import org.apache.spark.api.java.JavaRDD; @@ -13,103 +17,142 @@ import org.junit.jupiter.api.Assertions; import org.junit.jupiter.api.BeforeAll; import org.junit.jupiter.api.Test; -import java.io.IOException; -import java.nio.file.Files; -import java.nio.file.Path; +import com.fasterxml.jackson.databind.ObjectMapper; -import static eu.dnetlib.dhp.PropagationConstant.isSparkSessionManaged; +import eu.dnetlib.dhp.schema.oaf.Publication; public class ResultCountryPreparationTest { - private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper(); + private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper(); - private static SparkSession spark; + private static SparkSession spark; - private static Path workingDir; + private static Path workingDir; - @BeforeAll - public static void beforeAll() throws IOException { - workingDir = Files.createTempDirectory(ResultCountryPreparationTest.class.getSimpleName()); + @BeforeAll + public static void beforeAll() throws IOException { + workingDir = Files.createTempDirectory(ResultCountryPreparationTest.class.getSimpleName()); - SparkConf conf = new SparkConf(); - conf.setAppName(ResultCountryPreparationTest.class.getSimpleName()); + SparkConf conf = new SparkConf(); + conf.setAppName(ResultCountryPreparationTest.class.getSimpleName()); - conf.setMaster("local[*]"); - conf.set("spark.driver.host", "localhost"); - conf.set("hive.metastore.local", "true"); - conf.set("spark.ui.enabled", "false"); - conf.set("spark.sql.warehouse.dir", workingDir.toString()); - conf.set("hive.metastore.warehouse.dir", workingDir.resolve("warehouse").toString()); + conf.setMaster("local[*]"); + conf.set("spark.driver.host", "localhost"); + conf.set("hive.metastore.local", "true"); + conf.set("spark.ui.enabled", "false"); + conf.set("spark.sql.warehouse.dir", workingDir.toString()); + conf.set("hive.metastore.warehouse.dir", workingDir.resolve("warehouse").toString()); - spark = SparkSession - .builder() - .appName(ResultCountryPreparationTest.class.getSimpleName()) - .config(conf) - .getOrCreate(); - } + spark = SparkSession + .builder() + .appName(ResultCountryPreparationTest.class.getSimpleName()) + .config(conf) + .getOrCreate(); + } - @AfterAll - public static void afterAll() throws IOException { - FileUtils.deleteDirectory(workingDir.toFile()); - spark.stop(); - } + @AfterAll + public static void afterAll() throws IOException { + FileUtils.deleteDirectory(workingDir.toFile()); + spark.stop(); + } - @Test - void testPrepareResultCountry() throws Exception { - final String sourcePath = getClass() - .getResource("/eu/dnetlib/dhp/countrypropagation/graph/publication") - .getPath(); + @Test + void testPrepareResultCountry() throws Exception { + final String sourcePath = getClass() + .getResource("/eu/dnetlib/dhp/countrypropagation/graph/publication") + .getPath(); - final String preparedInfoPath = getClass() - .getResource("/eu/dnetlib/dhp/countrypropagation/datasourcecountry") - .getPath(); + final String preparedInfoPath = getClass() + .getResource("/eu/dnetlib/dhp/countrypropagation/datasourcecountry") + .getPath(); + PrepareResultCountrySet + .main( + new String[] { + "--isSparkSessionManaged", Boolean.FALSE.toString(), + "--workingPath", workingDir.toString() + "/working", + "--sourcePath", sourcePath, + "--outputPath", workingDir.toString() + "/resultCountry", + "--preparedInfoPath", preparedInfoPath, + "--resultTableName", Publication.class.getCanonicalName() + }); - PrepareResultCountrySet - .main( - new String[] { - "--isSparkSessionManaged", Boolean.FALSE.toString(), - "--workingPath", workingDir.toString() + "/working", - "--sourcePath", sourcePath, - "--outputPath", workingDir.toString() + "/resultCountry", - "--preparedInfoPath", preparedInfoPath, - "--resultTableName", Publication.class.getCanonicalName() - }); + final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext()); - final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext()); + JavaRDD tmp = sc + .textFile(workingDir.toString() + "/resultCountry") + .map(item -> OBJECT_MAPPER.readValue(item, ResultCountrySet.class)); - JavaRDD tmp = sc - .textFile(workingDir.toString() + "/resultCountry") - .map(item -> OBJECT_MAPPER.readValue(item, ResultCountrySet.class)); + Assertions.assertEquals(5, tmp.count()); + ResultCountrySet rc = tmp + .filter(r -> r.getResultId().equals("50|06cdd3ff4700::49ec404cee4e1452808aabeaffbd3072")) + .collect() + .get(0); + Assertions.assertEquals(1, rc.getCountrySet().size()); + Assertions.assertEquals("NL", rc.getCountrySet().get(0).getClassid()); + Assertions.assertEquals("Netherlands", rc.getCountrySet().get(0).getClassname()); - Assertions.assertEquals(5, tmp.count()); + rc = tmp + .filter(r -> r.getResultId().equals("50|07b5c0ccd4fe::e7f5459cc97865f2af6e3da964c1250b")) + .collect() + .get(0); + Assertions.assertEquals(1, rc.getCountrySet().size()); + Assertions.assertEquals("NL", rc.getCountrySet().get(0).getClassid()); + Assertions.assertEquals("Netherlands", rc.getCountrySet().get(0).getClassname()); - ResultCountrySet rc = tmp.filter(r -> r.getResultId().equals("50|06cdd3ff4700::49ec404cee4e1452808aabeaffbd3072")).collect().get(0); - Assertions.assertEquals(1, rc.getCountrySet().size()); - Assertions.assertEquals("NL",rc.getCountrySet().get(0).getClassid()); - Assertions.assertEquals("Netherlands",rc.getCountrySet().get(0).getClassname()); + rc = tmp + .filter(r -> r.getResultId().equals("50|355e65625b88::e7d48a470b13bda61f7ebe3513e20cb6")) + .collect() + .get(0); + Assertions.assertEquals(2, rc.getCountrySet().size()); + Assertions + .assertTrue( + rc + .getCountrySet() + .stream() + .anyMatch(cs -> cs.getClassid().equals("IT") && cs.getClassname().equals("Italy"))); + Assertions + .assertTrue( + rc + .getCountrySet() + .stream() + .anyMatch(cs -> cs.getClassid().equals("FR") && cs.getClassname().equals("France"))); - rc = tmp.filter(r -> r.getResultId().equals("50|07b5c0ccd4fe::e7f5459cc97865f2af6e3da964c1250b")).collect().get(0); - Assertions.assertEquals(1, rc.getCountrySet().size()); - Assertions.assertEquals("NL",rc.getCountrySet().get(0).getClassid()); - Assertions.assertEquals("Netherlands",rc.getCountrySet().get(0).getClassname()); + rc = tmp + .filter(r -> r.getResultId().equals("50|355e65625b88::74009c567c81b4aa55c813db658734df")) + .collect() + .get(0); + Assertions.assertEquals(2, rc.getCountrySet().size()); + Assertions + .assertTrue( + rc + .getCountrySet() + .stream() + .anyMatch(cs -> cs.getClassid().equals("IT") && cs.getClassname().equals("Italy"))); + Assertions + .assertTrue( + rc + .getCountrySet() + .stream() + .anyMatch(cs -> cs.getClassid().equals("NL") && cs.getClassname().equals("Netherlands"))); - rc = tmp.filter(r -> r.getResultId().equals("50|355e65625b88::e7d48a470b13bda61f7ebe3513e20cb6")).collect().get(0); - Assertions.assertEquals(2, rc.getCountrySet().size()); - Assertions.assertTrue(rc.getCountrySet().stream().anyMatch(cs -> cs.getClassid().equals("IT") && cs.getClassname().equals("Italy"))); - Assertions.assertTrue(rc.getCountrySet().stream().anyMatch(cs -> cs.getClassid().equals("FR") && cs.getClassname().equals("France"))); + rc = tmp + .filter(r -> r.getResultId().equals("50|355e65625b88::54a1c76f520bb2c8da27d12e42891088")) + .collect() + .get(0); + Assertions.assertEquals(2, rc.getCountrySet().size()); + Assertions + .assertTrue( + rc + .getCountrySet() + .stream() + .anyMatch(cs -> cs.getClassid().equals("IT") && cs.getClassname().equals("Italy"))); + Assertions + .assertTrue( + rc + .getCountrySet() + .stream() + .anyMatch(cs -> cs.getClassid().equals("FR") && cs.getClassname().equals("France"))); - rc = tmp.filter(r -> r.getResultId().equals("50|355e65625b88::74009c567c81b4aa55c813db658734df")).collect().get(0); - Assertions.assertEquals(2, rc.getCountrySet().size()); - Assertions.assertTrue(rc.getCountrySet().stream().anyMatch(cs -> cs.getClassid().equals("IT") && cs.getClassname().equals("Italy"))); - Assertions.assertTrue(rc.getCountrySet().stream().anyMatch(cs -> cs.getClassid().equals("NL") && cs.getClassname().equals("Netherlands"))); - - - rc = tmp.filter(r -> r.getResultId().equals("50|355e65625b88::54a1c76f520bb2c8da27d12e42891088")).collect().get(0); - Assertions.assertEquals(2, rc.getCountrySet().size()); - Assertions.assertTrue(rc.getCountrySet().stream().anyMatch(cs -> cs.getClassid().equals("IT") && cs.getClassname().equals("Italy"))); - Assertions.assertTrue(rc.getCountrySet().stream().anyMatch(cs -> cs.getClassid().equals("FR") && cs.getClassname().equals("France"))); - - - } + } } From f430029596f5630461e03bb51719033499345e0e Mon Sep 17 00:00:00 2001 From: Claudio Atzori Date: Fri, 11 Mar 2022 14:28:28 +0100 Subject: [PATCH 113/162] cleanup --- .../dnetlib/dhp/countrypropagation/PrepareResultCountrySet.java | 1 - 1 file changed, 1 deletion(-) diff --git a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/countrypropagation/PrepareResultCountrySet.java b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/countrypropagation/PrepareResultCountrySet.java index 560a22381..28b6f616d 100644 --- a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/countrypropagation/PrepareResultCountrySet.java +++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/countrypropagation/PrepareResultCountrySet.java @@ -61,7 +61,6 @@ public class PrepareResultCountrySet { Class resultClazz = (Class) Class.forName(resultClassName); SparkConf conf = new SparkConf(); - // conf.set("hive.metastore.uris", parser.get("hive_metastore_uris")); runWithSparkSession( conf, From 0f7d8ca2e0ab06528d68f785894dac936a359e07 Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Fri, 11 Mar 2022 15:16:02 +0100 Subject: [PATCH 114/162] [HostedByMap] change on master to align to PR 201 on beta merged as 9f3036c847 --- .../common/collection/DecompressTarGz.java | 39 +++ .../crossref/ExtractCrossrefRecords.java | 31 +-- .../dhp/oa/graph/hostedbymap/DownloadCSV.java | 27 +- .../oa/graph/hostedbymap/DownloadCSV2.java | 84 ------ .../hostedbymap/ExtractAndMapDoajJson.java | 116 ++++++++ .../oa/graph/hostedbymap/model/DOAJModel.java | 17 +- .../oa/graph/hostedbymap/model/doaj/APC.java | 35 +++ .../graph/hostedbymap/model/doaj/Admin.java | 25 ++ .../graph/hostedbymap/model/doaj/Article.java | 44 +++ .../graph/hostedbymap/model/doaj/BibJson.java | 253 ++++++++++++++++++ .../hostedbymap/model/doaj/Copyright.java | 25 ++ .../hostedbymap/model/doaj/DOAJEntry.java | 52 ++++ .../hostedbymap/model/doaj/DepositPolicy.java | 35 +++ .../hostedbymap/model/doaj/Editorial.java | 35 +++ .../hostedbymap/model/doaj/Institution.java | 25 ++ .../graph/hostedbymap/model/doaj/License.java | 67 +++++ .../oa/graph/hostedbymap/model/doaj/Max.java | 25 ++ .../hostedbymap/model/doaj/OtherCharges.java | 25 ++ .../hostedbymap/model/doaj/PidScheme.java | 26 ++ .../hostedbymap/model/doaj/Plagiarism.java | 27 ++ .../hostedbymap/model/doaj/Preservation.java | 44 +++ .../hostedbymap/model/doaj/Publisher.java | 25 ++ .../oa/graph/hostedbymap/model/doaj/Ref.java | 52 ++++ .../graph/hostedbymap/model/doaj/Subject.java | 34 +++ .../graph/hostedbymap/model/doaj/Waiver.java | 25 ++ .../graph/hostedbymap/oozie_app/workflow.xml | 101 +++++-- .../hostedbymap/SparkProduceHostedByMap.scala | 8 +- .../oa/graph/hostedbymap/DownloadCsvTest.java | 51 ---- .../graph/hostedbymap/doaj_transformed.json | 50 ++-- 29 files changed, 1170 insertions(+), 233 deletions(-) create mode 100644 dhp-common/src/main/java/eu/dnetlib/dhp/common/collection/DecompressTarGz.java delete mode 100644 dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/DownloadCSV2.java create mode 100644 dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/ExtractAndMapDoajJson.java create mode 100644 dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/model/doaj/APC.java create mode 100644 dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/model/doaj/Admin.java create mode 100644 dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/model/doaj/Article.java create mode 100644 dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/model/doaj/BibJson.java create mode 100644 dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/model/doaj/Copyright.java create mode 100644 dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/model/doaj/DOAJEntry.java create mode 100644 dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/model/doaj/DepositPolicy.java create mode 100644 dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/model/doaj/Editorial.java create mode 100644 dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/model/doaj/Institution.java create mode 100644 dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/model/doaj/License.java create mode 100644 dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/model/doaj/Max.java create mode 100644 dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/model/doaj/OtherCharges.java create mode 100644 dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/model/doaj/PidScheme.java create mode 100644 dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/model/doaj/Plagiarism.java create mode 100644 dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/model/doaj/Preservation.java create mode 100644 dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/model/doaj/Publisher.java create mode 100644 dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/model/doaj/Ref.java create mode 100644 dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/model/doaj/Subject.java create mode 100644 dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/model/doaj/Waiver.java diff --git a/dhp-common/src/main/java/eu/dnetlib/dhp/common/collection/DecompressTarGz.java b/dhp-common/src/main/java/eu/dnetlib/dhp/common/collection/DecompressTarGz.java new file mode 100644 index 000000000..49126984c --- /dev/null +++ b/dhp-common/src/main/java/eu/dnetlib/dhp/common/collection/DecompressTarGz.java @@ -0,0 +1,39 @@ +package eu.dnetlib.dhp.common.collection; + +import java.io.BufferedOutputStream; +import java.io.IOException; +import java.util.zip.GZIPOutputStream; + +import org.apache.commons.compress.archivers.tar.TarArchiveEntry; +import org.apache.commons.compress.archivers.tar.TarArchiveInputStream; +import org.apache.commons.compress.compressors.gzip.GzipCompressorInputStream; +import org.apache.commons.io.IOUtils; +import org.apache.hadoop.fs.FSDataInputStream; +import org.apache.hadoop.fs.FSDataOutputStream; +import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.Path; + +public class DecompressTarGz { + + public static void doExtract(FileSystem fs, String outputPath, String tarGzPath) throws IOException { + + FSDataInputStream inputFileStream = fs.open(new Path(tarGzPath)); + try (TarArchiveInputStream tais = new TarArchiveInputStream( + new GzipCompressorInputStream(inputFileStream))) { + TarArchiveEntry entry = null; + while ((entry = tais.getNextTarEntry()) != null) { + if (!entry.isDirectory()) { + try ( + FSDataOutputStream out = fs + .create(new Path(outputPath.concat(entry.getName()).concat(".gz"))); + GZIPOutputStream gzipOs = new GZIPOutputStream(new BufferedOutputStream(out))) { + + IOUtils.copy(tais, gzipOs); + + } + + } + } + } + } +} \ No newline at end of file diff --git a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/crossref/ExtractCrossrefRecords.java b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/crossref/ExtractCrossrefRecords.java index d1861ff0a..6060b619e 100644 --- a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/crossref/ExtractCrossrefRecords.java +++ b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/crossref/ExtractCrossrefRecords.java @@ -1,19 +1,13 @@ package eu.dnetlib.doiboost.crossref; -import java.io.BufferedOutputStream; -import java.net.URI; -import java.util.zip.GZIPOutputStream; +import static eu.dnetlib.dhp.common.collection.DecompressTarGz.doExtract; + +import java.net.URI; -import org.apache.commons.compress.archivers.tar.TarArchiveEntry; -import org.apache.commons.compress.archivers.tar.TarArchiveInputStream; -import org.apache.commons.compress.compressors.gzip.GzipCompressorInputStream; import org.apache.commons.io.IOUtils; import org.apache.hadoop.conf.Configuration; -import org.apache.hadoop.fs.FSDataInputStream; -import org.apache.hadoop.fs.FSDataOutputStream; import org.apache.hadoop.fs.FileSystem; -import org.apache.hadoop.fs.Path; import org.mortbay.log.Log; import eu.dnetlib.dhp.application.ArgumentApplicationParser; @@ -33,31 +27,16 @@ public class ExtractCrossrefRecords { final String outputPath = parser.get("outputPath"); final String crossrefFileNameTarGz = parser.get("crossrefFileNameTarGz"); - Path hdfsreadpath = new Path(workingPath.concat("/").concat(crossrefFileNameTarGz)); Configuration conf = new Configuration(); conf.set("fs.defaultFS", workingPath); conf.set("fs.hdfs.impl", org.apache.hadoop.hdfs.DistributedFileSystem.class.getName()); conf.set("fs.file.impl", org.apache.hadoop.fs.LocalFileSystem.class.getName()); FileSystem fs = FileSystem.get(URI.create(workingPath), conf); - FSDataInputStream crossrefFileStream = fs.open(hdfsreadpath); - try (TarArchiveInputStream tais = new TarArchiveInputStream( - new GzipCompressorInputStream(crossrefFileStream))) { - TarArchiveEntry entry = null; - while ((entry = tais.getNextTarEntry()) != null) { - if (!entry.isDirectory()) { - try ( - FSDataOutputStream out = fs - .create(new Path(outputPath.concat(entry.getName()).concat(".gz"))); - GZIPOutputStream gzipOs = new GZIPOutputStream(new BufferedOutputStream(out))) { - IOUtils.copy(tais, gzipOs); + doExtract(fs, outputPath, workingPath.concat("/").concat(crossrefFileNameTarGz)); - } - - } - } - } Log.info("Crossref dump reading completed"); } + } diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/DownloadCSV.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/DownloadCSV.java index dff761c34..bbadde524 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/DownloadCSV.java +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/DownloadCSV.java @@ -23,7 +23,7 @@ public class DownloadCSV { private static final Logger log = LoggerFactory.getLogger(DownloadCSV.class); - public static final char DEFAULT_DELIMITER = ';'; + public static final char DEFAULT_DELIMITER = ','; public static void main(final String[] args) throws Exception { final ArgumentApplicationParser parser = new ArgumentApplicationParser( @@ -40,9 +40,6 @@ public class DownloadCSV { final String fileURL = parser.get("fileURL"); log.info("fileURL {}", fileURL); - final String workingPath = parser.get("workingPath"); - log.info("workingPath {}", workingPath); - final String outputFile = parser.get("outputFile"); log.info("outputFile {}", outputFile); @@ -63,31 +60,15 @@ public class DownloadCSV { FileSystem fileSystem = FileSystem.get(conf); - new DownloadCSV().doDownload(fileURL, workingPath, outputFile, classForName, delimiter, fileSystem); + new DownloadCSV().doDownload(fileURL, outputFile, classForName, delimiter, fileSystem); } - protected void doDownload(String fileURL, String workingPath, String outputFile, String classForName, + protected void doDownload(String fileURL, String outputFile, String classForName, char delimiter, FileSystem fs) throws IOException, ClassNotFoundException, CollectorException { - final HttpConnector2 connector2 = new HttpConnector2(); - - final Path path = new Path(workingPath + "/replaced.csv"); - - try (BufferedReader in = new BufferedReader( - new InputStreamReader(connector2.getInputSourceAsStream(fileURL)))) { - - try (PrintWriter writer = new PrintWriter( - new OutputStreamWriter(fs.create(path, true), StandardCharsets.UTF_8))) { - String line; - while ((line = in.readLine()) != null) { - writer.println(line.replace("\\\"", "\"")); - } - } - } - - try (InputStreamReader reader = new InputStreamReader(fs.open(path))) { + try (InputStreamReader reader = new InputStreamReader(new HttpConnector2().getInputSourceAsStream(fileURL))) { GetCSV.getCsv(fs, reader, outputFile, classForName, delimiter); } } diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/DownloadCSV2.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/DownloadCSV2.java deleted file mode 100644 index d82d00862..000000000 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/DownloadCSV2.java +++ /dev/null @@ -1,84 +0,0 @@ - -package eu.dnetlib.dhp.oa.graph.hostedbymap; - -import java.io.*; -import java.util.Objects; -import java.util.Optional; - -import org.apache.commons.io.FileUtils; -import org.apache.commons.io.IOUtils; -import org.apache.hadoop.conf.Configuration; -import org.apache.hadoop.fs.FileSystem; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -import eu.dnetlib.dhp.application.ArgumentApplicationParser; -import eu.dnetlib.dhp.common.collection.GetCSV; -import eu.dnetlib.dhp.common.collection.HttpConnector2; - -public class DownloadCSV2 { - - private static final Logger log = LoggerFactory.getLogger(DownloadCSV2.class); - - public static final char DEFAULT_DELIMITER = ';'; - - public static void main(final String[] args) throws Exception { - final ArgumentApplicationParser parser = new ArgumentApplicationParser( - IOUtils - .toString( - Objects - .requireNonNull( - DownloadCSV2.class - .getResourceAsStream( - "/eu/dnetlib/dhp/oa/graph/hostedbymap/download_csv_parameters.json")))); - - parser.parseArgument(args); - - final String fileURL = parser.get("fileURL"); - log.info("fileURL {}", fileURL); - - final String tmpFile = parser.get("tmpFile"); - log.info("tmpFile {}", tmpFile); - - final String outputFile = parser.get("outputFile"); - log.info("outputFile {}", outputFile); - - final String hdfsNameNode = parser.get("hdfsNameNode"); - log.info("hdfsNameNode {}", hdfsNameNode); - - final String classForName = parser.get("classForName"); - log.info("classForName {}", classForName); - - final char delimiter = Optional - .ofNullable(parser.get("delimiter")) - .map(s -> s.charAt(0)) - .orElse(DEFAULT_DELIMITER); - log.info("delimiter {}", delimiter); - - HttpConnector2 connector2 = new HttpConnector2(); - - try (BufferedReader in = new BufferedReader( - new InputStreamReader(connector2.getInputSourceAsStream(fileURL)))) { - - try (PrintWriter writer = new PrintWriter(new BufferedWriter(new FileWriter(tmpFile)))) { - String line; - while ((line = in.readLine()) != null) { - writer.println(line.replace("\\\"", "\"")); - } - } - } - - try (BufferedReader in = new BufferedReader(new FileReader(tmpFile))) { - Configuration conf = new Configuration(); - conf.set("fs.defaultFS", hdfsNameNode); - - FileSystem fileSystem = FileSystem.get(conf); - - GetCSV.getCsv(fileSystem, in, outputFile, classForName, delimiter); - } finally { - FileUtils.deleteQuietly(new File(tmpFile)); - } - - } - -} diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/ExtractAndMapDoajJson.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/ExtractAndMapDoajJson.java new file mode 100644 index 000000000..b3a79e147 --- /dev/null +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/ExtractAndMapDoajJson.java @@ -0,0 +1,116 @@ +package eu.dnetlib.dhp.oa.graph.hostedbymap; + +import static eu.dnetlib.dhp.common.collection.DecompressTarGz.doExtract; + +import java.io.BufferedOutputStream; +import java.io.IOException; +import java.io.PrintWriter; +import java.util.Arrays; +import java.util.Objects; + +import org.apache.commons.io.IOUtils; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.*; +import org.apache.hadoop.io.compress.CompressionCodec; +import org.apache.hadoop.io.compress.CompressionCodecFactory; +import org.apache.hadoop.io.compress.CompressionInputStream; +import org.jetbrains.annotations.NotNull; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import com.fasterxml.jackson.core.JsonProcessingException; +import com.fasterxml.jackson.databind.ObjectMapper; + +import eu.dnetlib.dhp.application.ArgumentApplicationParser; +import eu.dnetlib.dhp.oa.graph.hostedbymap.model.DOAJModel; +import eu.dnetlib.dhp.oa.graph.hostedbymap.model.doaj.DOAJEntry; + +public class ExtractAndMapDoajJson { + + private static final Logger log = LoggerFactory.getLogger(ExtractAndMapDoajJson.class); + + public static void main(final String[] args) throws Exception { + final ArgumentApplicationParser parser = new ArgumentApplicationParser( + IOUtils + .toString( + Objects + .requireNonNull( + ExtractAndMapDoajJson.class + .getResourceAsStream( + "/eu/dnetlib/dhp/oa/graph/hostedbymap/download_json_parameters.json")))); + + parser.parseArgument(args); + + final String compressedInput = parser.get("compressedFile"); + log.info("compressedInput {}", compressedInput); + + final String hdfsNameNode = parser.get("hdfsNameNode"); + log.info("hdfsNameNode {}", hdfsNameNode); + + final String outputPath = parser.get("outputPath"); + log.info("outputPath {}", outputPath); + + final String workingPath = parser.get("workingPath"); + log.info("workingPath {}", workingPath); + + Configuration conf = new Configuration(); + conf.set("fs.defaultFS", hdfsNameNode); + + FileSystem fs = FileSystem.get(conf); + CompressionCodecFactory factory = new CompressionCodecFactory(conf); + CompressionCodec codec = factory.getCodecByClassName("org.apache.hadoop.io.compress.GzipCodec"); + doExtract(fs, workingPath, compressedInput); + doMap(fs, workingPath, outputPath, codec); + + } + + private static void doMap(FileSystem fs, String workingPath, String outputPath, CompressionCodec codec) + throws IOException { + RemoteIterator fileStatusListIterator = fs + .listFiles( + new Path(workingPath), true); + + Path hdfsWritePath = new Path(outputPath); + if (fs.exists(hdfsWritePath)) { + fs.delete(hdfsWritePath, true); + + } + try ( + + FSDataOutputStream out = fs + .create(hdfsWritePath); + PrintWriter writer = new PrintWriter(new BufferedOutputStream(out))) { + + while (fileStatusListIterator.hasNext()) { + Path path = fileStatusListIterator.next().getPath(); + if (!fs.isDirectory(path)) { + FSDataInputStream is = fs.open(path); + CompressionInputStream compressionInputStream = codec.createInputStream(is); + DOAJEntry[] doajEntries = new ObjectMapper().readValue(compressionInputStream, DOAJEntry[].class); + Arrays.stream(doajEntries).forEach(doaj -> { + try { + writer.println(new ObjectMapper().writeValueAsString(getDoajModel(doaj))); + } catch (JsonProcessingException e) { + e.printStackTrace(); + } + }); + } + + } + + } + + } + + @NotNull + public static DOAJModel getDoajModel(DOAJEntry doaj) { + DOAJModel doajModel = new DOAJModel(); + doajModel.setOaStart(doaj.getBibjson().getOa_start()); + doajModel.setEissn(doaj.getBibjson().getEissn()); + doajModel.setIssn(doaj.getBibjson().getPissn()); + doajModel.setJournalTitle(doaj.getBibjson().getTitle()); + doajModel.setReviewProcess(doaj.getBibjson().getEditorial().getReview_process()); + return doajModel; + } + +} \ No newline at end of file diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/model/DOAJModel.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/model/DOAJModel.java index 4b5dc22a6..c3b6f1f30 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/model/DOAJModel.java +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/model/DOAJModel.java @@ -2,6 +2,7 @@ package eu.dnetlib.dhp.oa.graph.hostedbymap.model; import java.io.Serializable; +import java.util.List; import com.opencsv.bean.CsvBindByName; @@ -17,7 +18,17 @@ public class DOAJModel implements Serializable { private String eissn; @CsvBindByName(column = "Review process") - private String reviewProcess; + private List reviewProcess; + + private Integer oaStart; + + public Integer getOaStart() { + return oaStart; + } + + public void setOaStart(Integer oaStart) { + this.oaStart = oaStart; + } public String getJournalTitle() { return journalTitle; @@ -43,11 +54,11 @@ public class DOAJModel implements Serializable { this.eissn = eissn; } - public String getReviewProcess() { + public List getReviewProcess() { return reviewProcess; } - public void setReviewProcess(String reviewProcess) { + public void setReviewProcess(List reviewProcess) { this.reviewProcess = reviewProcess; } } diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/model/doaj/APC.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/model/doaj/APC.java new file mode 100644 index 000000000..3473e0f9d --- /dev/null +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/model/doaj/APC.java @@ -0,0 +1,35 @@ + +package eu.dnetlib.dhp.oa.graph.hostedbymap.model.doaj; + +import java.io.Serializable; +import java.util.List; + +public class APC implements Serializable { + private Boolean has_apc; + private String url; + private List max; + + public List getMax() { + return max; + } + + public void setMax(List max) { + this.max = max; + } + + public Boolean getHas_apc() { + return has_apc; + } + + public void setHas_apc(Boolean has_apc) { + this.has_apc = has_apc; + } + + public String getUrl() { + return url; + } + + public void setUrl(String url) { + this.url = url; + } +} diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/model/doaj/Admin.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/model/doaj/Admin.java new file mode 100644 index 000000000..b823e3450 --- /dev/null +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/model/doaj/Admin.java @@ -0,0 +1,25 @@ + +package eu.dnetlib.dhp.oa.graph.hostedbymap.model.doaj; + +import java.io.Serializable; + +public class Admin implements Serializable { + private Boolean ticked; + private Boolean seal; + + public Boolean getTicked() { + return ticked; + } + + public void setTicked(Boolean ticked) { + this.ticked = ticked; + } + + public Boolean getSeal() { + return seal; + } + + public void setSeal(Boolean seal) { + this.seal = seal; + } +} diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/model/doaj/Article.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/model/doaj/Article.java new file mode 100644 index 000000000..7e90e24a2 --- /dev/null +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/model/doaj/Article.java @@ -0,0 +1,44 @@ + +package eu.dnetlib.dhp.oa.graph.hostedbymap.model.doaj; + +import java.io.Serializable; +import java.util.List; + +public class Article implements Serializable { + private String license_display_example_url; + private List license_display; + private Boolean orcid; + private Boolean i4oc_open_citations; + + public String getLicense_display_example_url() { + return license_display_example_url; + } + + public void setLicense_display_example_url(String license_display_example_url) { + this.license_display_example_url = license_display_example_url; + } + + public List getLicense_display() { + return license_display; + } + + public void setLicense_display(List license_display) { + this.license_display = license_display; + } + + public Boolean getOrcid() { + return orcid; + } + + public void setOrcid(Boolean orcid) { + this.orcid = orcid; + } + + public Boolean getI4oc_open_citations() { + return i4oc_open_citations; + } + + public void setI4oc_open_citations(Boolean i4oc_open_citations) { + this.i4oc_open_citations = i4oc_open_citations; + } +} diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/model/doaj/BibJson.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/model/doaj/BibJson.java new file mode 100644 index 000000000..f7c0d501f --- /dev/null +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/model/doaj/BibJson.java @@ -0,0 +1,253 @@ + +package eu.dnetlib.dhp.oa.graph.hostedbymap.model.doaj; + +import java.io.Serializable; +import java.util.List; + +import com.fasterxml.jackson.annotation.JsonIgnore; + +public class BibJson implements Serializable { + private Editorial editorial; + private PidScheme pid_scheme; + private Copyright copyright; + private List keywords; + private Plagiarism plagiarism; + private List subject; + private String eissn; + private String pissn; + private List language; + private String title; + private Article article; + private Institution institution; + private Preservation preservation; + private List license; + private Ref ref; + private Integer oa_start; + private APC apc; + private OtherCharges other_charges; + private Integer publication_time_weeks; + private DepositPolicy deposit_policy; + private Publisher publisher; + private Boolean boai; + private Waiver waiver; + private String alternative_title; + private List is_replaced_by; + private List replaces; + private String discontinued_date; + + public String getDiscontinued_date() { + return discontinued_date; + } + + public void setDiscontinued_date(String discontinued_date) { + this.discontinued_date = discontinued_date; + } + + public List getReplaces() { + return replaces; + } + + public void setReplaces(List replaces) { + this.replaces = replaces; + } + + public List getIs_replaced_by() { + return is_replaced_by; + } + + public void setIs_replaced_by(List is_replaced_by) { + this.is_replaced_by = is_replaced_by; + } + + public String getAlternative_title() { + return alternative_title; + } + + public void setAlternative_title(String alternative_title) { + this.alternative_title = alternative_title; + } + + public String getPissn() { + return pissn; + } + + public void setPissn(String pissn) { + this.pissn = pissn; + } + + public Editorial getEditorial() { + return editorial; + } + + public void setEditorial(Editorial editorial) { + this.editorial = editorial; + } + + public PidScheme getPid_scheme() { + return pid_scheme; + } + + public void setPid_scheme(PidScheme pid_scheme) { + this.pid_scheme = pid_scheme; + } + + public Copyright getCopyright() { + return copyright; + } + + public void setCopyright(Copyright copyright) { + this.copyright = copyright; + } + + public List getKeywords() { + return keywords; + } + + public void setKeywords(List keywords) { + this.keywords = keywords; + } + + public Plagiarism getPlagiarism() { + return plagiarism; + } + + public void setPlagiarism(Plagiarism plagiarism) { + this.plagiarism = plagiarism; + } + + public List getSubject() { + return subject; + } + + public void setSubject(List subject) { + this.subject = subject; + } + + public String getEissn() { + return eissn; + } + + public void setEissn(String eissn) { + this.eissn = eissn; + } + + public List getLanguage() { + return language; + } + + public void setLanguage(List language) { + this.language = language; + } + + public String getTitle() { + return title; + } + + public void setTitle(String title) { + this.title = title; + } + + public Article getArticle() { + return article; + } + + public void setArticle(Article article) { + this.article = article; + } + + public Institution getInstitution() { + return institution; + } + + public void setInstitution(Institution institution) { + this.institution = institution; + } + + public Preservation getPreservation() { + return preservation; + } + + public void setPreservation(Preservation preservation) { + this.preservation = preservation; + } + + public List getLicense() { + return license; + } + + public void setLicense(List license) { + this.license = license; + } + + public Ref getRef() { + return ref; + } + + public void setRef(Ref ref) { + this.ref = ref; + } + + public Integer getOa_start() { + return oa_start; + } + + public void setOa_start(Integer oa_start) { + this.oa_start = oa_start; + } + + public APC getApc() { + return apc; + } + + public void setApc(APC apc) { + this.apc = apc; + } + + public OtherCharges getOther_charges() { + return other_charges; + } + + public void setOther_charges(OtherCharges other_charges) { + this.other_charges = other_charges; + } + + public Integer getPublication_time_weeks() { + return publication_time_weeks; + } + + public void setPublication_time_weeks(Integer publication_time_weeks) { + this.publication_time_weeks = publication_time_weeks; + } + + public DepositPolicy getDeposit_policy() { + return deposit_policy; + } + + public void setDeposit_policy(DepositPolicy deposit_policy) { + this.deposit_policy = deposit_policy; + } + + public Publisher getPublisher() { + return publisher; + } + + public void setPublisher(Publisher publisher) { + this.publisher = publisher; + } + + public Boolean getBoai() { + return boai; + } + + public void setBoai(Boolean boai) { + this.boai = boai; + } + + public Waiver getWaiver() { + return waiver; + } + + public void setWaiver(Waiver waiver) { + this.waiver = waiver; + } +} diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/model/doaj/Copyright.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/model/doaj/Copyright.java new file mode 100644 index 000000000..c595c4c88 --- /dev/null +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/model/doaj/Copyright.java @@ -0,0 +1,25 @@ + +package eu.dnetlib.dhp.oa.graph.hostedbymap.model.doaj; + +import java.io.Serializable; + +public class Copyright implements Serializable { + private Boolean author_retains; + private String url; + + public Boolean getAuthor_retains() { + return author_retains; + } + + public void setAuthor_retains(Boolean author_retains) { + this.author_retains = author_retains; + } + + public String getUrl() { + return url; + } + + public void setUrl(String url) { + this.url = url; + } +} diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/model/doaj/DOAJEntry.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/model/doaj/DOAJEntry.java new file mode 100644 index 000000000..add5bf8bf --- /dev/null +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/model/doaj/DOAJEntry.java @@ -0,0 +1,52 @@ + +package eu.dnetlib.dhp.oa.graph.hostedbymap.model.doaj; + +import java.io.Serializable; + +public class DOAJEntry implements Serializable { + private String last_updated; + private BibJson bibjson; + private Admin admin; + private String created_date; + private String id; + + public String getLast_updated() { + return last_updated; + } + + public void setLast_updated(String last_updated) { + this.last_updated = last_updated; + } + + public BibJson getBibjson() { + return bibjson; + } + + public void setBibjson(BibJson bibjson) { + this.bibjson = bibjson; + } + + public Admin getAdmin() { + return admin; + } + + public void setAdmin(Admin admin) { + this.admin = admin; + } + + public String getCreated_date() { + return created_date; + } + + public void setCreated_date(String created_date) { + this.created_date = created_date; + } + + public String getId() { + return id; + } + + public void setId(String id) { + this.id = id; + } +} diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/model/doaj/DepositPolicy.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/model/doaj/DepositPolicy.java new file mode 100644 index 000000000..d86c97f34 --- /dev/null +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/model/doaj/DepositPolicy.java @@ -0,0 +1,35 @@ + +package eu.dnetlib.dhp.oa.graph.hostedbymap.model.doaj; + +import java.io.Serializable; +import java.util.List; + +public class DepositPolicy implements Serializable { + private List service; + private String url; + private Boolean has_policy; + + public List getService() { + return service; + } + + public void setService(List service) { + this.service = service; + } + + public String getUrl() { + return url; + } + + public void setUrl(String url) { + this.url = url; + } + + public Boolean getHas_policy() { + return has_policy; + } + + public void setHas_policy(Boolean has_policy) { + this.has_policy = has_policy; + } +} diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/model/doaj/Editorial.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/model/doaj/Editorial.java new file mode 100644 index 000000000..35bfba158 --- /dev/null +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/model/doaj/Editorial.java @@ -0,0 +1,35 @@ + +package eu.dnetlib.dhp.oa.graph.hostedbymap.model.doaj; + +import java.io.Serializable; +import java.util.List; + +public class Editorial implements Serializable { + private List review_process; + private String review_url; + private String board_url; + + public List getReview_process() { + return review_process; + } + + public void setReview_process(List review_process) { + this.review_process = review_process; + } + + public String getReview_url() { + return review_url; + } + + public void setReview_url(String review_url) { + this.review_url = review_url; + } + + public String getBoard_url() { + return board_url; + } + + public void setBoard_url(String board_url) { + this.board_url = board_url; + } +} diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/model/doaj/Institution.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/model/doaj/Institution.java new file mode 100644 index 000000000..3b4d90493 --- /dev/null +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/model/doaj/Institution.java @@ -0,0 +1,25 @@ + +package eu.dnetlib.dhp.oa.graph.hostedbymap.model.doaj; + +import java.io.Serializable; + +public class Institution implements Serializable { + private String country; + private String name; + + public String getCountry() { + return country; + } + + public void setCountry(String country) { + this.country = country; + } + + public String getName() { + return name; + } + + public void setName(String name) { + this.name = name; + } +} diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/model/doaj/License.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/model/doaj/License.java new file mode 100644 index 000000000..64c7bc18f --- /dev/null +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/model/doaj/License.java @@ -0,0 +1,67 @@ + +package eu.dnetlib.dhp.oa.graph.hostedbymap.model.doaj; + +import java.io.Serializable; + +import com.fasterxml.jackson.annotation.JsonProperty; + +public class License implements Serializable { + private Boolean nc; + private Boolean nd; + private Boolean by; + private String type; + private Boolean sa; + private String url; + + public Boolean getnC() { + return nc; + } + + @JsonProperty("NC") + public void setnC(Boolean NC) { + this.nc = NC; + } + + public String getType() { + return type; + } + + public void setType(String type) { + this.type = type; + } + + public Boolean getNd() { + return nd; + } + + @JsonProperty("ND") + public void setNd(Boolean nd) { + this.nd = nd; + } + + public Boolean getBy() { + return by; + } + + @JsonProperty("BY") + public void setBy(Boolean by) { + this.by = by; + } + + public Boolean getSa() { + return sa; + } + + @JsonProperty("SA") + public void setSa(Boolean sa) { + this.sa = sa; + } + + public String getUrl() { + return url; + } + + public void setUrl(String url) { + this.url = url; + } +} diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/model/doaj/Max.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/model/doaj/Max.java new file mode 100644 index 000000000..0e292b631 --- /dev/null +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/model/doaj/Max.java @@ -0,0 +1,25 @@ + +package eu.dnetlib.dhp.oa.graph.hostedbymap.model.doaj; + +import java.io.Serializable; + +public class Max implements Serializable { + private Integer price; + private String currency; + + public Integer getPrice() { + return price; + } + + public void setPrice(Integer price) { + this.price = price; + } + + public String getCurrency() { + return currency; + } + + public void setCurrency(String currency) { + this.currency = currency; + } +} diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/model/doaj/OtherCharges.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/model/doaj/OtherCharges.java new file mode 100644 index 000000000..1583481d2 --- /dev/null +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/model/doaj/OtherCharges.java @@ -0,0 +1,25 @@ + +package eu.dnetlib.dhp.oa.graph.hostedbymap.model.doaj; + +import java.io.Serializable; + +public class OtherCharges implements Serializable { + private Boolean has_other_charges; + private String url; + + public String getUrl() { + return url; + } + + public void setUrl(String url) { + this.url = url; + } + + public Boolean getHas_other_charges() { + return has_other_charges; + } + + public void setHas_other_charges(Boolean has_other_charges) { + this.has_other_charges = has_other_charges; + } +} diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/model/doaj/PidScheme.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/model/doaj/PidScheme.java new file mode 100644 index 000000000..bd7e710dd --- /dev/null +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/model/doaj/PidScheme.java @@ -0,0 +1,26 @@ + +package eu.dnetlib.dhp.oa.graph.hostedbymap.model.doaj; + +import java.io.Serializable; +import java.util.List; + +public class PidScheme implements Serializable { + private List scheme; + private Boolean has_pid_scheme; + + public List getScheme() { + return scheme; + } + + public void setScheme(List scheme) { + this.scheme = scheme; + } + + public Boolean getHas_pid_scheme() { + return has_pid_scheme; + } + + public void setHas_pid_scheme(Boolean has_pid_scheme) { + this.has_pid_scheme = has_pid_scheme; + } +} diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/model/doaj/Plagiarism.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/model/doaj/Plagiarism.java new file mode 100644 index 000000000..f2230b3ea --- /dev/null +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/model/doaj/Plagiarism.java @@ -0,0 +1,27 @@ + +package eu.dnetlib.dhp.oa.graph.hostedbymap.model.doaj; + +import java.io.Serializable; + +import javax.sql.rowset.serial.SerialArray; + +public class Plagiarism implements Serializable { + private Boolean detection; + private String url; + + public Boolean getDetection() { + return detection; + } + + public void setDetection(Boolean detection) { + this.detection = detection; + } + + public String getUrl() { + return url; + } + + public void setUrl(String url) { + this.url = url; + } +} diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/model/doaj/Preservation.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/model/doaj/Preservation.java new file mode 100644 index 000000000..f56ea9953 --- /dev/null +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/model/doaj/Preservation.java @@ -0,0 +1,44 @@ + +package eu.dnetlib.dhp.oa.graph.hostedbymap.model.doaj; + +import java.io.Serializable; +import java.util.List; + +public class Preservation implements Serializable { + private Boolean has_preservation; + private List service; + private List national_library; + private String url; + + public String getUrl() { + return url; + } + + public void setUrl(String url) { + this.url = url; + } + + public Boolean getHas_preservation() { + return has_preservation; + } + + public void setHas_preservation(Boolean has_preservation) { + this.has_preservation = has_preservation; + } + + public List getService() { + return service; + } + + public void setService(List service) { + this.service = service; + } + + public List getNational_library() { + return national_library; + } + + public void setNational_library(List national_library) { + this.national_library = national_library; + } +} diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/model/doaj/Publisher.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/model/doaj/Publisher.java new file mode 100644 index 000000000..6d97a7969 --- /dev/null +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/model/doaj/Publisher.java @@ -0,0 +1,25 @@ + +package eu.dnetlib.dhp.oa.graph.hostedbymap.model.doaj; + +import java.io.Serializable; + +public class Publisher implements Serializable { + private String country; + private String name; + + public String getCountry() { + return country; + } + + public void setCountry(String country) { + this.country = country; + } + + public String getName() { + return name; + } + + public void setName(String name) { + this.name = name; + } +} diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/model/doaj/Ref.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/model/doaj/Ref.java new file mode 100644 index 000000000..0f7c7dc95 --- /dev/null +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/model/doaj/Ref.java @@ -0,0 +1,52 @@ + +package eu.dnetlib.dhp.oa.graph.hostedbymap.model.doaj; + +import java.io.Serializable; + +public class Ref implements Serializable { + private String aims_scope; + private String journal; + private String oa_statement; + private String author_instructions; + private String license_terms; + + public String getAims_scope() { + return aims_scope; + } + + public void setAims_scope(String aims_scope) { + this.aims_scope = aims_scope; + } + + public String getJournal() { + return journal; + } + + public void setJournal(String journal) { + this.journal = journal; + } + + public String getOa_statement() { + return oa_statement; + } + + public void setOa_statement(String oa_statement) { + this.oa_statement = oa_statement; + } + + public String getAuthor_instructions() { + return author_instructions; + } + + public void setAuthor_instructions(String author_instructions) { + this.author_instructions = author_instructions; + } + + public String getLicense_terms() { + return license_terms; + } + + public void setLicense_terms(String license_terms) { + this.license_terms = license_terms; + } +} diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/model/doaj/Subject.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/model/doaj/Subject.java new file mode 100644 index 000000000..811638e76 --- /dev/null +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/model/doaj/Subject.java @@ -0,0 +1,34 @@ + +package eu.dnetlib.dhp.oa.graph.hostedbymap.model.doaj; + +import java.io.Serializable; + +public class Subject implements Serializable { + private String code; + private String scheme; + private String term; + + public String getCode() { + return code; + } + + public void setCode(String code) { + this.code = code; + } + + public String getScheme() { + return scheme; + } + + public void setScheme(String scheme) { + this.scheme = scheme; + } + + public String getTerm() { + return term; + } + + public void setTerm(String term) { + this.term = term; + } +} diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/model/doaj/Waiver.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/model/doaj/Waiver.java new file mode 100644 index 000000000..ca67dde0c --- /dev/null +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/model/doaj/Waiver.java @@ -0,0 +1,25 @@ + +package eu.dnetlib.dhp.oa.graph.hostedbymap.model.doaj; + +import java.io.Serializable; + +public class Waiver implements Serializable { + private Boolean has_waiver; + private String url; + + public String getUrl() { + return url; + } + + public void setUrl(String url) { + this.url = url; + } + + public Boolean getHas_waiver() { + return has_waiver; + } + + public void setHas_waiver(Boolean has_waiver) { + this.has_waiver = has_waiver; + } +} diff --git a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/hostedbymap/oozie_app/workflow.xml b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/hostedbymap/oozie_app/workflow.xml index 84035fe4e..1a3261ffb 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/hostedbymap/oozie_app/workflow.xml +++ b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/hostedbymap/oozie_app/workflow.xml @@ -74,7 +74,9 @@ ${wf:conf('resumeFrom') eq 'ProduceHBM'} - ${wf:conf('resumeFrom') eq 'download_csv'} + ${wf:conf('resumeFrom') eq 'DownloadBoth'} + ${wf:conf('resumeFrom') eq 'DownloadGold'} + ${wf:conf('resumeFrom') eq 'DownloadDoaj'} @@ -83,18 +85,9 @@ Action failed, error message[${wf:errorMessage(wf:lastErrorNode())}] - - - - - - - - - - + - + @@ -103,21 +96,43 @@ --hdfsNameNode${nameNode} --fileURL${unibiFileURL} --tmpFile/tmp/unibi_gold_replaced.csv - --outputFile${workingDir}/unibi_gold.json + --outputFile/user/${wf:user()}/data/unibi_gold.json --classForNameeu.dnetlib.dhp.oa.graph.hostedbymap.model.UnibiGoldModel - + + + ${jobTracker} + ${nameNode} + + + mapred.job.queue.name + ${queueName} + + + download.sh + ${doajJsonFileURL} + ${dumpPath} + ${dumpFileName} + HADOOP_USER_NAME=${wf:user()} + download.sh + + + + + + + + - eu.dnetlib.dhp.oa.graph.hostedbymap.DownloadCSV2 + eu.dnetlib.dhp.oa.graph.hostedbymap.ExtractAndMapDoajJson --hdfsNameNode${nameNode} - --fileURL${doajFileURL} - --tmpFile/tmp/doaj_replaced.csv - --outputFile${workingDir}/doaj.json - --classForNameeu.dnetlib.dhp.oa.graph.hostedbymap.model.DOAJModel + --compressedFile${dumpPath}/${dumpFileName} + --workingPath${workingDir}/DOAJ/ + --outputPath/user/${wf:user()}/data/doaj.json @@ -125,6 +140,54 @@ + + + eu.dnetlib.dhp.oa.graph.hostedbymap.DownloadCSV + --hdfsNameNode${nameNode} + --fileURL${unibiFileURL} + --tmpFile/tmp/unibi_gold_replaced.csv + --outputFile/user/${wf:user()}/data/unibi_gold.json + --classForNameeu.dnetlib.dhp.oa.graph.hostedbymap.model.UnibiGoldModel + + + + + + + + ${jobTracker} + ${nameNode} + + + mapred.job.queue.name + ${queueName} + + + download.sh + ${doajJsonFileURL} + ${dumpPath} + ${dumpFileName} + HADOOP_USER_NAME=${wf:user()} + download.sh + + + + + + + + + + eu.dnetlib.dhp.oa.graph.hostedbymap.ExtractAndMapDoajJson + --hdfsNameNode${nameNode} + --compressedFile${dumpPath}/${dumpFileName} + --workingPath${workingDir}/DOAJ/ + --outputPath/user/${wf:user()}/data/doaj.json + + + + + yarn-cluster diff --git a/dhp-workflows/dhp-graph-mapper/src/main/scala/eu/dnetlib/dhp/oa/graph/hostedbymap/SparkProduceHostedByMap.scala b/dhp-workflows/dhp-graph-mapper/src/main/scala/eu/dnetlib/dhp/oa/graph/hostedbymap/SparkProduceHostedByMap.scala index 8d8965866..0188dac28 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/scala/eu/dnetlib/dhp/oa/graph/hostedbymap/SparkProduceHostedByMap.scala +++ b/dhp-workflows/dhp-graph-mapper/src/main/scala/eu/dnetlib/dhp/oa/graph/hostedbymap/SparkProduceHostedByMap.scala @@ -2,9 +2,10 @@ package eu.dnetlib.dhp.oa.graph.hostedbymap import com.fasterxml.jackson.databind.ObjectMapper import eu.dnetlib.dhp.application.ArgumentApplicationParser +import eu.dnetlib.dhp.common.HdfsSupport import eu.dnetlib.dhp.oa.graph.hostedbymap.model.{DOAJModel, UnibiGoldModel} import eu.dnetlib.dhp.schema.oaf.Datasource -import org.apache.commons.io.IOUtils +import org.apache.commons.io.{FileUtils, IOUtils} import org.apache.hadoop.conf.Configuration import org.apache.hadoop.fs.{FileSystem, Path} import org.apache.hadoop.io.compress.GzipCodec @@ -13,7 +14,8 @@ import org.apache.spark.sql.{Dataset, Encoder, Encoders, SparkSession} import org.json4s.DefaultFormats import org.slf4j.{Logger, LoggerFactory} -import java.io.PrintWriter +import java.io.{File, PrintWriter} +import scala.collection.JavaConverters._ object SparkProduceHostedByMap { @@ -256,6 +258,8 @@ object SparkProduceHostedByMap { logger.info("Getting the Datasources") + HdfsSupport.remove(outputPath, spark.sparkContext.hadoopConfiguration) + Aggregators .explodeHostedByItemType( oaHostedByDataset(spark, datasourcePath) diff --git a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/hostedbymap/DownloadCsvTest.java b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/hostedbymap/DownloadCsvTest.java index edf74fc6a..48f1e0c06 100644 --- a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/hostedbymap/DownloadCsvTest.java +++ b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/hostedbymap/DownloadCsvTest.java @@ -55,7 +55,6 @@ public class DownloadCsvTest { new DownloadCSV() .doDownload( fileURL, - workingDir + "/unibi_gold", outputFile, UnibiGoldModel.class.getName(), ',', @@ -91,56 +90,6 @@ public class DownloadCsvTest { assertEquals(67028, count); } - @Disabled - @Test - void getDoajFileTest() throws CollectorException, IOException, ClassNotFoundException { - - String fileURL = "https://doaj.org/csv"; - - final String outputFile = workingDir + "/doaj.json"; - new DownloadCSV() - .doDownload( - fileURL, - workingDir + "/doaj", - outputFile, - DOAJModel.class.getName(), - ',', - fs); - - BufferedReader in = new BufferedReader(new InputStreamReader(fs.open(new Path(outputFile)))); - - String line; - int count = 0; - while ((line = in.readLine()) != null) { - DOAJModel doaj = new ObjectMapper().readValue(line, DOAJModel.class); - if (count == 0) { - assertEquals("0001-3765", doaj.getIssn()); - assertEquals("1678-2690", doaj.getEissn()); - assertEquals("Anais da Academia Brasileira de Ciências", doaj.getJournalTitle()); - } - if (count == 22) { - log.info(new ObjectMapper().writeValueAsString(doaj)); - System.out.println(new ObjectMapper().writeValueAsString(doaj)); - } - if (count == 7904) { - // log.info(new ObjectMapper().writeValueAsString(doaj)); - assertEquals("", doaj.getIssn()); - assertEquals("2055-7159", doaj.getEissn()); - assertEquals("BJR|case reports", doaj.getJournalTitle()); - } - if (count == 16707) { - - assertEquals("2783-1043", doaj.getIssn()); - assertEquals("2783-1051", doaj.getEissn()); - assertEquals("فیزیک کاربردی ایران", doaj.getJournalTitle()); - } - - count += 1; - } - - assertEquals(16715, count); - } - @AfterAll public static void cleanup() { FileUtils.deleteQuietly(new File(workingDir)); diff --git a/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/hostedbymap/doaj_transformed.json b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/hostedbymap/doaj_transformed.json index 9cec80eb4..09730f1da 100644 --- a/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/hostedbymap/doaj_transformed.json +++ b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/hostedbymap/doaj_transformed.json @@ -1,25 +1,25 @@ -{"journalTitle":"Lëd i Sneg","issn":"2076-6734","eissn":"2412-3765","reviewProcess":"Double blind peer review"} -{"journalTitle":"Компьютерные исследования и моделирование","issn":"2076-7633","eissn":"2077-6853","reviewProcess":"Blind peer review"} -{"journalTitle":" Историко-биологические исследования","issn":"2076-8176","eissn":"2500-1221","reviewProcess":"Double blind peer review"} -{"journalTitle":"Інформаційні технології і засоби навчання","issn":"2076-8184","eissn":"","reviewProcess":"Double blind peer review"} -{"journalTitle":"Revue Internationale de Pédagogie de l’Enseignement Supérieur","issn":"","eissn":"2076-8427","reviewProcess":"Double blind peer review"} -{"journalTitle":"Проблемы развития территории","issn":"2076-8915","eissn":"2409-9007","reviewProcess":"Double blind peer review"} -{"journalTitle":"Rambam Maimonides Medical Journal","issn":"","eissn":"2076-9172","reviewProcess":"Peer review"} -{"journalTitle":"Membranes","issn":"2077-0375","eissn":"","reviewProcess":"Blind peer review"} -{"journalTitle":"Journal of Clinical Medicine","issn":"","eissn":"2077-0383","reviewProcess":"Blind peer review"} -{"journalTitle":"Agriculture","issn":"","eissn":"2077-0472","reviewProcess":"Blind peer review"} -{"journalTitle":"Standartnye Obrazcy","issn":"2077-1177","eissn":"","reviewProcess":"Double blind peer review"} -{"journalTitle":"Металл и литье Украины","issn":"2077-1304","eissn":"2706-5529","reviewProcess":"Double blind peer review"} -{"journalTitle":"Journal of Marine Science and Engineering","issn":"","eissn":"2077-1312","reviewProcess":"Blind peer review"} -{"journalTitle":"Religions","issn":"","eissn":"2077-1444","reviewProcess":"Double blind peer review"} -{"journalTitle":"GW-Unterricht","issn":"2077-1517","eissn":"2414-4169","reviewProcess":"Double blind peer review"} -{"journalTitle":"UCV-Scientia","issn":"2077-172X","eissn":"","reviewProcess":"Peer review"} -{"journalTitle":"Sovremennye Issledovaniâ Socialʹnyh Problem","issn":"2077-1770","eissn":"2218-7405","reviewProcess":"Double blind peer review"} -{"journalTitle":"Granì","issn":"2077-1800","eissn":"2413-8738","reviewProcess":"Double blind peer review"} -{"journalTitle":"Journal of Economics Finance and Administrative Science","issn":"2077-1886","eissn":"2218-0648","reviewProcess":"Double blind peer review"} -{"journalTitle":"Science Education International","issn":"","eissn":"2077-2327","reviewProcess":"Double blind peer review"} -{"journalTitle":"Edumecentro","issn":"","eissn":"2077-2874","reviewProcess":"Double blind peer review"} -{"journalTitle":"Monteverdia","issn":"","eissn":"2077-2890","reviewProcess":"Double blind peer review"} -{"journalTitle":"Transformación","issn":"","eissn":"2077-2955","reviewProcess":"Double blind peer review"} -{"journalTitle":"Journal of Space Technology","issn":"2077-3099","eissn":"2411-5029","reviewProcess":"Double blind peer review"} -{"journalTitle":"Revue de Primatologie","issn":"","eissn":"2077-3757","reviewProcess":"Peer review"} \ No newline at end of file +{"journalTitle":"Lëd i Sneg","issn":"2076-6734","eissn":"2412-3765","reviewProcess":["Double blind peer review"],"oaStart":2015} +{"journalTitle":"Компьютерные исследования и моделирование","issn":"2076-7633","eissn":"2077-6853","reviewProcess":["Blind peer review"],"oaStart":2009} +{"journalTitle":" Историко-биологические исследования","issn":"2076-8176","eissn":"2500-1221","reviewProcess":["Double blind peer review"],"oaStart":2010} +{"journalTitle":"Інформаційні технології і засоби навчання","issn":"2076-8184","eissn":null,"reviewProcess":["Double blind peer review"],"oaStart":2006} +{"journalTitle":"Revue Internationale de Pédagogie de l’Enseignement Supérieur","issn":null,"eissn":"2076-8427","reviewProcess":["Double blind peer review"],"oaStart":2009} +{"journalTitle":"Проблемы развития территории","issn":"2076-8915","eissn":"2409-9007","reviewProcess":["Double blind peer review"],"oaStart":2008} +{"journalTitle":"Rambam Maimonides Medical Journal","issn":null,"eissn":"2076-9172","reviewProcess":["Peer review"],"oaStart":2010} +{"journalTitle":"Membranes","issn":"2077-0375","eissn":null,"reviewProcess":["Blind peer review"],"oaStart":2011} +{"journalTitle":"Journal of Clinical Medicine","issn":null,"eissn":"2077-0383","reviewProcess":["Blind peer review"],"oaStart":2012} +{"journalTitle":"Agriculture","issn":null,"eissn":"2077-0472","reviewProcess":["Blind peer review"],"oaStart":2011} +{"journalTitle":"Standartnye Obrazcy","issn":"2077-1177","eissn":null,"reviewProcess":["Double blind peer review"],"oaStart":2014} +{"journalTitle":"Металл и литье Украины","issn":"2077-1304","eissn":"2706-5529","reviewProcess":["Double blind peer review"],"oaStart":2019} +{"journalTitle":"Journal of Marine Science and Engineering","issn":null,"eissn":"2077-1312","reviewProcess":["Blind peer review"],"oaStart":2013} +{"journalTitle":"Religions","issn":null,"eissn":"2077-1444","reviewProcess":["Double blind peer review"],"oaStart":2010} +{"journalTitle":"GW-Unterricht","issn":"2077-1517","eissn":"2414-4169","reviewProcess":["Double blind peer review"],"oaStart":2010} +{"journalTitle":"UCV-Scientia","issn":"2077-172X","eissn":null,"reviewProcess":["Peer review"],"oaStart":2009} +{"journalTitle":"Sovremennye Issledovaniâ Socialʹnyh Problem","issn":"2077-1770","eissn":"2218-7405","reviewProcess":["Double blind peer review"],"oaStart":2010} +{"journalTitle":"Granì","issn":"2077-1800","eissn":"2413-8738","reviewProcess":["Double blind peer review"],"oaStart":2014} +{"journalTitle":"Journal of Economics Finance and Administrative Science","issn":"2077-1886","eissn":"2218-0648","reviewProcess":["Double blind peer review"],"oaStart":2017} +{"journalTitle":"Science Education International","issn":null,"eissn":"2077-2327","reviewProcess":["Double blind peer review"],"oaStart":2017} +{"journalTitle":"Edumecentro","issn":null,"eissn":"2077-2874","reviewProcess":["Double blind peer review"],"oaStart":2013} +{"journalTitle":"Monteverdia","issn":null,"eissn":"2077-2890","reviewProcess":["Double blind peer review"],"oaStart":2008} +{"journalTitle":"Transformación","issn":null,"eissn":"2077-2955","reviewProcess":["Double blind peer review"],"oaStart":2010} +{"journalTitle":"Journal of Space Technology","issn":"2077-3099","eissn":"2411-5029","reviewProcess":["Double blind peer review"],"oaStart":2011} +{"journalTitle":"Revue de Primatologie","issn":null,"eissn":"2077-3757","reviewProcess":["Peer review"],"oaStart":2009} \ No newline at end of file From c959639bd56a8fc20a90d38df21da83b8b7bfd95 Mon Sep 17 00:00:00 2001 From: miconis Date: Tue, 15 Mar 2022 16:33:03 +0100 Subject: [PATCH 115/162] dependency updated to the new pace-core version --- .../dnetlib/dhp/oa/dedup/SparkDedupTest.java | 62 ++++++++++++++----- pom.xml | 2 +- 2 files changed, 47 insertions(+), 17 deletions(-) diff --git a/dhp-workflows/dhp-dedup-openaire/src/test/java/eu/dnetlib/dhp/oa/dedup/SparkDedupTest.java b/dhp-workflows/dhp-dedup-openaire/src/test/java/eu/dnetlib/dhp/oa/dedup/SparkDedupTest.java index 549988767..9c9ec43d5 100644 --- a/dhp-workflows/dhp-dedup-openaire/src/test/java/eu/dnetlib/dhp/oa/dedup/SparkDedupTest.java +++ b/dhp-workflows/dhp-dedup-openaire/src/test/java/eu/dnetlib/dhp/oa/dedup/SparkDedupTest.java @@ -206,11 +206,16 @@ public class SparkDedupTest implements Serializable { .load(DedupUtility.createSimRelPath(testOutputBasePath, testActionSetId, "otherresearchproduct")) .count(); - assertEquals(3082, orgs_simrel); - assertEquals(7036, pubs_simrel); + assertEquals(3076, orgs_simrel); + assertEquals(7040, pubs_simrel); assertEquals(336, sw_simrel); assertEquals(442, ds_simrel); - assertEquals(6750, orp_simrel); + assertEquals(6784, orp_simrel); +// System.out.println("orgs_simrel = " + orgs_simrel); +// System.out.println("pubs_simrel = " + pubs_simrel); +// System.out.println("sw_simrel = " + sw_simrel); +// System.out.println("ds_simrel = " + ds_simrel); +// System.out.println("orp_simrel = " + orp_simrel); } @Test @@ -258,10 +263,14 @@ public class SparkDedupTest implements Serializable { .count(); // entities simrels supposed to be equal to the number of previous step (no rels in whitelist) - assertEquals(3082, orgs_simrel); - assertEquals(7036, pubs_simrel); + assertEquals(3076, orgs_simrel); + assertEquals(7040, pubs_simrel); assertEquals(442, ds_simrel); - assertEquals(6750, orp_simrel); + assertEquals(6784, orp_simrel); +// System.out.println("orgs_simrel = " + orgs_simrel); +// System.out.println("pubs_simrel = " + pubs_simrel); +// System.out.println("ds_simrel = " + ds_simrel); +// System.out.println("orp_simrel = " + orp_simrel); // entities simrels to be different from the number of previous step (new simrels in the whitelist) Dataset sw_simrel = spark @@ -288,6 +297,7 @@ public class SparkDedupTest implements Serializable { .count() > 0); assertEquals(338, sw_simrel.count()); +// System.out.println("sw_simrel = " + sw_simrel.count()); } @@ -435,11 +445,16 @@ public class SparkDedupTest implements Serializable { .load(testOutputBasePath + "/" + testActionSetId + "/otherresearchproduct_mergerel") .count(); - assertEquals(1272, orgs_mergerel); - assertEquals(1438, pubs_mergerel); + assertEquals(1268, orgs_mergerel); + assertEquals(1444, pubs_mergerel); assertEquals(286, sw_mergerel); assertEquals(472, ds_mergerel); - assertEquals(718, orp_mergerel); + assertEquals(738, orp_mergerel); +// System.out.println("orgs_mergerel = " + orgs_mergerel); +// System.out.println("pubs_mergerel = " + pubs_mergerel); +// System.out.println("sw_mergerel = " + sw_mergerel); +// System.out.println("ds_mergerel = " + ds_mergerel); +// System.out.println("orp_mergerel = " + orp_mergerel); } @@ -483,11 +498,17 @@ public class SparkDedupTest implements Serializable { testOutputBasePath + "/" + testActionSetId + "/otherresearchproduct_deduprecord") .count(); - assertEquals(85, orgs_deduprecord); - assertEquals(65, pubs_deduprecord); + assertEquals(86, orgs_deduprecord); + assertEquals(67, pubs_deduprecord); assertEquals(49, sw_deduprecord); assertEquals(97, ds_deduprecord); - assertEquals(89, orp_deduprecord); + assertEquals(92, orp_deduprecord); + +// System.out.println("orgs_deduprecord = " + orgs_deduprecord); +// System.out.println("pubs_deduprecord = " + pubs_deduprecord); +// System.out.println("sw_deduprecord = " + sw_deduprecord); +// System.out.println("ds_deduprecord = " + ds_deduprecord); +// System.out.println("orp_deduprecord = " + orp_deduprecord); } @Test @@ -566,13 +587,21 @@ public class SparkDedupTest implements Serializable { .distinct() .count(); - assertEquals(896, publications); - assertEquals(838, organizations); + assertEquals(898, publications); + assertEquals(839, organizations); assertEquals(100, projects); assertEquals(100, datasource); assertEquals(198, softwares); assertEquals(389, dataset); - assertEquals(517, otherresearchproduct); + assertEquals(520, otherresearchproduct); + +// System.out.println("publications = " + publications); +// System.out.println("organizations = " + organizations); +// System.out.println("projects = " + projects); +// System.out.println("datasource = " + datasource); +// System.out.println("software = " + softwares); +// System.out.println("dataset = " + dataset); +// System.out.println("otherresearchproduct = " + otherresearchproduct); long deletedOrgs = jsc .textFile(testDedupGraphBasePath + "/organization") @@ -626,7 +655,8 @@ public class SparkDedupTest implements Serializable { long relations = jsc.textFile(testDedupGraphBasePath + "/relation").count(); - assertEquals(4860, relations); +// assertEquals(4860, relations); + System.out.println("relations = " + relations); // check deletedbyinference final Dataset mergeRels = spark diff --git a/pom.xml b/pom.xml index 603a4cf1e..86adee902 100644 --- a/pom.xml +++ b/pom.xml @@ -801,7 +801,7 @@ [4.0.3] [6.0.5] [3.1.6] - [4.1.7] + [4.1.12] [2.6.1] 7.5.0 4.7.2 From c763aded70c9f087c91c28467a695fa41dc383ff Mon Sep 17 00:00:00 2001 From: miconis Date: Wed, 16 Mar 2022 16:41:50 +0100 Subject: [PATCH 116/162] dependency updated to the new pace-core version --- .../dnetlib/dhp/oa/dedup/SparkDedupTest.java | 57 ++++++++++++++----- pom.xml | 2 +- 2 files changed, 43 insertions(+), 16 deletions(-) diff --git a/dhp-workflows/dhp-dedup-openaire/src/test/java/eu/dnetlib/dhp/oa/dedup/SparkDedupTest.java b/dhp-workflows/dhp-dedup-openaire/src/test/java/eu/dnetlib/dhp/oa/dedup/SparkDedupTest.java index 549988767..2b9559867 100644 --- a/dhp-workflows/dhp-dedup-openaire/src/test/java/eu/dnetlib/dhp/oa/dedup/SparkDedupTest.java +++ b/dhp-workflows/dhp-dedup-openaire/src/test/java/eu/dnetlib/dhp/oa/dedup/SparkDedupTest.java @@ -206,11 +206,16 @@ public class SparkDedupTest implements Serializable { .load(DedupUtility.createSimRelPath(testOutputBasePath, testActionSetId, "otherresearchproduct")) .count(); - assertEquals(3082, orgs_simrel); - assertEquals(7036, pubs_simrel); + assertEquals(3076, orgs_simrel); + assertEquals(7040, pubs_simrel); assertEquals(336, sw_simrel); assertEquals(442, ds_simrel); - assertEquals(6750, orp_simrel); + assertEquals(6784, orp_simrel); +// System.out.println("org_simrel = " + orgs_simrel); +// System.out.println("pub_simrel = " + pubs_simrel); +// System.out.println("sw_simrel = " + sw_simrel); +// System.out.println("ds_simrel = " + ds_simrel); +// System.out.println("orp_simrel = " + orp_simrel); } @Test @@ -258,10 +263,14 @@ public class SparkDedupTest implements Serializable { .count(); // entities simrels supposed to be equal to the number of previous step (no rels in whitelist) - assertEquals(3082, orgs_simrel); - assertEquals(7036, pubs_simrel); + assertEquals(3076, orgs_simrel); + assertEquals(7040, pubs_simrel); assertEquals(442, ds_simrel); - assertEquals(6750, orp_simrel); + assertEquals(6784, orp_simrel); +// System.out.println("org_simrel = " + orgs_simrel); +// System.out.println("pub_simrel = " + pubs_simrel); +// System.out.println("ds_simrel = " + ds_simrel); +// System.out.println("orp_simrel = " + orp_simrel); // entities simrels to be different from the number of previous step (new simrels in the whitelist) Dataset sw_simrel = spark @@ -288,6 +297,7 @@ public class SparkDedupTest implements Serializable { .count() > 0); assertEquals(338, sw_simrel.count()); +// System.out.println("sw_simrel_count = " + sw_simrel.count()); } @@ -435,11 +445,16 @@ public class SparkDedupTest implements Serializable { .load(testOutputBasePath + "/" + testActionSetId + "/otherresearchproduct_mergerel") .count(); - assertEquals(1272, orgs_mergerel); - assertEquals(1438, pubs_mergerel); + assertEquals(1268, orgs_mergerel); + assertEquals(1444, pubs_mergerel); assertEquals(286, sw_mergerel); assertEquals(472, ds_mergerel); - assertEquals(718, orp_mergerel); + assertEquals(738, orp_mergerel); +// System.out.println("org_simrel = " + orgs_mergerel); +// System.out.println("pub_simrel = " + pubs_mergerel); +// System.out.println("sw_simrel = " + sw_mergerel); +// System.out.println("ds_simrel = " + ds_mergerel); +// System.out.println("orp_simrel = " + orp_mergerel); } @@ -483,11 +498,16 @@ public class SparkDedupTest implements Serializable { testOutputBasePath + "/" + testActionSetId + "/otherresearchproduct_deduprecord") .count(); - assertEquals(85, orgs_deduprecord); - assertEquals(65, pubs_deduprecord); + assertEquals(86, orgs_deduprecord); + assertEquals(67, pubs_deduprecord); assertEquals(49, sw_deduprecord); assertEquals(97, ds_deduprecord); - assertEquals(89, orp_deduprecord); + assertEquals(92, orp_deduprecord); +// System.out.println("org_simrel = " + orgs_deduprecord); +// System.out.println("pub_simrel = " + pubs_deduprecord); +// System.out.println("sw_simrel = " + sw_deduprecord); +// System.out.println("ds_simrel = " + ds_deduprecord); +// System.out.println("orp_simrel = " + orp_deduprecord); } @Test @@ -566,13 +586,18 @@ public class SparkDedupTest implements Serializable { .distinct() .count(); - assertEquals(896, publications); - assertEquals(838, organizations); + assertEquals(898, publications); + assertEquals(839, organizations); assertEquals(100, projects); assertEquals(100, datasource); assertEquals(198, softwares); assertEquals(389, dataset); - assertEquals(517, otherresearchproduct); + assertEquals(520, otherresearchproduct); +// System.out.println("publications = " + publications); +// System.out.println("organizations = " + organizations); +// System.out.println("softwares = " + softwares); +// System.out.println("dataset = " + dataset); +// System.out.println("orp = " + otherresearchproduct); long deletedOrgs = jsc .textFile(testDedupGraphBasePath + "/organization") @@ -627,6 +652,8 @@ public class SparkDedupTest implements Serializable { long relations = jsc.textFile(testDedupGraphBasePath + "/relation").count(); assertEquals(4860, relations); +// System.out.println("relations = " + relations); + // check deletedbyinference final Dataset mergeRels = spark diff --git a/pom.xml b/pom.xml index 603a4cf1e..86adee902 100644 --- a/pom.xml +++ b/pom.xml @@ -801,7 +801,7 @@ [4.0.3] [6.0.5] [3.1.6] - [4.1.7] + [4.1.12] [2.6.1] 7.5.0 4.7.2 From 89fd27548077586def45bbe9f9e893ed6862ac84 Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Mon, 21 Mar 2022 09:54:45 +0100 Subject: [PATCH 117/162] [HostedByMap] added left over from PR and fixed issue on workflow --- .../hostedbymap/download_json_parameters.json | 27 +++++++++++++++++++ .../graph/hostedbymap/oozie_app/download.sh | 3 +++ .../graph/hostedbymap/oozie_app/workflow.xml | 2 +- 3 files changed, 31 insertions(+), 1 deletion(-) create mode 100644 dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/hostedbymap/download_json_parameters.json create mode 100644 dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/hostedbymap/oozie_app/download.sh diff --git a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/hostedbymap/download_json_parameters.json b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/hostedbymap/download_json_parameters.json new file mode 100644 index 000000000..72498c4e0 --- /dev/null +++ b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/hostedbymap/download_json_parameters.json @@ -0,0 +1,27 @@ +[ + + { + "paramName":"op", + "paramLongName":"outputPath", + "paramDescription": "the output json file produced by the CSV downlaod procedure", + "paramRequired": true + }, + + { + "paramName": "hnn", + "paramLongName": "hdfsNameNode", + "paramDescription": "the path used to store the HostedByMap", + "paramRequired": true + },{ + "paramName": "cf", + "paramLongName": "compressedFile", + "paramDescription": "the path used to store the HostedByMap", + "paramRequired": true +},{ + "paramName":"wp", + "paramLongName":"workingPath", + "paramDescription": "the output json file produced by the CSV downlaod procedure", + "paramRequired": true +} +] + diff --git a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/hostedbymap/oozie_app/download.sh b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/hostedbymap/oozie_app/download.sh new file mode 100644 index 000000000..35220bd8c --- /dev/null +++ b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/hostedbymap/oozie_app/download.sh @@ -0,0 +1,3 @@ +#!/bin/bash +curl -LSs $1 | hdfs dfs -put - $2/$3 +curl -LSs http://api.crossref.org/works/10.1099/jgv.0.001453 > prova.txt \ No newline at end of file diff --git a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/hostedbymap/oozie_app/workflow.xml b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/hostedbymap/oozie_app/workflow.xml index 1a3261ffb..c7fffed5b 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/hostedbymap/oozie_app/workflow.xml +++ b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/hostedbymap/oozie_app/workflow.xml @@ -92,7 +92,7 @@ - eu.dnetlib.dhp.oa.graph.hostedbymap.DownloadCSV2 + eu.dnetlib.dhp.oa.graph.hostedbymap.DownloadCSV --hdfsNameNode${nameNode} --fileURL${unibiFileURL} --tmpFile/tmp/unibi_gold_replaced.csv From dcfbeb8142e91379ae4003be1f789a527895c574 Mon Sep 17 00:00:00 2001 From: Antonis Lempesis Date: Mon, 21 Mar 2022 12:36:03 +0200 Subject: [PATCH 118/162] yet more typos --- .../oozie_app/scripts/step16-createIndicatorsTables.sql | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step16-createIndicatorsTables.sql b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step16-createIndicatorsTables.sql index d925b3694..a95ee74d8 100755 --- a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step16-createIndicatorsTables.sql +++ b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step16-createIndicatorsTables.sql @@ -265,19 +265,19 @@ left outer join ( create table indi_org_openess stored as parquet as WITH datasets_oa as ( - SELECT ro.organization, count(dg.id) no_oadatasets FROM indi_datasets_gold_oa_new dg + SELECT ro.organization, count(dg.id) no_oadatasets FROM indi_datasets_gold_oa dg join openaire_prod_stats.result_organization ro on dg.id=ro.id join openaire_prod_stats.dataset ds on dg.id=ds.id WHERE dg.is_gold=1 group by ro.organization), software_oa as ( - SELECT ro.organization, count(dg.id) no_oasoftware FROM indi_software_gold_oa_new dg + SELECT ro.organization, count(dg.id) no_oasoftware FROM indi_software_gold_oa dg join openaire_prod_stats.result_organization ro on dg.id=ro.id join openaire_prod_stats.software ds on dg.id=ds.id WHERE dg.is_gold=1 group by ro.organization), pubs_oa as ( - SELECT ro.organization, count(dg.id) no_oapubs FROM indi_pub_gold_oa_new dg + SELECT ro.organization, count(dg.id) no_oapubs FROM indi_pub_gold_oa dg join openaire_prod_stats.result_organization ro on dg.id=ro.id join openaire_prod_stats.publication ds on dg.id=ds.id where dg.is_gold=1 From 2e8394ecf8c0df33e047e04191df68251763a0c5 Mon Sep 17 00:00:00 2001 From: antleb Date: Tue, 22 Mar 2022 16:16:08 +0200 Subject: [PATCH 119/162] creating aaall tables as parquet --- .../oa/graph/stats/oozie_app/scripts/step10.sql | 2 +- .../oa/graph/stats/oozie_app/scripts/step13.sql | 10 +++++----- .../oa/graph/stats/oozie_app/scripts/step14.sql | 12 ++++++------ .../oa/graph/stats/oozie_app/scripts/step15.sql | 8 ++++---- .../graph/stats/oozie_app/scripts/step15_5.sql | 6 +++--- .../oozie_app/scripts/step16_1-definitions.sql | 6 +++--- .../oa/graph/stats/oozie_app/scripts/step2.sql | 16 ++++++++-------- .../oozie_app/scripts/step20-createMonitorDB.sql | 4 ++-- .../oa/graph/stats/oozie_app/scripts/step3.sql | 16 ++++++++-------- .../oa/graph/stats/oozie_app/scripts/step4.sql | 16 ++++++++-------- .../oa/graph/stats/oozie_app/scripts/step5.sql | 16 ++++++++-------- .../oa/graph/stats/oozie_app/scripts/step6.sql | 10 +++++----- .../oa/graph/stats/oozie_app/scripts/step7.sql | 4 ++-- .../oa/graph/stats/oozie_app/scripts/step8.sql | 8 ++++---- .../oa/graph/stats/oozie_app/scripts/step9.sql | 6 +++--- 15 files changed, 70 insertions(+), 70 deletions(-) diff --git a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step10.sql b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step10.sql index 13a4803a9..f1e1ceedd 100644 --- a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step10.sql +++ b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step10.sql @@ -49,7 +49,7 @@ select * from openaire_prod_usage_stats.views_stats; -- Creation date of the database ------------------------------------------------------------------------------------------------ ------------------------------------------------------------------------------------------------ -create table ${stats_db_name}.creation_date as +create table ${stats_db_name}.creation_date STORED AS PARQUET as select date_format(current_date(), 'dd-MM-yyyy') as date; -- -- ANALYZE TABLE ${stats_db_name}.creation_date COMPUTE STATISTICS; diff --git a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step13.sql b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step13.sql index 947c91072..a5839da11 100644 --- a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step13.sql +++ b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step13.sql @@ -5,7 +5,7 @@ -- Sources related tables/views ------------------------------------------------------ ------------------------------------------------------ -CREATE TABLE IF NOT EXISTS ${stats_db_name}.publication_sources as +CREATE TABLE IF NOT EXISTS ${stats_db_name}.publication_sources STORED AS PARQUET as SELECT p.id, case when d.id is null then 'other' else p.datasource end as datasource FROM ( SELECT substr(p.id, 4) as id, substr(datasource, 4) as datasource @@ -16,7 +16,7 @@ LEFT OUTER JOIN from ${openaire_db_name}.datasource d WHERE d.datainfo.deletedbyinference=false and d.datainfo.invisible = FALSE) d on p.datasource = d.id; -CREATE TABLE IF NOT EXISTS ${stats_db_name}.dataset_sources as +CREATE TABLE IF NOT EXISTS ${stats_db_name}.dataset_sources STORED AS PARQUET as SELECT p.id, case when d.id is null then 'other' else p.datasource end as datasource FROM ( SELECT substr(p.id, 4) as id, substr(datasource, 4) as datasource @@ -27,7 +27,7 @@ LEFT OUTER JOIN from ${openaire_db_name}.datasource d WHERE d.datainfo.deletedbyinference=false and d.datainfo.invisible = FALSE) d on p.datasource = d.id; -CREATE TABLE IF NOT EXISTS ${stats_db_name}.software_sources as +CREATE TABLE IF NOT EXISTS ${stats_db_name}.software_sources STORED AS PARQUET as SELECT p.id, case when d.id is null then 'other' else p.datasource end as datasource FROM ( SELECT substr(p.id, 4) as id, substr(datasource, 4) as datasource @@ -38,7 +38,7 @@ LEFT OUTER JOIN from ${openaire_db_name}.datasource d WHERE d.datainfo.deletedbyinference=false and d.datainfo.invisible = FALSE) d on p.datasource = d.id; -CREATE TABLE IF NOT EXISTS ${stats_db_name}.otherresearchproduct_sources as +CREATE TABLE IF NOT EXISTS ${stats_db_name}.otherresearchproduct_sources STORED AS PARQUET as SELECT p.id, case when d.id is null then 'other' else p.datasource end as datasource FROM ( SELECT substr(p.id, 4) as id, substr(datasource, 4) as datasource @@ -59,7 +59,7 @@ UNION ALL SELECT * FROM ${stats_db_name}.otherresearchproduct_sources; -create table ${stats_db_name}.result_orcid as +create table ${stats_db_name}.result_orcid STORED AS PARQUET as select distinct res.id, regexp_replace(res.orcid, 'http://orcid.org/' ,'') as orcid from ( SELECT substr(res.id, 4) as id, auth_pid.value as orcid diff --git a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step14.sql b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step14.sql index e3a33a893..e8e29ff11 100644 --- a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step14.sql +++ b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step14.sql @@ -5,22 +5,22 @@ -- Licences related tables/views ------------------------------------------------------ ------------------------------------------------------ -CREATE TABLE IF NOT EXISTS ${stats_db_name}.publication_licenses AS +CREATE TABLE IF NOT EXISTS ${stats_db_name}.publication_licenses STORED AS PARQUET AS SELECT substr(p.id, 4) as id, licenses.value as type from ${openaire_db_name}.publication p LATERAL VIEW explode(p.instance.license) instances as licenses where licenses.value is not null and licenses.value != '' and p.datainfo.deletedbyinference=false and p.datainfo.invisible = FALSE; -CREATE TABLE IF NOT EXISTS ${stats_db_name}.dataset_licenses AS +CREATE TABLE IF NOT EXISTS ${stats_db_name}.dataset_licenses STORED AS PARQUET AS SELECT substr(p.id, 4) as id, licenses.value as type from ${openaire_db_name}.dataset p LATERAL VIEW explode(p.instance.license) instances as licenses where licenses.value is not null and licenses.value != '' and p.datainfo.deletedbyinference=false and p.datainfo.invisible = FALSE; -CREATE TABLE IF NOT EXISTS ${stats_db_name}.software_licenses AS +CREATE TABLE IF NOT EXISTS ${stats_db_name}.software_licenses STORED AS PARQUET AS SELECT substr(p.id, 4) as id, licenses.value as type from ${openaire_db_name}.software p LATERAL VIEW explode(p.instance.license) instances as licenses where licenses.value is not null and licenses.value != '' and p.datainfo.deletedbyinference=false and p.datainfo.invisible = FALSE; -CREATE TABLE IF NOT EXISTS ${stats_db_name}.otherresearchproduct_licenses AS +CREATE TABLE IF NOT EXISTS ${stats_db_name}.otherresearchproduct_licenses STORED AS PARQUET AS SELECT substr(p.id, 4) as id, licenses.value as type from ${openaire_db_name}.otherresearchproduct p LATERAL VIEW explode(p.instance.license) instances as licenses where licenses.value is not null and licenses.value != '' and p.datainfo.deletedbyinference=false and p.datainfo.invisible = FALSE; @@ -34,11 +34,11 @@ SELECT * FROM ${stats_db_name}.software_licenses UNION ALL SELECT * FROM ${stats_db_name}.otherresearchproduct_licenses; -CREATE TABLE IF NOT EXISTS ${stats_db_name}.organization_pids AS +CREATE TABLE IF NOT EXISTS ${stats_db_name}.organization_pids STORED AS PARQUET AS select substr(o.id, 4) as id, ppid.qualifier.classname as type, ppid.value as pid from ${openaire_db_name}.organization o lateral view explode(o.pid) pids as ppid; -CREATE TABLE IF NOT EXISTS ${stats_db_name}.organization_sources as +CREATE TABLE IF NOT EXISTS ${stats_db_name}.organization_sources STORED AS PARQUET as SELECT o.id, case when d.id is null then 'other' else o.datasource end as datasource FROM ( SELECT substr(o.id, 4) as id, substr(instances.instance.key, 4) as datasource diff --git a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step15.sql b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step15.sql index f46b65171..cec22cd3e 100644 --- a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step15.sql +++ b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step15.sql @@ -6,22 +6,22 @@ ------------------------------------------------------ ------------------------------------------------------ -CREATE TABLE IF NOT EXISTS ${stats_db_name}.publication_refereed as +CREATE TABLE IF NOT EXISTS ${stats_db_name}.publication_refereed STORED AS PARQUET as select substr(r.id, 4) as id, inst.refereed.classname as refereed from ${openaire_db_name}.publication r lateral view explode(r.instance) instances as inst where r.datainfo.deletedbyinference=false and r.datainfo.invisible = FALSE; -CREATE TABLE IF NOT EXISTS ${stats_db_name}.dataset_refereed as +CREATE TABLE IF NOT EXISTS ${stats_db_name}.dataset_refereed STORED AS PARQUET as select substr(r.id, 4) as id, inst.refereed.classname as refereed from ${openaire_db_name}.dataset r lateral view explode(r.instance) instances as inst where r.datainfo.deletedbyinference=false and r.datainfo.invisible = FALSE; -CREATE TABLE IF NOT EXISTS ${stats_db_name}.software_refereed as +CREATE TABLE IF NOT EXISTS ${stats_db_name}.software_refereed STORED AS PARQUET as select substr(r.id, 4) as id, inst.refereed.classname as refereed from ${openaire_db_name}.software r lateral view explode(r.instance) instances as inst where r.datainfo.deletedbyinference=false and r.datainfo.invisible = FALSE; -CREATE TABLE IF NOT EXISTS ${stats_db_name}.otherresearchproduct_refereed as +CREATE TABLE IF NOT EXISTS ${stats_db_name}.otherresearchproduct_refereed STORED AS PARQUET as select substr(r.id, 4) as id, inst.refereed.classname as refereed from ${openaire_db_name}.otherresearchproduct r lateral view explode(r.instance) instances as inst where r.datainfo.deletedbyinference=false and r.datainfo.invisible = FALSE; diff --git a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step15_5.sql b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step15_5.sql index 3a7d9f455..04c7f83b9 100644 --- a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step15_5.sql +++ b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step15_5.sql @@ -1,21 +1,21 @@ ------------------------------------------- --- Extra tables, mostly used by indicators -create table ${stats_db_name}.result_projectcount as +create table ${stats_db_name}.result_projectcount STORED AS PARQUET as select r.id, count(distinct p.id) as count from ${stats_db_name}.result r left outer join ${stats_db_name}.result_projects rp on rp.id=r.id left outer join ${stats_db_name}.project p on p.id=rp.project group by r.id; -create table ${stats_db_name}.result_fundercount as +create table ${stats_db_name}.result_fundercount STORED AS PARQUET as select r.id, count(distinct p.funder) as count from ${stats_db_name}.result r left outer join ${stats_db_name}.result_projects rp on rp.id=r.id left outer join ${stats_db_name}.project p on p.id=rp.project group by r.id; -create table ${stats_db_name}.project_resultcount as +create table ${stats_db_name}.project_resultcount STORED AS PARQUET as with rcount as ( select p.id as pid, count(distinct r.id) as `count`, r.type as type from ${stats_db_name}.project p diff --git a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step16_1-definitions.sql b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step16_1-definitions.sql index 7ad6f3888..88c1ece78 100644 --- a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step16_1-definitions.sql +++ b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step16_1-definitions.sql @@ -3,20 +3,20 @@ ---------------------------------------------------- -- Peer reviewed: -create table ${stats_db_name}.result_peerreviewed as +create table ${stats_db_name}.result_peerreviewed STORED AS PARQUET as select r.id as id, case when doi.doi_from_crossref=1 and grey.grey_lit=0 then true else false end as peer_reviewed from ${stats_db_name}.result r left outer join ${stats_db_name}.indi_pub_doi_from_crossref doi on doi.id=r.id left outer join ${stats_db_name}.indi_pub_grey_lit grey on grey.id=r.id; -- Green OA: -create table ${stats_db_name}.result_greenoa as +create table ${stats_db_name}.result_greenoa STORED AS PARQUET as select r.id, case when green.green_oa=1 then true else false end as green from ${stats_db_name}.result r left outer join ${stats_db_name}.indi_pub_green_oa green on green.id=r.id; -- GOLD OA: -create table ${stats_db_name}.result_gold as +create table ${stats_db_name}.result_gold STORED AS PARQUET as select r.id, case when gold.is_gold=1 then true else false end as gold from ${stats_db_name}.result r left outer join ${stats_db_name}.indi_pub_gold_oa gold on gold.id=r.id; \ No newline at end of file diff --git a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step2.sql b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step2.sql index 468a42045..4ffbd384b 100644 --- a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step2.sql +++ b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step2.sql @@ -40,13 +40,13 @@ SELECT substr(p.id, 4) as id, from ${openaire_db_name}.publication p where p.datainfo.deletedbyinference = false and p.datainfo.invisible=false; -CREATE TABLE ${stats_db_name}.publication_classifications AS +CREATE TABLE ${stats_db_name}.publication_classifications STORED AS PARQUET AS SELECT substr(p.id, 4) as id, instancetype.classname as type from ${openaire_db_name}.publication p LATERAL VIEW explode(p.instance.instancetype) instances as instancetype where p.datainfo.deletedbyinference = false and p.datainfo.invisible=false; -CREATE TABLE ${stats_db_name}.publication_concepts AS +CREATE TABLE ${stats_db_name}.publication_concepts STORED AS PARQUET AS SELECT substr(p.id, 4) as id, case when contexts.context.id RLIKE '^[^::]+::[^::]+::.+$' then contexts.context.id when contexts.context.id RLIKE '^[^::]+::[^::]+$' then concat(contexts.context.id, '::other') @@ -55,7 +55,7 @@ from ${openaire_db_name}.publication p LATERAL VIEW explode(p.context) contexts as context where p.datainfo.deletedbyinference = false and p.datainfo.invisible=false; -CREATE TABLE ${stats_db_name}.publication_datasources as +CREATE TABLE ${stats_db_name}.publication_datasources STORED AS PARQUET as SELECT p.id, case when d.id is null then 'other' else p.datasource end as datasource FROM ( SELECT substr(p.id, 4) as id, substr(instances.instance.hostedby.key, 4) as datasource @@ -66,30 +66,30 @@ FROM ( from ${openaire_db_name}.datasource d WHERE d.datainfo.deletedbyinference = false and d.datainfo.invisible=false) d on p.datasource = d.id; -CREATE TABLE ${stats_db_name}.publication_languages AS +CREATE TABLE ${stats_db_name}.publication_languages STORED AS PARQUET AS select substr(p.id, 4) as id, p.language.classname as language FROM ${openaire_db_name}.publication p where p.datainfo.deletedbyinference = false and p.datainfo.invisible=false; -CREATE TABLE ${stats_db_name}.publication_oids AS +CREATE TABLE ${stats_db_name}.publication_oids STORED AS PARQUET AS SELECT substr(p.id, 4) AS id, oids.ids AS oid FROM ${openaire_db_name}.publication p LATERAL VIEW explode(p.originalid) oids AS ids where p.datainfo.deletedbyinference = false and p.datainfo.invisible=false; -CREATE TABLE ${stats_db_name}.publication_pids AS +CREATE TABLE ${stats_db_name}.publication_pids STORED AS PARQUET AS SELECT substr(p.id, 4) AS id, ppid.qualifier.classname AS type, ppid.value as pid FROM ${openaire_db_name}.publication p LATERAL VIEW explode(p.pid) pids AS ppid where p.datainfo.deletedbyinference = false and p.datainfo.invisible=false; -CREATE TABLE ${stats_db_name}.publication_topics as +CREATE TABLE ${stats_db_name}.publication_topics STORED AS PARQUET as select substr(p.id, 4) AS id, subjects.subject.qualifier.classname AS TYPE, subjects.subject.value AS topic FROM ${openaire_db_name}.publication p LATERAL VIEW explode(p.subject) subjects AS subject where p.datainfo.deletedbyinference = false and p.datainfo.invisible=false; -CREATE TABLE ${stats_db_name}.publication_citations AS +CREATE TABLE ${stats_db_name}.publication_citations STORED AS PARQUET AS SELECT substr(p.id, 4) AS id, xpath_string(citation.value, "//citation/id[@type='openaire']/@value") AS cites FROM ${openaire_db_name}.publication p lateral view explode(p.extrainfo) citations AS citation diff --git a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step20-createMonitorDB.sql b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step20-createMonitorDB.sql index 7c5257edd..f34e3fabb 100644 --- a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step20-createMonitorDB.sql +++ b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step20-createMonitorDB.sql @@ -52,7 +52,7 @@ compute stats TARGET.result_languages; create table TARGET.result_licenses stored as parquet as select * from SOURCE.result_licenses orig where exists (select 1 from TARGET.result r where r.id=orig.id); compute stats TARGET.result_licenses; -create table TARGET.licenses_normalized as select * from SOURCE.licenses_normalized; +create table TARGET.licenses_normalized STORED AS PARQUET as select * from SOURCE.licenses_normalized; create table TARGET.result_oids stored as parquet as select * from SOURCE.result_oids orig where exists (select 1 from TARGET.result r where r.id=orig.id); compute stats TARGET.result_oids; @@ -83,7 +83,7 @@ compute stats TARGET.result_topics; create view TARGET.foo1 as select * from SOURCE.result_result rr where rr.source in (select id from TARGET.result); create view TARGET.foo2 as select * from SOURCE.result_result rr where rr.target in (select id from TARGET.result); -create table TARGET.result_result as select distinct * from (select * from TARGET.foo1 union all select * from TARGET.foo2) foufou; +create table TARGET.result_result STORED AS PARQUET as select distinct * from (select * from TARGET.foo1 union all select * from TARGET.foo2) foufou; drop view TARGET.foo1; drop view TARGET.foo2; compute stats TARGET.result_result; diff --git a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step3.sql b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step3.sql index 76a5e5a48..eb97263a7 100644 --- a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step3.sql +++ b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step3.sql @@ -40,20 +40,20 @@ SELECT substr(d.id, 4) AS id, FROM ${openaire_db_name}.dataset d WHERE d.datainfo.deletedbyinference = FALSE and d.datainfo.invisible=false; -CREATE TABLE ${stats_db_name}.dataset_citations AS +CREATE TABLE ${stats_db_name}.dataset_citations STORED AS PARQUET AS SELECT substr(d.id, 4) AS id, xpath_string(citation.value, "//citation/id[@type='openaire']/@value") AS cites FROM ${openaire_db_name}.dataset d LATERAL VIEW explode(d.extrainfo) citations AS citation WHERE xpath_string(citation.value, "//citation/id[@type='openaire']/@value") != "" and d.datainfo.deletedbyinference = false and d.datainfo.invisible=false; -CREATE TABLE ${stats_db_name}.dataset_classifications AS +CREATE TABLE ${stats_db_name}.dataset_classifications STORED AS PARQUET AS SELECT substr(p.id, 4) AS id, instancetype.classname AS type FROM ${openaire_db_name}.dataset p LATERAL VIEW explode(p.instance.instancetype) instances AS instancetype where p.datainfo.deletedbyinference = false and p.datainfo.invisible=false; -CREATE TABLE ${stats_db_name}.dataset_concepts AS +CREATE TABLE ${stats_db_name}.dataset_concepts STORED AS PARQUET AS SELECT substr(p.id, 4) as id, case when contexts.context.id RLIKE '^[^::]+::[^::]+::.+$' then contexts.context.id when contexts.context.id RLIKE '^[^::]+::[^::]+$' then concat(contexts.context.id, '::other') @@ -62,7 +62,7 @@ from ${openaire_db_name}.dataset p LATERAL VIEW explode(p.context) contexts as context where p.datainfo.deletedbyinference = false and p.datainfo.invisible=false; -CREATE TABLE ${stats_db_name}.dataset_datasources AS +CREATE TABLE ${stats_db_name}.dataset_datasources STORED AS PARQUET AS SELECT p.id, case when d.id IS NULL THEN 'other' ELSE p.datasource END AS datasource FROM ( SELECT substr(p.id, 4) as id, substr(instances.instance.hostedby.key, 4) AS datasource @@ -74,24 +74,24 @@ FROM ( FROM ${openaire_db_name}.datasource d WHERE d.datainfo.deletedbyinference = false and d.datainfo.invisible=false) d ON p.datasource = d.id; -CREATE TABLE ${stats_db_name}.dataset_languages AS +CREATE TABLE ${stats_db_name}.dataset_languages STORED AS PARQUET AS SELECT substr(p.id, 4) AS id, p.language.classname AS language FROM ${openaire_db_name}.dataset p where p.datainfo.deletedbyinference = false and p.datainfo.invisible=false; -CREATE TABLE ${stats_db_name}.dataset_oids AS +CREATE TABLE ${stats_db_name}.dataset_oids STORED AS PARQUET AS SELECT substr(p.id, 4) AS id, oids.ids AS oid FROM ${openaire_db_name}.dataset p LATERAL VIEW explode(p.originalid) oids AS ids where p.datainfo.deletedbyinference = false and p.datainfo.invisible=false; -CREATE TABLE ${stats_db_name}.dataset_pids AS +CREATE TABLE ${stats_db_name}.dataset_pids STORED AS PARQUET AS SELECT substr(p.id, 4) AS id, ppid.qualifier.classname AS type, ppid.value AS pid FROM ${openaire_db_name}.dataset p LATERAL VIEW explode(p.pid) pids AS ppid where p.datainfo.deletedbyinference = false and p.datainfo.invisible=false; -CREATE TABLE ${stats_db_name}.dataset_topics AS +CREATE TABLE ${stats_db_name}.dataset_topics STORED AS PARQUET AS SELECT substr(p.id, 4) AS id, subjects.subject.qualifier.classname AS type, subjects.subject.value AS topic FROM ${openaire_db_name}.dataset p LATERAL VIEW explode(p.subject) subjects AS subject diff --git a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step4.sql b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step4.sql index dc71f41f1..0d1f6323e 100644 --- a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step4.sql +++ b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step4.sql @@ -40,20 +40,20 @@ SELECT substr(s.id, 4) as id, from ${openaire_db_name}.software s where s.datainfo.deletedbyinference = false and s.datainfo.invisible=false; -CREATE TABLE ${stats_db_name}.software_citations AS +CREATE TABLE ${stats_db_name}.software_citations STORED AS PARQUET AS SELECT substr(s.id, 4) as id, xpath_string(citation.value, "//citation/id[@type='openaire']/@value") AS cites FROM ${openaire_db_name}.software s LATERAL VIEW explode(s.extrainfo) citations as citation where xpath_string(citation.value, "//citation/id[@type='openaire']/@value") != "" and s.datainfo.deletedbyinference = false and s.datainfo.invisible=false; -CREATE TABLE ${stats_db_name}.software_classifications AS +CREATE TABLE ${stats_db_name}.software_classifications STORED AS PARQUET AS SELECT substr(p.id, 4) AS id, instancetype.classname AS type FROM ${openaire_db_name}.software p LATERAL VIEW explode(p.instance.instancetype) instances AS instancetype where p.datainfo.deletedbyinference = false and p.datainfo.invisible=false; -CREATE TABLE ${stats_db_name}.software_concepts AS +CREATE TABLE ${stats_db_name}.software_concepts STORED AS PARQUET AS SELECT substr(p.id, 4) as id, case when contexts.context.id RLIKE '^[^::]+::[^::]+::.+$' then contexts.context.id when contexts.context.id RLIKE '^[^::]+::[^::]+$' then concat(contexts.context.id, '::other') @@ -62,7 +62,7 @@ FROM ${openaire_db_name}.software p LATERAL VIEW explode(p.context) contexts AS context where p.datainfo.deletedbyinference = false and p.datainfo.invisible=false; -CREATE TABLE ${stats_db_name}.software_datasources AS +CREATE TABLE ${stats_db_name}.software_datasources STORED AS PARQUET AS SELECT p.id, CASE WHEN d.id IS NULL THEN 'other' ELSE p.datasource end as datasource FROM ( SELECT substr(p.id, 4) AS id, substr(instances.instance.hostedby.key, 4) AS datasource @@ -74,24 +74,24 @@ FROM ( FROM ${openaire_db_name}.datasource d WHERE d.datainfo.deletedbyinference = false and d.datainfo.invisible=false) d ON p.datasource = d.id; -CREATE TABLE ${stats_db_name}.software_languages AS +CREATE TABLE ${stats_db_name}.software_languages STORED AS PARQUET AS select substr(p.id, 4) AS id, p.language.classname AS language FROM ${openaire_db_name}.software p where p.datainfo.deletedbyinference = false and p.datainfo.invisible=false; -CREATE TABLE ${stats_db_name}.software_oids AS +CREATE TABLE ${stats_db_name}.software_oids STORED AS PARQUET AS SELECT substr(p.id, 4) AS id, oids.ids AS oid FROM ${openaire_db_name}.software p LATERAL VIEW explode(p.originalid) oids AS ids where p.datainfo.deletedbyinference = false and p.datainfo.invisible=false; -CREATE TABLE ${stats_db_name}.software_pids AS +CREATE TABLE ${stats_db_name}.software_pids STORED AS PARQUET AS SELECT substr(p.id, 4) AS id, ppid.qualifier.classname AS type, ppid.value AS pid FROM ${openaire_db_name}.software p LATERAL VIEW explode(p.pid) pids AS ppid where p.datainfo.deletedbyinference = false and p.datainfo.invisible=false; -CREATE TABLE ${stats_db_name}.software_topics AS +CREATE TABLE ${stats_db_name}.software_topics STORED AS PARQUET AS SELECT substr(p.id, 4) AS id, subjects.subject.qualifier.classname AS type, subjects.subject.value AS topic FROM ${openaire_db_name}.software p LATERAL VIEW explode(p.subject) subjects AS subject diff --git a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step5.sql b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step5.sql index 353aa98b7..06b616d6a 100644 --- a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step5.sql +++ b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step5.sql @@ -40,18 +40,18 @@ FROM ${openaire_db_name}.otherresearchproduct o WHERE o.datainfo.deletedbyinference = FALSE and o.datainfo.invisible=false; -- Otherresearchproduct_citations -CREATE TABLE ${stats_db_name}.otherresearchproduct_citations AS +CREATE TABLE ${stats_db_name}.otherresearchproduct_citations STORED AS PARQUET AS SELECT substr(o.id, 4) AS id, xpath_string(citation.value, "//citation/id[@type='openaire']/@value") AS cites FROM ${openaire_db_name}.otherresearchproduct o LATERAL VIEW explode(o.extrainfo) citations AS citation WHERE xpath_string(citation.value, "//citation/id[@type='openaire']/@value") != "" and o.datainfo.deletedbyinference = false and o.datainfo.invisible=false; -CREATE TABLE ${stats_db_name}.otherresearchproduct_classifications AS +CREATE TABLE ${stats_db_name}.otherresearchproduct_classifications STORED AS PARQUET AS SELECT substr(p.id, 4) AS id, instancetype.classname AS type FROM ${openaire_db_name}.otherresearchproduct p LATERAL VIEW explode(p.instance.instancetype) instances AS instancetype where p.datainfo.deletedbyinference = false and p.datainfo.invisible=false; -CREATE TABLE ${stats_db_name}.otherresearchproduct_concepts AS +CREATE TABLE ${stats_db_name}.otherresearchproduct_concepts STORED AS PARQUET AS SELECT substr(p.id, 4) as id, case when contexts.context.id RLIKE '^[^::]+::[^::]+::.+$' then contexts.context.id when contexts.context.id RLIKE '^[^::]+::[^::]+$' then concat(contexts.context.id, '::other') @@ -59,7 +59,7 @@ SELECT substr(p.id, 4) as id, case FROM ${openaire_db_name}.otherresearchproduct p LATERAL VIEW explode(p.context) contexts AS context where p.datainfo.deletedbyinference = false and p.datainfo.invisible=false; -CREATE TABLE ${stats_db_name}.otherresearchproduct_datasources AS +CREATE TABLE ${stats_db_name}.otherresearchproduct_datasources STORED AS PARQUET AS SELECT p.id, CASE WHEN d.id IS NULL THEN 'other' ELSE p.datasource END AS datasource FROM (SELECT substr(p.id, 4) AS id, substr(instances.instance.hostedby.key, 4) AS datasource from ${openaire_db_name}.otherresearchproduct p lateral view explode(p.instance) instances as instance @@ -68,22 +68,22 @@ FROM (SELECT substr(p.id, 4) AS id, substr(instances.instance.hostedby.key, 4) A from ${openaire_db_name}.datasource d WHERE d.datainfo.deletedbyinference = false and d.datainfo.invisible=false) d on p.datasource = d.id; -CREATE TABLE ${stats_db_name}.otherresearchproduct_languages AS +CREATE TABLE ${stats_db_name}.otherresearchproduct_languages STORED AS PARQUET AS SELECT substr(p.id, 4) AS id, p.language.classname AS language FROM ${openaire_db_name}.otherresearchproduct p where p.datainfo.deletedbyinference = false and p.datainfo.invisible=false; -CREATE TABLE ${stats_db_name}.otherresearchproduct_oids AS +CREATE TABLE ${stats_db_name}.otherresearchproduct_oids STORED AS PARQUET AS SELECT substr(p.id, 4) AS id, oids.ids AS oid FROM ${openaire_db_name}.otherresearchproduct p LATERAL VIEW explode(p.originalid) oids AS ids where p.datainfo.deletedbyinference = false and p.datainfo.invisible=false; -CREATE TABLE ${stats_db_name}.otherresearchproduct_pids AS +CREATE TABLE ${stats_db_name}.otherresearchproduct_pids STORED AS PARQUET AS SELECT substr(p.id, 4) AS id, ppid.qualifier.classname AS type, ppid.value AS pid FROM ${openaire_db_name}.otherresearchproduct p LATERAL VIEW explode(p.pid) pids AS ppid where p.datainfo.deletedbyinference = false and p.datainfo.invisible=false; -CREATE TABLE ${stats_db_name}.otherresearchproduct_topics AS +CREATE TABLE ${stats_db_name}.otherresearchproduct_topics STORED AS PARQUET AS SELECT substr(p.id, 4) AS id, subjects.subject.qualifier.classname AS type, subjects.subject.value AS topic FROM ${openaire_db_name}.otherresearchproduct p LATERAL VIEW explode(p.subject) subjects AS subject where p.datainfo.deletedbyinference = false and p.datainfo.invisible=false; \ No newline at end of file diff --git a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step6.sql b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step6.sql index b5e2eb37b..dc7c01046 100644 --- a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step6.sql +++ b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step6.sql @@ -3,24 +3,24 @@ -- Project table/view and Project related tables/views ------------------------------------------------------ ------------------------------------------------------ -CREATE TABLE ${stats_db_name}.project_oids AS +CREATE TABLE ${stats_db_name}.project_oids STORED AS PARQUET AS SELECT substr(p.id, 4) AS id, oids.ids AS oid FROM ${openaire_db_name}.project p LATERAL VIEW explode(p.originalid) oids AS ids where p.datainfo.deletedbyinference=false and p.datainfo.invisible=false; -CREATE TABLE ${stats_db_name}.project_organizations AS +CREATE TABLE ${stats_db_name}.project_organizations STORED AS PARQUET AS SELECT substr(r.source, 4) AS id, substr(r.target, 4) AS organization from ${openaire_db_name}.relation r WHERE r.reltype = 'projectOrganization' and r.datainfo.deletedbyinference = false and r.datainfo.invisible=false; -CREATE TABLE ${stats_db_name}.project_results AS +CREATE TABLE ${stats_db_name}.project_results STORED AS PARQUET AS SELECT substr(r.target, 4) AS id, substr(r.source, 4) AS result, r.datainfo.provenanceaction.classname as provenance FROM ${openaire_db_name}.relation r WHERE r.reltype = 'resultProject' and r.datainfo.deletedbyinference = false and r.datainfo.invisible=false; -create table ${stats_db_name}.project_classification as +create table ${stats_db_name}.project_classification STORED AS PARQUET as select substr(p.id, 4) as id, class.h2020programme.code, class.level1, class.level2, class.level3 from ${openaire_db_name}.project p lateral view explode(p.h2020classification) classifs as class @@ -76,7 +76,7 @@ SELECT substr(p.id, 4) AS id, FROM ${openaire_db_name}.project p WHERE p.datainfo.deletedbyinference = false and p.datainfo.invisible=false; -create table ${stats_db_name}.funder as +create table ${stats_db_name}.funder STORED AS PARQUET as select distinct xpath_string(fund, '//funder/id') as id, xpath_string(fund, '//funder/name') as name, xpath_string(fund, '//funder/shortname') as shortname diff --git a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step7.sql b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step7.sql index a57966abf..e1c36cbc0 100644 --- a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step7.sql +++ b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step7.sql @@ -123,13 +123,13 @@ UNION ALL SELECT * FROM ${stats_db_name}.otherresearchproduct_topics; -CREATE TABLE ${stats_db_name}.result_organization AS +CREATE TABLE ${stats_db_name}.result_organization STORED AS PARQUET AS SELECT substr(r.target, 4) AS id, substr(r.source, 4) AS organization FROM ${openaire_db_name}.relation r WHERE r.reltype = 'resultOrganization' and r.datainfo.deletedbyinference = false and r.datainfo.invisible=false; -CREATE TABLE ${stats_db_name}.result_projects AS +CREATE TABLE ${stats_db_name}.result_projects STORED AS PARQUET AS select pr.result AS id, pr.id AS project, datediff(p.enddate, p.startdate) AS daysfromend, pr.provenance as provenance FROM ${stats_db_name}.result r JOIN ${stats_db_name}.project_results pr ON r.id = pr.result diff --git a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step8.sql b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step8.sql index d3935fd4a..fa3eca1a9 100644 --- a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step8.sql +++ b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step8.sql @@ -80,24 +80,24 @@ UPDATE ${stats_db_name}.datasource_tmp SET yearofvalidation=null WHERE yearofvalidation = '-1'; -CREATE TABLE ${stats_db_name}.datasource_languages AS +CREATE TABLE ${stats_db_name}.datasource_languages STORED AS PARQUET AS SELECT substr(d.id, 4) AS id, langs.languages AS language FROM ${openaire_db_name}.datasource d LATERAL VIEW explode(d.odlanguages.value) langs AS languages where d.datainfo.deletedbyinference=false and d.datainfo.invisible=false; -CREATE TABLE ${stats_db_name}.datasource_oids AS +CREATE TABLE ${stats_db_name}.datasource_oids STORED AS PARQUET AS SELECT substr(d.id, 4) AS id, oids.ids AS oid FROM ${openaire_db_name}.datasource d LATERAL VIEW explode(d.originalid) oids AS ids where d.datainfo.deletedbyinference=false and d.datainfo.invisible=false; -CREATE TABLE ${stats_db_name}.datasource_organizations AS +CREATE TABLE ${stats_db_name}.datasource_organizations STORED AS PARQUET AS SELECT substr(r.target, 4) AS id, substr(r.source, 4) AS organization FROM ${openaire_db_name}.relation r WHERE r.reltype = 'datasourceOrganization' and r.datainfo.deletedbyinference = false and r.datainfo.invisible=false; -- datasource sources: -- where the datasource info have been collected from. -create table if not exists ${stats_db_name}.datasource_sources AS +create table if not exists ${stats_db_name}.datasource_sources STORED AS PARQUET AS select substr(d.id, 4) as id, substr(cf.key, 4) as datasource from ${openaire_db_name}.datasource d lateral view explode(d.collectedfrom) cfrom as cf where d.datainfo.deletedbyinference = false and d.datainfo.invisible=false; diff --git a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step9.sql b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step9.sql index c73aa811c..02b0aaca7 100644 --- a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step9.sql +++ b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step9.sql @@ -3,7 +3,7 @@ -- Organization table/view and Organization related tables/views ---------------------------------------------------------------- ---------------------------------------------------------------- -CREATE TABLE IF NOT EXISTS ${stats_db_name}.organization AS +CREATE TABLE IF NOT EXISTS ${stats_db_name}.organization STORED AS PARQUET AS SELECT substr(o.id, 4) as id, o.legalname.value as name, o.legalshortname.value as legalshortname, @@ -11,11 +11,11 @@ SELECT substr(o.id, 4) as id, FROM ${openaire_db_name}.organization o WHERE o.datainfo.deletedbyinference = FALSE and o.datainfo.invisible = FALSE; -CREATE OR REPLACE VIEW ${stats_db_name}.organization_datasources AS +CREATE OR REPLACE VIEW ${stats_db_name}.organization_datasources STORED AS PARQUET AS SELECT organization AS id, id AS datasource FROM ${stats_db_name}.datasource_organizations; -CREATE OR REPLACE VIEW ${stats_db_name}.organization_projects AS +CREATE OR REPLACE VIEW ${stats_db_name}.organization_projects STORED AS PARQUET AS SELECT id AS project, organization as id FROM ${stats_db_name}.project_organizations; From 62f91b0869d3014310c4062019c36579d4a507d3 Mon Sep 17 00:00:00 2001 From: antleb Date: Tue, 22 Mar 2022 16:17:49 +0200 Subject: [PATCH 120/162] cleanup --- .../oa/graph/stats/oozie_app/scripts/step10.sql | 3 --- .../oa/graph/stats/oozie_app/scripts/step11.sql | 5 +---- .../oa/graph/stats/oozie_app/scripts/step14.sql | 15 +-------------- .../oa/graph/stats/oozie_app/scripts/step9.sql | 5 +---- 4 files changed, 3 insertions(+), 25 deletions(-) diff --git a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step10.sql b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step10.sql index f1e1ceedd..92dedf243 100644 --- a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step10.sql +++ b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step10.sql @@ -51,6 +51,3 @@ select * from openaire_prod_usage_stats.views_stats; ------------------------------------------------------------------------------------------------ create table ${stats_db_name}.creation_date STORED AS PARQUET as select date_format(current_date(), 'dd-MM-yyyy') as date; --- --- ANALYZE TABLE ${stats_db_name}.creation_date COMPUTE STATISTICS; --- ANALYZE TABLE ${stats_db_name}.creation_date COMPUTE STATISTICS FOR COLUMNS; \ No newline at end of file diff --git a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step11.sql b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step11.sql index e892da0be..d699b68c3 100644 --- a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step11.sql +++ b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step11.sql @@ -113,7 +113,4 @@ FROM ${stats_db_name}.result_projects, ${stats_db_name}.project WHERE result_projects.id = result.id AND result.type = 'publication' - AND project.id = result_projects.project; - --- ANALYZE TABLE ${stats_db_name}.project COMPUTE STATISTICS; --- ANALYZE TABLE ${stats_db_name}.project COMPUTE STATISTICS FOR COLUMNS; \ No newline at end of file + AND project.id = result_projects.project; \ No newline at end of file diff --git a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step14.sql b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step14.sql index e8e29ff11..47a6f84c2 100644 --- a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step14.sql +++ b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step14.sql @@ -46,17 +46,4 @@ FROM ( LEFT OUTER JOIN ( SELECT substr(d.id, 4) id from ${openaire_db_name}.datasource d - WHERE d.datainfo.deletedbyinference=false and d.datainfo.invisible = FALSE) d on o.datasource = d.id; - --- ANALYZE TABLE ${stats_db_name}.publication_licenses COMPUTE STATISTICS; --- ANALYZE TABLE ${stats_db_name}.publication_licenses COMPUTE STATISTICS FOR COLUMNS; --- ANALYZE TABLE ${stats_db_name}.dataset_licenses COMPUTE STATISTICS; --- ANALYZE TABLE ${stats_db_name}.dataset_licenses COMPUTE STATISTICS FOR COLUMNS; --- ANALYZE TABLE ${stats_db_name}.software_licenses COMPUTE STATISTICS; --- ANALYZE TABLE ${stats_db_name}.software_licenses COMPUTE STATISTICS FOR COLUMNS; --- ANALYZE TABLE ${stats_db_name}.otherresearchproduct_licenses COMPUTE STATISTICS; --- ANALYZE TABLE ${stats_db_name}.otherresearchproduct_licenses COMPUTE STATISTICS FOR COLUMNS; --- ANALYZE TABLE ${stats_db_name}.organization_pids COMPUTE STATISTICS; --- ANALYZE TABLE ${stats_db_name}.organization_pids COMPUTE STATISTICS FOR COLUMNS; --- ANALYZE TABLE ${stats_db_name}.organization_sources COMPUTE STATISTICS; --- ANALYZE TABLE ${stats_db_name}.organization_sources COMPUTE STATISTICS FOR COLUMNS; \ No newline at end of file + WHERE d.datainfo.deletedbyinference=false and d.datainfo.invisible = FALSE) d on o.datasource = d.id; \ No newline at end of file diff --git a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step9.sql b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step9.sql index 02b0aaca7..b42640e10 100644 --- a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step9.sql +++ b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step9.sql @@ -17,7 +17,4 @@ FROM ${stats_db_name}.datasource_organizations; CREATE OR REPLACE VIEW ${stats_db_name}.organization_projects STORED AS PARQUET AS SELECT id AS project, organization as id -FROM ${stats_db_name}.project_organizations; - --- ANALYZE TABLE ${stats_db_name}.organization COMPUTE STATISTICS; --- ANALYZE TABLE ${stats_db_name}.organization COMPUTE STATISTICS FOR COLUMNS; \ No newline at end of file +FROM ${stats_db_name}.project_organizations; \ No newline at end of file From f10066547bb467dd9eff8e78ed4d538802cdaa10 Mon Sep 17 00:00:00 2001 From: Claudio Atzori Date: Wed, 23 Mar 2022 12:22:26 +0100 Subject: [PATCH 121/162] increased spark.sql.shuffle.partitions in affiliation_from_semrel_propagation --- .../dhp/resulttoorganizationfromsemrel/oozie_app/workflow.xml | 1 + 1 file changed, 1 insertion(+) diff --git a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/resulttoorganizationfromsemrel/oozie_app/workflow.xml b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/resulttoorganizationfromsemrel/oozie_app/workflow.xml index 3f0530aaf..5ce2f5c06 100644 --- a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/resulttoorganizationfromsemrel/oozie_app/workflow.xml +++ b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/resulttoorganizationfromsemrel/oozie_app/workflow.xml @@ -173,6 +173,7 @@ --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} --conf spark.dynamicAllocation.enabled=true --conf spark.dynamicAllocation.maxExecutors=${spark2MaxExecutors} + --conf spark.sql.shuffle.partitions=3840
--relationPath${workingDir}/preparedInfo/relation --outputPath${outputPath}/relation From 48d32466e4c4ee5b3ef39a23f04073487b2979da Mon Sep 17 00:00:00 2001 From: Claudio Atzori Date: Wed, 23 Mar 2022 14:52:03 +0100 Subject: [PATCH 122/162] instances grouped by URL expose only one refereed --- .../dhp/oa/provision/model/XmlInstance.java | 13 +++++++--- .../oa/provision/utils/XmlRecordFactory.java | 20 +++++++------- .../oa/provision/XmlRecordFactoryTest.java | 2 +- .../dnetlib/dhp/oa/provision/publication.json | 26 ++++++++++++++++++- pom.xml | 2 +- 5 files changed, 48 insertions(+), 15 deletions(-) diff --git a/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/model/XmlInstance.java b/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/model/XmlInstance.java index a38329750..930eab4c3 100644 --- a/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/model/XmlInstance.java +++ b/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/model/XmlInstance.java @@ -12,6 +12,7 @@ import scala.Serializable; public class XmlInstance implements Serializable { public static final AccessRight UNKNOWN_ACCESS_RIGHT; + public static final Qualifier UNKNOWN_REVIEW_LEVEL; static { UNKNOWN_ACCESS_RIGHT = new AccessRight(); @@ -19,6 +20,12 @@ public class XmlInstance implements Serializable { UNKNOWN_ACCESS_RIGHT.setClassname(ModelConstants.UNKNOWN); UNKNOWN_ACCESS_RIGHT.setSchemeid(ModelConstants.DNET_ACCESS_MODES); UNKNOWN_ACCESS_RIGHT.setSchemename(ModelConstants.DNET_ACCESS_MODES); + + UNKNOWN_REVIEW_LEVEL = new Qualifier(); + UNKNOWN_REVIEW_LEVEL.setClassid("0000"); + UNKNOWN_REVIEW_LEVEL.setClassname(ModelConstants.UNKNOWN); + UNKNOWN_ACCESS_RIGHT.setSchemeid(ModelConstants.DNET_REVIEW_LEVELS); + UNKNOWN_REVIEW_LEVEL.setSchemename(ModelConstants.DNET_REVIEW_LEVELS); } private String url; @@ -50,7 +57,7 @@ public class XmlInstance implements Serializable { // typed results private String processingchargecurrency; - private Set refereed = Sets.newHashSet();; // peer-review status + private Qualifier refereed; // peer-review status public String getUrl() { return url; @@ -148,11 +155,11 @@ public class XmlInstance implements Serializable { this.processingchargecurrency = processingchargecurrency; } - public Set getRefereed() { + public Qualifier getRefereed() { return refereed; } - public void setRefereed(Set refereed) { + public void setRefereed(Qualifier refereed) { this.refereed = refereed; } } diff --git a/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/utils/XmlRecordFactory.java b/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/utils/XmlRecordFactory.java index cb29dc121..5d4a831c0 100644 --- a/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/utils/XmlRecordFactory.java +++ b/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/utils/XmlRecordFactory.java @@ -31,6 +31,7 @@ import org.dom4j.Node; import org.dom4j.io.OutputFormat; import org.dom4j.io.SAXReader; import org.dom4j.io.XMLWriter; +import org.json4s.Xml; import com.fasterxml.jackson.databind.ObjectMapper; import com.google.common.base.Joiner; @@ -1208,14 +1209,8 @@ public class XmlRecordFactory implements Serializable { if (instance.getRefereed() != null) { fields - .addAll( - instance - .getRefereed() - .stream() - .filter(Objects::nonNull) - .filter(r -> !r.isBlank()) - .map(r -> XmlSerializationUtils.mapQualifier("refereed", r)) - .collect(Collectors.toList())); + .add( + XmlSerializationUtils.mapQualifier("refereed", instance.getRefereed())); } if (instance.getProcessingchargeamount() != null && isNotBlank(instance.getProcessingchargeamount())) { @@ -1359,13 +1354,20 @@ public class XmlRecordFactory implements Serializable { .map(Instance::getAccessright) .min(new AccessRightComparator()) .orElse(XmlInstance.UNKNOWN_ACCESS_RIGHT)); + instance + .setRefereed( + instances + .stream() + .map(Pair::getValue) + .map(i -> Optional.ofNullable(i.getRefereed()).orElse(XmlInstance.UNKNOWN_REVIEW_LEVEL)) + .min(new RefereedComparator()) + .orElse(XmlInstance.UNKNOWN_REVIEW_LEVEL)); instances.forEach(p -> { final Instance i = p.getRight(); instance.getCollectedfrom().add(i.getCollectedfrom()); instance.getHostedby().add(i.getHostedby()); instance.getInstancetype().add(i.getInstancetype()); - instance.getRefereed().add(i.getRefereed()); instance .setProcessingchargeamount( Optional.ofNullable(i.getProcessingchargeamount()).map(apc -> apc.getValue()).orElse(null)); diff --git a/dhp-workflows/dhp-graph-provision/src/test/java/eu/dnetlib/dhp/oa/provision/XmlRecordFactoryTest.java b/dhp-workflows/dhp-graph-provision/src/test/java/eu/dnetlib/dhp/oa/provision/XmlRecordFactoryTest.java index 273132bff..c32d868e8 100644 --- a/dhp-workflows/dhp-graph-provision/src/test/java/eu/dnetlib/dhp/oa/provision/XmlRecordFactoryTest.java +++ b/dhp-workflows/dhp-graph-provision/src/test/java/eu/dnetlib/dhp/oa/provision/XmlRecordFactoryTest.java @@ -66,7 +66,7 @@ public class XmlRecordFactoryTest { assertEquals("doi", doc.valueOf("//instance/alternateidentifier/@classid")); assertEquals("10.5689/LIB.2018.2853550", doc.valueOf("//instance/alternateidentifier/text()")); - assertEquals(3, doc.selectNodes("//instance").size()); + assertEquals(2, doc.selectNodes("//instance").size()); assertEquals("1721.47", doc.valueOf("//processingchargeamount/text()")); assertEquals("EUR", doc.valueOf("//processingchargecurrency/text()")); diff --git a/dhp-workflows/dhp-graph-provision/src/test/resources/eu/dnetlib/dhp/oa/provision/publication.json b/dhp-workflows/dhp-graph-provision/src/test/resources/eu/dnetlib/dhp/oa/provision/publication.json index d0c8ea4ed..2c52ce269 100644 --- a/dhp-workflows/dhp-graph-provision/src/test/resources/eu/dnetlib/dhp/oa/provision/publication.json +++ b/dhp-workflows/dhp-graph-provision/src/test/resources/eu/dnetlib/dhp/oa/provision/publication.json @@ -453,6 +453,12 @@ "schemeid": "dnet:dataCite_resource", "schemename": "dnet:dataCite_resource" }, + "refereed": { + "classid": "0001", + "classname": "peerReviewed", + "schemeid": "dnet:review_levels", + "schemename": "dnet:review_levels" + }, "license": { "dataInfo": { "deletedbyinference": false, @@ -585,6 +591,12 @@ "schemeid": "dnet:dataCite_resource", "schemename": "dnet:dataCite_resource" }, + "refereed": { + "classid": "0000", + "classname": "UNKNOWN", + "schemeid": "dnet:review_levels", + "schemename": "dnet:review_levels" + }, "license": { "dataInfo": { "deletedbyinference": false, @@ -717,6 +729,12 @@ "schemeid": "dnet:dataCite_resource", "schemename": "dnet:dataCite_resource" }, + "refereed": { + "classid": "0002", + "classname": "nonPeerReviewed", + "schemeid": "dnet:review_levels", + "schemename": "dnet:review_levels" + }, "license": { "dataInfo": { "deletedbyinference": false, @@ -849,6 +867,12 @@ "schemeid": "dnet:dataCite_resource", "schemename": "dnet:dataCite_resource" }, + "refereed": { + "classid": "0001", + "classname": "peerReviewed", + "schemeid": "dnet:review_levels", + "schemename": "dnet:review_levels" + }, "license": { "dataInfo": { "deletedbyinference": false, @@ -866,7 +890,7 @@ "value": "" }, "url": [ - "http://dx.doi.org/10.1109/TED.2018.2853552", + "http://dx.doi.org/10.1109/TED.2018.2853551", "http://dx.doi.org/10.1109/TED.2018.2853554" ] }, diff --git a/pom.xml b/pom.xml index 86adee902..0f0aa711e 100644 --- a/pom.xml +++ b/pom.xml @@ -797,7 +797,7 @@ 3.3.3 3.4.2 [2.12,3.0) - [2.10.29] + [2.10.30] [4.0.3] [6.0.5] [3.1.6] From 7b8f85692e990abdc30e0f70e77f63bc95fbbe31 Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Wed, 23 Mar 2022 17:20:23 +0100 Subject: [PATCH 123/162] [Enrichment country] fixed issues with parameters and workflow args --- .../input_countrypropagation_parameters.json | 6 ------ .../dhp/countrypropagation/oozie_app/workflow.xml | 9 ++++----- 2 files changed, 4 insertions(+), 11 deletions(-) diff --git a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/countrypropagation/input_countrypropagation_parameters.json b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/countrypropagation/input_countrypropagation_parameters.json index e7a6bf5b0..f217e2458 100644 --- a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/countrypropagation/input_countrypropagation_parameters.json +++ b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/countrypropagation/input_countrypropagation_parameters.json @@ -5,12 +5,6 @@ "paramDescription": "the path of the sequencial file to read", "paramRequired": true }, - { - "paramName":"h", - "paramLongName":"hive_metastore_uris", - "paramDescription": "the hive metastore uris", - "paramRequired": false - }, { "paramName":"tn", "paramLongName":"resultTableName", diff --git a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/countrypropagation/oozie_app/workflow.xml b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/countrypropagation/oozie_app/workflow.xml index 85116e4cc..202c86a6d 100644 --- a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/countrypropagation/oozie_app/workflow.xml +++ b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/countrypropagation/oozie_app/workflow.xml @@ -110,7 +110,6 @@ --sourcePath${sourcePath} --whitelist${whitelist} --allowedtypes${allowedtypes} - --hive_metastore_uris${hive_metastore_uris} --outputPath${workingDir}/preparedInfo @@ -146,7 +145,7 @@
--sourcePath${sourcePath}/publication --outputPath${workingDir}/publication - --hive_metastore_uris${hive_metastore_uris} + --workingPath${workingDir}/workingP --resultTableNameeu.dnetlib.dhp.schema.oaf.Publication --preparedInfoPath${workingDir}/preparedInfo @@ -176,7 +175,7 @@
--sourcePath${sourcePath}/dataset --outputPath${workingDir}/dataset - --hive_metastore_uris${hive_metastore_uris} + --workingPath${workingDir}/workingD --resultTableNameeu.dnetlib.dhp.schema.oaf.Dataset --preparedInfoPath${workingDir}/preparedInfo @@ -206,7 +205,7 @@
--sourcePath${sourcePath}/otherresearchproduct --outputPath${workingDir}/otherresearchproduct - --hive_metastore_uris${hive_metastore_uris} + --workingPath${workingDir}/workingO --resultTableNameeu.dnetlib.dhp.schema.oaf.OtherResearchProduct --preparedInfoPath${workingDir}/preparedInfo @@ -236,7 +235,7 @@
--sourcePath${sourcePath}/software --outputPath${workingDir}/software - --hive_metastore_uris${hive_metastore_uris} + --workingPath${workingDir}/workingS --resultTableNameeu.dnetlib.dhp.schema.oaf.Software --preparedInfoPath${workingDir}/preparedInfo From d8503cd19144f64d77b88b9e68c4242b978d017f Mon Sep 17 00:00:00 2001 From: antleb Date: Thu, 24 Mar 2022 14:02:36 +0200 Subject: [PATCH 124/162] added moooar organizations --- .../oozie_app/scripts/step20-createMonitorDB.sql | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step20-createMonitorDB.sql b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step20-createMonitorDB.sql index f34e3fabb..76674b227 100644 --- a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step20-createMonitorDB.sql +++ b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step20-createMonitorDB.sql @@ -22,7 +22,16 @@ create table TARGET.result stored as parquet as 'openorgs____::b84450f9864182c67b8611b5593f4250', 'openorgs____::d41cf6bd4ab1b1362a44397e0b95c975', 'openorgs____::eadc8da90a546e98c03f896661a2e4d4', - 'openorgs____::d2a09b9d5eabb10c95f9470e172d05d2') )) foo; + 'openorgs____::d2a09b9d5eabb10c95f9470e172d05d2', + 'openorgs____::d169c7407dd417152596908d48c11460', + 'openorgs____::1ec924b1759bb16d0a02f2dad8689b21', + 'openorgs____::2fb1e47b4612688d9de9169d579939a7', + 'openorgs____::759d59f05d77188faee99b7493b46805', + 'openorgs____::cad284878801b9465fa51a95b1d779db', + 'openorgs____::eadc8da90a546e98c03f896661a2e4d4', + 'openorgs____::c0286313e36479eff8676dba9b724b40' + -- ,'openorgs____::c80a8243a5e5c620d7931c88d93bf17a' -- Paris Diderot + ) )) foo; compute stats TARGET.result; create table TARGET.result_citations stored as parquet as select * from SOURCE.result_citations orig where exists (select 1 from TARGET.result r where r.id=orig.id); From 61319b2e8324d73620ed3618acd527333fbceef6 Mon Sep 17 00:00:00 2001 From: Claudio Atzori Date: Fri, 25 Mar 2022 16:38:33 +0100 Subject: [PATCH 125/162] updated dhp-schema version; set entity-level dataInfo before & after merging the fields from the group of duplicates --- .../eu/dnetlib/dhp/oa/dedup/DedupRecordFactory.java | 2 +- .../eu/dnetlib/dhp/oa/dedup/EntityMergerTest.java | 11 ++++++++++- pom.xml | 2 +- 3 files changed, 12 insertions(+), 3 deletions(-) diff --git a/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/DedupRecordFactory.java b/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/DedupRecordFactory.java index b8ae4ca02..d345cf98f 100644 --- a/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/DedupRecordFactory.java +++ b/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/DedupRecordFactory.java @@ -107,7 +107,7 @@ public class DedupRecordFactory { entity.setId(id); entity.setLastupdatetimestamp(ts); - entity.getDataInfo().setDeletedbyinference(false); + entity.setDataInfo(dataInfo); return entity; } diff --git a/dhp-workflows/dhp-dedup-openaire/src/test/java/eu/dnetlib/dhp/oa/dedup/EntityMergerTest.java b/dhp-workflows/dhp-dedup-openaire/src/test/java/eu/dnetlib/dhp/oa/dedup/EntityMergerTest.java index e86f91f99..aa3722ce5 100644 --- a/dhp-workflows/dhp-dedup-openaire/src/test/java/eu/dnetlib/dhp/oa/dedup/EntityMergerTest.java +++ b/dhp-workflows/dhp-dedup-openaire/src/test/java/eu/dnetlib/dhp/oa/dedup/EntityMergerTest.java @@ -77,7 +77,16 @@ class EntityMergerTest implements Serializable { // verify id assertEquals(dedupId, pub_merged.getId()); - assertEquals(pub_top.getJournal(), pub_merged.getJournal()); + assertEquals(pub_top.getJournal().getName(), pub_merged.getJournal().getName()); + assertEquals(pub_top.getJournal().getIssnOnline(), pub_merged.getJournal().getIssnOnline()); + assertEquals(pub_top.getJournal().getIssnLinking(), pub_merged.getJournal().getIssnLinking()); + assertEquals(pub_top.getJournal().getIssnPrinted(), pub_merged.getJournal().getIssnPrinted()); + assertEquals(pub_top.getJournal().getIss(), pub_merged.getJournal().getIss()); + assertEquals(pub_top.getJournal().getEp(), pub_merged.getJournal().getEp()); + assertEquals(pub_top.getJournal().getSp(), pub_merged.getJournal().getSp()); + assertEquals(pub_top.getJournal().getVol(), pub_merged.getJournal().getVol()); + assertEquals(pub_top.getJournal().getConferencedate(), pub_merged.getJournal().getConferencedate()); + assertEquals(pub_top.getJournal().getConferenceplace(), pub_merged.getJournal().getConferenceplace()); assertEquals("OPEN", pub_merged.getBestaccessright().getClassid()); assertEquals(pub_top.getResulttype(), pub_merged.getResulttype()); assertEquals(pub_top.getLanguage(), pub_merged.getLanguage()); diff --git a/pom.xml b/pom.xml index 86adee902..45265d5ba 100644 --- a/pom.xml +++ b/pom.xml @@ -797,7 +797,7 @@ 3.3.3 3.4.2 [2.12,3.0) - [2.10.29] + [2.10.31] [4.0.3] [6.0.5] [3.1.6] From 3610f1749ab7896cc72f131236a3455635200c65 Mon Sep 17 00:00:00 2001 From: Claudio Atzori Date: Mon, 28 Mar 2022 09:20:27 +0200 Subject: [PATCH 126/162] merged pom.xml from beta branch --- pom.xml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pom.xml b/pom.xml index 7a20a1652..45265d5ba 100644 --- a/pom.xml +++ b/pom.xml @@ -797,11 +797,11 @@ 3.3.3 3.4.2 [2.12,3.0) - [2.10.30-SNAPSHOT] + [2.10.31] [4.0.3] [6.0.5] [3.1.6] - [4.1.7] + [4.1.12] [2.6.1] 7.5.0 4.7.2 From 395ac6ececc1ac72947fb9efffd3e49e25708ba6 Mon Sep 17 00:00:00 2001 From: Claudio Atzori Date: Mon, 28 Mar 2022 09:23:42 +0200 Subject: [PATCH 127/162] merged pom.xml from beta branch --- pom.xml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pom.xml b/pom.xml index 0f0aa711e..45265d5ba 100644 --- a/pom.xml +++ b/pom.xml @@ -797,7 +797,7 @@ 3.3.3 3.4.2 [2.12,3.0) - [2.10.30] + [2.10.31] [4.0.3] [6.0.5] [3.1.6] From eca82e30c9b986be0a7049fbbaa477d91536c677 Mon Sep 17 00:00:00 2001 From: Claudio Atzori Date: Tue, 29 Mar 2022 09:46:49 +0200 Subject: [PATCH 128/162] updated dhp-schema version --- pom.xml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pom.xml b/pom.xml index 45265d5ba..6a14954e5 100644 --- a/pom.xml +++ b/pom.xml @@ -797,7 +797,7 @@ 3.3.3 3.4.2 [2.12,3.0) - [2.10.31] + [2.10.32] [4.0.3] [6.0.5] [3.1.6] From 0a0ae84c2232d8ed7fa08ee8f8e77349fd14a90a Mon Sep 17 00:00:00 2001 From: Claudio Atzori Date: Tue, 29 Mar 2022 10:52:58 +0200 Subject: [PATCH 129/162] [graph raw] DOI based instance URLs on https --- .../main/java/eu/dnetlib/dhp/oa/graph/raw/OdfToOafMapper.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/OdfToOafMapper.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/OdfToOafMapper.java index 639c1ab30..1bbeac9fb 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/OdfToOafMapper.java +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/OdfToOafMapper.java @@ -25,7 +25,7 @@ import eu.dnetlib.dhp.schema.oaf.utils.IdentifierFactory; public class OdfToOafMapper extends AbstractMdRecordToOafMapper { - public static final String HTTP_DX_DOI_PREIFX = "http://dx.doi.org/"; + public static final String HTTP_DX_DOI_PREIFX = "https://dx.doi.org/"; public OdfToOafMapper(final VocabularyGroup vocs, final boolean invisible, final boolean shouldHashId, final boolean forceOrginalId) { From 1b1101016944ee921da8b49dbeff9106c05a6220 Mon Sep 17 00:00:00 2001 From: Sandro La Bruzzo Date: Tue, 29 Mar 2022 10:59:14 +0200 Subject: [PATCH 130/162] minor fix --- .../test/scala/eu/dnetlib/dhp/datacite/DataciteToOAFTest.scala | 3 +++ 1 file changed, 3 insertions(+) diff --git a/dhp-workflows/dhp-aggregation/src/test/scala/eu/dnetlib/dhp/datacite/DataciteToOAFTest.scala b/dhp-workflows/dhp-aggregation/src/test/scala/eu/dnetlib/dhp/datacite/DataciteToOAFTest.scala index ca1dbc665..136b0fb93 100644 --- a/dhp-workflows/dhp-aggregation/src/test/scala/eu/dnetlib/dhp/datacite/DataciteToOAFTest.scala +++ b/dhp-workflows/dhp-aggregation/src/test/scala/eu/dnetlib/dhp/datacite/DataciteToOAFTest.scala @@ -70,6 +70,9 @@ class DataciteToOAFTest extends AbstractVocabularyTest { assertEquals(100, nativeSize) + spark.read.load(targetPath).printSchema(); + + val result: Dataset[Oaf] = spark.read.load(targetPath).as[Oaf] result From ee24f3eb2c8fd92f43a49a3358d7a083365995b5 Mon Sep 17 00:00:00 2001 From: antleb Date: Tue, 29 Mar 2022 13:47:48 +0300 Subject: [PATCH 131/162] views cannot be stored as parquet... --- .../eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step9.sql | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step9.sql b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step9.sql index b42640e10..eec037a54 100644 --- a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step9.sql +++ b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step9.sql @@ -15,6 +15,6 @@ CREATE OR REPLACE VIEW ${stats_db_name}.organization_datasources STORED AS PARQU SELECT organization AS id, id AS datasource FROM ${stats_db_name}.datasource_organizations; -CREATE OR REPLACE VIEW ${stats_db_name}.organization_projects STORED AS PARQUET AS +CREATE OR REPLACE VIEW ${stats_db_name}.organization_projects AS SELECT id AS project, organization as id FROM ${stats_db_name}.project_organizations; \ No newline at end of file From fff0b3cc199ae5fc5da302895d229fac776e3bfc Mon Sep 17 00:00:00 2001 From: antleb Date: Tue, 29 Mar 2022 14:15:31 +0300 Subject: [PATCH 132/162] added apcs in monitor db --- .../graph/stats/oozie_app/scripts/step20-createMonitorDB.sql | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step20-createMonitorDB.sql b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step20-createMonitorDB.sql index 76674b227..bcc9f0b5d 100644 --- a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step20-createMonitorDB.sql +++ b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step20-createMonitorDB.sql @@ -90,6 +90,11 @@ compute stats TARGET.result_sources; create table TARGET.result_topics stored as parquet as select * from SOURCE.result_topics orig where exists (select 1 from TARGET.result r where r.id=orig.id); compute stats TARGET.result_topics; +create table TARGET.result_apc stored as parquet as select * from SOURCE.result_apc orig where exists (select 1 from TARGET.result r where r.id=orig.id); +compute stats TARGET.result_apc; + + + create view TARGET.foo1 as select * from SOURCE.result_result rr where rr.source in (select id from TARGET.result); create view TARGET.foo2 as select * from SOURCE.result_result rr where rr.target in (select id from TARGET.result); create table TARGET.result_result STORED AS PARQUET as select distinct * from (select * from TARGET.foo1 union all select * from TARGET.foo2) foufou; From 7112806a735013f438463c64847321d025be41e6 Mon Sep 17 00:00:00 2001 From: antleb Date: Tue, 29 Mar 2022 16:37:29 +0300 Subject: [PATCH 133/162] views cannot be stored as parquet... --- .../eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step9.sql | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step9.sql b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step9.sql index eec037a54..3da36dfe5 100644 --- a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step9.sql +++ b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step9.sql @@ -11,7 +11,7 @@ SELECT substr(o.id, 4) as id, FROM ${openaire_db_name}.organization o WHERE o.datainfo.deletedbyinference = FALSE and o.datainfo.invisible = FALSE; -CREATE OR REPLACE VIEW ${stats_db_name}.organization_datasources STORED AS PARQUET AS +CREATE OR REPLACE VIEW ${stats_db_name}.organization_datasources AS SELECT organization AS id, id AS datasource FROM ${stats_db_name}.datasource_organizations; From 79336d46c5f7575619992e5d004d561166751c36 Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Mon, 4 Apr 2022 15:52:31 +0200 Subject: [PATCH 134/162] [Clean Context] first naive implementation of a functionality to clean not wanted contextes from one result. This implementation simply verifies the main title of the results start with a given string --- .../oa/graph/clean/CleanContextSparkJob.java | 113 +++++++++++++ .../graph/input_clean_context_parameters.json | 37 ++++ .../dhp/oa/graph/clean/CleanContextTest.java | 159 ++++++++++++++++++ .../clean/publication_clean_context.json | 7 + 4 files changed, 316 insertions(+) create mode 100644 dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/clean/CleanContextSparkJob.java create mode 100644 dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/input_clean_context_parameters.json create mode 100644 dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/clean/CleanContextTest.java create mode 100644 dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/clean/publication_clean_context.json diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/clean/CleanContextSparkJob.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/clean/CleanContextSparkJob.java new file mode 100644 index 000000000..b20dcb67b --- /dev/null +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/clean/CleanContextSparkJob.java @@ -0,0 +1,113 @@ +package eu.dnetlib.dhp.oa.graph.clean; + +import com.fasterxml.jackson.databind.ObjectMapper; +import eu.dnetlib.dhp.application.ArgumentApplicationParser; +import eu.dnetlib.dhp.common.HdfsSupport; +import eu.dnetlib.dhp.common.vocabulary.VocabularyGroup; +import eu.dnetlib.dhp.schema.common.ModelConstants; +import eu.dnetlib.dhp.schema.oaf.Context; +import eu.dnetlib.dhp.schema.oaf.Oaf; +import eu.dnetlib.dhp.schema.oaf.OafEntity; +import eu.dnetlib.dhp.schema.oaf.Result; +import eu.dnetlib.dhp.utils.ISLookupClientFactory; +import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpService; +import org.apache.commons.io.IOUtils; +import org.apache.spark.SparkConf; +import org.apache.spark.api.java.function.MapFunction; +import org.apache.spark.sql.Dataset; +import org.apache.spark.sql.Encoders; +import org.apache.spark.sql.SaveMode; +import org.apache.spark.sql.SparkSession; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.io.Serializable; +import java.util.List; +import java.util.Optional; +import java.util.stream.Collectors; + +import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession; + +public class CleanContextSparkJob implements Serializable { + private static final Logger log = LoggerFactory.getLogger(CleanContextSparkJob.class); + + private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper(); + + public static void main(String[] args) throws Exception { + + String jsonConfiguration = IOUtils + .toString( + CleanContextSparkJob.class + .getResourceAsStream( + "/eu/dnetlib/dhp/oa/graph/input_clean_context_parameters.json")); + final ArgumentApplicationParser parser = new ArgumentApplicationParser(jsonConfiguration); + parser.parseArgument(args); + + Boolean isSparkSessionManaged = Optional + .ofNullable(parser.get("isSparkSessionManaged")) + .map(Boolean::valueOf) + .orElse(Boolean.TRUE); + log.info("isSparkSessionManaged: {}", isSparkSessionManaged); + + String inputPath = parser.get("inputPath"); + log.info("inputPath: {}", inputPath); + + String workingPath = parser.get("workingPath"); + log.info("workingPath: {}", workingPath); + + String contextId = parser.get("contextId"); + log.info("contextId: {}", contextId); + + String verifyParam = parser.get("verifyParam"); + log.info("verifyParam: {}", verifyParam); + + + String graphTableClassName = parser.get("graphTableClassName"); + log.info("graphTableClassName: {}", graphTableClassName); + + Class entityClazz = (Class) Class.forName(graphTableClassName); + + + SparkConf conf = new SparkConf(); + runWithSparkSession( + conf, + isSparkSessionManaged, + spark -> { + + cleanContext(spark, contextId, verifyParam, inputPath, entityClazz, workingPath); + }); + } + + private static void cleanContext(SparkSession spark, String contextId, String verifyParam, String inputPath, Class entityClazz, String workingPath) { + Dataset res = spark + .read() + .textFile(inputPath) + .map( + (MapFunction) value -> OBJECT_MAPPER.readValue(value, entityClazz), + Encoders.bean(entityClazz)); + + res.map((MapFunction) r -> { + if(!r.getTitle() + .stream() + .filter(t -> t.getQualifier().getClassid() + .equalsIgnoreCase(ModelConstants.MAIN_TITLE_QUALIFIER.getClassid())) + .anyMatch(t -> t.getValue().toLowerCase().startsWith(verifyParam.toLowerCase()))){ + return r; + } + r.setContext(r.getContext().stream().filter(c -> !c.getId().split("::")[0] + .equalsIgnoreCase(contextId)).collect(Collectors.toList())); + return r; + } ,Encoders.bean(entityClazz)) + .write() + .mode(SaveMode.Overwrite) + .option("compression","gzip") + .json(workingPath); + + spark.read().textFile(workingPath).map((MapFunction) value -> OBJECT_MAPPER.readValue(value, entityClazz), + Encoders.bean(entityClazz)) + .write() + .mode(SaveMode.Overwrite) + .option("compression","gzip") + .json(inputPath); + } +} diff --git a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/input_clean_context_parameters.json b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/input_clean_context_parameters.json new file mode 100644 index 000000000..9b1588283 --- /dev/null +++ b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/input_clean_context_parameters.json @@ -0,0 +1,37 @@ +[ + { + "paramName": "issm", + "paramLongName": "isSparkSessionManaged", + "paramDescription": "when true will stop SparkSession after job execution", + "paramRequired": false + }, + { + "paramName": "in", + "paramLongName": "inputPath", + "paramDescription": "the path to the graph data dump to read", + "paramRequired": true + }, + { + "paramName": "wp", + "paramLongName": "workingPath", + "paramDescription": "the path to store the output graph", + "paramRequired": true + }, + { + "paramName": "ci", + "paramLongName": "contextId", + "paramDescription": "url to the ISLookup Service", + "paramRequired": true + }, + { + "paramName": "class", + "paramLongName": "graphTableClassName", + "paramDescription": "class name moelling the graph table", + "paramRequired": true + },{ + "paramName": "vf", + "paramLongName": "verifyParam", + "paramDescription": "url to the ISLookup Service", + "paramRequired": true +} +] diff --git a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/clean/CleanContextTest.java b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/clean/CleanContextTest.java new file mode 100644 index 000000000..bba814346 --- /dev/null +++ b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/clean/CleanContextTest.java @@ -0,0 +1,159 @@ +package eu.dnetlib.dhp.oa.graph.clean; + +import com.fasterxml.jackson.core.JsonProcessingException; +import com.fasterxml.jackson.databind.ObjectMapper; +import eu.dnetlib.dhp.oa.graph.dump.Constants; +import eu.dnetlib.dhp.oa.graph.dump.DumpJobTest; +import eu.dnetlib.dhp.oa.graph.dump.DumpProducts; +import eu.dnetlib.dhp.oa.graph.dump.community.CommunityMap; +import eu.dnetlib.dhp.schema.common.ModelConstants; +import eu.dnetlib.dhp.schema.dump.oaf.Instance; +import eu.dnetlib.dhp.schema.dump.oaf.OpenAccessRoute; +import eu.dnetlib.dhp.schema.dump.oaf.graph.GraphResult; +import eu.dnetlib.dhp.schema.oaf.Publication; +import eu.dnetlib.dhp.schema.oaf.Software; +import eu.dnetlib.dhp.schema.oaf.StructuredProperty; +import org.apache.commons.io.FileUtils; +import org.apache.spark.SparkConf; +import org.apache.spark.api.java.JavaRDD; +import org.apache.spark.api.java.JavaSparkContext; +import org.apache.spark.api.java.function.MapFunction; +import org.apache.spark.sql.Encoders; +import org.apache.spark.sql.SparkSession; +import org.junit.jupiter.api.AfterAll; +import org.junit.jupiter.api.Assertions; +import org.junit.jupiter.api.BeforeAll; +import org.junit.jupiter.api.Test; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.io.IOException; +import java.nio.file.Files; +import java.nio.file.Path; +import java.util.List; +import java.util.Locale; + +public class CleanContextTest { + + private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper(); + + private static SparkSession spark; + + private static Path workingDir; + + private static final Logger log = LoggerFactory.getLogger(CleanContextTest.class); + + @BeforeAll + public static void beforeAll() throws IOException { + workingDir = Files.createTempDirectory(DumpJobTest.class.getSimpleName()); + log.info("using work dir {}", workingDir); + + SparkConf conf = new SparkConf(); + conf.setAppName(DumpJobTest.class.getSimpleName()); + + conf.setMaster("local[*]"); + conf.set("spark.driver.host", "localhost"); + conf.set("hive.metastore.local", "true"); + conf.set("spark.ui.enabled", "false"); + conf.set("spark.sql.warehouse.dir", workingDir.toString()); + conf.set("hive.metastore.warehouse.dir", workingDir.resolve("warehouse").toString()); + + spark = SparkSession + .builder() + .appName(DumpJobTest.class.getSimpleName()) + .config(conf) + .getOrCreate(); + } + + @AfterAll + public static void afterAll() throws IOException { + FileUtils.deleteDirectory(workingDir.toFile()); + spark.stop(); + } + + @Test + public void testResultClean() throws Exception { + final String sourcePath = getClass() + .getResource("/eu/dnetlib/dhp/oa/graph/clean/publication_clean_context.json") + .getPath(); + final String prefix = "gcube "; + + + spark.read().textFile(sourcePath).map((MapFunction) r -> OBJECT_MAPPER.readValue(r, Publication.class), Encoders.bean(Publication.class)) + .write().json(workingDir.toString() + "/publication"); + + + CleanContextSparkJob.main(new String[] { + "--isSparkSessionManaged", Boolean.FALSE.toString(), + "--inputPath", workingDir.toString() + "/publication", + "-graphTableClassName", Publication.class.getCanonicalName(), + "-workingPath", workingDir.toString() + "/working", + "-contextId","sobigdata", + "-verifyParam","gCube " + }); + + final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext()); + JavaRDD tmp = sc + .textFile(workingDir.toString() + "/publication") + .map(item -> OBJECT_MAPPER.readValue(item, Publication.class)); + + Assertions.assertEquals(7, tmp.count()); + + //original result with sobigdata context and gcube as starting string in the main title for the publication + Assertions.assertEquals(0, + tmp.filter(p->p.getId().equals("50|DansKnawCris::0224aae28af558f21768dbc6439c7a95")).collect().get(0).getContext().size()); + + //original result with sobigdata context without gcube as starting string in the main title for the publication + Assertions.assertEquals(1, + tmp.filter(p->p.getId().equals("50|DansKnawCris::20c414a3b1c742d5dd3851f1b67df2d9")).collect().get(0).getContext().size()); + Assertions.assertEquals("sobigdata::projects::2",tmp.filter(p->p.getId().equals("50|DansKnawCris::20c414a3b1c742d5dd3851f1b67df2d9")).collect().get(0).getContext().get(0).getId() ); + + //original result with sobigdata context with gcube as starting string in the subtitle + Assertions.assertEquals(1, + tmp.filter(p->p.getId().equals("50|DansKnawCris::3c81248c335f0aa07e06817ece6fa6af")).collect().get(0).getContext().size()); + Assertions.assertEquals("sobigdata::projects::2",tmp.filter(p->p.getId().equals("50|DansKnawCris::3c81248c335f0aa07e06817ece6fa6af")).collect().get(0).getContext().get(0).getId() ); + List titles = tmp.filter(p -> p.getId().equals("50|DansKnawCris::3c81248c335f0aa07e06817ece6fa6af")).collect().get(0).getTitle(); + Assertions.assertEquals(1, titles.size()); + Assertions.assertTrue(titles.get(0).getValue().toLowerCase().startsWith(prefix) ); + Assertions.assertEquals("subtitle", titles.get(0).getQualifier().getClassid()); + + //original result with sobigdata context with gcube not as starting string in the main title + Assertions.assertEquals(1, + tmp.filter(p->p.getId().equals("50|DansKnawCris::3c9f068ddc930360bec6925488a9a97f")).collect().get(0).getContext().size()); + Assertions.assertEquals("sobigdata::projects::1",tmp.filter(p->p.getId().equals("50|DansKnawCris::3c9f068ddc930360bec6925488a9a97f")).collect().get(0).getContext().get(0).getId() ); + titles = tmp.filter(p -> p.getId().equals("50|DansKnawCris::3c9f068ddc930360bec6925488a9a97f")).collect().get(0).getTitle(); + Assertions.assertEquals(1, titles.size()); + Assertions.assertFalse(titles.get(0).getValue().toLowerCase().startsWith(prefix) ); + Assertions.assertTrue(titles.get(0).getValue().toLowerCase().contains(prefix.trim()) ); + Assertions.assertEquals("main title", titles.get(0).getQualifier().getClassid()); + + //original result with sobigdata in context and also other contexts with gcube as starting string for the main title + Assertions.assertEquals(1, + tmp.filter(p->p.getId().equals("50|DansKnawCris::4669a378a73661417182c208e6fdab53")).collect().get(0).getContext().size()); + Assertions.assertEquals("dh-ch",tmp.filter(p->p.getId().equals("50|DansKnawCris::4669a378a73661417182c208e6fdab53")).collect().get(0).getContext().get(0).getId() ); + titles = tmp.filter(p -> p.getId().equals("50|DansKnawCris::4669a378a73661417182c208e6fdab53")).collect().get(0).getTitle(); + Assertions.assertEquals(1, titles.size()); + Assertions.assertTrue(titles.get(0).getValue().toLowerCase().startsWith(prefix) ); + Assertions.assertEquals("main title", titles.get(0).getQualifier().getClassid()); + + //original result with multiple main title one of which whith gcube as starting string and with 2 contextes + Assertions.assertEquals(1, + tmp.filter(p->p.getId().equals("50|DansKnawCris::4a9152e80f860eab99072e921d74a0ff")).collect().get(0).getContext().size()); + Assertions.assertEquals("dh-ch",tmp.filter(p->p.getId().equals("50|DansKnawCris::4a9152e80f860eab99072e921d74a0ff")).collect().get(0).getContext().get(0).getId() ); + titles = tmp.filter(p -> p.getId().equals("50|DansKnawCris::4a9152e80f860eab99072e921d74a0ff")).collect().get(0).getTitle(); + Assertions.assertEquals(2, titles.size()); + Assertions.assertTrue(titles.stream().anyMatch(t -> t.getQualifier().getClassid().equals("main title") && t.getValue().toLowerCase().startsWith(prefix)) ); + + + //original result without sobigdata in context with gcube as starting string for the main title + Assertions.assertEquals(1, + tmp.filter(p->p.getId().equals("50|dedup_wf_001::01e6a28565ca01376b7548e530c6f6e8")).collect().get(0).getContext().size()); + Assertions.assertEquals("dh-ch",tmp.filter(p->p.getId().equals("50|dedup_wf_001::01e6a28565ca01376b7548e530c6f6e8")).collect().get(0).getContext().get(0).getId() ); + titles = tmp.filter(p -> p.getId().equals("50|dedup_wf_001::01e6a28565ca01376b7548e530c6f6e8")).collect().get(0).getTitle(); + Assertions.assertEquals(2, titles.size()); + + Assertions.assertTrue(titles.stream().anyMatch(t -> t.getQualifier().getClassid().equals("main title") && t.getValue().toLowerCase().startsWith(prefix))); + + + } +} diff --git a/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/clean/publication_clean_context.json b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/clean/publication_clean_context.json new file mode 100644 index 000000000..b7c51d810 --- /dev/null +++ b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/clean/publication_clean_context.json @@ -0,0 +1,7 @@ +{"author":[],"bestaccessright":{"classid":"OPEN","classname":"Open Access","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":[{"dataInfo":null,"key":"10|openaire____::c6df70599aa984f16ee52b4b86d2e89f","value":"DANS (Data Archiving and Networked Services)"}],"context":[{"dataInfo":[{"deletedbyinference":false,"inferenceprovenance":"bulktagging","inferred":true,"invisible":false,"provenanceaction":{"classid":"community:subject","classname":"Bulktagging for Community - Subject","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":null}],"id":"sobigdata::projects::2"}],"contributor":[],"country":[],"coverage":[],"dataInfo":{"deletedbyinference":true,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"2007-01-01"},"dateofcollection":"","dateoftransformation":"2020-05-25T16:14:18.452Z","description":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"Lit.opg., bijl."}],"embargoenddate":null,"externalReference":[],"extraInfo":[],"format":[],"fulltext":[],"id":"50|DansKnawCris::0224aae28af558f21768dbc6439c7a95","instance":[{"accessright":{"classid":"OPEN","classname":"Open Access","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":{"dataInfo":null,"key":"10|openaire____::c6df70599aa984f16ee52b4b86d2e89f","value":"DANS (Data Archiving and Networked Services)"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"2007-01-01"},"distributionlocation":"","hostedby":{"dataInfo":null,"key":"10|openaire____::c6df70599aa984f16ee52b4b86d2e89f","value":"DANS (Data Archiving and Networked Services)"},"instancetype":{"classid":"0017","classname":"Report","schemeid":"dnet:publication_resource","schemename":"dnet:publication_resource"},"license":null,"processingchargeamount":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":true,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"8250"},"processingchargecurrency":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":true,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"EUR"},"refereed":null,"url":null}],"language":{"classid":"nl","classname":"nl","schemeid":"dnet:languages","schemename":"dnet:languages"},"lastupdatetimestamp":1591282676557,"oaiprovenance":{"originDescription":{"altered":true,"baseURL":"http%3A%2F%2Fservices.nod.dans.knaw.nl%2Foa-cerif","datestamp":"2019-12-01T07:51:24Z","harvestDate":"2020-05-25T11:33:13.427Z","identifier":"oai:services.nod.dans.knaw.nl:Publications/rce:document:550013110","metadataNamespace":""}},"originalId":["DansKnawCris::0224aae28af558f21768dbc6439c7a95"],"pid":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"urn","classname":"urn","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"value":"http://cultureelerfgoed.adlibsoft.com/dispatcher.aspx?action=search&database=ChoiceRapporten&search=priref=550013110"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"urn","classname":"urn","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"value":"http://cultureelerfgoed.adlibsoft.com/dispatcher.aspx?action=search&database=ChoiceRapporten&search=priref=550013110"}],"publisher":null,"relevantdate":[],"resourcetype":{"classid":"0017","classname":"0017","schemeid":"dnet:dataCite_resource","schemename":"dnet:dataCite_resource"},"resulttype":{"classid":"publication","classname":"publication","schemeid":"dnet:result_typologies","schemename":"dnet:result_typologies"},"source":[],"subject":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"value":"archeologie"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"value":"prospectie"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"value":"Archaeology"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"value":"Archaeology"}],"title":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"main title","classname":"main title","schemeid":"dnet:dataCite_title","schemename":"dnet:dataCite_title"},"value":"Gcube veldonderzoek d.m.v. boringen (karterende fase) : Raadhuisstraat te Dirkshorn, gemeente Harenkarspel"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"main title","classname":"main title","schemeid":"dnet:dataCite_title","schemename":"dnet:dataCite_title"},"value":"Synthegra Archeologie Rapportenreeks P0502381"}],"journal":null} +{"author":[],"bestaccessright":{"classid":"OPEN","classname":"Open Access","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":[{"dataInfo":null,"key":"10|openaire____::c6df70599aa984f16ee52b4b86d2e89f","value":"DANS (Data Archiving and Networked Services)"}],"context":[{"dataInfo":[{"deletedbyinference":false,"inferenceprovenance":"bulktagging","inferred":true,"invisible":false,"provenanceaction":{"classid":"community:subject","classname":"Bulktagging for Community - Subject","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":null}],"id":"sobigdata::projects::2"}],"contributor":[],"country":[],"coverage":[],"dataInfo":{"deletedbyinference":true,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"dateofacceptance":null,"dateofcollection":"","dateoftransformation":"2020-05-25T17:03:57.761Z","description":[],"embargoenddate":null,"externalReference":[],"extraInfo":[],"format":[],"fulltext":[],"id":"50|DansKnawCris::20c414a3b1c742d5dd3851f1b67df2d9","instance":[{"accessright":{"classid":"OPEN","classname":"Open Access","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":{"dataInfo":null,"key":"10|openaire____::c6df70599aa984f16ee52b4b86d2e89f","value":"DANS (Data Archiving and Networked Services)"},"dateofacceptance":null,"distributionlocation":"","hostedby":{"dataInfo":null,"key":"10|openaire____::c6df70599aa984f16ee52b4b86d2e89f","value":"DANS (Data Archiving and Networked Services)"},"instancetype":{"classid":"0017","classname":"Report","schemeid":"dnet:publication_resource","schemename":"dnet:publication_resource"},"license":null,"processingchargeamount": {"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":true,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"1396"},"processingchargecurrency":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":true,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"EUR"},"refereed":null,"url":null}],"language":{"classid":"UNKNOWN","classname":"UNKNOWN","schemeid":"dnet:languages","schemename":"dnet:languages"},"lastupdatetimestamp":1591283087415,"oaiprovenance":{"originDescription":{"altered":true,"baseURL":"http%3A%2F%2Fservices.nod.dans.knaw.nl%2Foa-cerif","datestamp":"2019-12-01T07:58:39Z","harvestDate":"2020-05-25T11:34:38.707Z","identifier":"oai:services.nod.dans.knaw.nl:Publications/rce-kb:document:800020324","metadataNamespace":""}},"originalId":["DansKnawCris::20c414a3b1c742d5dd3851f1b67df2d9"],"pid":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"urn","classname":"urn","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"value":"http://cultureelerfgoed.adlibsoft.com/dispatcher.aspx?action=search&database=ChoiceFullCatalogue&search=priref=800020324"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"urn","classname":"urn","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"value":"http://cultureelerfgoed.adlibsoft.com/dispatcher.aspx?action=search&database=ChoiceFullCatalogue&search=priref=800020324"}],"publisher":null,"relevantdate":[],"resourcetype":{"classid":"0017","classname":"0017","schemeid":"dnet:dataCite_resource","schemename":"dnet:dataCite_resource"},"resulttype":{"classid":"publication","classname":"publication","schemeid":"dnet:result_typologies","schemename":"dnet:result_typologies"},"source":[],"subject":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"value":"archeologie"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"value":"Archaeology"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"value":"Archaeology"}],"title":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"main title","classname":"main title","schemeid":"dnet:dataCite_title","schemename":"dnet:dataCite_title"},"value":"None"}],"journal":null} +{"author":[{"affiliation":[],"fullname":"van Someren, Christian","name":"Christian","pid":[],"rank":1,"surname":"van Someren"}],"bestaccessright":{"classid":"RESTRICTED","classname":"Restricted","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":[{"dataInfo":null,"key":"10|openaire____::c6df70599aa984f16ee52b4b86d2e89f","value":"DANS (Data Archiving and Networked Services)"}],"context":[{"dataInfo":[{"deletedbyinference":false,"inferenceprovenance":"bulktagging","inferred":true,"invisible":false,"provenanceaction":{"classid":"community:subject","classname":"Bulktagging for Community - Subject","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":null}],"id":"sobigdata::projects::2"}],"contributor":[],"country":[],"coverage":[],"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"2016-11-01"},"dateofcollection":"","dateoftransformation":"2020-05-25T17:55:32.27Z","description":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"Over het Energieakkoord. In het energieakkoord voor duurzame groei is afgesproken dat in 2020 14 procent van de opwek hernieuwbaar moet zijn en in 2023 16 procent. De doelstelling is een uitdagende opgave waarbij de eerste vraag is: \"Hoeveel hernieuwbare energie wordt er op dit moment opgewekt in Nederland?\" Deze website geeft antwoord op de vraag voor de actueel opgewekte windenergie, zonne-energie en biogas."}],"embargoenddate":null,"externalReference":[],"extraInfo":[],"format":[],"fulltext":[],"id":"50|DansKnawCris::3c81248c335f0aa07e06817ece6fa6af","instance":[{"accessright":{"classid":"RESTRICTED","classname":"Restricted","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":{"dataInfo":null,"key":"10|openaire____::c6df70599aa984f16ee52b4b86d2e89f","value":"DANS (Data Archiving and Networked Services)"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"2016-11-01"},"distributionlocation":"","hostedby":{"dataInfo":null,"key":"10|openaire____::c6df70599aa984f16ee52b4b86d2e89f","value":"DANS (Data Archiving and Networked Services)"},"instancetype":{"classid":"0038","classname":"Other literature type","schemeid":"dnet:publication_resource","schemename":"dnet:publication_resource"},"license":null,"processingchargeamount":null,"processingchargecurrency":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":true,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"EUR"},"refereed":null,"url":["http://energieopwek.nl/"]}],"language":{"classid":"nl","classname":"nl","schemeid":"dnet:languages","schemename":"dnet:languages"},"lastupdatetimestamp":1591282663379,"oaiprovenance":{"originDescription":{"altered":true,"baseURL":"http%3A%2F%2Fservices.nod.dans.knaw.nl%2Foa-cerif","datestamp":"2020-05-08T17:46:55Z","harvestDate":"2020-05-25T15:30:24.079Z","identifier":"oai:services.nod.dans.knaw.nl:Publications/hbo:oai:hbokennisbank.nl:hanzepure:oai:research.hanze.nl:publications/813a5dfa-4fd0-44c4-8cbf-310324dc724d","metadataNamespace":""}},"originalId":["DansKnawCris::3c81248c335f0aa07e06817ece6fa6af"],"pid":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"urn","classname":"urn","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"value":"urn:nbn:nl:hs:18-813a5dfa-4fd0-44c4-8cbf-310324dc724d"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"urn","classname":"urn","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"value":"urn:nbn:nl:hs:18-813a5dfa-4fd0-44c4-8cbf-310324dc724d"}],"publisher":null,"relevantdate":[],"resourcetype":{"classid":"0038","classname":"0038","schemeid":"dnet:dataCite_resource","schemename":"dnet:dataCite_resource"},"resulttype":{"classid":"publication","classname":"publication","schemeid":"dnet:result_typologies","schemename":"dnet:result_typologies"},"source":[],"subject":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"value":"energieproductie"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"value":"Management"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"value":"Monitoring"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"value":"Policy and Law"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"value":"Energie interventies en gedrag"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"value":"publieke ondersteuning en communicatie"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"value":"Professional practice & society"}],"title":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"subtitle","classname":"subtitle","schemeid":"dnet:dataCite_title","schemename":"dnet:dataCite_title"},"value":"GCUBE opwek"}],"journal":null} +{"author":[],"bestaccessright":{"classid":"OPEN","classname":"Open Access","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":[{"dataInfo":null,"key":"10|openaire____::c6df70599aa984f16ee52b4b86d2e89f","value":"DANS (Data Archiving and Networked Services)"}],"context":[{"dataInfo":[{"deletedbyinference":false,"inferenceprovenance":"bulktagging","inferred":true,"invisible":false,"provenanceaction":{"classid":"community:subject","classname":"Bulktagging for Community - Subject","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":null}],"id":"sobigdata::projects::1"}],"contributor":[],"country":[],"coverage":[],"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"dateofacceptance":null,"dateofcollection":"","dateoftransformation":"2020-05-25T17:55:40.568Z","description":[],"embargoenddate":null,"externalReference":[],"extraInfo":[],"format":[],"fulltext":[],"id":"50|DansKnawCris::3c9f068ddc930360bec6925488a9a97f","instance":[{"accessright":{"classid":"OPEN","classname":"Open Access","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":{"dataInfo":null,"key":"10|openaire____::c6df70599aa984f16ee52b4b86d2e89f","value":"DANS (Data Archiving and Networked Services)"},"dateofacceptance":null,"distributionlocation":"","hostedby":{"dataInfo":null,"key":"10|openaire____::c6df70599aa984f16ee52b4b86d2e89f","value":"DANS (Data Archiving and Networked Services)"},"instancetype":{"classid":"0017","classname":"Report","schemeid":"dnet:publication_resource","schemename":"dnet:publication_resource"},"license":null,"processingchargeamount":null,"processingchargecurrency":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":true,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"EUR"},"refereed":null,"url":null}],"language":{"classid":"UNKNOWN","classname":"UNKNOWN","schemeid":"dnet:languages","schemename":"dnet:languages"},"lastupdatetimestamp":1591282670417,"oaiprovenance":{"originDescription":{"altered":true,"baseURL":"http%3A%2F%2Fservices.nod.dans.knaw.nl%2Foa-cerif","datestamp":"2019-12-01T08:41:34Z","harvestDate":"2020-05-25T11:40:05.974Z","identifier":"oai:services.nod.dans.knaw.nl:Publications/rce:document:550052278","metadataNamespace":""}},"originalId":["DansKnawCris::3c9f068ddc930360bec6925488a9a97f"],"pid":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"urn","classname":"urn","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"value":"http://cultureelerfgoed.adlibsoft.com/dispatcher.aspx?action=search&database=ChoiceRapporten&search=priref=550052278"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"urn","classname":"urn","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"value":"http://cultureelerfgoed.adlibsoft.com/dispatcher.aspx?action=search&database=ChoiceRapporten&search=priref=550052278"}],"publisher":null,"relevantdate":[],"resourcetype":{"classid":"0017","classname":"0017","schemeid":"dnet:dataCite_resource","schemename":"dnet:dataCite_resource"},"resulttype":{"classid":"publication","classname":"publication","schemeid":"dnet:result_typologies","schemename":"dnet:result_typologies"},"source":[],"subject":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"value":"archeologie"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"value":"Archaeology"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"value":"Archaeology"}],"title":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"main title","classname":"main title","schemeid":"dnet:dataCite_title","schemename":"dnet:dataCite_title"},"value":"None GCUBE"}],"journal":null} +{"author":[],"bestaccessright":{"classid":"OPEN","classname":"Open Access","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":[{"dataInfo":null,"key":"10|openaire____::c6df70599aa984f16ee52b4b86d2e89f","value":"DANS (Data Archiving and Networked Services)"}],"context":[{"dataInfo":[{"deletedbyinference":false,"inferenceprovenance":"bulktagging","inferred":true,"invisible":false,"provenanceaction":{"classid":"community:subject","classname":"Bulktagging for Community - Subject","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":null}],"id":"dh-ch"},{"dataInfo":[{"deletedbyinference":false,"inferenceprovenance":"bulktagging","inferred":true,"invisible":false,"provenanceaction":{"classid":"community:subject","classname":"Bulktagging for Community - Subject","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":null}],"id":"sobigdata::projects::1"}],"contributor":[],"country":[],"coverage":[],"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"2010-01-01"},"dateofcollection":"","dateoftransformation":"2020-05-25T18:11:57.737Z","description":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"Met lit. opg"}],"embargoenddate":null,"externalReference":[],"extraInfo":[],"format":[],"fulltext":[],"id":"50|DansKnawCris::4669a378a73661417182c208e6fdab53","instance":[{"accessright":{"classid":"OPEN","classname":"Open Access","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":{"dataInfo":null,"key":"10|openaire____::c6df70599aa984f16ee52b4b86d2e89f","value":"DANS (Data Archiving and Networked Services)"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"2010-01-01"},"distributionlocation":"","hostedby":{"dataInfo":null,"key":"10|openaire____::c6df70599aa984f16ee52b4b86d2e89f","value":"DANS (Data Archiving and Networked Services)"},"instancetype":{"classid":"0017","classname":"Report","schemeid":"dnet:publication_resource","schemename":"dnet:publication_resource"},"license":null,"processingchargeamount":null,"processingchargecurrency":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":true,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"EUR"},"refereed":null,"url":null}],"language":{"classid":"nl","classname":"nl","schemeid":"dnet:languages","schemename":"dnet:languages"},"lastupdatetimestamp":1591282758835,"oaiprovenance":{"originDescription":{"altered":true,"baseURL":"http%3A%2F%2Fservices.nod.dans.knaw.nl%2Foa-cerif","datestamp":"2019-12-01T07:36:00Z","harvestDate":"2020-05-25T11:30:47.199Z","identifier":"oai:services.nod.dans.knaw.nl:Publications/rce-kb:document:800007467","metadataNamespace":""}},"originalId":["DansKnawCris::4669a378a73661417182c208e6fdab53"],"pid":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"urn","classname":"urn","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"value":"http://cultureelerfgoed.adlibsoft.com/dispatcher.aspx?action=search&database=ChoiceFullCatalogue&search=priref=800007467"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"urn","classname":"urn","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"value":"http://cultureelerfgoed.adlibsoft.com/dispatcher.aspx?action=search&database=ChoiceFullCatalogue&search=priref=800007467"}],"publisher":null,"relevantdate":[],"resourcetype":{"classid":"0017","classname":"0017","schemeid":"dnet:dataCite_resource","schemename":"dnet:dataCite_resource"},"resulttype":{"classid":"publication","classname":"publication","schemeid":"dnet:result_typologies","schemename":"dnet:result_typologies"},"source":[],"subject":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"value":"archeologie"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"value":"archeologie"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"value":"Vlaardingen"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"value":"Archaeology"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"value":"Archaeology"}],"title":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"main title","classname":"main title","schemeid":"dnet:dataCite_title","schemename":"dnet:dataCite_title"},"value":"gCube RAAP-rapport, ISSN 0925-6229 2089"}],"journal":null} +{"author":[],"bestaccessright":{"classid":"OPEN","classname":"Open Access","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":[{"dataInfo":null,"key":"10|openaire____::c6df70599aa984f16ee52b4b86d2e89f","value":"DANS (Data Archiving and Networked Services)"}],"context":[{"dataInfo":[{"deletedbyinference":false,"inferenceprovenance":"bulktagging","inferred":true,"invisible":false,"provenanceaction":{"classid":"community:subject","classname":"Bulktagging for Community - Subject","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":null}],"id":"dh-ch"},{"dataInfo":[{"deletedbyinference":false,"inferenceprovenance":"bulktagging","inferred":true,"invisible":false,"provenanceaction":{"classid":"community:subject","classname":"Bulktagging for Community - Subject","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":null}],"id":"sobigdata::projects::1"}],"contributor":[],"country":[],"coverage":[],"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"dateofacceptance":null,"dateofcollection":"","dateoftransformation":"2020-05-25T18:18:18.049Z","description":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"Met literatuuropgave"}],"embargoenddate":null,"externalReference":[],"extraInfo":[],"format":[],"fulltext":[],"id":"50|DansKnawCris::4a9152e80f860eab99072e921d74a0ff","instance":[{"accessright":{"classid":"OPEN","classname":"Open Access","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":{"dataInfo":null,"key":"10|openaire____::c6df70599aa984f16ee52b4b86d2e89f","value":"DANS (Data Archiving and Networked Services)"},"dateofacceptance":null,"distributionlocation":"","hostedby":{"dataInfo":null,"key":"10|openaire____::c6df70599aa984f16ee52b4b86d2e89f","value":"DANS (Data Archiving and Networked Services)"},"instancetype":{"classid":"0017","classname":"Report","schemeid":"dnet:publication_resource","schemename":"dnet:publication_resource"},"license":null,"processingchargeamount":null,"processingchargecurrency":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":true,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"EUR"},"refereed":null,"url":null}],"language":{"classid":"nl","classname":"nl","schemeid":"dnet:languages","schemename":"dnet:languages"},"lastupdatetimestamp":1591283010899,"oaiprovenance":{"originDescription":{"altered":true,"baseURL":"http%3A%2F%2Fservices.nod.dans.knaw.nl%2Foa-cerif","datestamp":"2019-12-01T07:48:12Z","harvestDate":"2020-05-25T11:32:46.363Z","identifier":"oai:services.nod.dans.knaw.nl:Publications/rce-kb:document:800014509","metadataNamespace":""}},"originalId":["DansKnawCris::4a9152e80f860eab99072e921d74a0ff"],"pid":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"urn","classname":"urn","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"value":"http://cultureelerfgoed.adlibsoft.com/dispatcher.aspx?action=search&database=ChoiceFullCatalogue&search=priref=800014509"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"urn","classname":"urn","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"value":"http://cultureelerfgoed.adlibsoft.com/dispatcher.aspx?action=search&database=ChoiceFullCatalogue&search=priref=800014509"}],"publisher":null,"relevantdate":[],"resourcetype":{"classid":"0017","classname":"0017","schemeid":"dnet:dataCite_resource","schemename":"dnet:dataCite_resource"},"resulttype":{"classid":"publication","classname":"publication","schemeid":"dnet:result_typologies","schemename":"dnet:result_typologies"},"source":[],"subject":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"value":"archeologie"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"value":"archeologie"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"value":"Bladel"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"value":"Archaeology"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"value":"Archaeology"}],"title":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"main title","classname":"main title","schemeid":"dnet:dataCite_title","schemename":"dnet:dataCite_title"},"value":"Proefsleuvenonderzoek aan de Bredasebaan 8 te Bladel"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"main title","classname":"main title","schemeid":"dnet:dataCite_title","schemename":"dnet:dataCite_title"},"value":"gcube Archeodienst rapport, ISSN 1877-2900 565"}],"journal":null} +{"author":[{"affiliation":null,"fullname":"Schubart, A.","name":"A.","pid":[],"rank":1,"surname":"Schubart"}],"bestaccessright":{"classid":"OPEN","classname":"Open Access","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":[{"dataInfo":null,"key":"10|openaire____::081b82f96300b6a6e3d282bad31cb6e2","value":"Crossref"},{"dataInfo":null,"key":"10|openaire____::8ac8380272269217cb09a928c8caa993","value":"UnpayWall"},{"dataInfo":null,"key":"10|openaire____::5f532a3fc4f1ea403f37070f59a7a53a","value":"Microsoft Academic Graph"},{"dataInfo":null,"key":"10|opendoar____::358aee4cc897452c00244351e4d91f69","value":"ZENODO"}],"context":[{"dataInfo":[{"deletedbyinference":false,"inferenceprovenance":"bulktagging","inferred":true,"invisible":false,"provenanceaction":{"classid":"community:subject","classname":"Bulktagging for Community - Subject","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":null}],"id":"dh-ch"}],"contributor":[],"country":[],"coverage":[],"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"decisiontree-dedup-test","inferred":true,"invisible":false,"provenanceaction":{"classid":"sysimport:dedup","classname":"sysimport:dedup","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.8"},"dateofacceptance":{"dataInfo":null,"value":"1866-01-01"},"dateofcollection":"2019-02-17T01:33:44Z","dateoftransformation":null,"description":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"n/a"}],"embargoenddate":null,"externalReference":[],"extraInfo":[],"format":[],"fulltext":[],"id":"50|dedup_wf_001::01e6a28565ca01376b7548e530c6f6e8","instance":[{"accessright":{"classid":"RESTRICTED","classname":"Restricted","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":{"dataInfo":null,"key":"10|openaire____::081b82f96300b6a6e3d282bad31cb6e2","value":"Crossref"},"dateofacceptance":{"dataInfo":null,"value":"1866-01-01"},"distributionlocation":null,"hostedby":{"dataInfo":null,"key":"10|issn___print::4a110be99bfeda7a5e85b5e4c7e95d87","value":"Philologus"},"instancetype":{"classid":"0001","classname":"Article","schemeid":"dnet:publication_resource","schemename":"dnet:publication_resource"},"license":null,"processingchargeamount":null,"processingchargecurrency":null,"refereed":null,"url":["http://www.degruyter.com/view/j/phil.1866.24.issue-1-4/phil.1866.24.14.561/phil.1866.24.14.561.xml","http://www.degruyter.com/view/j/phil.1866.24.issue-1-4/phil.1866.24.14.561/phil.1866.24.14.561.pdf","http://dx.doi.org/10.1524/phil.1866.24.14.561"]},{"accessright":{"classid":"OPEN","classname":"Open Access","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":{"dataInfo":null,"key":"10|openaire____::8ac8380272269217cb09a928c8caa993","value":"UnpayWall"},"dateofacceptance":null,"distributionlocation":null,"hostedby":{"dataInfo":null,"key":"10|issn___print::4a110be99bfeda7a5e85b5e4c7e95d87","value":"Philologus"},"instancetype":{"classid":"0001","classname":"Article","schemeid":"dnet:publication_resource","schemename":"dnet:publication_resource"},"license":{"dataInfo":null,"value":"cc0"},"processingchargeamount":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":true,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"2578.35"},"processingchargecurrency":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":true,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"EUR"},"refereed":null,"url":["https://zenodo.org/record/1634826/files/article.pdf"]},{"accessright":null,"collectedfrom":{"dataInfo":null,"key":"10|openaire____::5f532a3fc4f1ea403f37070f59a7a53a","value":"Microsoft Academic Graph"},"dateofacceptance":null,"distributionlocation":null,"hostedby":{"dataInfo":null,"key":"10|issn___print::4a110be99bfeda7a5e85b5e4c7e95d87","value":"Philologus"},"instancetype":{"classid":"0001","classname":"Article","schemeid":"dnet:publication_resource","schemename":"dnet:publication_resource"},"license":null,"processingchargeamount":null,"processingchargecurrency":null,"refereed":null,"url":["http://www.degruyter.com/view/j/phil.1866.24.issue-1-4/phil.1866.24.14.561/phil.1866.24.14.561.xml","https://academic.microsoft.com/#/detail/2601803436"]},{"accessright":{"classid":"OPEN","classname":"Open Access","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":{"dataInfo":null,"key":"10|opendoar____::358aee4cc897452c00244351e4d91f69","value":"ZENODO"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk","classname":"sysimport:crosswalk","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"1866-01-01"},"distributionlocation":"","hostedby":{"dataInfo":null,"key":"10|opendoar____::358aee4cc897452c00244351e4d91f69","value":"ZENODO"},"instancetype":{"classid":"0038","classname":"Other literature type","schemeid":"dnet:publication_resource","schemename":"dnet:publication_resource"},"license":null,"processingchargeamount":null,"processingchargecurrency":null,"refereed":null,"url":["https://zenodo.org/record/1634826"]}],"language":null,"lastupdatetimestamp":1591290751234,"oaiprovenance":null,"originalId":["10.1524/phil.1866.24.14.561","2601803436","od______2659::f9ba286bcf8429160b58072028052325"],"pid":[{"dataInfo":null,"qualifier":{"classid":"doi","classname":"doi","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"value":"10.1524/phil.1866.24.14.561"}],"publisher":{"dataInfo":null,"value":"Walter de Gruyter GmbH"},"relevantdate":[{"dataInfo":null,"qualifier":{"classid":"created","classname":"created","schemeid":"dnet:dataCite_date","schemename":"dnet:dataCite_date"},"value":"2017-03-26T10:09:50Z"},{"dataInfo":null,"qualifier":{"classid":"published-print","classname":"published-print","schemeid":"dnet:dataCite_date","schemename":"dnet:dataCite_date"},"value":"1866-01-01"}],"resourcetype":{"classid":"Other","classname":"Other","schemeid":"dnet:dataCite_resource","schemename":"dnet:dataCite_resource"},"resulttype":{"classid":"publication","classname":"publication","schemeid":"dnet:result_typologies","schemename":"dnet:result_typologies"},"source":[{"dataInfo":null,"value":"Crossref"},{"dataInfo":null,"value":null}],"subject":[{"dataInfo":null,"qualifier":{"classid":"keywords","classname":"keywords","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"Theology"},{"dataInfo":null,"qualifier":{"classid":"keywords","classname":"keywords","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"History"},{"dataInfo":null,"qualifier":{"classid":"keywords","classname":"keywords","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"Art history"}],"title":[{"dataInfo":null,"qualifier":{"classid":"alternative title","classname":"alternative title","schemeid":"dnet:dataCite_title","schemename":"dnet:dataCite_title"},"value":"XIV. Die wörter άγαλμα, είχών, ξόανον, ανδρuις und verwandte, in ihren verschiedenen beziehungen. Nach Pausanias"},{"dataInfo":null,"qualifier":{"classid":"main title","classname":"main title","schemeid":"dnet:dataCite_title","schemename":"dnet:dataCite_title"},"value":"gcube xiv die worter άγαλμα eίχών ξόανον ανδρuις und verwandte in ihren verschiedenen beziehungen nach pausanias"}],"journal":{"conferencedate":null,"conferenceplace":null,"dataInfo":null,"edition":null,"ep":null,"iss":null,"issnLinking":null,"issnOnline":"2196-7008","issnPrinted":"0031-7985","name":"Philologus","sp":null,"vol":"24"}} \ No newline at end of file From e77d1049511170f954f9122e54083dd491392387 Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Tue, 5 Apr 2022 15:07:11 +0200 Subject: [PATCH 135/162] [OC] added / to workflow path --- .../dhp/actionmanager/opencitations/oozie_app/workflow.xml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/actionmanager/opencitations/oozie_app/workflow.xml b/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/actionmanager/opencitations/oozie_app/workflow.xml index aee2559ee..0f01039f7 100644 --- a/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/actionmanager/opencitations/oozie_app/workflow.xml +++ b/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/actionmanager/opencitations/oozie_app/workflow.xml @@ -83,7 +83,7 @@ --conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir} --workingPath${workingPath}/COCI - --outputPath${workingPath}/COCI_JSON + --outputPath${workingPath}/COCI_JSON/ --delimiter${delimiter} --inputFile${inputFileCoci} From c442c91f8951d42f89d1270797b833d270f9217b Mon Sep 17 00:00:00 2001 From: antleb Date: Wed, 6 Apr 2022 12:40:02 +0300 Subject: [PATCH 136/162] computing stats in each step --- .../scripts/step16-createIndicatorsTables.sql | 51 ++++++++++++++++++- 1 file changed, 50 insertions(+), 1 deletion(-) diff --git a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step16-createIndicatorsTables.sql b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step16-createIndicatorsTables.sql index a95ee74d8..24e6bff7e 100755 --- a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step16-createIndicatorsTables.sql +++ b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step16-createIndicatorsTables.sql @@ -12,6 +12,8 @@ and (ri.accessright = 'Open Access' or ri.accessright = 'Embargo' or ri.accessright = 'Open Source')) tmp on p.id= tmp.id; +compute stats indi_pub_green_oa; + create table indi_pub_grey_lit stored as parquet as select distinct p.id, coalesce(grey_lit, 0) as grey_lit from publication p @@ -22,6 +24,8 @@ join result_classifications rt on rt.id = p.id where rt.type not in ('Article','Part of book or chapter of book','Book','Doctoral thesis','Master thesis','Data Paper', 'Thesis', 'Bachelor thesis', 'Conference object') and not exists (select 1 from result_classifications rc where type ='Other literature type' and rc.id=p.id)) tmp on p.id=tmp.id; +compute stats indi_pub_grey_lit; + create table indi_pub_doi_from_crossref stored as parquet as select distinct p.id, coalesce(doi_from_crossref, 0) as doi_from_crossref from publication p @@ -31,6 +35,7 @@ join datasource d on d.id = ri.collectedfrom where pidtype='Digital Object Identifier' and d.name ='Crossref') tmp on tmp.id=p.id; +compute stats indi_pub_doi_from_crossref; ---- Sprint 2 ---- create table indi_result_has_cc_licence stored as parquet as select distinct r.id, (case when lic='' or lic is null then 0 else 1 end) as has_cc_license @@ -40,6 +45,8 @@ join result_licenses as license on license.id = r.id where lower(license.type) LIKE '%creativecommons.org%' OR lower(license.type) LIKE '%cc-%') tmp on r.id= tmp.id; +compute stats indi_result_has_cc_licence; + create table indi_result_has_cc_licence_url stored as parquet as select distinct r.id, case when lic_host='' or lic_host is null then 0 else 1 end as has_cc_license_url from result r @@ -49,16 +56,21 @@ join result_licenses as license on license.id = r.id WHERE lower(parse_url(license.type, "HOST")) = "creativecommons.org") tmp on r.id= tmp.id; +compute stats indi_result_has_cc_licence_url; + create table indi_pub_has_abstract stored as parquet as select distinct publication.id, coalesce(abstract, 1) has_abstract from publication; +compute stats indi_pub_has_abstract; + create table indi_result_with_orcid stored as parquet as select distinct r.id, coalesce(has_orcid, 0) as has_orcid from result r left outer join (select id, 1 as has_orcid from result_orcid) tmp on r.id= tmp.id; +compute stats indi_result_with_orcid; ---- Sprint 3 ---- create table indi_funded_result_with_fundref stored as parquet as @@ -68,6 +80,8 @@ left outer join (select distinct id, 1 as fundref from project_results where provenance='Harvested') tmp on r.id= tmp.id; +compute stats indi_funded_result_with_fundref; + create table indi_result_org_country_collab stored as parquet as with tmp as (select o.id as id, o.country , ro.id as result,r.type from organization o @@ -79,6 +93,8 @@ join tmp as o2 on o1.result=o2.result where o1.id<>o2.id and o1.country<>o2.country group by o1.id, o1.type,o2.country; +compute stats indi_result_org_country_collab; + create table indi_result_org_collab stored as parquet as with tmp as (select o.id, ro.id as result,r.type from organization o @@ -90,6 +106,8 @@ join tmp as o2 on o1.result=o2.result where o1.id<>o2.id group by o1.id, o2.id, o1.type; +compute stats indi_result_org_collab; + create table indi_funder_country_collab stored as parquet as with tmp as (select funder, project, country from organization_projects op join organization o on o.id=op.id @@ -101,6 +119,8 @@ join tmp as f2 on f1.project=f2.project where f1.country<>f2.country group by f1.funder, f2.country, f1.country; +compute stats indi_funder_country_collab; + create table indi_result_country_collab stored as parquet as with tmp as (select country, ro.id as result,r.type from organization o @@ -112,6 +132,8 @@ join tmp as o2 on o1.result=o2.result where o1.country<>o2.country group by o1.country, o2.country, o1.type; +compute stats indi_result_country_collab; + ---- Sprint 4 ---- create table indi_pub_diamond stored as parquet as select distinct pd.id, coalesce(in_diamond_journal, 0) as in_diamond_journal @@ -123,6 +145,8 @@ join stats_ext.plan_s_jn ps where (ps.issn_print=d.issn_printed and ps.issn_onli and (ps.journal_is_in_doaj=true or ps.journal_is_oa=true) and ps.has_apc=false) tmp on pd.id=tmp.id; +compute stats indi_pub_diamond; + create table indi_pub_hybrid stored as parquet as select distinct pd.id, coalesce(is_hybrid, 0) as is_hybrid from publication_datasources pd @@ -133,6 +157,8 @@ join stats_ext.plan_s_jn ps where (ps.issn_print=d.issn_printed and ps.issn_onli and (ps.journal_is_in_doaj=false and ps.journal_is_oa=false)) tmp on pd.id=tmp.id; +compute stats indi_pub_hybrid; + create table indi_pub_in_transformative stored as parquet as select distinct pd.id, coalesce(is_transformative, 0) as is_transformative from publication pd @@ -143,6 +169,8 @@ join stats_ext.plan_s_jn ps where (ps.issn_print=d.issn_printed and ps.issn_onli and ps.is_transformative_journal=true) tmp on pd.id=tmp.id; +compute stats indi_pub_in_transformative; + create table indi_pub_closed_other_open stored as parquet as select distinct ri.id, coalesce(pub_closed_other_open, 0) as pub_closed_other_open from result_instance ri left outer join @@ -153,11 +181,12 @@ where d.type like '%Journal%' and ri.accessright='Closed Access' and (p.bestlicence='Open Access' or p.bestlicence='Open Source')) tmp on tmp.id=ri.id; - +compute stats indi_pub_closed_other_open; ---- Sprint 5 ---- create table indi_result_no_of_copies stored as parquet as select id, count(id) as number_of_copies from result_instance group by id; +compute stats indi_result_no_of_copies; ---- Sprint 6 ---- create table indi_pub_gold_oa stored as parquet as WITH gold_oa AS ( @@ -183,6 +212,8 @@ LEFT OUTER JOIN ( JOIN issn on issn.id=pd.datasource JOIN gold_oa on issn.issn = gold_oa.issn) tmp ON pd.id=tmp.id; +compute stats indi_pub_gold_oa; + create table indi_datasets_gold_oa stored as parquet as WITH gold_oa AS ( SELECT issn_l, journal_is_in_doaj, journal_is_oa, issn_1 as issn @@ -210,6 +241,8 @@ LEFT OUTER JOIN ( JOIN issn on issn.id=pd.datasource JOIN gold_oa on issn.issn = gold_oa.issn) tmp ON pd.id=tmp.id; +compute stats indi_datasets_gold_oa; + create table indi_software_gold_oa stored as parquet as WITH gold_oa AS ( SELECT issn_l, journal_is_in_doaj, journal_is_oa, issn_1 as issn @@ -237,6 +270,8 @@ LEFT OUTER JOIN ( JOIN issn on issn.id=pd.datasource JOIN gold_oa on issn.issn = gold_oa.issn) tmp ON pd.id=tmp.id; +compute stats indi_software_gold_oa; + create table indi_org_findable stored as parquet as with result_with_pid as ( select ro.organization organization, count(distinct rp.id) no_result_with_pid from result_organization ro @@ -263,6 +298,8 @@ join result_with_pid_share on result_with_pid_share.organization=allresults.orga left outer join ( select organization, abstract_share from result_with_abstract_share) tmp on tmp.organization=allresults.organization; +compute stats indi_org_findable; + create table indi_org_openess stored as parquet as WITH datasets_oa as ( SELECT ro.organization, count(dg.id) no_oadatasets FROM indi_datasets_gold_oa dg @@ -313,6 +350,8 @@ left outer join ( left outer join ( select organization,s from allsoftwaresshare) tmp1 on tmp1.organization=allpubsshare.organization; +compute stats indi_org_openess; + create table indi_pub_hybrid_oa_with_cc stored as parquet as WITH hybrid_oa AS ( SELECT issn_l, journal_is_in_doaj, journal_is_oa, issn_print as issn @@ -343,6 +382,8 @@ LEFT OUTER JOIN ( JOIN indi_result_has_cc_licence cc on pd.id=cc.id where cc.has_cc_license=1) tmp on pd.id=tmp.id; +compute stats indi_pub_hybrid_oa_with_cc; + create table indi_pub_downloads stored as parquet as SELECT result_id, sum(downloads) no_dowloads from openaire_prod_usage_stats.usage_stats join publication on result_id=id @@ -350,6 +391,8 @@ where downloads>0 GROUP BY result_id order by no_dowloads desc; +compute stats indi_pub_downloads; + create table indi_pub_downloads_datasource stored as parquet as SELECT result_id, repository_id, sum(downloads) no_dowloads from openaire_prod_usage_stats.usage_stats join publication on result_id=id @@ -357,15 +400,21 @@ where downloads>0 GROUP BY result_id, repository_id order by result_id; +compute stats indi_pub_downloads_datasource; + create table indi_pub_downloads_year stored as parquet as SELECT result_id, substring(us.`date`, 1,4) as `year`, sum(downloads) no_dowloads from openaire_prod_usage_stats.usage_stats us join publication on result_id=id where downloads>0 GROUP BY result_id, `year` order by `year` asc; +compute stats indi_pub_downloads_year; + create table indi_pub_downloads_datasource_year stored as parquet as SELECT result_id, substring(us.`date`, 1,4) as `year`, repository_id, sum(downloads) no_dowloads from openaire_prod_usage_stats.usage_stats us join publication on result_id=id where downloads>0 GROUP BY result_id, repository_id, `year` order by `year` asc, result_id; + +compute stats indi_pub_downloads_datasource_year; \ No newline at end of file From 05fafa14082fd7dbedc70c19505026a9fea60d72 Mon Sep 17 00:00:00 2001 From: Claudio Atzori Date: Wed, 6 Apr 2022 15:23:50 +0200 Subject: [PATCH 137/162] [graph raw] avoid NPEs importing datasource consent fields --- .../oa/graph/raw/MigrateDbEntitiesApplication.java | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/MigrateDbEntitiesApplication.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/MigrateDbEntitiesApplication.java index 1f8c23ac3..b5801ca5c 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/MigrateDbEntitiesApplication.java +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/MigrateDbEntitiesApplication.java @@ -42,10 +42,7 @@ import java.io.IOException; import java.sql.Array; import java.sql.ResultSet; import java.sql.SQLException; -import java.util.ArrayList; -import java.util.Arrays; -import java.util.Date; -import java.util.List; +import java.util.*; import java.util.function.Consumer; import java.util.function.Function; import java.util.function.Predicate; @@ -311,7 +308,13 @@ public class MigrateDbEntitiesApplication extends AbstractMigrationApplication i ds.setContentpolicies(prepareListOfQualifiers(rs.getArray("contentpolicies"))); ds.setConsenttermsofuse(rs.getBoolean("consenttermsofuse")); ds.setFulltextdownload(rs.getBoolean("fulltextdownload")); - ds.setConsenttermsofusedate(rs.getDate("consenttermsofusedate").toString()); + ds + .setConsenttermsofusedate( + Optional + .ofNullable( + rs.getDate("consenttermsofusedate")) + .map(c -> c.toString()) + .orElse(null)); return Arrays.asList(ds); } catch (final Exception e) { From 4190c9f6bc2db772e0ea788c85a94c165d0f4cfa Mon Sep 17 00:00:00 2001 From: Claudio Atzori Date: Wed, 6 Apr 2022 15:23:50 +0200 Subject: [PATCH 138/162] [graph raw] avoid NPEs importing datasource consent fields --- .../oa/graph/raw/MigrateDbEntitiesApplication.java | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/MigrateDbEntitiesApplication.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/MigrateDbEntitiesApplication.java index 1f8c23ac3..b5801ca5c 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/MigrateDbEntitiesApplication.java +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/MigrateDbEntitiesApplication.java @@ -42,10 +42,7 @@ import java.io.IOException; import java.sql.Array; import java.sql.ResultSet; import java.sql.SQLException; -import java.util.ArrayList; -import java.util.Arrays; -import java.util.Date; -import java.util.List; +import java.util.*; import java.util.function.Consumer; import java.util.function.Function; import java.util.function.Predicate; @@ -311,7 +308,13 @@ public class MigrateDbEntitiesApplication extends AbstractMigrationApplication i ds.setContentpolicies(prepareListOfQualifiers(rs.getArray("contentpolicies"))); ds.setConsenttermsofuse(rs.getBoolean("consenttermsofuse")); ds.setFulltextdownload(rs.getBoolean("fulltextdownload")); - ds.setConsenttermsofusedate(rs.getDate("consenttermsofusedate").toString()); + ds + .setConsenttermsofusedate( + Optional + .ofNullable( + rs.getDate("consenttermsofusedate")) + .map(c -> c.toString()) + .orElse(null)); return Arrays.asList(ds); } catch (final Exception e) { From 66e718981ec4001bc7ddee5eace375b14e472a90 Mon Sep 17 00:00:00 2001 From: Claudio Atzori Date: Thu, 7 Apr 2022 11:34:02 +0200 Subject: [PATCH 139/162] [maven-release-plugin] prepare release dhp-1.2.4 --- dhp-build/dhp-build-assembly-resources/pom.xml | 2 +- dhp-build/dhp-build-properties-maven-plugin/pom.xml | 2 +- dhp-build/dhp-code-style/pom.xml | 2 +- dhp-build/pom.xml | 2 +- dhp-common/pom.xml | 2 +- dhp-workflows/dhp-actionmanager/pom.xml | 2 +- dhp-workflows/dhp-aggregation/pom.xml | 2 +- dhp-workflows/dhp-blacklist/pom.xml | 2 +- dhp-workflows/dhp-broker-events/pom.xml | 6 ++---- dhp-workflows/dhp-dedup-openaire/pom.xml | 2 +- dhp-workflows/dhp-distcp/pom.xml | 2 +- dhp-workflows/dhp-doiboost/pom.xml | 2 +- dhp-workflows/dhp-enrichment/pom.xml | 4 ++-- dhp-workflows/dhp-graph-mapper/pom.xml | 2 +- dhp-workflows/dhp-graph-provision/pom.xml | 2 +- dhp-workflows/dhp-stats-promote/pom.xml | 2 +- dhp-workflows/dhp-stats-update/pom.xml | 2 +- dhp-workflows/dhp-usage-raw-data-update/pom.xml | 2 +- dhp-workflows/dhp-usage-stats-build/pom.xml | 2 +- dhp-workflows/dhp-workflow-profiles/pom.xml | 2 +- dhp-workflows/pom.xml | 2 +- pom.xml | 4 ++-- 22 files changed, 25 insertions(+), 27 deletions(-) diff --git a/dhp-build/dhp-build-assembly-resources/pom.xml b/dhp-build/dhp-build-assembly-resources/pom.xml index 012ff89a3..a4ab512a8 100644 --- a/dhp-build/dhp-build-assembly-resources/pom.xml +++ b/dhp-build/dhp-build-assembly-resources/pom.xml @@ -6,7 +6,7 @@ eu.dnetlib.dhp dhp-build - 1.2.4-SNAPSHOT + 1.2.4 dhp-build-assembly-resources diff --git a/dhp-build/dhp-build-properties-maven-plugin/pom.xml b/dhp-build/dhp-build-properties-maven-plugin/pom.xml index 256017e2c..5efa96c43 100644 --- a/dhp-build/dhp-build-properties-maven-plugin/pom.xml +++ b/dhp-build/dhp-build-properties-maven-plugin/pom.xml @@ -6,7 +6,7 @@ eu.dnetlib.dhp dhp-build - 1.2.4-SNAPSHOT + 1.2.4 dhp-build-properties-maven-plugin diff --git a/dhp-build/dhp-code-style/pom.xml b/dhp-build/dhp-code-style/pom.xml index db0097d64..05830a5c1 100644 --- a/dhp-build/dhp-code-style/pom.xml +++ b/dhp-build/dhp-code-style/pom.xml @@ -5,7 +5,7 @@ eu.dnetlib.dhp dhp-code-style - 1.2.4-SNAPSHOT + 1.2.4 jar diff --git a/dhp-build/pom.xml b/dhp-build/pom.xml index 97fbdf45b..a193dd893 100644 --- a/dhp-build/pom.xml +++ b/dhp-build/pom.xml @@ -4,7 +4,7 @@ eu.dnetlib.dhp dhp - 1.2.4-SNAPSHOT + 1.2.4 dhp-build pom diff --git a/dhp-common/pom.xml b/dhp-common/pom.xml index 7b18f0105..901e1b26f 100644 --- a/dhp-common/pom.xml +++ b/dhp-common/pom.xml @@ -5,7 +5,7 @@ eu.dnetlib.dhp dhp - 1.2.4-SNAPSHOT + 1.2.4 ../pom.xml diff --git a/dhp-workflows/dhp-actionmanager/pom.xml b/dhp-workflows/dhp-actionmanager/pom.xml index f63f3aa01..559bc3125 100644 --- a/dhp-workflows/dhp-actionmanager/pom.xml +++ b/dhp-workflows/dhp-actionmanager/pom.xml @@ -4,7 +4,7 @@ eu.dnetlib.dhp dhp-workflows - 1.2.4-SNAPSHOT + 1.2.4 dhp-actionmanager diff --git a/dhp-workflows/dhp-aggregation/pom.xml b/dhp-workflows/dhp-aggregation/pom.xml index c89cc9d1d..f1a7b08f0 100644 --- a/dhp-workflows/dhp-aggregation/pom.xml +++ b/dhp-workflows/dhp-aggregation/pom.xml @@ -4,7 +4,7 @@ eu.dnetlib.dhp dhp-workflows - 1.2.4-SNAPSHOT + 1.2.4 dhp-aggregation diff --git a/dhp-workflows/dhp-blacklist/pom.xml b/dhp-workflows/dhp-blacklist/pom.xml index 047767d33..8f07db5be 100644 --- a/dhp-workflows/dhp-blacklist/pom.xml +++ b/dhp-workflows/dhp-blacklist/pom.xml @@ -3,7 +3,7 @@ dhp-workflows eu.dnetlib.dhp - 1.2.4-SNAPSHOT + 1.2.4 4.0.0 diff --git a/dhp-workflows/dhp-broker-events/pom.xml b/dhp-workflows/dhp-broker-events/pom.xml index afbd93dcb..3b8a09d16 100644 --- a/dhp-workflows/dhp-broker-events/pom.xml +++ b/dhp-workflows/dhp-broker-events/pom.xml @@ -1,11 +1,9 @@ - + dhp-workflows eu.dnetlib.dhp - 1.2.4-SNAPSHOT + 1.2.4 4.0.0 diff --git a/dhp-workflows/dhp-dedup-openaire/pom.xml b/dhp-workflows/dhp-dedup-openaire/pom.xml index f47f8ba6e..5cb9283ce 100644 --- a/dhp-workflows/dhp-dedup-openaire/pom.xml +++ b/dhp-workflows/dhp-dedup-openaire/pom.xml @@ -3,7 +3,7 @@ dhp-workflows eu.dnetlib.dhp - 1.2.4-SNAPSHOT + 1.2.4 4.0.0 dhp-dedup-openaire diff --git a/dhp-workflows/dhp-distcp/pom.xml b/dhp-workflows/dhp-distcp/pom.xml index 8c10538c0..e4685d7ff 100644 --- a/dhp-workflows/dhp-distcp/pom.xml +++ b/dhp-workflows/dhp-distcp/pom.xml @@ -3,7 +3,7 @@ dhp-workflows eu.dnetlib.dhp - 1.2.4-SNAPSHOT + 1.2.4 4.0.0 diff --git a/dhp-workflows/dhp-doiboost/pom.xml b/dhp-workflows/dhp-doiboost/pom.xml index f496ea9a2..f33b8c4c5 100644 --- a/dhp-workflows/dhp-doiboost/pom.xml +++ b/dhp-workflows/dhp-doiboost/pom.xml @@ -3,7 +3,7 @@ dhp-workflows eu.dnetlib.dhp - 1.2.4-SNAPSHOT + 1.2.4 4.0.0 diff --git a/dhp-workflows/dhp-enrichment/pom.xml b/dhp-workflows/dhp-enrichment/pom.xml index 0b4269acd..122362196 100644 --- a/dhp-workflows/dhp-enrichment/pom.xml +++ b/dhp-workflows/dhp-enrichment/pom.xml @@ -3,7 +3,7 @@ dhp-workflows eu.dnetlib.dhp - 1.2.4-SNAPSHOT + 1.2.4 4.0.0 @@ -51,7 +51,7 @@ eu.dnetlib.dhp dhp-aggregation - 1.2.4-SNAPSHOT + 1.2.4 compile diff --git a/dhp-workflows/dhp-graph-mapper/pom.xml b/dhp-workflows/dhp-graph-mapper/pom.xml index 17146903a..fd60cafb2 100644 --- a/dhp-workflows/dhp-graph-mapper/pom.xml +++ b/dhp-workflows/dhp-graph-mapper/pom.xml @@ -3,7 +3,7 @@ dhp-workflows eu.dnetlib.dhp - 1.2.4-SNAPSHOT + 1.2.4 4.0.0 diff --git a/dhp-workflows/dhp-graph-provision/pom.xml b/dhp-workflows/dhp-graph-provision/pom.xml index e402d0600..518f8f565 100644 --- a/dhp-workflows/dhp-graph-provision/pom.xml +++ b/dhp-workflows/dhp-graph-provision/pom.xml @@ -3,7 +3,7 @@ dhp-workflows eu.dnetlib.dhp - 1.2.4-SNAPSHOT + 1.2.4 4.0.0 diff --git a/dhp-workflows/dhp-stats-promote/pom.xml b/dhp-workflows/dhp-stats-promote/pom.xml index c64c2f58e..ee52031e8 100644 --- a/dhp-workflows/dhp-stats-promote/pom.xml +++ b/dhp-workflows/dhp-stats-promote/pom.xml @@ -3,7 +3,7 @@ dhp-workflows eu.dnetlib.dhp - 1.2.4-SNAPSHOT + 1.2.4 4.0.0 dhp-stats-promote diff --git a/dhp-workflows/dhp-stats-update/pom.xml b/dhp-workflows/dhp-stats-update/pom.xml index 52f35ff07..3c1b2ec6e 100644 --- a/dhp-workflows/dhp-stats-update/pom.xml +++ b/dhp-workflows/dhp-stats-update/pom.xml @@ -3,7 +3,7 @@ dhp-workflows eu.dnetlib.dhp - 1.2.4-SNAPSHOT + 1.2.4 4.0.0 dhp-stats-update diff --git a/dhp-workflows/dhp-usage-raw-data-update/pom.xml b/dhp-workflows/dhp-usage-raw-data-update/pom.xml index a78f92d41..b39a7a9a0 100644 --- a/dhp-workflows/dhp-usage-raw-data-update/pom.xml +++ b/dhp-workflows/dhp-usage-raw-data-update/pom.xml @@ -3,7 +3,7 @@ dhp-workflows eu.dnetlib.dhp - 1.2.4-SNAPSHOT + 1.2.4 4.0.0 dhp-usage-raw-data-update diff --git a/dhp-workflows/dhp-usage-stats-build/pom.xml b/dhp-workflows/dhp-usage-stats-build/pom.xml index 20d2f5b76..a9152062d 100644 --- a/dhp-workflows/dhp-usage-stats-build/pom.xml +++ b/dhp-workflows/dhp-usage-stats-build/pom.xml @@ -3,7 +3,7 @@ dhp-workflows eu.dnetlib.dhp - 1.2.4-SNAPSHOT + 1.2.4 4.0.0 dhp-usage-stats-build diff --git a/dhp-workflows/dhp-workflow-profiles/pom.xml b/dhp-workflows/dhp-workflow-profiles/pom.xml index b1c51c497..a4dbade09 100644 --- a/dhp-workflows/dhp-workflow-profiles/pom.xml +++ b/dhp-workflows/dhp-workflow-profiles/pom.xml @@ -3,7 +3,7 @@ dhp-workflows eu.dnetlib.dhp - 1.2.4-SNAPSHOT + 1.2.4 4.0.0 diff --git a/dhp-workflows/pom.xml b/dhp-workflows/pom.xml index 143178560..47bda3d00 100644 --- a/dhp-workflows/pom.xml +++ b/dhp-workflows/pom.xml @@ -6,7 +6,7 @@ eu.dnetlib.dhp dhp - 1.2.4-SNAPSHOT + 1.2.4 ../pom.xml diff --git a/pom.xml b/pom.xml index 6a14954e5..44bef5516 100644 --- a/pom.xml +++ b/pom.xml @@ -3,7 +3,7 @@ 4.0.0 eu.dnetlib.dhp dhp - 1.2.4-SNAPSHOT + 1.2.4 pom @@ -37,7 +37,7 @@ scm:git:gitea@code-repo.d4science.org:D-Net/dnet-hadoop.git scm:git:gitea@code-repo.d4science.org:D-Net/dnet-hadoop.git https://code-repo.d4science.org/D-Net/dnet-hadoop/ - HEAD + dhp-1.2.4 This module is the root descriptor for the dnet-hadoop project From 505420fd61e530dfbab5e689f3352280f08a1074 Mon Sep 17 00:00:00 2001 From: Claudio Atzori Date: Thu, 7 Apr 2022 11:34:06 +0200 Subject: [PATCH 140/162] [maven-release-plugin] prepare for next development iteration --- dhp-build/dhp-build-assembly-resources/pom.xml | 2 +- dhp-build/dhp-build-properties-maven-plugin/pom.xml | 2 +- dhp-build/dhp-code-style/pom.xml | 2 +- dhp-build/pom.xml | 2 +- dhp-common/pom.xml | 2 +- dhp-workflows/dhp-actionmanager/pom.xml | 2 +- dhp-workflows/dhp-aggregation/pom.xml | 2 +- dhp-workflows/dhp-blacklist/pom.xml | 2 +- dhp-workflows/dhp-broker-events/pom.xml | 2 +- dhp-workflows/dhp-dedup-openaire/pom.xml | 2 +- dhp-workflows/dhp-distcp/pom.xml | 2 +- dhp-workflows/dhp-doiboost/pom.xml | 2 +- dhp-workflows/dhp-enrichment/pom.xml | 4 ++-- dhp-workflows/dhp-graph-mapper/pom.xml | 2 +- dhp-workflows/dhp-graph-provision/pom.xml | 2 +- dhp-workflows/dhp-stats-promote/pom.xml | 2 +- dhp-workflows/dhp-stats-update/pom.xml | 2 +- dhp-workflows/dhp-usage-raw-data-update/pom.xml | 2 +- dhp-workflows/dhp-usage-stats-build/pom.xml | 2 +- dhp-workflows/dhp-workflow-profiles/pom.xml | 2 +- dhp-workflows/pom.xml | 2 +- pom.xml | 4 ++-- 22 files changed, 24 insertions(+), 24 deletions(-) diff --git a/dhp-build/dhp-build-assembly-resources/pom.xml b/dhp-build/dhp-build-assembly-resources/pom.xml index a4ab512a8..44165995d 100644 --- a/dhp-build/dhp-build-assembly-resources/pom.xml +++ b/dhp-build/dhp-build-assembly-resources/pom.xml @@ -6,7 +6,7 @@ eu.dnetlib.dhp dhp-build - 1.2.4 + 1.2.5-SNAPSHOT dhp-build-assembly-resources diff --git a/dhp-build/dhp-build-properties-maven-plugin/pom.xml b/dhp-build/dhp-build-properties-maven-plugin/pom.xml index 5efa96c43..7579bdf45 100644 --- a/dhp-build/dhp-build-properties-maven-plugin/pom.xml +++ b/dhp-build/dhp-build-properties-maven-plugin/pom.xml @@ -6,7 +6,7 @@ eu.dnetlib.dhp dhp-build - 1.2.4 + 1.2.5-SNAPSHOT dhp-build-properties-maven-plugin diff --git a/dhp-build/dhp-code-style/pom.xml b/dhp-build/dhp-code-style/pom.xml index 05830a5c1..9d592943a 100644 --- a/dhp-build/dhp-code-style/pom.xml +++ b/dhp-build/dhp-code-style/pom.xml @@ -5,7 +5,7 @@ eu.dnetlib.dhp dhp-code-style - 1.2.4 + 1.2.5-SNAPSHOT jar diff --git a/dhp-build/pom.xml b/dhp-build/pom.xml index a193dd893..9040ea94e 100644 --- a/dhp-build/pom.xml +++ b/dhp-build/pom.xml @@ -4,7 +4,7 @@ eu.dnetlib.dhp dhp - 1.2.4 + 1.2.5-SNAPSHOT dhp-build pom diff --git a/dhp-common/pom.xml b/dhp-common/pom.xml index 901e1b26f..6df11f4ea 100644 --- a/dhp-common/pom.xml +++ b/dhp-common/pom.xml @@ -5,7 +5,7 @@ eu.dnetlib.dhp dhp - 1.2.4 + 1.2.5-SNAPSHOT ../pom.xml diff --git a/dhp-workflows/dhp-actionmanager/pom.xml b/dhp-workflows/dhp-actionmanager/pom.xml index 559bc3125..29e1fab1f 100644 --- a/dhp-workflows/dhp-actionmanager/pom.xml +++ b/dhp-workflows/dhp-actionmanager/pom.xml @@ -4,7 +4,7 @@ eu.dnetlib.dhp dhp-workflows - 1.2.4 + 1.2.5-SNAPSHOT dhp-actionmanager diff --git a/dhp-workflows/dhp-aggregation/pom.xml b/dhp-workflows/dhp-aggregation/pom.xml index f1a7b08f0..53d349d2a 100644 --- a/dhp-workflows/dhp-aggregation/pom.xml +++ b/dhp-workflows/dhp-aggregation/pom.xml @@ -4,7 +4,7 @@ eu.dnetlib.dhp dhp-workflows - 1.2.4 + 1.2.5-SNAPSHOT dhp-aggregation diff --git a/dhp-workflows/dhp-blacklist/pom.xml b/dhp-workflows/dhp-blacklist/pom.xml index 8f07db5be..479a9e8c6 100644 --- a/dhp-workflows/dhp-blacklist/pom.xml +++ b/dhp-workflows/dhp-blacklist/pom.xml @@ -3,7 +3,7 @@ dhp-workflows eu.dnetlib.dhp - 1.2.4 + 1.2.5-SNAPSHOT 4.0.0 diff --git a/dhp-workflows/dhp-broker-events/pom.xml b/dhp-workflows/dhp-broker-events/pom.xml index 3b8a09d16..5a41a0090 100644 --- a/dhp-workflows/dhp-broker-events/pom.xml +++ b/dhp-workflows/dhp-broker-events/pom.xml @@ -3,7 +3,7 @@ dhp-workflows eu.dnetlib.dhp - 1.2.4 + 1.2.5-SNAPSHOT 4.0.0 diff --git a/dhp-workflows/dhp-dedup-openaire/pom.xml b/dhp-workflows/dhp-dedup-openaire/pom.xml index 5cb9283ce..a14076a78 100644 --- a/dhp-workflows/dhp-dedup-openaire/pom.xml +++ b/dhp-workflows/dhp-dedup-openaire/pom.xml @@ -3,7 +3,7 @@ dhp-workflows eu.dnetlib.dhp - 1.2.4 + 1.2.5-SNAPSHOT 4.0.0 dhp-dedup-openaire diff --git a/dhp-workflows/dhp-distcp/pom.xml b/dhp-workflows/dhp-distcp/pom.xml index e4685d7ff..c3d3a7375 100644 --- a/dhp-workflows/dhp-distcp/pom.xml +++ b/dhp-workflows/dhp-distcp/pom.xml @@ -3,7 +3,7 @@ dhp-workflows eu.dnetlib.dhp - 1.2.4 + 1.2.5-SNAPSHOT 4.0.0 diff --git a/dhp-workflows/dhp-doiboost/pom.xml b/dhp-workflows/dhp-doiboost/pom.xml index f33b8c4c5..37accbc4f 100644 --- a/dhp-workflows/dhp-doiboost/pom.xml +++ b/dhp-workflows/dhp-doiboost/pom.xml @@ -3,7 +3,7 @@ dhp-workflows eu.dnetlib.dhp - 1.2.4 + 1.2.5-SNAPSHOT 4.0.0 diff --git a/dhp-workflows/dhp-enrichment/pom.xml b/dhp-workflows/dhp-enrichment/pom.xml index 122362196..591cad252 100644 --- a/dhp-workflows/dhp-enrichment/pom.xml +++ b/dhp-workflows/dhp-enrichment/pom.xml @@ -3,7 +3,7 @@ dhp-workflows eu.dnetlib.dhp - 1.2.4 + 1.2.5-SNAPSHOT 4.0.0 @@ -51,7 +51,7 @@ eu.dnetlib.dhp dhp-aggregation - 1.2.4 + 1.2.5-SNAPSHOT compile diff --git a/dhp-workflows/dhp-graph-mapper/pom.xml b/dhp-workflows/dhp-graph-mapper/pom.xml index fd60cafb2..687f0de66 100644 --- a/dhp-workflows/dhp-graph-mapper/pom.xml +++ b/dhp-workflows/dhp-graph-mapper/pom.xml @@ -3,7 +3,7 @@ dhp-workflows eu.dnetlib.dhp - 1.2.4 + 1.2.5-SNAPSHOT 4.0.0 diff --git a/dhp-workflows/dhp-graph-provision/pom.xml b/dhp-workflows/dhp-graph-provision/pom.xml index 518f8f565..ae8f3b53a 100644 --- a/dhp-workflows/dhp-graph-provision/pom.xml +++ b/dhp-workflows/dhp-graph-provision/pom.xml @@ -3,7 +3,7 @@ dhp-workflows eu.dnetlib.dhp - 1.2.4 + 1.2.5-SNAPSHOT 4.0.0 diff --git a/dhp-workflows/dhp-stats-promote/pom.xml b/dhp-workflows/dhp-stats-promote/pom.xml index ee52031e8..ce3e739a5 100644 --- a/dhp-workflows/dhp-stats-promote/pom.xml +++ b/dhp-workflows/dhp-stats-promote/pom.xml @@ -3,7 +3,7 @@ dhp-workflows eu.dnetlib.dhp - 1.2.4 + 1.2.5-SNAPSHOT 4.0.0 dhp-stats-promote diff --git a/dhp-workflows/dhp-stats-update/pom.xml b/dhp-workflows/dhp-stats-update/pom.xml index 3c1b2ec6e..2bc610c42 100644 --- a/dhp-workflows/dhp-stats-update/pom.xml +++ b/dhp-workflows/dhp-stats-update/pom.xml @@ -3,7 +3,7 @@ dhp-workflows eu.dnetlib.dhp - 1.2.4 + 1.2.5-SNAPSHOT 4.0.0 dhp-stats-update diff --git a/dhp-workflows/dhp-usage-raw-data-update/pom.xml b/dhp-workflows/dhp-usage-raw-data-update/pom.xml index b39a7a9a0..954c8bd39 100644 --- a/dhp-workflows/dhp-usage-raw-data-update/pom.xml +++ b/dhp-workflows/dhp-usage-raw-data-update/pom.xml @@ -3,7 +3,7 @@ dhp-workflows eu.dnetlib.dhp - 1.2.4 + 1.2.5-SNAPSHOT 4.0.0 dhp-usage-raw-data-update diff --git a/dhp-workflows/dhp-usage-stats-build/pom.xml b/dhp-workflows/dhp-usage-stats-build/pom.xml index a9152062d..54e18580b 100644 --- a/dhp-workflows/dhp-usage-stats-build/pom.xml +++ b/dhp-workflows/dhp-usage-stats-build/pom.xml @@ -3,7 +3,7 @@ dhp-workflows eu.dnetlib.dhp - 1.2.4 + 1.2.5-SNAPSHOT 4.0.0 dhp-usage-stats-build diff --git a/dhp-workflows/dhp-workflow-profiles/pom.xml b/dhp-workflows/dhp-workflow-profiles/pom.xml index a4dbade09..8c71a5ca1 100644 --- a/dhp-workflows/dhp-workflow-profiles/pom.xml +++ b/dhp-workflows/dhp-workflow-profiles/pom.xml @@ -3,7 +3,7 @@ dhp-workflows eu.dnetlib.dhp - 1.2.4 + 1.2.5-SNAPSHOT 4.0.0 diff --git a/dhp-workflows/pom.xml b/dhp-workflows/pom.xml index 47bda3d00..541d59007 100644 --- a/dhp-workflows/pom.xml +++ b/dhp-workflows/pom.xml @@ -6,7 +6,7 @@ eu.dnetlib.dhp dhp - 1.2.4 + 1.2.5-SNAPSHOT ../pom.xml diff --git a/pom.xml b/pom.xml index 44bef5516..be58842a1 100644 --- a/pom.xml +++ b/pom.xml @@ -3,7 +3,7 @@ 4.0.0 eu.dnetlib.dhp dhp - 1.2.4 + 1.2.5-SNAPSHOT pom @@ -37,7 +37,7 @@ scm:git:gitea@code-repo.d4science.org:D-Net/dnet-hadoop.git scm:git:gitea@code-repo.d4science.org:D-Net/dnet-hadoop.git https://code-repo.d4science.org/D-Net/dnet-hadoop/ - dhp-1.2.4 + HEAD This module is the root descriptor for the dnet-hadoop project From bc05b6168aa5cda8c4e014af385e01ed77e3c43d Mon Sep 17 00:00:00 2001 From: Claudio Atzori Date: Thu, 7 Apr 2022 11:49:06 +0200 Subject: [PATCH 141/162] [maven-release-plugin] rollback the release of dhp-1.2.4 --- dhp-build/dhp-build-assembly-resources/pom.xml | 2 +- dhp-build/dhp-build-properties-maven-plugin/pom.xml | 2 +- dhp-build/dhp-code-style/pom.xml | 2 +- dhp-build/pom.xml | 2 +- dhp-common/pom.xml | 2 +- dhp-workflows/dhp-actionmanager/pom.xml | 2 +- dhp-workflows/dhp-aggregation/pom.xml | 2 +- dhp-workflows/dhp-blacklist/pom.xml | 2 +- dhp-workflows/dhp-broker-events/pom.xml | 6 ++++-- dhp-workflows/dhp-dedup-openaire/pom.xml | 2 +- dhp-workflows/dhp-distcp/pom.xml | 2 +- dhp-workflows/dhp-doiboost/pom.xml | 2 +- dhp-workflows/dhp-enrichment/pom.xml | 4 ++-- dhp-workflows/dhp-graph-mapper/pom.xml | 2 +- dhp-workflows/dhp-graph-provision/pom.xml | 2 +- dhp-workflows/dhp-stats-promote/pom.xml | 2 +- dhp-workflows/dhp-stats-update/pom.xml | 2 +- dhp-workflows/dhp-usage-raw-data-update/pom.xml | 2 +- dhp-workflows/dhp-usage-stats-build/pom.xml | 2 +- dhp-workflows/dhp-workflow-profiles/pom.xml | 2 +- dhp-workflows/pom.xml | 2 +- pom.xml | 2 +- 22 files changed, 26 insertions(+), 24 deletions(-) diff --git a/dhp-build/dhp-build-assembly-resources/pom.xml b/dhp-build/dhp-build-assembly-resources/pom.xml index 44165995d..012ff89a3 100644 --- a/dhp-build/dhp-build-assembly-resources/pom.xml +++ b/dhp-build/dhp-build-assembly-resources/pom.xml @@ -6,7 +6,7 @@ eu.dnetlib.dhp dhp-build - 1.2.5-SNAPSHOT + 1.2.4-SNAPSHOT dhp-build-assembly-resources diff --git a/dhp-build/dhp-build-properties-maven-plugin/pom.xml b/dhp-build/dhp-build-properties-maven-plugin/pom.xml index 7579bdf45..256017e2c 100644 --- a/dhp-build/dhp-build-properties-maven-plugin/pom.xml +++ b/dhp-build/dhp-build-properties-maven-plugin/pom.xml @@ -6,7 +6,7 @@ eu.dnetlib.dhp dhp-build - 1.2.5-SNAPSHOT + 1.2.4-SNAPSHOT dhp-build-properties-maven-plugin diff --git a/dhp-build/dhp-code-style/pom.xml b/dhp-build/dhp-code-style/pom.xml index 9d592943a..db0097d64 100644 --- a/dhp-build/dhp-code-style/pom.xml +++ b/dhp-build/dhp-code-style/pom.xml @@ -5,7 +5,7 @@ eu.dnetlib.dhp dhp-code-style - 1.2.5-SNAPSHOT + 1.2.4-SNAPSHOT jar diff --git a/dhp-build/pom.xml b/dhp-build/pom.xml index 9040ea94e..97fbdf45b 100644 --- a/dhp-build/pom.xml +++ b/dhp-build/pom.xml @@ -4,7 +4,7 @@ eu.dnetlib.dhp dhp - 1.2.5-SNAPSHOT + 1.2.4-SNAPSHOT dhp-build pom diff --git a/dhp-common/pom.xml b/dhp-common/pom.xml index 6df11f4ea..7b18f0105 100644 --- a/dhp-common/pom.xml +++ b/dhp-common/pom.xml @@ -5,7 +5,7 @@ eu.dnetlib.dhp dhp - 1.2.5-SNAPSHOT + 1.2.4-SNAPSHOT ../pom.xml diff --git a/dhp-workflows/dhp-actionmanager/pom.xml b/dhp-workflows/dhp-actionmanager/pom.xml index 29e1fab1f..f63f3aa01 100644 --- a/dhp-workflows/dhp-actionmanager/pom.xml +++ b/dhp-workflows/dhp-actionmanager/pom.xml @@ -4,7 +4,7 @@ eu.dnetlib.dhp dhp-workflows - 1.2.5-SNAPSHOT + 1.2.4-SNAPSHOT dhp-actionmanager diff --git a/dhp-workflows/dhp-aggregation/pom.xml b/dhp-workflows/dhp-aggregation/pom.xml index 53d349d2a..c89cc9d1d 100644 --- a/dhp-workflows/dhp-aggregation/pom.xml +++ b/dhp-workflows/dhp-aggregation/pom.xml @@ -4,7 +4,7 @@ eu.dnetlib.dhp dhp-workflows - 1.2.5-SNAPSHOT + 1.2.4-SNAPSHOT dhp-aggregation diff --git a/dhp-workflows/dhp-blacklist/pom.xml b/dhp-workflows/dhp-blacklist/pom.xml index 479a9e8c6..047767d33 100644 --- a/dhp-workflows/dhp-blacklist/pom.xml +++ b/dhp-workflows/dhp-blacklist/pom.xml @@ -3,7 +3,7 @@ dhp-workflows eu.dnetlib.dhp - 1.2.5-SNAPSHOT + 1.2.4-SNAPSHOT 4.0.0 diff --git a/dhp-workflows/dhp-broker-events/pom.xml b/dhp-workflows/dhp-broker-events/pom.xml index 5a41a0090..afbd93dcb 100644 --- a/dhp-workflows/dhp-broker-events/pom.xml +++ b/dhp-workflows/dhp-broker-events/pom.xml @@ -1,9 +1,11 @@ - + dhp-workflows eu.dnetlib.dhp - 1.2.5-SNAPSHOT + 1.2.4-SNAPSHOT 4.0.0 diff --git a/dhp-workflows/dhp-dedup-openaire/pom.xml b/dhp-workflows/dhp-dedup-openaire/pom.xml index a14076a78..f47f8ba6e 100644 --- a/dhp-workflows/dhp-dedup-openaire/pom.xml +++ b/dhp-workflows/dhp-dedup-openaire/pom.xml @@ -3,7 +3,7 @@ dhp-workflows eu.dnetlib.dhp - 1.2.5-SNAPSHOT + 1.2.4-SNAPSHOT 4.0.0 dhp-dedup-openaire diff --git a/dhp-workflows/dhp-distcp/pom.xml b/dhp-workflows/dhp-distcp/pom.xml index c3d3a7375..8c10538c0 100644 --- a/dhp-workflows/dhp-distcp/pom.xml +++ b/dhp-workflows/dhp-distcp/pom.xml @@ -3,7 +3,7 @@ dhp-workflows eu.dnetlib.dhp - 1.2.5-SNAPSHOT + 1.2.4-SNAPSHOT 4.0.0 diff --git a/dhp-workflows/dhp-doiboost/pom.xml b/dhp-workflows/dhp-doiboost/pom.xml index 37accbc4f..f496ea9a2 100644 --- a/dhp-workflows/dhp-doiboost/pom.xml +++ b/dhp-workflows/dhp-doiboost/pom.xml @@ -3,7 +3,7 @@ dhp-workflows eu.dnetlib.dhp - 1.2.5-SNAPSHOT + 1.2.4-SNAPSHOT 4.0.0 diff --git a/dhp-workflows/dhp-enrichment/pom.xml b/dhp-workflows/dhp-enrichment/pom.xml index 591cad252..0b4269acd 100644 --- a/dhp-workflows/dhp-enrichment/pom.xml +++ b/dhp-workflows/dhp-enrichment/pom.xml @@ -3,7 +3,7 @@ dhp-workflows eu.dnetlib.dhp - 1.2.5-SNAPSHOT + 1.2.4-SNAPSHOT 4.0.0 @@ -51,7 +51,7 @@ eu.dnetlib.dhp dhp-aggregation - 1.2.5-SNAPSHOT + 1.2.4-SNAPSHOT compile diff --git a/dhp-workflows/dhp-graph-mapper/pom.xml b/dhp-workflows/dhp-graph-mapper/pom.xml index 687f0de66..17146903a 100644 --- a/dhp-workflows/dhp-graph-mapper/pom.xml +++ b/dhp-workflows/dhp-graph-mapper/pom.xml @@ -3,7 +3,7 @@ dhp-workflows eu.dnetlib.dhp - 1.2.5-SNAPSHOT + 1.2.4-SNAPSHOT 4.0.0 diff --git a/dhp-workflows/dhp-graph-provision/pom.xml b/dhp-workflows/dhp-graph-provision/pom.xml index ae8f3b53a..e402d0600 100644 --- a/dhp-workflows/dhp-graph-provision/pom.xml +++ b/dhp-workflows/dhp-graph-provision/pom.xml @@ -3,7 +3,7 @@ dhp-workflows eu.dnetlib.dhp - 1.2.5-SNAPSHOT + 1.2.4-SNAPSHOT 4.0.0 diff --git a/dhp-workflows/dhp-stats-promote/pom.xml b/dhp-workflows/dhp-stats-promote/pom.xml index ce3e739a5..c64c2f58e 100644 --- a/dhp-workflows/dhp-stats-promote/pom.xml +++ b/dhp-workflows/dhp-stats-promote/pom.xml @@ -3,7 +3,7 @@ dhp-workflows eu.dnetlib.dhp - 1.2.5-SNAPSHOT + 1.2.4-SNAPSHOT 4.0.0 dhp-stats-promote diff --git a/dhp-workflows/dhp-stats-update/pom.xml b/dhp-workflows/dhp-stats-update/pom.xml index 2bc610c42..52f35ff07 100644 --- a/dhp-workflows/dhp-stats-update/pom.xml +++ b/dhp-workflows/dhp-stats-update/pom.xml @@ -3,7 +3,7 @@ dhp-workflows eu.dnetlib.dhp - 1.2.5-SNAPSHOT + 1.2.4-SNAPSHOT 4.0.0 dhp-stats-update diff --git a/dhp-workflows/dhp-usage-raw-data-update/pom.xml b/dhp-workflows/dhp-usage-raw-data-update/pom.xml index 954c8bd39..a78f92d41 100644 --- a/dhp-workflows/dhp-usage-raw-data-update/pom.xml +++ b/dhp-workflows/dhp-usage-raw-data-update/pom.xml @@ -3,7 +3,7 @@ dhp-workflows eu.dnetlib.dhp - 1.2.5-SNAPSHOT + 1.2.4-SNAPSHOT 4.0.0 dhp-usage-raw-data-update diff --git a/dhp-workflows/dhp-usage-stats-build/pom.xml b/dhp-workflows/dhp-usage-stats-build/pom.xml index 54e18580b..20d2f5b76 100644 --- a/dhp-workflows/dhp-usage-stats-build/pom.xml +++ b/dhp-workflows/dhp-usage-stats-build/pom.xml @@ -3,7 +3,7 @@ dhp-workflows eu.dnetlib.dhp - 1.2.5-SNAPSHOT + 1.2.4-SNAPSHOT 4.0.0 dhp-usage-stats-build diff --git a/dhp-workflows/dhp-workflow-profiles/pom.xml b/dhp-workflows/dhp-workflow-profiles/pom.xml index 8c71a5ca1..b1c51c497 100644 --- a/dhp-workflows/dhp-workflow-profiles/pom.xml +++ b/dhp-workflows/dhp-workflow-profiles/pom.xml @@ -3,7 +3,7 @@ dhp-workflows eu.dnetlib.dhp - 1.2.5-SNAPSHOT + 1.2.4-SNAPSHOT 4.0.0 diff --git a/dhp-workflows/pom.xml b/dhp-workflows/pom.xml index 541d59007..143178560 100644 --- a/dhp-workflows/pom.xml +++ b/dhp-workflows/pom.xml @@ -6,7 +6,7 @@ eu.dnetlib.dhp dhp - 1.2.5-SNAPSHOT + 1.2.4-SNAPSHOT ../pom.xml diff --git a/pom.xml b/pom.xml index be58842a1..6a14954e5 100644 --- a/pom.xml +++ b/pom.xml @@ -3,7 +3,7 @@ 4.0.0 eu.dnetlib.dhp dhp - 1.2.5-SNAPSHOT + 1.2.4-SNAPSHOT pom From 85bc722ff49ff86d379c1c6b07c4229fe7da8bb4 Mon Sep 17 00:00:00 2001 From: Claudio Atzori Date: Thu, 7 Apr 2022 12:18:43 +0200 Subject: [PATCH 142/162] [maven-release-plugin] prepare release dhp-1.2.4 --- dhp-build/dhp-build-assembly-resources/pom.xml | 2 +- dhp-build/dhp-build-properties-maven-plugin/pom.xml | 2 +- dhp-build/dhp-code-style/pom.xml | 2 +- dhp-build/pom.xml | 2 +- dhp-common/pom.xml | 2 +- dhp-workflows/dhp-actionmanager/pom.xml | 2 +- dhp-workflows/dhp-aggregation/pom.xml | 2 +- dhp-workflows/dhp-blacklist/pom.xml | 2 +- dhp-workflows/dhp-broker-events/pom.xml | 6 ++---- dhp-workflows/dhp-dedup-openaire/pom.xml | 2 +- dhp-workflows/dhp-distcp/pom.xml | 2 +- dhp-workflows/dhp-doiboost/pom.xml | 2 +- dhp-workflows/dhp-enrichment/pom.xml | 4 ++-- dhp-workflows/dhp-graph-mapper/pom.xml | 2 +- dhp-workflows/dhp-graph-provision/pom.xml | 2 +- dhp-workflows/dhp-stats-promote/pom.xml | 2 +- dhp-workflows/dhp-stats-update/pom.xml | 2 +- dhp-workflows/dhp-usage-raw-data-update/pom.xml | 2 +- dhp-workflows/dhp-usage-stats-build/pom.xml | 2 +- dhp-workflows/dhp-workflow-profiles/pom.xml | 2 +- dhp-workflows/pom.xml | 2 +- pom.xml | 4 ++-- 22 files changed, 25 insertions(+), 27 deletions(-) diff --git a/dhp-build/dhp-build-assembly-resources/pom.xml b/dhp-build/dhp-build-assembly-resources/pom.xml index 012ff89a3..a4ab512a8 100644 --- a/dhp-build/dhp-build-assembly-resources/pom.xml +++ b/dhp-build/dhp-build-assembly-resources/pom.xml @@ -6,7 +6,7 @@ eu.dnetlib.dhp dhp-build - 1.2.4-SNAPSHOT + 1.2.4 dhp-build-assembly-resources diff --git a/dhp-build/dhp-build-properties-maven-plugin/pom.xml b/dhp-build/dhp-build-properties-maven-plugin/pom.xml index 256017e2c..5efa96c43 100644 --- a/dhp-build/dhp-build-properties-maven-plugin/pom.xml +++ b/dhp-build/dhp-build-properties-maven-plugin/pom.xml @@ -6,7 +6,7 @@ eu.dnetlib.dhp dhp-build - 1.2.4-SNAPSHOT + 1.2.4 dhp-build-properties-maven-plugin diff --git a/dhp-build/dhp-code-style/pom.xml b/dhp-build/dhp-code-style/pom.xml index db0097d64..05830a5c1 100644 --- a/dhp-build/dhp-code-style/pom.xml +++ b/dhp-build/dhp-code-style/pom.xml @@ -5,7 +5,7 @@ eu.dnetlib.dhp dhp-code-style - 1.2.4-SNAPSHOT + 1.2.4 jar diff --git a/dhp-build/pom.xml b/dhp-build/pom.xml index 97fbdf45b..a193dd893 100644 --- a/dhp-build/pom.xml +++ b/dhp-build/pom.xml @@ -4,7 +4,7 @@ eu.dnetlib.dhp dhp - 1.2.4-SNAPSHOT + 1.2.4 dhp-build pom diff --git a/dhp-common/pom.xml b/dhp-common/pom.xml index 7b18f0105..901e1b26f 100644 --- a/dhp-common/pom.xml +++ b/dhp-common/pom.xml @@ -5,7 +5,7 @@ eu.dnetlib.dhp dhp - 1.2.4-SNAPSHOT + 1.2.4 ../pom.xml diff --git a/dhp-workflows/dhp-actionmanager/pom.xml b/dhp-workflows/dhp-actionmanager/pom.xml index f63f3aa01..559bc3125 100644 --- a/dhp-workflows/dhp-actionmanager/pom.xml +++ b/dhp-workflows/dhp-actionmanager/pom.xml @@ -4,7 +4,7 @@ eu.dnetlib.dhp dhp-workflows - 1.2.4-SNAPSHOT + 1.2.4 dhp-actionmanager diff --git a/dhp-workflows/dhp-aggregation/pom.xml b/dhp-workflows/dhp-aggregation/pom.xml index c89cc9d1d..f1a7b08f0 100644 --- a/dhp-workflows/dhp-aggregation/pom.xml +++ b/dhp-workflows/dhp-aggregation/pom.xml @@ -4,7 +4,7 @@ eu.dnetlib.dhp dhp-workflows - 1.2.4-SNAPSHOT + 1.2.4 dhp-aggregation diff --git a/dhp-workflows/dhp-blacklist/pom.xml b/dhp-workflows/dhp-blacklist/pom.xml index 047767d33..8f07db5be 100644 --- a/dhp-workflows/dhp-blacklist/pom.xml +++ b/dhp-workflows/dhp-blacklist/pom.xml @@ -3,7 +3,7 @@ dhp-workflows eu.dnetlib.dhp - 1.2.4-SNAPSHOT + 1.2.4 4.0.0 diff --git a/dhp-workflows/dhp-broker-events/pom.xml b/dhp-workflows/dhp-broker-events/pom.xml index afbd93dcb..3b8a09d16 100644 --- a/dhp-workflows/dhp-broker-events/pom.xml +++ b/dhp-workflows/dhp-broker-events/pom.xml @@ -1,11 +1,9 @@ - + dhp-workflows eu.dnetlib.dhp - 1.2.4-SNAPSHOT + 1.2.4 4.0.0 diff --git a/dhp-workflows/dhp-dedup-openaire/pom.xml b/dhp-workflows/dhp-dedup-openaire/pom.xml index f47f8ba6e..5cb9283ce 100644 --- a/dhp-workflows/dhp-dedup-openaire/pom.xml +++ b/dhp-workflows/dhp-dedup-openaire/pom.xml @@ -3,7 +3,7 @@ dhp-workflows eu.dnetlib.dhp - 1.2.4-SNAPSHOT + 1.2.4 4.0.0 dhp-dedup-openaire diff --git a/dhp-workflows/dhp-distcp/pom.xml b/dhp-workflows/dhp-distcp/pom.xml index 8c10538c0..e4685d7ff 100644 --- a/dhp-workflows/dhp-distcp/pom.xml +++ b/dhp-workflows/dhp-distcp/pom.xml @@ -3,7 +3,7 @@ dhp-workflows eu.dnetlib.dhp - 1.2.4-SNAPSHOT + 1.2.4 4.0.0 diff --git a/dhp-workflows/dhp-doiboost/pom.xml b/dhp-workflows/dhp-doiboost/pom.xml index f496ea9a2..f33b8c4c5 100644 --- a/dhp-workflows/dhp-doiboost/pom.xml +++ b/dhp-workflows/dhp-doiboost/pom.xml @@ -3,7 +3,7 @@ dhp-workflows eu.dnetlib.dhp - 1.2.4-SNAPSHOT + 1.2.4 4.0.0 diff --git a/dhp-workflows/dhp-enrichment/pom.xml b/dhp-workflows/dhp-enrichment/pom.xml index 0b4269acd..122362196 100644 --- a/dhp-workflows/dhp-enrichment/pom.xml +++ b/dhp-workflows/dhp-enrichment/pom.xml @@ -3,7 +3,7 @@ dhp-workflows eu.dnetlib.dhp - 1.2.4-SNAPSHOT + 1.2.4 4.0.0 @@ -51,7 +51,7 @@ eu.dnetlib.dhp dhp-aggregation - 1.2.4-SNAPSHOT + 1.2.4 compile diff --git a/dhp-workflows/dhp-graph-mapper/pom.xml b/dhp-workflows/dhp-graph-mapper/pom.xml index 17146903a..fd60cafb2 100644 --- a/dhp-workflows/dhp-graph-mapper/pom.xml +++ b/dhp-workflows/dhp-graph-mapper/pom.xml @@ -3,7 +3,7 @@ dhp-workflows eu.dnetlib.dhp - 1.2.4-SNAPSHOT + 1.2.4 4.0.0 diff --git a/dhp-workflows/dhp-graph-provision/pom.xml b/dhp-workflows/dhp-graph-provision/pom.xml index e402d0600..518f8f565 100644 --- a/dhp-workflows/dhp-graph-provision/pom.xml +++ b/dhp-workflows/dhp-graph-provision/pom.xml @@ -3,7 +3,7 @@ dhp-workflows eu.dnetlib.dhp - 1.2.4-SNAPSHOT + 1.2.4 4.0.0 diff --git a/dhp-workflows/dhp-stats-promote/pom.xml b/dhp-workflows/dhp-stats-promote/pom.xml index c64c2f58e..ee52031e8 100644 --- a/dhp-workflows/dhp-stats-promote/pom.xml +++ b/dhp-workflows/dhp-stats-promote/pom.xml @@ -3,7 +3,7 @@ dhp-workflows eu.dnetlib.dhp - 1.2.4-SNAPSHOT + 1.2.4 4.0.0 dhp-stats-promote diff --git a/dhp-workflows/dhp-stats-update/pom.xml b/dhp-workflows/dhp-stats-update/pom.xml index 52f35ff07..3c1b2ec6e 100644 --- a/dhp-workflows/dhp-stats-update/pom.xml +++ b/dhp-workflows/dhp-stats-update/pom.xml @@ -3,7 +3,7 @@ dhp-workflows eu.dnetlib.dhp - 1.2.4-SNAPSHOT + 1.2.4 4.0.0 dhp-stats-update diff --git a/dhp-workflows/dhp-usage-raw-data-update/pom.xml b/dhp-workflows/dhp-usage-raw-data-update/pom.xml index a78f92d41..b39a7a9a0 100644 --- a/dhp-workflows/dhp-usage-raw-data-update/pom.xml +++ b/dhp-workflows/dhp-usage-raw-data-update/pom.xml @@ -3,7 +3,7 @@ dhp-workflows eu.dnetlib.dhp - 1.2.4-SNAPSHOT + 1.2.4 4.0.0 dhp-usage-raw-data-update diff --git a/dhp-workflows/dhp-usage-stats-build/pom.xml b/dhp-workflows/dhp-usage-stats-build/pom.xml index 20d2f5b76..a9152062d 100644 --- a/dhp-workflows/dhp-usage-stats-build/pom.xml +++ b/dhp-workflows/dhp-usage-stats-build/pom.xml @@ -3,7 +3,7 @@ dhp-workflows eu.dnetlib.dhp - 1.2.4-SNAPSHOT + 1.2.4 4.0.0 dhp-usage-stats-build diff --git a/dhp-workflows/dhp-workflow-profiles/pom.xml b/dhp-workflows/dhp-workflow-profiles/pom.xml index b1c51c497..a4dbade09 100644 --- a/dhp-workflows/dhp-workflow-profiles/pom.xml +++ b/dhp-workflows/dhp-workflow-profiles/pom.xml @@ -3,7 +3,7 @@ dhp-workflows eu.dnetlib.dhp - 1.2.4-SNAPSHOT + 1.2.4 4.0.0 diff --git a/dhp-workflows/pom.xml b/dhp-workflows/pom.xml index 143178560..47bda3d00 100644 --- a/dhp-workflows/pom.xml +++ b/dhp-workflows/pom.xml @@ -6,7 +6,7 @@ eu.dnetlib.dhp dhp - 1.2.4-SNAPSHOT + 1.2.4 ../pom.xml diff --git a/pom.xml b/pom.xml index 6a14954e5..44bef5516 100644 --- a/pom.xml +++ b/pom.xml @@ -3,7 +3,7 @@ 4.0.0 eu.dnetlib.dhp dhp - 1.2.4-SNAPSHOT + 1.2.4 pom @@ -37,7 +37,7 @@ scm:git:gitea@code-repo.d4science.org:D-Net/dnet-hadoop.git scm:git:gitea@code-repo.d4science.org:D-Net/dnet-hadoop.git https://code-repo.d4science.org/D-Net/dnet-hadoop/ - HEAD + dhp-1.2.4 This module is the root descriptor for the dnet-hadoop project From 2ac2d928bd7b450753cfff1130ff3180da76052a Mon Sep 17 00:00:00 2001 From: Claudio Atzori Date: Thu, 7 Apr 2022 12:18:47 +0200 Subject: [PATCH 143/162] [maven-release-plugin] prepare for next development iteration --- dhp-build/dhp-build-assembly-resources/pom.xml | 2 +- dhp-build/dhp-build-properties-maven-plugin/pom.xml | 2 +- dhp-build/dhp-code-style/pom.xml | 2 +- dhp-build/pom.xml | 2 +- dhp-common/pom.xml | 2 +- dhp-workflows/dhp-actionmanager/pom.xml | 2 +- dhp-workflows/dhp-aggregation/pom.xml | 2 +- dhp-workflows/dhp-blacklist/pom.xml | 2 +- dhp-workflows/dhp-broker-events/pom.xml | 2 +- dhp-workflows/dhp-dedup-openaire/pom.xml | 2 +- dhp-workflows/dhp-distcp/pom.xml | 2 +- dhp-workflows/dhp-doiboost/pom.xml | 2 +- dhp-workflows/dhp-enrichment/pom.xml | 4 ++-- dhp-workflows/dhp-graph-mapper/pom.xml | 2 +- dhp-workflows/dhp-graph-provision/pom.xml | 2 +- dhp-workflows/dhp-stats-promote/pom.xml | 2 +- dhp-workflows/dhp-stats-update/pom.xml | 2 +- dhp-workflows/dhp-usage-raw-data-update/pom.xml | 2 +- dhp-workflows/dhp-usage-stats-build/pom.xml | 2 +- dhp-workflows/dhp-workflow-profiles/pom.xml | 2 +- dhp-workflows/pom.xml | 2 +- pom.xml | 4 ++-- 22 files changed, 24 insertions(+), 24 deletions(-) diff --git a/dhp-build/dhp-build-assembly-resources/pom.xml b/dhp-build/dhp-build-assembly-resources/pom.xml index a4ab512a8..44165995d 100644 --- a/dhp-build/dhp-build-assembly-resources/pom.xml +++ b/dhp-build/dhp-build-assembly-resources/pom.xml @@ -6,7 +6,7 @@ eu.dnetlib.dhp dhp-build - 1.2.4 + 1.2.5-SNAPSHOT dhp-build-assembly-resources diff --git a/dhp-build/dhp-build-properties-maven-plugin/pom.xml b/dhp-build/dhp-build-properties-maven-plugin/pom.xml index 5efa96c43..7579bdf45 100644 --- a/dhp-build/dhp-build-properties-maven-plugin/pom.xml +++ b/dhp-build/dhp-build-properties-maven-plugin/pom.xml @@ -6,7 +6,7 @@ eu.dnetlib.dhp dhp-build - 1.2.4 + 1.2.5-SNAPSHOT dhp-build-properties-maven-plugin diff --git a/dhp-build/dhp-code-style/pom.xml b/dhp-build/dhp-code-style/pom.xml index 05830a5c1..9d592943a 100644 --- a/dhp-build/dhp-code-style/pom.xml +++ b/dhp-build/dhp-code-style/pom.xml @@ -5,7 +5,7 @@ eu.dnetlib.dhp dhp-code-style - 1.2.4 + 1.2.5-SNAPSHOT jar diff --git a/dhp-build/pom.xml b/dhp-build/pom.xml index a193dd893..9040ea94e 100644 --- a/dhp-build/pom.xml +++ b/dhp-build/pom.xml @@ -4,7 +4,7 @@ eu.dnetlib.dhp dhp - 1.2.4 + 1.2.5-SNAPSHOT dhp-build pom diff --git a/dhp-common/pom.xml b/dhp-common/pom.xml index 901e1b26f..6df11f4ea 100644 --- a/dhp-common/pom.xml +++ b/dhp-common/pom.xml @@ -5,7 +5,7 @@ eu.dnetlib.dhp dhp - 1.2.4 + 1.2.5-SNAPSHOT ../pom.xml diff --git a/dhp-workflows/dhp-actionmanager/pom.xml b/dhp-workflows/dhp-actionmanager/pom.xml index 559bc3125..29e1fab1f 100644 --- a/dhp-workflows/dhp-actionmanager/pom.xml +++ b/dhp-workflows/dhp-actionmanager/pom.xml @@ -4,7 +4,7 @@ eu.dnetlib.dhp dhp-workflows - 1.2.4 + 1.2.5-SNAPSHOT dhp-actionmanager diff --git a/dhp-workflows/dhp-aggregation/pom.xml b/dhp-workflows/dhp-aggregation/pom.xml index f1a7b08f0..53d349d2a 100644 --- a/dhp-workflows/dhp-aggregation/pom.xml +++ b/dhp-workflows/dhp-aggregation/pom.xml @@ -4,7 +4,7 @@ eu.dnetlib.dhp dhp-workflows - 1.2.4 + 1.2.5-SNAPSHOT dhp-aggregation diff --git a/dhp-workflows/dhp-blacklist/pom.xml b/dhp-workflows/dhp-blacklist/pom.xml index 8f07db5be..479a9e8c6 100644 --- a/dhp-workflows/dhp-blacklist/pom.xml +++ b/dhp-workflows/dhp-blacklist/pom.xml @@ -3,7 +3,7 @@ dhp-workflows eu.dnetlib.dhp - 1.2.4 + 1.2.5-SNAPSHOT 4.0.0 diff --git a/dhp-workflows/dhp-broker-events/pom.xml b/dhp-workflows/dhp-broker-events/pom.xml index 3b8a09d16..5a41a0090 100644 --- a/dhp-workflows/dhp-broker-events/pom.xml +++ b/dhp-workflows/dhp-broker-events/pom.xml @@ -3,7 +3,7 @@ dhp-workflows eu.dnetlib.dhp - 1.2.4 + 1.2.5-SNAPSHOT 4.0.0 diff --git a/dhp-workflows/dhp-dedup-openaire/pom.xml b/dhp-workflows/dhp-dedup-openaire/pom.xml index 5cb9283ce..a14076a78 100644 --- a/dhp-workflows/dhp-dedup-openaire/pom.xml +++ b/dhp-workflows/dhp-dedup-openaire/pom.xml @@ -3,7 +3,7 @@ dhp-workflows eu.dnetlib.dhp - 1.2.4 + 1.2.5-SNAPSHOT 4.0.0 dhp-dedup-openaire diff --git a/dhp-workflows/dhp-distcp/pom.xml b/dhp-workflows/dhp-distcp/pom.xml index e4685d7ff..c3d3a7375 100644 --- a/dhp-workflows/dhp-distcp/pom.xml +++ b/dhp-workflows/dhp-distcp/pom.xml @@ -3,7 +3,7 @@ dhp-workflows eu.dnetlib.dhp - 1.2.4 + 1.2.5-SNAPSHOT 4.0.0 diff --git a/dhp-workflows/dhp-doiboost/pom.xml b/dhp-workflows/dhp-doiboost/pom.xml index f33b8c4c5..37accbc4f 100644 --- a/dhp-workflows/dhp-doiboost/pom.xml +++ b/dhp-workflows/dhp-doiboost/pom.xml @@ -3,7 +3,7 @@ dhp-workflows eu.dnetlib.dhp - 1.2.4 + 1.2.5-SNAPSHOT 4.0.0 diff --git a/dhp-workflows/dhp-enrichment/pom.xml b/dhp-workflows/dhp-enrichment/pom.xml index 122362196..591cad252 100644 --- a/dhp-workflows/dhp-enrichment/pom.xml +++ b/dhp-workflows/dhp-enrichment/pom.xml @@ -3,7 +3,7 @@ dhp-workflows eu.dnetlib.dhp - 1.2.4 + 1.2.5-SNAPSHOT 4.0.0 @@ -51,7 +51,7 @@ eu.dnetlib.dhp dhp-aggregation - 1.2.4 + 1.2.5-SNAPSHOT compile diff --git a/dhp-workflows/dhp-graph-mapper/pom.xml b/dhp-workflows/dhp-graph-mapper/pom.xml index fd60cafb2..687f0de66 100644 --- a/dhp-workflows/dhp-graph-mapper/pom.xml +++ b/dhp-workflows/dhp-graph-mapper/pom.xml @@ -3,7 +3,7 @@ dhp-workflows eu.dnetlib.dhp - 1.2.4 + 1.2.5-SNAPSHOT 4.0.0 diff --git a/dhp-workflows/dhp-graph-provision/pom.xml b/dhp-workflows/dhp-graph-provision/pom.xml index 518f8f565..ae8f3b53a 100644 --- a/dhp-workflows/dhp-graph-provision/pom.xml +++ b/dhp-workflows/dhp-graph-provision/pom.xml @@ -3,7 +3,7 @@ dhp-workflows eu.dnetlib.dhp - 1.2.4 + 1.2.5-SNAPSHOT 4.0.0 diff --git a/dhp-workflows/dhp-stats-promote/pom.xml b/dhp-workflows/dhp-stats-promote/pom.xml index ee52031e8..ce3e739a5 100644 --- a/dhp-workflows/dhp-stats-promote/pom.xml +++ b/dhp-workflows/dhp-stats-promote/pom.xml @@ -3,7 +3,7 @@ dhp-workflows eu.dnetlib.dhp - 1.2.4 + 1.2.5-SNAPSHOT 4.0.0 dhp-stats-promote diff --git a/dhp-workflows/dhp-stats-update/pom.xml b/dhp-workflows/dhp-stats-update/pom.xml index 3c1b2ec6e..2bc610c42 100644 --- a/dhp-workflows/dhp-stats-update/pom.xml +++ b/dhp-workflows/dhp-stats-update/pom.xml @@ -3,7 +3,7 @@ dhp-workflows eu.dnetlib.dhp - 1.2.4 + 1.2.5-SNAPSHOT 4.0.0 dhp-stats-update diff --git a/dhp-workflows/dhp-usage-raw-data-update/pom.xml b/dhp-workflows/dhp-usage-raw-data-update/pom.xml index b39a7a9a0..954c8bd39 100644 --- a/dhp-workflows/dhp-usage-raw-data-update/pom.xml +++ b/dhp-workflows/dhp-usage-raw-data-update/pom.xml @@ -3,7 +3,7 @@ dhp-workflows eu.dnetlib.dhp - 1.2.4 + 1.2.5-SNAPSHOT 4.0.0 dhp-usage-raw-data-update diff --git a/dhp-workflows/dhp-usage-stats-build/pom.xml b/dhp-workflows/dhp-usage-stats-build/pom.xml index a9152062d..54e18580b 100644 --- a/dhp-workflows/dhp-usage-stats-build/pom.xml +++ b/dhp-workflows/dhp-usage-stats-build/pom.xml @@ -3,7 +3,7 @@ dhp-workflows eu.dnetlib.dhp - 1.2.4 + 1.2.5-SNAPSHOT 4.0.0 dhp-usage-stats-build diff --git a/dhp-workflows/dhp-workflow-profiles/pom.xml b/dhp-workflows/dhp-workflow-profiles/pom.xml index a4dbade09..8c71a5ca1 100644 --- a/dhp-workflows/dhp-workflow-profiles/pom.xml +++ b/dhp-workflows/dhp-workflow-profiles/pom.xml @@ -3,7 +3,7 @@ dhp-workflows eu.dnetlib.dhp - 1.2.4 + 1.2.5-SNAPSHOT 4.0.0 diff --git a/dhp-workflows/pom.xml b/dhp-workflows/pom.xml index 47bda3d00..541d59007 100644 --- a/dhp-workflows/pom.xml +++ b/dhp-workflows/pom.xml @@ -6,7 +6,7 @@ eu.dnetlib.dhp dhp - 1.2.4 + 1.2.5-SNAPSHOT ../pom.xml diff --git a/pom.xml b/pom.xml index 44bef5516..be58842a1 100644 --- a/pom.xml +++ b/pom.xml @@ -3,7 +3,7 @@ 4.0.0 eu.dnetlib.dhp dhp - 1.2.4 + 1.2.5-SNAPSHOT pom @@ -37,7 +37,7 @@ scm:git:gitea@code-repo.d4science.org:D-Net/dnet-hadoop.git scm:git:gitea@code-repo.d4science.org:D-Net/dnet-hadoop.git https://code-repo.d4science.org/D-Net/dnet-hadoop/ - dhp-1.2.4 + HEAD This module is the root descriptor for the dnet-hadoop project From 3bba6d6e384074014b5d5a9d97ca5633ddd2a4a6 Mon Sep 17 00:00:00 2001 From: Claudio Atzori Date: Thu, 7 Apr 2022 12:23:17 +0200 Subject: [PATCH 144/162] [maven-release-plugin] rollback the release of dhp-1.2.4 --- dhp-build/dhp-build-assembly-resources/pom.xml | 2 +- dhp-build/dhp-build-properties-maven-plugin/pom.xml | 2 +- dhp-build/dhp-code-style/pom.xml | 2 +- dhp-build/pom.xml | 2 +- dhp-common/pom.xml | 2 +- dhp-workflows/dhp-actionmanager/pom.xml | 2 +- dhp-workflows/dhp-aggregation/pom.xml | 2 +- dhp-workflows/dhp-blacklist/pom.xml | 2 +- dhp-workflows/dhp-broker-events/pom.xml | 6 ++++-- dhp-workflows/dhp-dedup-openaire/pom.xml | 2 +- dhp-workflows/dhp-distcp/pom.xml | 2 +- dhp-workflows/dhp-doiboost/pom.xml | 2 +- dhp-workflows/dhp-enrichment/pom.xml | 4 ++-- dhp-workflows/dhp-graph-mapper/pom.xml | 2 +- dhp-workflows/dhp-graph-provision/pom.xml | 2 +- dhp-workflows/dhp-stats-promote/pom.xml | 2 +- dhp-workflows/dhp-stats-update/pom.xml | 2 +- dhp-workflows/dhp-usage-raw-data-update/pom.xml | 2 +- dhp-workflows/dhp-usage-stats-build/pom.xml | 2 +- dhp-workflows/dhp-workflow-profiles/pom.xml | 2 +- dhp-workflows/pom.xml | 2 +- pom.xml | 2 +- 22 files changed, 26 insertions(+), 24 deletions(-) diff --git a/dhp-build/dhp-build-assembly-resources/pom.xml b/dhp-build/dhp-build-assembly-resources/pom.xml index 44165995d..012ff89a3 100644 --- a/dhp-build/dhp-build-assembly-resources/pom.xml +++ b/dhp-build/dhp-build-assembly-resources/pom.xml @@ -6,7 +6,7 @@ eu.dnetlib.dhp dhp-build - 1.2.5-SNAPSHOT + 1.2.4-SNAPSHOT dhp-build-assembly-resources diff --git a/dhp-build/dhp-build-properties-maven-plugin/pom.xml b/dhp-build/dhp-build-properties-maven-plugin/pom.xml index 7579bdf45..256017e2c 100644 --- a/dhp-build/dhp-build-properties-maven-plugin/pom.xml +++ b/dhp-build/dhp-build-properties-maven-plugin/pom.xml @@ -6,7 +6,7 @@ eu.dnetlib.dhp dhp-build - 1.2.5-SNAPSHOT + 1.2.4-SNAPSHOT dhp-build-properties-maven-plugin diff --git a/dhp-build/dhp-code-style/pom.xml b/dhp-build/dhp-code-style/pom.xml index 9d592943a..db0097d64 100644 --- a/dhp-build/dhp-code-style/pom.xml +++ b/dhp-build/dhp-code-style/pom.xml @@ -5,7 +5,7 @@ eu.dnetlib.dhp dhp-code-style - 1.2.5-SNAPSHOT + 1.2.4-SNAPSHOT jar diff --git a/dhp-build/pom.xml b/dhp-build/pom.xml index 9040ea94e..97fbdf45b 100644 --- a/dhp-build/pom.xml +++ b/dhp-build/pom.xml @@ -4,7 +4,7 @@ eu.dnetlib.dhp dhp - 1.2.5-SNAPSHOT + 1.2.4-SNAPSHOT dhp-build pom diff --git a/dhp-common/pom.xml b/dhp-common/pom.xml index 6df11f4ea..7b18f0105 100644 --- a/dhp-common/pom.xml +++ b/dhp-common/pom.xml @@ -5,7 +5,7 @@ eu.dnetlib.dhp dhp - 1.2.5-SNAPSHOT + 1.2.4-SNAPSHOT ../pom.xml diff --git a/dhp-workflows/dhp-actionmanager/pom.xml b/dhp-workflows/dhp-actionmanager/pom.xml index 29e1fab1f..f63f3aa01 100644 --- a/dhp-workflows/dhp-actionmanager/pom.xml +++ b/dhp-workflows/dhp-actionmanager/pom.xml @@ -4,7 +4,7 @@ eu.dnetlib.dhp dhp-workflows - 1.2.5-SNAPSHOT + 1.2.4-SNAPSHOT dhp-actionmanager diff --git a/dhp-workflows/dhp-aggregation/pom.xml b/dhp-workflows/dhp-aggregation/pom.xml index 53d349d2a..c89cc9d1d 100644 --- a/dhp-workflows/dhp-aggregation/pom.xml +++ b/dhp-workflows/dhp-aggregation/pom.xml @@ -4,7 +4,7 @@ eu.dnetlib.dhp dhp-workflows - 1.2.5-SNAPSHOT + 1.2.4-SNAPSHOT dhp-aggregation diff --git a/dhp-workflows/dhp-blacklist/pom.xml b/dhp-workflows/dhp-blacklist/pom.xml index 479a9e8c6..047767d33 100644 --- a/dhp-workflows/dhp-blacklist/pom.xml +++ b/dhp-workflows/dhp-blacklist/pom.xml @@ -3,7 +3,7 @@ dhp-workflows eu.dnetlib.dhp - 1.2.5-SNAPSHOT + 1.2.4-SNAPSHOT 4.0.0 diff --git a/dhp-workflows/dhp-broker-events/pom.xml b/dhp-workflows/dhp-broker-events/pom.xml index 5a41a0090..afbd93dcb 100644 --- a/dhp-workflows/dhp-broker-events/pom.xml +++ b/dhp-workflows/dhp-broker-events/pom.xml @@ -1,9 +1,11 @@ - + dhp-workflows eu.dnetlib.dhp - 1.2.5-SNAPSHOT + 1.2.4-SNAPSHOT 4.0.0 diff --git a/dhp-workflows/dhp-dedup-openaire/pom.xml b/dhp-workflows/dhp-dedup-openaire/pom.xml index a14076a78..f47f8ba6e 100644 --- a/dhp-workflows/dhp-dedup-openaire/pom.xml +++ b/dhp-workflows/dhp-dedup-openaire/pom.xml @@ -3,7 +3,7 @@ dhp-workflows eu.dnetlib.dhp - 1.2.5-SNAPSHOT + 1.2.4-SNAPSHOT 4.0.0 dhp-dedup-openaire diff --git a/dhp-workflows/dhp-distcp/pom.xml b/dhp-workflows/dhp-distcp/pom.xml index c3d3a7375..8c10538c0 100644 --- a/dhp-workflows/dhp-distcp/pom.xml +++ b/dhp-workflows/dhp-distcp/pom.xml @@ -3,7 +3,7 @@ dhp-workflows eu.dnetlib.dhp - 1.2.5-SNAPSHOT + 1.2.4-SNAPSHOT 4.0.0 diff --git a/dhp-workflows/dhp-doiboost/pom.xml b/dhp-workflows/dhp-doiboost/pom.xml index 37accbc4f..f496ea9a2 100644 --- a/dhp-workflows/dhp-doiboost/pom.xml +++ b/dhp-workflows/dhp-doiboost/pom.xml @@ -3,7 +3,7 @@ dhp-workflows eu.dnetlib.dhp - 1.2.5-SNAPSHOT + 1.2.4-SNAPSHOT 4.0.0 diff --git a/dhp-workflows/dhp-enrichment/pom.xml b/dhp-workflows/dhp-enrichment/pom.xml index 591cad252..0b4269acd 100644 --- a/dhp-workflows/dhp-enrichment/pom.xml +++ b/dhp-workflows/dhp-enrichment/pom.xml @@ -3,7 +3,7 @@ dhp-workflows eu.dnetlib.dhp - 1.2.5-SNAPSHOT + 1.2.4-SNAPSHOT 4.0.0 @@ -51,7 +51,7 @@ eu.dnetlib.dhp dhp-aggregation - 1.2.5-SNAPSHOT + 1.2.4-SNAPSHOT compile diff --git a/dhp-workflows/dhp-graph-mapper/pom.xml b/dhp-workflows/dhp-graph-mapper/pom.xml index 687f0de66..17146903a 100644 --- a/dhp-workflows/dhp-graph-mapper/pom.xml +++ b/dhp-workflows/dhp-graph-mapper/pom.xml @@ -3,7 +3,7 @@ dhp-workflows eu.dnetlib.dhp - 1.2.5-SNAPSHOT + 1.2.4-SNAPSHOT 4.0.0 diff --git a/dhp-workflows/dhp-graph-provision/pom.xml b/dhp-workflows/dhp-graph-provision/pom.xml index ae8f3b53a..e402d0600 100644 --- a/dhp-workflows/dhp-graph-provision/pom.xml +++ b/dhp-workflows/dhp-graph-provision/pom.xml @@ -3,7 +3,7 @@ dhp-workflows eu.dnetlib.dhp - 1.2.5-SNAPSHOT + 1.2.4-SNAPSHOT 4.0.0 diff --git a/dhp-workflows/dhp-stats-promote/pom.xml b/dhp-workflows/dhp-stats-promote/pom.xml index ce3e739a5..c64c2f58e 100644 --- a/dhp-workflows/dhp-stats-promote/pom.xml +++ b/dhp-workflows/dhp-stats-promote/pom.xml @@ -3,7 +3,7 @@ dhp-workflows eu.dnetlib.dhp - 1.2.5-SNAPSHOT + 1.2.4-SNAPSHOT 4.0.0 dhp-stats-promote diff --git a/dhp-workflows/dhp-stats-update/pom.xml b/dhp-workflows/dhp-stats-update/pom.xml index 2bc610c42..52f35ff07 100644 --- a/dhp-workflows/dhp-stats-update/pom.xml +++ b/dhp-workflows/dhp-stats-update/pom.xml @@ -3,7 +3,7 @@ dhp-workflows eu.dnetlib.dhp - 1.2.5-SNAPSHOT + 1.2.4-SNAPSHOT 4.0.0 dhp-stats-update diff --git a/dhp-workflows/dhp-usage-raw-data-update/pom.xml b/dhp-workflows/dhp-usage-raw-data-update/pom.xml index 954c8bd39..a78f92d41 100644 --- a/dhp-workflows/dhp-usage-raw-data-update/pom.xml +++ b/dhp-workflows/dhp-usage-raw-data-update/pom.xml @@ -3,7 +3,7 @@ dhp-workflows eu.dnetlib.dhp - 1.2.5-SNAPSHOT + 1.2.4-SNAPSHOT 4.0.0 dhp-usage-raw-data-update diff --git a/dhp-workflows/dhp-usage-stats-build/pom.xml b/dhp-workflows/dhp-usage-stats-build/pom.xml index 54e18580b..20d2f5b76 100644 --- a/dhp-workflows/dhp-usage-stats-build/pom.xml +++ b/dhp-workflows/dhp-usage-stats-build/pom.xml @@ -3,7 +3,7 @@ dhp-workflows eu.dnetlib.dhp - 1.2.5-SNAPSHOT + 1.2.4-SNAPSHOT 4.0.0 dhp-usage-stats-build diff --git a/dhp-workflows/dhp-workflow-profiles/pom.xml b/dhp-workflows/dhp-workflow-profiles/pom.xml index 8c71a5ca1..b1c51c497 100644 --- a/dhp-workflows/dhp-workflow-profiles/pom.xml +++ b/dhp-workflows/dhp-workflow-profiles/pom.xml @@ -3,7 +3,7 @@ dhp-workflows eu.dnetlib.dhp - 1.2.5-SNAPSHOT + 1.2.4-SNAPSHOT 4.0.0 diff --git a/dhp-workflows/pom.xml b/dhp-workflows/pom.xml index 541d59007..143178560 100644 --- a/dhp-workflows/pom.xml +++ b/dhp-workflows/pom.xml @@ -6,7 +6,7 @@ eu.dnetlib.dhp dhp - 1.2.5-SNAPSHOT + 1.2.4-SNAPSHOT ../pom.xml diff --git a/pom.xml b/pom.xml index be58842a1..6a14954e5 100644 --- a/pom.xml +++ b/pom.xml @@ -3,7 +3,7 @@ 4.0.0 eu.dnetlib.dhp dhp - 1.2.5-SNAPSHOT + 1.2.4-SNAPSHOT pom From f03dea4f4993ac0b6c41d33bcfef2c4a0aa7f579 Mon Sep 17 00:00:00 2001 From: Claudio Atzori Date: Thu, 7 Apr 2022 13:22:55 +0200 Subject: [PATCH 145/162] allow to skip maven site --- dhp-build/dhp-code-style/pom.xml | 4 ++++ pom.xml | 4 ++++ 2 files changed, 8 insertions(+) diff --git a/dhp-build/dhp-code-style/pom.xml b/dhp-build/dhp-code-style/pom.xml index db0097d64..a7a67ff77 100644 --- a/dhp-build/dhp-code-style/pom.xml +++ b/dhp-build/dhp-code-style/pom.xml @@ -47,12 +47,16 @@ org.apache.maven.plugins maven-site-plugin 3.9.1 + + true + + UTF-8 sftp://dnet-hadoop@static-web.d4science.org/dnet-hadoop diff --git a/pom.xml b/pom.xml index 6a14954e5..010dd56fe 100644 --- a/pom.xml +++ b/pom.xml @@ -551,6 +551,9 @@ org.apache.maven.plugins maven-site-plugin 3.9.1 + + ${dhp.site.skip} + @@ -791,6 +794,7 @@ 2.4.0.cloudera2 2.9.6 3.5 + true 11.0.2 2.11.12 5.6.1 From db3d9877a55eb97abecf4936737465210d3c21c2 Mon Sep 17 00:00:00 2001 From: Claudio Atzori Date: Thu, 7 Apr 2022 13:26:58 +0200 Subject: [PATCH 146/162] [maven-release-plugin] prepare release dhp-1.2.4 --- dhp-build/dhp-build-assembly-resources/pom.xml | 2 +- dhp-build/dhp-build-properties-maven-plugin/pom.xml | 2 +- dhp-build/dhp-code-style/pom.xml | 2 +- dhp-build/pom.xml | 2 +- dhp-common/pom.xml | 2 +- dhp-workflows/dhp-actionmanager/pom.xml | 2 +- dhp-workflows/dhp-aggregation/pom.xml | 2 +- dhp-workflows/dhp-blacklist/pom.xml | 2 +- dhp-workflows/dhp-broker-events/pom.xml | 6 ++---- dhp-workflows/dhp-dedup-openaire/pom.xml | 2 +- dhp-workflows/dhp-distcp/pom.xml | 2 +- dhp-workflows/dhp-doiboost/pom.xml | 2 +- dhp-workflows/dhp-enrichment/pom.xml | 4 ++-- dhp-workflows/dhp-graph-mapper/pom.xml | 2 +- dhp-workflows/dhp-graph-provision/pom.xml | 2 +- dhp-workflows/dhp-stats-promote/pom.xml | 2 +- dhp-workflows/dhp-stats-update/pom.xml | 2 +- dhp-workflows/dhp-usage-raw-data-update/pom.xml | 2 +- dhp-workflows/dhp-usage-stats-build/pom.xml | 2 +- dhp-workflows/dhp-workflow-profiles/pom.xml | 2 +- dhp-workflows/pom.xml | 2 +- pom.xml | 4 ++-- 22 files changed, 25 insertions(+), 27 deletions(-) diff --git a/dhp-build/dhp-build-assembly-resources/pom.xml b/dhp-build/dhp-build-assembly-resources/pom.xml index 012ff89a3..a4ab512a8 100644 --- a/dhp-build/dhp-build-assembly-resources/pom.xml +++ b/dhp-build/dhp-build-assembly-resources/pom.xml @@ -6,7 +6,7 @@ eu.dnetlib.dhp dhp-build - 1.2.4-SNAPSHOT + 1.2.4 dhp-build-assembly-resources diff --git a/dhp-build/dhp-build-properties-maven-plugin/pom.xml b/dhp-build/dhp-build-properties-maven-plugin/pom.xml index 256017e2c..5efa96c43 100644 --- a/dhp-build/dhp-build-properties-maven-plugin/pom.xml +++ b/dhp-build/dhp-build-properties-maven-plugin/pom.xml @@ -6,7 +6,7 @@ eu.dnetlib.dhp dhp-build - 1.2.4-SNAPSHOT + 1.2.4 dhp-build-properties-maven-plugin diff --git a/dhp-build/dhp-code-style/pom.xml b/dhp-build/dhp-code-style/pom.xml index a7a67ff77..ab942d9ac 100644 --- a/dhp-build/dhp-code-style/pom.xml +++ b/dhp-build/dhp-code-style/pom.xml @@ -5,7 +5,7 @@ eu.dnetlib.dhp dhp-code-style - 1.2.4-SNAPSHOT + 1.2.4 jar diff --git a/dhp-build/pom.xml b/dhp-build/pom.xml index 97fbdf45b..a193dd893 100644 --- a/dhp-build/pom.xml +++ b/dhp-build/pom.xml @@ -4,7 +4,7 @@ eu.dnetlib.dhp dhp - 1.2.4-SNAPSHOT + 1.2.4 dhp-build pom diff --git a/dhp-common/pom.xml b/dhp-common/pom.xml index 7b18f0105..901e1b26f 100644 --- a/dhp-common/pom.xml +++ b/dhp-common/pom.xml @@ -5,7 +5,7 @@ eu.dnetlib.dhp dhp - 1.2.4-SNAPSHOT + 1.2.4 ../pom.xml diff --git a/dhp-workflows/dhp-actionmanager/pom.xml b/dhp-workflows/dhp-actionmanager/pom.xml index f63f3aa01..559bc3125 100644 --- a/dhp-workflows/dhp-actionmanager/pom.xml +++ b/dhp-workflows/dhp-actionmanager/pom.xml @@ -4,7 +4,7 @@ eu.dnetlib.dhp dhp-workflows - 1.2.4-SNAPSHOT + 1.2.4 dhp-actionmanager diff --git a/dhp-workflows/dhp-aggregation/pom.xml b/dhp-workflows/dhp-aggregation/pom.xml index c89cc9d1d..f1a7b08f0 100644 --- a/dhp-workflows/dhp-aggregation/pom.xml +++ b/dhp-workflows/dhp-aggregation/pom.xml @@ -4,7 +4,7 @@ eu.dnetlib.dhp dhp-workflows - 1.2.4-SNAPSHOT + 1.2.4 dhp-aggregation diff --git a/dhp-workflows/dhp-blacklist/pom.xml b/dhp-workflows/dhp-blacklist/pom.xml index 047767d33..8f07db5be 100644 --- a/dhp-workflows/dhp-blacklist/pom.xml +++ b/dhp-workflows/dhp-blacklist/pom.xml @@ -3,7 +3,7 @@ dhp-workflows eu.dnetlib.dhp - 1.2.4-SNAPSHOT + 1.2.4 4.0.0 diff --git a/dhp-workflows/dhp-broker-events/pom.xml b/dhp-workflows/dhp-broker-events/pom.xml index afbd93dcb..3b8a09d16 100644 --- a/dhp-workflows/dhp-broker-events/pom.xml +++ b/dhp-workflows/dhp-broker-events/pom.xml @@ -1,11 +1,9 @@ - + dhp-workflows eu.dnetlib.dhp - 1.2.4-SNAPSHOT + 1.2.4 4.0.0 diff --git a/dhp-workflows/dhp-dedup-openaire/pom.xml b/dhp-workflows/dhp-dedup-openaire/pom.xml index f47f8ba6e..5cb9283ce 100644 --- a/dhp-workflows/dhp-dedup-openaire/pom.xml +++ b/dhp-workflows/dhp-dedup-openaire/pom.xml @@ -3,7 +3,7 @@ dhp-workflows eu.dnetlib.dhp - 1.2.4-SNAPSHOT + 1.2.4 4.0.0 dhp-dedup-openaire diff --git a/dhp-workflows/dhp-distcp/pom.xml b/dhp-workflows/dhp-distcp/pom.xml index 8c10538c0..e4685d7ff 100644 --- a/dhp-workflows/dhp-distcp/pom.xml +++ b/dhp-workflows/dhp-distcp/pom.xml @@ -3,7 +3,7 @@ dhp-workflows eu.dnetlib.dhp - 1.2.4-SNAPSHOT + 1.2.4 4.0.0 diff --git a/dhp-workflows/dhp-doiboost/pom.xml b/dhp-workflows/dhp-doiboost/pom.xml index f496ea9a2..f33b8c4c5 100644 --- a/dhp-workflows/dhp-doiboost/pom.xml +++ b/dhp-workflows/dhp-doiboost/pom.xml @@ -3,7 +3,7 @@ dhp-workflows eu.dnetlib.dhp - 1.2.4-SNAPSHOT + 1.2.4 4.0.0 diff --git a/dhp-workflows/dhp-enrichment/pom.xml b/dhp-workflows/dhp-enrichment/pom.xml index 0b4269acd..122362196 100644 --- a/dhp-workflows/dhp-enrichment/pom.xml +++ b/dhp-workflows/dhp-enrichment/pom.xml @@ -3,7 +3,7 @@ dhp-workflows eu.dnetlib.dhp - 1.2.4-SNAPSHOT + 1.2.4 4.0.0 @@ -51,7 +51,7 @@ eu.dnetlib.dhp dhp-aggregation - 1.2.4-SNAPSHOT + 1.2.4 compile diff --git a/dhp-workflows/dhp-graph-mapper/pom.xml b/dhp-workflows/dhp-graph-mapper/pom.xml index 17146903a..fd60cafb2 100644 --- a/dhp-workflows/dhp-graph-mapper/pom.xml +++ b/dhp-workflows/dhp-graph-mapper/pom.xml @@ -3,7 +3,7 @@ dhp-workflows eu.dnetlib.dhp - 1.2.4-SNAPSHOT + 1.2.4 4.0.0 diff --git a/dhp-workflows/dhp-graph-provision/pom.xml b/dhp-workflows/dhp-graph-provision/pom.xml index e402d0600..518f8f565 100644 --- a/dhp-workflows/dhp-graph-provision/pom.xml +++ b/dhp-workflows/dhp-graph-provision/pom.xml @@ -3,7 +3,7 @@ dhp-workflows eu.dnetlib.dhp - 1.2.4-SNAPSHOT + 1.2.4 4.0.0 diff --git a/dhp-workflows/dhp-stats-promote/pom.xml b/dhp-workflows/dhp-stats-promote/pom.xml index c64c2f58e..ee52031e8 100644 --- a/dhp-workflows/dhp-stats-promote/pom.xml +++ b/dhp-workflows/dhp-stats-promote/pom.xml @@ -3,7 +3,7 @@ dhp-workflows eu.dnetlib.dhp - 1.2.4-SNAPSHOT + 1.2.4 4.0.0 dhp-stats-promote diff --git a/dhp-workflows/dhp-stats-update/pom.xml b/dhp-workflows/dhp-stats-update/pom.xml index 52f35ff07..3c1b2ec6e 100644 --- a/dhp-workflows/dhp-stats-update/pom.xml +++ b/dhp-workflows/dhp-stats-update/pom.xml @@ -3,7 +3,7 @@ dhp-workflows eu.dnetlib.dhp - 1.2.4-SNAPSHOT + 1.2.4 4.0.0 dhp-stats-update diff --git a/dhp-workflows/dhp-usage-raw-data-update/pom.xml b/dhp-workflows/dhp-usage-raw-data-update/pom.xml index a78f92d41..b39a7a9a0 100644 --- a/dhp-workflows/dhp-usage-raw-data-update/pom.xml +++ b/dhp-workflows/dhp-usage-raw-data-update/pom.xml @@ -3,7 +3,7 @@ dhp-workflows eu.dnetlib.dhp - 1.2.4-SNAPSHOT + 1.2.4 4.0.0 dhp-usage-raw-data-update diff --git a/dhp-workflows/dhp-usage-stats-build/pom.xml b/dhp-workflows/dhp-usage-stats-build/pom.xml index 20d2f5b76..a9152062d 100644 --- a/dhp-workflows/dhp-usage-stats-build/pom.xml +++ b/dhp-workflows/dhp-usage-stats-build/pom.xml @@ -3,7 +3,7 @@ dhp-workflows eu.dnetlib.dhp - 1.2.4-SNAPSHOT + 1.2.4 4.0.0 dhp-usage-stats-build diff --git a/dhp-workflows/dhp-workflow-profiles/pom.xml b/dhp-workflows/dhp-workflow-profiles/pom.xml index b1c51c497..a4dbade09 100644 --- a/dhp-workflows/dhp-workflow-profiles/pom.xml +++ b/dhp-workflows/dhp-workflow-profiles/pom.xml @@ -3,7 +3,7 @@ dhp-workflows eu.dnetlib.dhp - 1.2.4-SNAPSHOT + 1.2.4 4.0.0 diff --git a/dhp-workflows/pom.xml b/dhp-workflows/pom.xml index 143178560..47bda3d00 100644 --- a/dhp-workflows/pom.xml +++ b/dhp-workflows/pom.xml @@ -6,7 +6,7 @@ eu.dnetlib.dhp dhp - 1.2.4-SNAPSHOT + 1.2.4 ../pom.xml diff --git a/pom.xml b/pom.xml index 010dd56fe..2a807e3d0 100644 --- a/pom.xml +++ b/pom.xml @@ -3,7 +3,7 @@ 4.0.0 eu.dnetlib.dhp dhp - 1.2.4-SNAPSHOT + 1.2.4 pom @@ -37,7 +37,7 @@ scm:git:gitea@code-repo.d4science.org:D-Net/dnet-hadoop.git scm:git:gitea@code-repo.d4science.org:D-Net/dnet-hadoop.git https://code-repo.d4science.org/D-Net/dnet-hadoop/ - HEAD + dhp-1.2.4 This module is the root descriptor for the dnet-hadoop project From 598e11dfd77edfd56457b620810f7aff26c82bb0 Mon Sep 17 00:00:00 2001 From: Claudio Atzori Date: Thu, 7 Apr 2022 13:27:02 +0200 Subject: [PATCH 147/162] [maven-release-plugin] prepare for next development iteration --- dhp-build/dhp-build-assembly-resources/pom.xml | 2 +- dhp-build/dhp-build-properties-maven-plugin/pom.xml | 2 +- dhp-build/dhp-code-style/pom.xml | 2 +- dhp-build/pom.xml | 2 +- dhp-common/pom.xml | 2 +- dhp-workflows/dhp-actionmanager/pom.xml | 2 +- dhp-workflows/dhp-aggregation/pom.xml | 2 +- dhp-workflows/dhp-blacklist/pom.xml | 2 +- dhp-workflows/dhp-broker-events/pom.xml | 2 +- dhp-workflows/dhp-dedup-openaire/pom.xml | 2 +- dhp-workflows/dhp-distcp/pom.xml | 2 +- dhp-workflows/dhp-doiboost/pom.xml | 2 +- dhp-workflows/dhp-enrichment/pom.xml | 4 ++-- dhp-workflows/dhp-graph-mapper/pom.xml | 2 +- dhp-workflows/dhp-graph-provision/pom.xml | 2 +- dhp-workflows/dhp-stats-promote/pom.xml | 2 +- dhp-workflows/dhp-stats-update/pom.xml | 2 +- dhp-workflows/dhp-usage-raw-data-update/pom.xml | 2 +- dhp-workflows/dhp-usage-stats-build/pom.xml | 2 +- dhp-workflows/dhp-workflow-profiles/pom.xml | 2 +- dhp-workflows/pom.xml | 2 +- pom.xml | 4 ++-- 22 files changed, 24 insertions(+), 24 deletions(-) diff --git a/dhp-build/dhp-build-assembly-resources/pom.xml b/dhp-build/dhp-build-assembly-resources/pom.xml index a4ab512a8..44165995d 100644 --- a/dhp-build/dhp-build-assembly-resources/pom.xml +++ b/dhp-build/dhp-build-assembly-resources/pom.xml @@ -6,7 +6,7 @@ eu.dnetlib.dhp dhp-build - 1.2.4 + 1.2.5-SNAPSHOT dhp-build-assembly-resources diff --git a/dhp-build/dhp-build-properties-maven-plugin/pom.xml b/dhp-build/dhp-build-properties-maven-plugin/pom.xml index 5efa96c43..7579bdf45 100644 --- a/dhp-build/dhp-build-properties-maven-plugin/pom.xml +++ b/dhp-build/dhp-build-properties-maven-plugin/pom.xml @@ -6,7 +6,7 @@ eu.dnetlib.dhp dhp-build - 1.2.4 + 1.2.5-SNAPSHOT dhp-build-properties-maven-plugin diff --git a/dhp-build/dhp-code-style/pom.xml b/dhp-build/dhp-code-style/pom.xml index ab942d9ac..5a86efe17 100644 --- a/dhp-build/dhp-code-style/pom.xml +++ b/dhp-build/dhp-code-style/pom.xml @@ -5,7 +5,7 @@ eu.dnetlib.dhp dhp-code-style - 1.2.4 + 1.2.5-SNAPSHOT jar diff --git a/dhp-build/pom.xml b/dhp-build/pom.xml index a193dd893..9040ea94e 100644 --- a/dhp-build/pom.xml +++ b/dhp-build/pom.xml @@ -4,7 +4,7 @@ eu.dnetlib.dhp dhp - 1.2.4 + 1.2.5-SNAPSHOT dhp-build pom diff --git a/dhp-common/pom.xml b/dhp-common/pom.xml index 901e1b26f..6df11f4ea 100644 --- a/dhp-common/pom.xml +++ b/dhp-common/pom.xml @@ -5,7 +5,7 @@ eu.dnetlib.dhp dhp - 1.2.4 + 1.2.5-SNAPSHOT ../pom.xml diff --git a/dhp-workflows/dhp-actionmanager/pom.xml b/dhp-workflows/dhp-actionmanager/pom.xml index 559bc3125..29e1fab1f 100644 --- a/dhp-workflows/dhp-actionmanager/pom.xml +++ b/dhp-workflows/dhp-actionmanager/pom.xml @@ -4,7 +4,7 @@ eu.dnetlib.dhp dhp-workflows - 1.2.4 + 1.2.5-SNAPSHOT dhp-actionmanager diff --git a/dhp-workflows/dhp-aggregation/pom.xml b/dhp-workflows/dhp-aggregation/pom.xml index f1a7b08f0..53d349d2a 100644 --- a/dhp-workflows/dhp-aggregation/pom.xml +++ b/dhp-workflows/dhp-aggregation/pom.xml @@ -4,7 +4,7 @@ eu.dnetlib.dhp dhp-workflows - 1.2.4 + 1.2.5-SNAPSHOT dhp-aggregation diff --git a/dhp-workflows/dhp-blacklist/pom.xml b/dhp-workflows/dhp-blacklist/pom.xml index 8f07db5be..479a9e8c6 100644 --- a/dhp-workflows/dhp-blacklist/pom.xml +++ b/dhp-workflows/dhp-blacklist/pom.xml @@ -3,7 +3,7 @@ dhp-workflows eu.dnetlib.dhp - 1.2.4 + 1.2.5-SNAPSHOT 4.0.0 diff --git a/dhp-workflows/dhp-broker-events/pom.xml b/dhp-workflows/dhp-broker-events/pom.xml index 3b8a09d16..5a41a0090 100644 --- a/dhp-workflows/dhp-broker-events/pom.xml +++ b/dhp-workflows/dhp-broker-events/pom.xml @@ -3,7 +3,7 @@ dhp-workflows eu.dnetlib.dhp - 1.2.4 + 1.2.5-SNAPSHOT 4.0.0 diff --git a/dhp-workflows/dhp-dedup-openaire/pom.xml b/dhp-workflows/dhp-dedup-openaire/pom.xml index 5cb9283ce..a14076a78 100644 --- a/dhp-workflows/dhp-dedup-openaire/pom.xml +++ b/dhp-workflows/dhp-dedup-openaire/pom.xml @@ -3,7 +3,7 @@ dhp-workflows eu.dnetlib.dhp - 1.2.4 + 1.2.5-SNAPSHOT 4.0.0 dhp-dedup-openaire diff --git a/dhp-workflows/dhp-distcp/pom.xml b/dhp-workflows/dhp-distcp/pom.xml index e4685d7ff..c3d3a7375 100644 --- a/dhp-workflows/dhp-distcp/pom.xml +++ b/dhp-workflows/dhp-distcp/pom.xml @@ -3,7 +3,7 @@ dhp-workflows eu.dnetlib.dhp - 1.2.4 + 1.2.5-SNAPSHOT 4.0.0 diff --git a/dhp-workflows/dhp-doiboost/pom.xml b/dhp-workflows/dhp-doiboost/pom.xml index f33b8c4c5..37accbc4f 100644 --- a/dhp-workflows/dhp-doiboost/pom.xml +++ b/dhp-workflows/dhp-doiboost/pom.xml @@ -3,7 +3,7 @@ dhp-workflows eu.dnetlib.dhp - 1.2.4 + 1.2.5-SNAPSHOT 4.0.0 diff --git a/dhp-workflows/dhp-enrichment/pom.xml b/dhp-workflows/dhp-enrichment/pom.xml index 122362196..591cad252 100644 --- a/dhp-workflows/dhp-enrichment/pom.xml +++ b/dhp-workflows/dhp-enrichment/pom.xml @@ -3,7 +3,7 @@ dhp-workflows eu.dnetlib.dhp - 1.2.4 + 1.2.5-SNAPSHOT 4.0.0 @@ -51,7 +51,7 @@ eu.dnetlib.dhp dhp-aggregation - 1.2.4 + 1.2.5-SNAPSHOT compile diff --git a/dhp-workflows/dhp-graph-mapper/pom.xml b/dhp-workflows/dhp-graph-mapper/pom.xml index fd60cafb2..687f0de66 100644 --- a/dhp-workflows/dhp-graph-mapper/pom.xml +++ b/dhp-workflows/dhp-graph-mapper/pom.xml @@ -3,7 +3,7 @@ dhp-workflows eu.dnetlib.dhp - 1.2.4 + 1.2.5-SNAPSHOT 4.0.0 diff --git a/dhp-workflows/dhp-graph-provision/pom.xml b/dhp-workflows/dhp-graph-provision/pom.xml index 518f8f565..ae8f3b53a 100644 --- a/dhp-workflows/dhp-graph-provision/pom.xml +++ b/dhp-workflows/dhp-graph-provision/pom.xml @@ -3,7 +3,7 @@ dhp-workflows eu.dnetlib.dhp - 1.2.4 + 1.2.5-SNAPSHOT 4.0.0 diff --git a/dhp-workflows/dhp-stats-promote/pom.xml b/dhp-workflows/dhp-stats-promote/pom.xml index ee52031e8..ce3e739a5 100644 --- a/dhp-workflows/dhp-stats-promote/pom.xml +++ b/dhp-workflows/dhp-stats-promote/pom.xml @@ -3,7 +3,7 @@ dhp-workflows eu.dnetlib.dhp - 1.2.4 + 1.2.5-SNAPSHOT 4.0.0 dhp-stats-promote diff --git a/dhp-workflows/dhp-stats-update/pom.xml b/dhp-workflows/dhp-stats-update/pom.xml index 3c1b2ec6e..2bc610c42 100644 --- a/dhp-workflows/dhp-stats-update/pom.xml +++ b/dhp-workflows/dhp-stats-update/pom.xml @@ -3,7 +3,7 @@ dhp-workflows eu.dnetlib.dhp - 1.2.4 + 1.2.5-SNAPSHOT 4.0.0 dhp-stats-update diff --git a/dhp-workflows/dhp-usage-raw-data-update/pom.xml b/dhp-workflows/dhp-usage-raw-data-update/pom.xml index b39a7a9a0..954c8bd39 100644 --- a/dhp-workflows/dhp-usage-raw-data-update/pom.xml +++ b/dhp-workflows/dhp-usage-raw-data-update/pom.xml @@ -3,7 +3,7 @@ dhp-workflows eu.dnetlib.dhp - 1.2.4 + 1.2.5-SNAPSHOT 4.0.0 dhp-usage-raw-data-update diff --git a/dhp-workflows/dhp-usage-stats-build/pom.xml b/dhp-workflows/dhp-usage-stats-build/pom.xml index a9152062d..54e18580b 100644 --- a/dhp-workflows/dhp-usage-stats-build/pom.xml +++ b/dhp-workflows/dhp-usage-stats-build/pom.xml @@ -3,7 +3,7 @@ dhp-workflows eu.dnetlib.dhp - 1.2.4 + 1.2.5-SNAPSHOT 4.0.0 dhp-usage-stats-build diff --git a/dhp-workflows/dhp-workflow-profiles/pom.xml b/dhp-workflows/dhp-workflow-profiles/pom.xml index a4dbade09..8c71a5ca1 100644 --- a/dhp-workflows/dhp-workflow-profiles/pom.xml +++ b/dhp-workflows/dhp-workflow-profiles/pom.xml @@ -3,7 +3,7 @@ dhp-workflows eu.dnetlib.dhp - 1.2.4 + 1.2.5-SNAPSHOT 4.0.0 diff --git a/dhp-workflows/pom.xml b/dhp-workflows/pom.xml index 47bda3d00..541d59007 100644 --- a/dhp-workflows/pom.xml +++ b/dhp-workflows/pom.xml @@ -6,7 +6,7 @@ eu.dnetlib.dhp dhp - 1.2.4 + 1.2.5-SNAPSHOT ../pom.xml diff --git a/pom.xml b/pom.xml index 2a807e3d0..136b9b867 100644 --- a/pom.xml +++ b/pom.xml @@ -3,7 +3,7 @@ 4.0.0 eu.dnetlib.dhp dhp - 1.2.4 + 1.2.5-SNAPSHOT pom @@ -37,7 +37,7 @@ scm:git:gitea@code-repo.d4science.org:D-Net/dnet-hadoop.git scm:git:gitea@code-repo.d4science.org:D-Net/dnet-hadoop.git https://code-repo.d4science.org/D-Net/dnet-hadoop/ - dhp-1.2.4 + HEAD This module is the root descriptor for the dnet-hadoop project From ad85d88eafbd14fc1a7ef7a53fe8c3e180c6b4af Mon Sep 17 00:00:00 2001 From: Claudio Atzori Date: Thu, 7 Apr 2022 13:28:35 +0200 Subject: [PATCH 148/162] [maven-release-plugin] rollback the release of dhp-1.2.4 --- dhp-build/dhp-build-assembly-resources/pom.xml | 2 +- dhp-build/dhp-build-properties-maven-plugin/pom.xml | 2 +- dhp-build/dhp-code-style/pom.xml | 2 +- dhp-build/pom.xml | 2 +- dhp-common/pom.xml | 2 +- dhp-workflows/dhp-actionmanager/pom.xml | 2 +- dhp-workflows/dhp-aggregation/pom.xml | 2 +- dhp-workflows/dhp-blacklist/pom.xml | 2 +- dhp-workflows/dhp-broker-events/pom.xml | 6 ++++-- dhp-workflows/dhp-dedup-openaire/pom.xml | 2 +- dhp-workflows/dhp-distcp/pom.xml | 2 +- dhp-workflows/dhp-doiboost/pom.xml | 2 +- dhp-workflows/dhp-enrichment/pom.xml | 4 ++-- dhp-workflows/dhp-graph-mapper/pom.xml | 2 +- dhp-workflows/dhp-graph-provision/pom.xml | 2 +- dhp-workflows/dhp-stats-promote/pom.xml | 2 +- dhp-workflows/dhp-stats-update/pom.xml | 2 +- dhp-workflows/dhp-usage-raw-data-update/pom.xml | 2 +- dhp-workflows/dhp-usage-stats-build/pom.xml | 2 +- dhp-workflows/dhp-workflow-profiles/pom.xml | 2 +- dhp-workflows/pom.xml | 2 +- pom.xml | 2 +- 22 files changed, 26 insertions(+), 24 deletions(-) diff --git a/dhp-build/dhp-build-assembly-resources/pom.xml b/dhp-build/dhp-build-assembly-resources/pom.xml index 44165995d..012ff89a3 100644 --- a/dhp-build/dhp-build-assembly-resources/pom.xml +++ b/dhp-build/dhp-build-assembly-resources/pom.xml @@ -6,7 +6,7 @@ eu.dnetlib.dhp dhp-build - 1.2.5-SNAPSHOT + 1.2.4-SNAPSHOT dhp-build-assembly-resources diff --git a/dhp-build/dhp-build-properties-maven-plugin/pom.xml b/dhp-build/dhp-build-properties-maven-plugin/pom.xml index 7579bdf45..256017e2c 100644 --- a/dhp-build/dhp-build-properties-maven-plugin/pom.xml +++ b/dhp-build/dhp-build-properties-maven-plugin/pom.xml @@ -6,7 +6,7 @@ eu.dnetlib.dhp dhp-build - 1.2.5-SNAPSHOT + 1.2.4-SNAPSHOT dhp-build-properties-maven-plugin diff --git a/dhp-build/dhp-code-style/pom.xml b/dhp-build/dhp-code-style/pom.xml index 5a86efe17..a7a67ff77 100644 --- a/dhp-build/dhp-code-style/pom.xml +++ b/dhp-build/dhp-code-style/pom.xml @@ -5,7 +5,7 @@ eu.dnetlib.dhp dhp-code-style - 1.2.5-SNAPSHOT + 1.2.4-SNAPSHOT jar diff --git a/dhp-build/pom.xml b/dhp-build/pom.xml index 9040ea94e..97fbdf45b 100644 --- a/dhp-build/pom.xml +++ b/dhp-build/pom.xml @@ -4,7 +4,7 @@ eu.dnetlib.dhp dhp - 1.2.5-SNAPSHOT + 1.2.4-SNAPSHOT dhp-build pom diff --git a/dhp-common/pom.xml b/dhp-common/pom.xml index 6df11f4ea..7b18f0105 100644 --- a/dhp-common/pom.xml +++ b/dhp-common/pom.xml @@ -5,7 +5,7 @@ eu.dnetlib.dhp dhp - 1.2.5-SNAPSHOT + 1.2.4-SNAPSHOT ../pom.xml diff --git a/dhp-workflows/dhp-actionmanager/pom.xml b/dhp-workflows/dhp-actionmanager/pom.xml index 29e1fab1f..f63f3aa01 100644 --- a/dhp-workflows/dhp-actionmanager/pom.xml +++ b/dhp-workflows/dhp-actionmanager/pom.xml @@ -4,7 +4,7 @@ eu.dnetlib.dhp dhp-workflows - 1.2.5-SNAPSHOT + 1.2.4-SNAPSHOT dhp-actionmanager diff --git a/dhp-workflows/dhp-aggregation/pom.xml b/dhp-workflows/dhp-aggregation/pom.xml index 53d349d2a..c89cc9d1d 100644 --- a/dhp-workflows/dhp-aggregation/pom.xml +++ b/dhp-workflows/dhp-aggregation/pom.xml @@ -4,7 +4,7 @@ eu.dnetlib.dhp dhp-workflows - 1.2.5-SNAPSHOT + 1.2.4-SNAPSHOT dhp-aggregation diff --git a/dhp-workflows/dhp-blacklist/pom.xml b/dhp-workflows/dhp-blacklist/pom.xml index 479a9e8c6..047767d33 100644 --- a/dhp-workflows/dhp-blacklist/pom.xml +++ b/dhp-workflows/dhp-blacklist/pom.xml @@ -3,7 +3,7 @@ dhp-workflows eu.dnetlib.dhp - 1.2.5-SNAPSHOT + 1.2.4-SNAPSHOT 4.0.0 diff --git a/dhp-workflows/dhp-broker-events/pom.xml b/dhp-workflows/dhp-broker-events/pom.xml index 5a41a0090..afbd93dcb 100644 --- a/dhp-workflows/dhp-broker-events/pom.xml +++ b/dhp-workflows/dhp-broker-events/pom.xml @@ -1,9 +1,11 @@ - + dhp-workflows eu.dnetlib.dhp - 1.2.5-SNAPSHOT + 1.2.4-SNAPSHOT 4.0.0 diff --git a/dhp-workflows/dhp-dedup-openaire/pom.xml b/dhp-workflows/dhp-dedup-openaire/pom.xml index a14076a78..f47f8ba6e 100644 --- a/dhp-workflows/dhp-dedup-openaire/pom.xml +++ b/dhp-workflows/dhp-dedup-openaire/pom.xml @@ -3,7 +3,7 @@ dhp-workflows eu.dnetlib.dhp - 1.2.5-SNAPSHOT + 1.2.4-SNAPSHOT 4.0.0 dhp-dedup-openaire diff --git a/dhp-workflows/dhp-distcp/pom.xml b/dhp-workflows/dhp-distcp/pom.xml index c3d3a7375..8c10538c0 100644 --- a/dhp-workflows/dhp-distcp/pom.xml +++ b/dhp-workflows/dhp-distcp/pom.xml @@ -3,7 +3,7 @@ dhp-workflows eu.dnetlib.dhp - 1.2.5-SNAPSHOT + 1.2.4-SNAPSHOT 4.0.0 diff --git a/dhp-workflows/dhp-doiboost/pom.xml b/dhp-workflows/dhp-doiboost/pom.xml index 37accbc4f..f496ea9a2 100644 --- a/dhp-workflows/dhp-doiboost/pom.xml +++ b/dhp-workflows/dhp-doiboost/pom.xml @@ -3,7 +3,7 @@ dhp-workflows eu.dnetlib.dhp - 1.2.5-SNAPSHOT + 1.2.4-SNAPSHOT 4.0.0 diff --git a/dhp-workflows/dhp-enrichment/pom.xml b/dhp-workflows/dhp-enrichment/pom.xml index 591cad252..0b4269acd 100644 --- a/dhp-workflows/dhp-enrichment/pom.xml +++ b/dhp-workflows/dhp-enrichment/pom.xml @@ -3,7 +3,7 @@ dhp-workflows eu.dnetlib.dhp - 1.2.5-SNAPSHOT + 1.2.4-SNAPSHOT 4.0.0 @@ -51,7 +51,7 @@ eu.dnetlib.dhp dhp-aggregation - 1.2.5-SNAPSHOT + 1.2.4-SNAPSHOT compile diff --git a/dhp-workflows/dhp-graph-mapper/pom.xml b/dhp-workflows/dhp-graph-mapper/pom.xml index 687f0de66..17146903a 100644 --- a/dhp-workflows/dhp-graph-mapper/pom.xml +++ b/dhp-workflows/dhp-graph-mapper/pom.xml @@ -3,7 +3,7 @@ dhp-workflows eu.dnetlib.dhp - 1.2.5-SNAPSHOT + 1.2.4-SNAPSHOT 4.0.0 diff --git a/dhp-workflows/dhp-graph-provision/pom.xml b/dhp-workflows/dhp-graph-provision/pom.xml index ae8f3b53a..e402d0600 100644 --- a/dhp-workflows/dhp-graph-provision/pom.xml +++ b/dhp-workflows/dhp-graph-provision/pom.xml @@ -3,7 +3,7 @@ dhp-workflows eu.dnetlib.dhp - 1.2.5-SNAPSHOT + 1.2.4-SNAPSHOT 4.0.0 diff --git a/dhp-workflows/dhp-stats-promote/pom.xml b/dhp-workflows/dhp-stats-promote/pom.xml index ce3e739a5..c64c2f58e 100644 --- a/dhp-workflows/dhp-stats-promote/pom.xml +++ b/dhp-workflows/dhp-stats-promote/pom.xml @@ -3,7 +3,7 @@ dhp-workflows eu.dnetlib.dhp - 1.2.5-SNAPSHOT + 1.2.4-SNAPSHOT 4.0.0 dhp-stats-promote diff --git a/dhp-workflows/dhp-stats-update/pom.xml b/dhp-workflows/dhp-stats-update/pom.xml index 2bc610c42..52f35ff07 100644 --- a/dhp-workflows/dhp-stats-update/pom.xml +++ b/dhp-workflows/dhp-stats-update/pom.xml @@ -3,7 +3,7 @@ dhp-workflows eu.dnetlib.dhp - 1.2.5-SNAPSHOT + 1.2.4-SNAPSHOT 4.0.0 dhp-stats-update diff --git a/dhp-workflows/dhp-usage-raw-data-update/pom.xml b/dhp-workflows/dhp-usage-raw-data-update/pom.xml index 954c8bd39..a78f92d41 100644 --- a/dhp-workflows/dhp-usage-raw-data-update/pom.xml +++ b/dhp-workflows/dhp-usage-raw-data-update/pom.xml @@ -3,7 +3,7 @@ dhp-workflows eu.dnetlib.dhp - 1.2.5-SNAPSHOT + 1.2.4-SNAPSHOT 4.0.0 dhp-usage-raw-data-update diff --git a/dhp-workflows/dhp-usage-stats-build/pom.xml b/dhp-workflows/dhp-usage-stats-build/pom.xml index 54e18580b..20d2f5b76 100644 --- a/dhp-workflows/dhp-usage-stats-build/pom.xml +++ b/dhp-workflows/dhp-usage-stats-build/pom.xml @@ -3,7 +3,7 @@ dhp-workflows eu.dnetlib.dhp - 1.2.5-SNAPSHOT + 1.2.4-SNAPSHOT 4.0.0 dhp-usage-stats-build diff --git a/dhp-workflows/dhp-workflow-profiles/pom.xml b/dhp-workflows/dhp-workflow-profiles/pom.xml index 8c71a5ca1..b1c51c497 100644 --- a/dhp-workflows/dhp-workflow-profiles/pom.xml +++ b/dhp-workflows/dhp-workflow-profiles/pom.xml @@ -3,7 +3,7 @@ dhp-workflows eu.dnetlib.dhp - 1.2.5-SNAPSHOT + 1.2.4-SNAPSHOT 4.0.0 diff --git a/dhp-workflows/pom.xml b/dhp-workflows/pom.xml index 541d59007..143178560 100644 --- a/dhp-workflows/pom.xml +++ b/dhp-workflows/pom.xml @@ -6,7 +6,7 @@ eu.dnetlib.dhp dhp - 1.2.5-SNAPSHOT + 1.2.4-SNAPSHOT ../pom.xml diff --git a/pom.xml b/pom.xml index 136b9b867..010dd56fe 100644 --- a/pom.xml +++ b/pom.xml @@ -3,7 +3,7 @@ 4.0.0 eu.dnetlib.dhp dhp - 1.2.5-SNAPSHOT + 1.2.4-SNAPSHOT pom From 86585a6b274ad8e0beaa9e7f43d123b7320739be Mon Sep 17 00:00:00 2001 From: Claudio Atzori Date: Thu, 7 Apr 2022 13:32:19 +0200 Subject: [PATCH 149/162] [maven-release-plugin] prepare release dhp-1.2.4 --- dhp-build/dhp-build-assembly-resources/pom.xml | 2 +- dhp-build/dhp-build-properties-maven-plugin/pom.xml | 2 +- dhp-build/dhp-code-style/pom.xml | 2 +- dhp-build/pom.xml | 2 +- dhp-common/pom.xml | 2 +- dhp-workflows/dhp-actionmanager/pom.xml | 2 +- dhp-workflows/dhp-aggregation/pom.xml | 2 +- dhp-workflows/dhp-blacklist/pom.xml | 2 +- dhp-workflows/dhp-broker-events/pom.xml | 6 ++---- dhp-workflows/dhp-dedup-openaire/pom.xml | 2 +- dhp-workflows/dhp-distcp/pom.xml | 2 +- dhp-workflows/dhp-doiboost/pom.xml | 2 +- dhp-workflows/dhp-enrichment/pom.xml | 4 ++-- dhp-workflows/dhp-graph-mapper/pom.xml | 2 +- dhp-workflows/dhp-graph-provision/pom.xml | 2 +- dhp-workflows/dhp-stats-promote/pom.xml | 2 +- dhp-workflows/dhp-stats-update/pom.xml | 2 +- dhp-workflows/dhp-usage-raw-data-update/pom.xml | 2 +- dhp-workflows/dhp-usage-stats-build/pom.xml | 2 +- dhp-workflows/dhp-workflow-profiles/pom.xml | 2 +- dhp-workflows/pom.xml | 2 +- pom.xml | 4 ++-- 22 files changed, 25 insertions(+), 27 deletions(-) diff --git a/dhp-build/dhp-build-assembly-resources/pom.xml b/dhp-build/dhp-build-assembly-resources/pom.xml index 012ff89a3..a4ab512a8 100644 --- a/dhp-build/dhp-build-assembly-resources/pom.xml +++ b/dhp-build/dhp-build-assembly-resources/pom.xml @@ -6,7 +6,7 @@ eu.dnetlib.dhp dhp-build - 1.2.4-SNAPSHOT + 1.2.4 dhp-build-assembly-resources diff --git a/dhp-build/dhp-build-properties-maven-plugin/pom.xml b/dhp-build/dhp-build-properties-maven-plugin/pom.xml index 256017e2c..5efa96c43 100644 --- a/dhp-build/dhp-build-properties-maven-plugin/pom.xml +++ b/dhp-build/dhp-build-properties-maven-plugin/pom.xml @@ -6,7 +6,7 @@ eu.dnetlib.dhp dhp-build - 1.2.4-SNAPSHOT + 1.2.4 dhp-build-properties-maven-plugin diff --git a/dhp-build/dhp-code-style/pom.xml b/dhp-build/dhp-code-style/pom.xml index a7a67ff77..ab942d9ac 100644 --- a/dhp-build/dhp-code-style/pom.xml +++ b/dhp-build/dhp-code-style/pom.xml @@ -5,7 +5,7 @@ eu.dnetlib.dhp dhp-code-style - 1.2.4-SNAPSHOT + 1.2.4 jar diff --git a/dhp-build/pom.xml b/dhp-build/pom.xml index 97fbdf45b..a193dd893 100644 --- a/dhp-build/pom.xml +++ b/dhp-build/pom.xml @@ -4,7 +4,7 @@ eu.dnetlib.dhp dhp - 1.2.4-SNAPSHOT + 1.2.4 dhp-build pom diff --git a/dhp-common/pom.xml b/dhp-common/pom.xml index 7b18f0105..901e1b26f 100644 --- a/dhp-common/pom.xml +++ b/dhp-common/pom.xml @@ -5,7 +5,7 @@ eu.dnetlib.dhp dhp - 1.2.4-SNAPSHOT + 1.2.4 ../pom.xml diff --git a/dhp-workflows/dhp-actionmanager/pom.xml b/dhp-workflows/dhp-actionmanager/pom.xml index f63f3aa01..559bc3125 100644 --- a/dhp-workflows/dhp-actionmanager/pom.xml +++ b/dhp-workflows/dhp-actionmanager/pom.xml @@ -4,7 +4,7 @@ eu.dnetlib.dhp dhp-workflows - 1.2.4-SNAPSHOT + 1.2.4 dhp-actionmanager diff --git a/dhp-workflows/dhp-aggregation/pom.xml b/dhp-workflows/dhp-aggregation/pom.xml index c89cc9d1d..f1a7b08f0 100644 --- a/dhp-workflows/dhp-aggregation/pom.xml +++ b/dhp-workflows/dhp-aggregation/pom.xml @@ -4,7 +4,7 @@ eu.dnetlib.dhp dhp-workflows - 1.2.4-SNAPSHOT + 1.2.4 dhp-aggregation diff --git a/dhp-workflows/dhp-blacklist/pom.xml b/dhp-workflows/dhp-blacklist/pom.xml index 047767d33..8f07db5be 100644 --- a/dhp-workflows/dhp-blacklist/pom.xml +++ b/dhp-workflows/dhp-blacklist/pom.xml @@ -3,7 +3,7 @@ dhp-workflows eu.dnetlib.dhp - 1.2.4-SNAPSHOT + 1.2.4 4.0.0 diff --git a/dhp-workflows/dhp-broker-events/pom.xml b/dhp-workflows/dhp-broker-events/pom.xml index afbd93dcb..3b8a09d16 100644 --- a/dhp-workflows/dhp-broker-events/pom.xml +++ b/dhp-workflows/dhp-broker-events/pom.xml @@ -1,11 +1,9 @@ - + dhp-workflows eu.dnetlib.dhp - 1.2.4-SNAPSHOT + 1.2.4 4.0.0 diff --git a/dhp-workflows/dhp-dedup-openaire/pom.xml b/dhp-workflows/dhp-dedup-openaire/pom.xml index f47f8ba6e..5cb9283ce 100644 --- a/dhp-workflows/dhp-dedup-openaire/pom.xml +++ b/dhp-workflows/dhp-dedup-openaire/pom.xml @@ -3,7 +3,7 @@ dhp-workflows eu.dnetlib.dhp - 1.2.4-SNAPSHOT + 1.2.4 4.0.0 dhp-dedup-openaire diff --git a/dhp-workflows/dhp-distcp/pom.xml b/dhp-workflows/dhp-distcp/pom.xml index 8c10538c0..e4685d7ff 100644 --- a/dhp-workflows/dhp-distcp/pom.xml +++ b/dhp-workflows/dhp-distcp/pom.xml @@ -3,7 +3,7 @@ dhp-workflows eu.dnetlib.dhp - 1.2.4-SNAPSHOT + 1.2.4 4.0.0 diff --git a/dhp-workflows/dhp-doiboost/pom.xml b/dhp-workflows/dhp-doiboost/pom.xml index f496ea9a2..f33b8c4c5 100644 --- a/dhp-workflows/dhp-doiboost/pom.xml +++ b/dhp-workflows/dhp-doiboost/pom.xml @@ -3,7 +3,7 @@ dhp-workflows eu.dnetlib.dhp - 1.2.4-SNAPSHOT + 1.2.4 4.0.0 diff --git a/dhp-workflows/dhp-enrichment/pom.xml b/dhp-workflows/dhp-enrichment/pom.xml index 0b4269acd..122362196 100644 --- a/dhp-workflows/dhp-enrichment/pom.xml +++ b/dhp-workflows/dhp-enrichment/pom.xml @@ -3,7 +3,7 @@ dhp-workflows eu.dnetlib.dhp - 1.2.4-SNAPSHOT + 1.2.4 4.0.0 @@ -51,7 +51,7 @@ eu.dnetlib.dhp dhp-aggregation - 1.2.4-SNAPSHOT + 1.2.4 compile diff --git a/dhp-workflows/dhp-graph-mapper/pom.xml b/dhp-workflows/dhp-graph-mapper/pom.xml index 17146903a..fd60cafb2 100644 --- a/dhp-workflows/dhp-graph-mapper/pom.xml +++ b/dhp-workflows/dhp-graph-mapper/pom.xml @@ -3,7 +3,7 @@ dhp-workflows eu.dnetlib.dhp - 1.2.4-SNAPSHOT + 1.2.4 4.0.0 diff --git a/dhp-workflows/dhp-graph-provision/pom.xml b/dhp-workflows/dhp-graph-provision/pom.xml index e402d0600..518f8f565 100644 --- a/dhp-workflows/dhp-graph-provision/pom.xml +++ b/dhp-workflows/dhp-graph-provision/pom.xml @@ -3,7 +3,7 @@ dhp-workflows eu.dnetlib.dhp - 1.2.4-SNAPSHOT + 1.2.4 4.0.0 diff --git a/dhp-workflows/dhp-stats-promote/pom.xml b/dhp-workflows/dhp-stats-promote/pom.xml index c64c2f58e..ee52031e8 100644 --- a/dhp-workflows/dhp-stats-promote/pom.xml +++ b/dhp-workflows/dhp-stats-promote/pom.xml @@ -3,7 +3,7 @@ dhp-workflows eu.dnetlib.dhp - 1.2.4-SNAPSHOT + 1.2.4 4.0.0 dhp-stats-promote diff --git a/dhp-workflows/dhp-stats-update/pom.xml b/dhp-workflows/dhp-stats-update/pom.xml index 52f35ff07..3c1b2ec6e 100644 --- a/dhp-workflows/dhp-stats-update/pom.xml +++ b/dhp-workflows/dhp-stats-update/pom.xml @@ -3,7 +3,7 @@ dhp-workflows eu.dnetlib.dhp - 1.2.4-SNAPSHOT + 1.2.4 4.0.0 dhp-stats-update diff --git a/dhp-workflows/dhp-usage-raw-data-update/pom.xml b/dhp-workflows/dhp-usage-raw-data-update/pom.xml index a78f92d41..b39a7a9a0 100644 --- a/dhp-workflows/dhp-usage-raw-data-update/pom.xml +++ b/dhp-workflows/dhp-usage-raw-data-update/pom.xml @@ -3,7 +3,7 @@ dhp-workflows eu.dnetlib.dhp - 1.2.4-SNAPSHOT + 1.2.4 4.0.0 dhp-usage-raw-data-update diff --git a/dhp-workflows/dhp-usage-stats-build/pom.xml b/dhp-workflows/dhp-usage-stats-build/pom.xml index 20d2f5b76..a9152062d 100644 --- a/dhp-workflows/dhp-usage-stats-build/pom.xml +++ b/dhp-workflows/dhp-usage-stats-build/pom.xml @@ -3,7 +3,7 @@ dhp-workflows eu.dnetlib.dhp - 1.2.4-SNAPSHOT + 1.2.4 4.0.0 dhp-usage-stats-build diff --git a/dhp-workflows/dhp-workflow-profiles/pom.xml b/dhp-workflows/dhp-workflow-profiles/pom.xml index b1c51c497..a4dbade09 100644 --- a/dhp-workflows/dhp-workflow-profiles/pom.xml +++ b/dhp-workflows/dhp-workflow-profiles/pom.xml @@ -3,7 +3,7 @@ dhp-workflows eu.dnetlib.dhp - 1.2.4-SNAPSHOT + 1.2.4 4.0.0 diff --git a/dhp-workflows/pom.xml b/dhp-workflows/pom.xml index 143178560..47bda3d00 100644 --- a/dhp-workflows/pom.xml +++ b/dhp-workflows/pom.xml @@ -6,7 +6,7 @@ eu.dnetlib.dhp dhp - 1.2.4-SNAPSHOT + 1.2.4 ../pom.xml diff --git a/pom.xml b/pom.xml index 010dd56fe..2a807e3d0 100644 --- a/pom.xml +++ b/pom.xml @@ -3,7 +3,7 @@ 4.0.0 eu.dnetlib.dhp dhp - 1.2.4-SNAPSHOT + 1.2.4 pom @@ -37,7 +37,7 @@ scm:git:gitea@code-repo.d4science.org:D-Net/dnet-hadoop.git scm:git:gitea@code-repo.d4science.org:D-Net/dnet-hadoop.git https://code-repo.d4science.org/D-Net/dnet-hadoop/ - HEAD + dhp-1.2.4 This module is the root descriptor for the dnet-hadoop project From c26222623fbd81dee46040acddab4fa9cd644296 Mon Sep 17 00:00:00 2001 From: Claudio Atzori Date: Thu, 7 Apr 2022 13:32:22 +0200 Subject: [PATCH 150/162] [maven-release-plugin] prepare for next development iteration --- dhp-build/dhp-build-assembly-resources/pom.xml | 2 +- dhp-build/dhp-build-properties-maven-plugin/pom.xml | 2 +- dhp-build/dhp-code-style/pom.xml | 2 +- dhp-build/pom.xml | 2 +- dhp-common/pom.xml | 2 +- dhp-workflows/dhp-actionmanager/pom.xml | 2 +- dhp-workflows/dhp-aggregation/pom.xml | 2 +- dhp-workflows/dhp-blacklist/pom.xml | 2 +- dhp-workflows/dhp-broker-events/pom.xml | 2 +- dhp-workflows/dhp-dedup-openaire/pom.xml | 2 +- dhp-workflows/dhp-distcp/pom.xml | 2 +- dhp-workflows/dhp-doiboost/pom.xml | 2 +- dhp-workflows/dhp-enrichment/pom.xml | 4 ++-- dhp-workflows/dhp-graph-mapper/pom.xml | 2 +- dhp-workflows/dhp-graph-provision/pom.xml | 2 +- dhp-workflows/dhp-stats-promote/pom.xml | 2 +- dhp-workflows/dhp-stats-update/pom.xml | 2 +- dhp-workflows/dhp-usage-raw-data-update/pom.xml | 2 +- dhp-workflows/dhp-usage-stats-build/pom.xml | 2 +- dhp-workflows/dhp-workflow-profiles/pom.xml | 2 +- dhp-workflows/pom.xml | 2 +- pom.xml | 4 ++-- 22 files changed, 24 insertions(+), 24 deletions(-) diff --git a/dhp-build/dhp-build-assembly-resources/pom.xml b/dhp-build/dhp-build-assembly-resources/pom.xml index a4ab512a8..44165995d 100644 --- a/dhp-build/dhp-build-assembly-resources/pom.xml +++ b/dhp-build/dhp-build-assembly-resources/pom.xml @@ -6,7 +6,7 @@ eu.dnetlib.dhp dhp-build - 1.2.4 + 1.2.5-SNAPSHOT dhp-build-assembly-resources diff --git a/dhp-build/dhp-build-properties-maven-plugin/pom.xml b/dhp-build/dhp-build-properties-maven-plugin/pom.xml index 5efa96c43..7579bdf45 100644 --- a/dhp-build/dhp-build-properties-maven-plugin/pom.xml +++ b/dhp-build/dhp-build-properties-maven-plugin/pom.xml @@ -6,7 +6,7 @@ eu.dnetlib.dhp dhp-build - 1.2.4 + 1.2.5-SNAPSHOT dhp-build-properties-maven-plugin diff --git a/dhp-build/dhp-code-style/pom.xml b/dhp-build/dhp-code-style/pom.xml index ab942d9ac..5a86efe17 100644 --- a/dhp-build/dhp-code-style/pom.xml +++ b/dhp-build/dhp-code-style/pom.xml @@ -5,7 +5,7 @@ eu.dnetlib.dhp dhp-code-style - 1.2.4 + 1.2.5-SNAPSHOT jar diff --git a/dhp-build/pom.xml b/dhp-build/pom.xml index a193dd893..9040ea94e 100644 --- a/dhp-build/pom.xml +++ b/dhp-build/pom.xml @@ -4,7 +4,7 @@ eu.dnetlib.dhp dhp - 1.2.4 + 1.2.5-SNAPSHOT dhp-build pom diff --git a/dhp-common/pom.xml b/dhp-common/pom.xml index 901e1b26f..6df11f4ea 100644 --- a/dhp-common/pom.xml +++ b/dhp-common/pom.xml @@ -5,7 +5,7 @@ eu.dnetlib.dhp dhp - 1.2.4 + 1.2.5-SNAPSHOT ../pom.xml diff --git a/dhp-workflows/dhp-actionmanager/pom.xml b/dhp-workflows/dhp-actionmanager/pom.xml index 559bc3125..29e1fab1f 100644 --- a/dhp-workflows/dhp-actionmanager/pom.xml +++ b/dhp-workflows/dhp-actionmanager/pom.xml @@ -4,7 +4,7 @@ eu.dnetlib.dhp dhp-workflows - 1.2.4 + 1.2.5-SNAPSHOT dhp-actionmanager diff --git a/dhp-workflows/dhp-aggregation/pom.xml b/dhp-workflows/dhp-aggregation/pom.xml index f1a7b08f0..53d349d2a 100644 --- a/dhp-workflows/dhp-aggregation/pom.xml +++ b/dhp-workflows/dhp-aggregation/pom.xml @@ -4,7 +4,7 @@ eu.dnetlib.dhp dhp-workflows - 1.2.4 + 1.2.5-SNAPSHOT dhp-aggregation diff --git a/dhp-workflows/dhp-blacklist/pom.xml b/dhp-workflows/dhp-blacklist/pom.xml index 8f07db5be..479a9e8c6 100644 --- a/dhp-workflows/dhp-blacklist/pom.xml +++ b/dhp-workflows/dhp-blacklist/pom.xml @@ -3,7 +3,7 @@ dhp-workflows eu.dnetlib.dhp - 1.2.4 + 1.2.5-SNAPSHOT 4.0.0 diff --git a/dhp-workflows/dhp-broker-events/pom.xml b/dhp-workflows/dhp-broker-events/pom.xml index 3b8a09d16..5a41a0090 100644 --- a/dhp-workflows/dhp-broker-events/pom.xml +++ b/dhp-workflows/dhp-broker-events/pom.xml @@ -3,7 +3,7 @@ dhp-workflows eu.dnetlib.dhp - 1.2.4 + 1.2.5-SNAPSHOT 4.0.0 diff --git a/dhp-workflows/dhp-dedup-openaire/pom.xml b/dhp-workflows/dhp-dedup-openaire/pom.xml index 5cb9283ce..a14076a78 100644 --- a/dhp-workflows/dhp-dedup-openaire/pom.xml +++ b/dhp-workflows/dhp-dedup-openaire/pom.xml @@ -3,7 +3,7 @@ dhp-workflows eu.dnetlib.dhp - 1.2.4 + 1.2.5-SNAPSHOT 4.0.0 dhp-dedup-openaire diff --git a/dhp-workflows/dhp-distcp/pom.xml b/dhp-workflows/dhp-distcp/pom.xml index e4685d7ff..c3d3a7375 100644 --- a/dhp-workflows/dhp-distcp/pom.xml +++ b/dhp-workflows/dhp-distcp/pom.xml @@ -3,7 +3,7 @@ dhp-workflows eu.dnetlib.dhp - 1.2.4 + 1.2.5-SNAPSHOT 4.0.0 diff --git a/dhp-workflows/dhp-doiboost/pom.xml b/dhp-workflows/dhp-doiboost/pom.xml index f33b8c4c5..37accbc4f 100644 --- a/dhp-workflows/dhp-doiboost/pom.xml +++ b/dhp-workflows/dhp-doiboost/pom.xml @@ -3,7 +3,7 @@ dhp-workflows eu.dnetlib.dhp - 1.2.4 + 1.2.5-SNAPSHOT 4.0.0 diff --git a/dhp-workflows/dhp-enrichment/pom.xml b/dhp-workflows/dhp-enrichment/pom.xml index 122362196..591cad252 100644 --- a/dhp-workflows/dhp-enrichment/pom.xml +++ b/dhp-workflows/dhp-enrichment/pom.xml @@ -3,7 +3,7 @@ dhp-workflows eu.dnetlib.dhp - 1.2.4 + 1.2.5-SNAPSHOT 4.0.0 @@ -51,7 +51,7 @@ eu.dnetlib.dhp dhp-aggregation - 1.2.4 + 1.2.5-SNAPSHOT compile diff --git a/dhp-workflows/dhp-graph-mapper/pom.xml b/dhp-workflows/dhp-graph-mapper/pom.xml index fd60cafb2..687f0de66 100644 --- a/dhp-workflows/dhp-graph-mapper/pom.xml +++ b/dhp-workflows/dhp-graph-mapper/pom.xml @@ -3,7 +3,7 @@ dhp-workflows eu.dnetlib.dhp - 1.2.4 + 1.2.5-SNAPSHOT 4.0.0 diff --git a/dhp-workflows/dhp-graph-provision/pom.xml b/dhp-workflows/dhp-graph-provision/pom.xml index 518f8f565..ae8f3b53a 100644 --- a/dhp-workflows/dhp-graph-provision/pom.xml +++ b/dhp-workflows/dhp-graph-provision/pom.xml @@ -3,7 +3,7 @@ dhp-workflows eu.dnetlib.dhp - 1.2.4 + 1.2.5-SNAPSHOT 4.0.0 diff --git a/dhp-workflows/dhp-stats-promote/pom.xml b/dhp-workflows/dhp-stats-promote/pom.xml index ee52031e8..ce3e739a5 100644 --- a/dhp-workflows/dhp-stats-promote/pom.xml +++ b/dhp-workflows/dhp-stats-promote/pom.xml @@ -3,7 +3,7 @@ dhp-workflows eu.dnetlib.dhp - 1.2.4 + 1.2.5-SNAPSHOT 4.0.0 dhp-stats-promote diff --git a/dhp-workflows/dhp-stats-update/pom.xml b/dhp-workflows/dhp-stats-update/pom.xml index 3c1b2ec6e..2bc610c42 100644 --- a/dhp-workflows/dhp-stats-update/pom.xml +++ b/dhp-workflows/dhp-stats-update/pom.xml @@ -3,7 +3,7 @@ dhp-workflows eu.dnetlib.dhp - 1.2.4 + 1.2.5-SNAPSHOT 4.0.0 dhp-stats-update diff --git a/dhp-workflows/dhp-usage-raw-data-update/pom.xml b/dhp-workflows/dhp-usage-raw-data-update/pom.xml index b39a7a9a0..954c8bd39 100644 --- a/dhp-workflows/dhp-usage-raw-data-update/pom.xml +++ b/dhp-workflows/dhp-usage-raw-data-update/pom.xml @@ -3,7 +3,7 @@ dhp-workflows eu.dnetlib.dhp - 1.2.4 + 1.2.5-SNAPSHOT 4.0.0 dhp-usage-raw-data-update diff --git a/dhp-workflows/dhp-usage-stats-build/pom.xml b/dhp-workflows/dhp-usage-stats-build/pom.xml index a9152062d..54e18580b 100644 --- a/dhp-workflows/dhp-usage-stats-build/pom.xml +++ b/dhp-workflows/dhp-usage-stats-build/pom.xml @@ -3,7 +3,7 @@ dhp-workflows eu.dnetlib.dhp - 1.2.4 + 1.2.5-SNAPSHOT 4.0.0 dhp-usage-stats-build diff --git a/dhp-workflows/dhp-workflow-profiles/pom.xml b/dhp-workflows/dhp-workflow-profiles/pom.xml index a4dbade09..8c71a5ca1 100644 --- a/dhp-workflows/dhp-workflow-profiles/pom.xml +++ b/dhp-workflows/dhp-workflow-profiles/pom.xml @@ -3,7 +3,7 @@ dhp-workflows eu.dnetlib.dhp - 1.2.4 + 1.2.5-SNAPSHOT 4.0.0 diff --git a/dhp-workflows/pom.xml b/dhp-workflows/pom.xml index 47bda3d00..541d59007 100644 --- a/dhp-workflows/pom.xml +++ b/dhp-workflows/pom.xml @@ -6,7 +6,7 @@ eu.dnetlib.dhp dhp - 1.2.4 + 1.2.5-SNAPSHOT ../pom.xml diff --git a/pom.xml b/pom.xml index 2a807e3d0..136b9b867 100644 --- a/pom.xml +++ b/pom.xml @@ -3,7 +3,7 @@ 4.0.0 eu.dnetlib.dhp dhp - 1.2.4 + 1.2.5-SNAPSHOT pom @@ -37,7 +37,7 @@ scm:git:gitea@code-repo.d4science.org:D-Net/dnet-hadoop.git scm:git:gitea@code-repo.d4science.org:D-Net/dnet-hadoop.git https://code-repo.d4science.org/D-Net/dnet-hadoop/ - dhp-1.2.4 + HEAD This module is the root descriptor for the dnet-hadoop project From 48b580b45cc2e36ca08d73e2b111ffed019dfce1 Mon Sep 17 00:00:00 2001 From: Claudio Atzori Date: Mon, 11 Apr 2022 08:52:12 +0200 Subject: [PATCH 151/162] [graph enrichment] fixed country_propagation oozie workflow definition, parameter saveGraph is not needed anymore by the SparkCountryPropagationJob --- .../eu/dnetlib/dhp/countrypropagation/oozie_app/workflow.xml | 4 ---- 1 file changed, 4 deletions(-) diff --git a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/countrypropagation/oozie_app/workflow.xml b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/countrypropagation/oozie_app/workflow.xml index 202c86a6d..271ccbf72 100644 --- a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/countrypropagation/oozie_app/workflow.xml +++ b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/countrypropagation/oozie_app/workflow.xml @@ -274,7 +274,6 @@ --sourcePath${sourcePath}/publication --preparedInfoPath${workingDir}/publication - --saveGraph${saveGraph} --resultTableNameeu.dnetlib.dhp.schema.oaf.Publication --outputPath${outputPath}/publication @@ -304,7 +303,6 @@ --sourcePath${sourcePath}/dataset --preparedInfoPath${workingDir}/dataset - --saveGraph${saveGraph} --resultTableNameeu.dnetlib.dhp.schema.oaf.Dataset --outputPath${outputPath}/dataset @@ -334,7 +332,6 @@ --sourcePath${sourcePath}/otherresearchproduct --preparedInfoPath${workingDir}/otherresearchproduct - --saveGraph${saveGraph} --resultTableNameeu.dnetlib.dhp.schema.oaf.OtherResearchProduct --outputPath${outputPath}/otherresearchproduct @@ -364,7 +361,6 @@ --sourcePath${sourcePath}/software --preparedInfoPath${workingDir}/software - --saveGraph${saveGraph} --resultTableNameeu.dnetlib.dhp.schema.oaf.Software --outputPath${outputPath}/software From 73c172926a03447119b2fa4dccdd9765e736b2a5 Mon Sep 17 00:00:00 2001 From: Claudio Atzori Date: Tue, 12 Apr 2022 10:25:42 +0200 Subject: [PATCH 152/162] [Doiboost] fixed fundingReference extraction from the Crossref records --- .../doiboost/crossref/Crossref2Oaf.scala | 39 +++++++------------ 1 file changed, 14 insertions(+), 25 deletions(-) diff --git a/dhp-workflows/dhp-doiboost/src/main/scala/eu/dnetlib/doiboost/crossref/Crossref2Oaf.scala b/dhp-workflows/dhp-doiboost/src/main/scala/eu/dnetlib/doiboost/crossref/Crossref2Oaf.scala index 0cb08ea94..b4d6d67e2 100644 --- a/dhp-workflows/dhp-doiboost/src/main/scala/eu/dnetlib/doiboost/crossref/Crossref2Oaf.scala +++ b/dhp-workflows/dhp-doiboost/src/main/scala/eu/dnetlib/doiboost/crossref/Crossref2Oaf.scala @@ -446,16 +446,12 @@ case object Crossref2Oaf { case "10.13039/501100000781" => generateSimpleRelationFromAward(funder, "corda_______", extractECAward) generateSimpleRelationFromAward(funder, "corda__h2020", extractECAward) - case "10.13039/100000001" => - generateSimpleRelationFromAward(funder, "nsf_________", a => a) - case "10.13039/501100001665" => - generateSimpleRelationFromAward(funder, "anr_________", a => a) - case "10.13039/501100002341" => - generateSimpleRelationFromAward(funder, "aka_________", a => a) + case "10.13039/100000001" => generateSimpleRelationFromAward(funder, "nsf_________", a => a) + case "10.13039/501100001665" => generateSimpleRelationFromAward(funder, "anr_________", a => a) + case "10.13039/501100002341" => generateSimpleRelationFromAward(funder, "aka_________", a => a) case "10.13039/501100001602" => - generateSimpleRelationFromAward(funder, "aka_________", a => a.replace("SFI", "")) - case "10.13039/501100000923" => - generateSimpleRelationFromAward(funder, "arc_________", a => a) + generateSimpleRelationFromAward(funder, "sfi_________", a => a.replace("SFI", "")) + case "10.13039/501100000923" => generateSimpleRelationFromAward(funder, "arc_________", a => a) case "10.13039/501100000038" => val targetId = getProjectId("nserc_______", "1e5e62235d094afd01cd56e65112fc63") queue += generateRelation(sourceId, targetId, ModelConstants.IS_PRODUCED_BY) @@ -468,14 +464,10 @@ case object Crossref2Oaf { val targetId = getProjectId("cihr________", "1e5e62235d094afd01cd56e65112fc63") queue += generateRelation(sourceId, targetId, ModelConstants.IS_PRODUCED_BY) queue += generateRelation(targetId, sourceId, ModelConstants.PRODUCES) - case "10.13039/501100002848" => - generateSimpleRelationFromAward(funder, "conicytf____", a => a) - case "10.13039/501100003448" => - generateSimpleRelationFromAward(funder, "gsrt________", extractECAward) - case "10.13039/501100010198" => - generateSimpleRelationFromAward(funder, "sgov________", a => a) - case "10.13039/501100004564" => - generateSimpleRelationFromAward(funder, "mestd_______", extractECAward) + case "10.13039/501100002848" => generateSimpleRelationFromAward(funder, "conicytf____", a => a) + case "10.13039/501100003448" => generateSimpleRelationFromAward(funder, "gsrt________", extractECAward) + case "10.13039/501100010198" => generateSimpleRelationFromAward(funder, "sgov________", a => a) + case "10.13039/501100004564" => generateSimpleRelationFromAward(funder, "mestd_______", extractECAward) case "10.13039/501100003407" => generateSimpleRelationFromAward(funder, "miur________", a => a) val targetId = getProjectId("miur________", "1e5e62235d094afd01cd56e65112fc63") @@ -487,15 +479,11 @@ case object Crossref2Oaf { "irb_hr______", a => a.replaceAll("Project No.", "").replaceAll("HRZZ-", "") ) - case "10.13039/501100006769" => - generateSimpleRelationFromAward(funder, "rsf_________", a => a) - case "10.13039/501100001711" => - generateSimpleRelationFromAward(funder, "snsf________", snsfRule) - case "10.13039/501100004410" => - generateSimpleRelationFromAward(funder, "tubitakf____", a => a) - case "10.10.13039/100004440" => - generateSimpleRelationFromAward(funder, "wt__________", a => a) + case "10.13039/501100006769" => generateSimpleRelationFromAward(funder, "rsf_________", a => a) + case "10.13039/501100001711" => generateSimpleRelationFromAward(funder, "snsf________", snsfRule) + case "10.13039/501100004410" => generateSimpleRelationFromAward(funder, "tubitakf____", a => a) case "10.13039/100004440" => + generateSimpleRelationFromAward(funder, "wt__________", a => a) val targetId = getProjectId("wt__________", "1e5e62235d094afd01cd56e65112fc63") queue += generateRelation(sourceId, targetId, ModelConstants.IS_PRODUCED_BY) queue += generateRelation(targetId, sourceId, ModelConstants.PRODUCES) @@ -516,6 +504,7 @@ case object Crossref2Oaf { case "CONICYT, Programa de Formación de Capital Humano Avanzado" => generateSimpleRelationFromAward(funder, "conicytf____", extractECAward) case "Wellcome Trust Masters Fellowship" => + generateSimpleRelationFromAward(funder, "wt__________", a => a) val targetId = getProjectId("wt__________", "1e5e62235d094afd01cd56e65112fc63") queue += generateRelation(sourceId, targetId, ModelConstants.IS_PRODUCED_BY) queue += generateRelation(targetId, sourceId, ModelConstants.PRODUCES) From 869407c6e202e765ba26d15b684ad47c516eff12 Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Wed, 20 Apr 2022 14:02:05 +0200 Subject: [PATCH 153/162] [Measures] added new measure (usagecounts) as action set. Measure added at the level of the result. Ref #7587 --- .../dnetlib/dhp/actionmanager/Constants.java | 2 + .../usagestats/SparkAtomicActionUsageJob.java | 165 ++++++++++++++++++ .../usagestats/UsageStatsModel.java | 34 ++++ .../usagestats/input_actionset_parameter.json | 26 +++ .../usagestats/oozie_app/config-default.xml | 30 ++++ .../usagestats/oozie_app/workflow.xml | 98 +++++++++++ .../SparkAtomicActionCountJobTest.java | 133 ++++++++++++++ .../dhp/actionmanager/usagestats/usagestatsdb | 12 ++ pom.xml | 2 +- 9 files changed, 501 insertions(+), 1 deletion(-) create mode 100644 dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/usagestats/SparkAtomicActionUsageJob.java create mode 100644 dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/usagestats/UsageStatsModel.java create mode 100644 dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/actionmanager/usagestats/input_actionset_parameter.json create mode 100644 dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/actionmanager/usagestats/oozie_app/config-default.xml create mode 100644 dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/actionmanager/usagestats/oozie_app/workflow.xml create mode 100644 dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/actionmanager/usagestats/SparkAtomicActionCountJobTest.java create mode 100644 dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/actionmanager/usagestats/usagestatsdb diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/Constants.java b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/Constants.java index b790d90cb..aa25ca633 100644 --- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/Constants.java +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/Constants.java @@ -27,6 +27,8 @@ public class Constants { public static final String UPDATE_CLASS_NAME = "Inferred by OpenAIRE"; public static final String UPDATE_MEASURE_BIP_CLASS_ID = "measure:bip"; public static final String UPDATE_SUBJECT_SDG_CLASS_ID = "subject:sdg"; + public static final String UPDATE_MEASURE_USAGE_COUNTS_CLASS_ID = "measure:usage_counts"; + public static final String UPDATE_KEY_USAGE_COUNTS = "count"; public static final String FOS_CLASS_ID = "FOS"; public static final String FOS_CLASS_NAME = "Fields of Science and Technology classification"; diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/usagestats/SparkAtomicActionUsageJob.java b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/usagestats/SparkAtomicActionUsageJob.java new file mode 100644 index 000000000..04f861df5 --- /dev/null +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/usagestats/SparkAtomicActionUsageJob.java @@ -0,0 +1,165 @@ + +package eu.dnetlib.dhp.actionmanager.usagestats; + +import static eu.dnetlib.dhp.actionmanager.Constants.*; +import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkHiveSession; +import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession; + +import java.io.Serializable; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.List; +import java.util.Optional; +import java.util.stream.Collectors; + +import org.apache.commons.io.IOUtils; +import org.apache.cxf.wsdl.service.factory.MethodNameSoapActionServiceConfiguration; +import org.apache.hadoop.io.Text; +import org.apache.hadoop.mapred.SequenceFileOutputFormat; +import org.apache.spark.SparkConf; +import org.apache.spark.api.java.JavaRDD; +import org.apache.spark.api.java.JavaSparkContext; +import org.apache.spark.api.java.function.ForeachFunction; +import org.apache.spark.api.java.function.MapFunction; +import org.apache.spark.api.java.function.MapGroupsFunction; +import org.apache.spark.sql.Dataset; +import org.apache.spark.sql.Encoders; +import org.apache.spark.sql.SaveMode; +import org.apache.spark.sql.SparkSession; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import com.fasterxml.jackson.databind.ObjectMapper; + +import eu.dnetlib.dhp.actionmanager.bipmodel.BipDeserialize; +import eu.dnetlib.dhp.actionmanager.bipmodel.BipScore; +import eu.dnetlib.dhp.application.ArgumentApplicationParser; +import eu.dnetlib.dhp.common.HdfsSupport; +import eu.dnetlib.dhp.schema.action.AtomicAction; +import eu.dnetlib.dhp.schema.common.ModelConstants; +import eu.dnetlib.dhp.schema.oaf.DataInfo; +import eu.dnetlib.dhp.schema.oaf.KeyValue; +import eu.dnetlib.dhp.schema.oaf.Measure; +import eu.dnetlib.dhp.schema.oaf.Result; +import eu.dnetlib.dhp.schema.oaf.utils.OafMapperUtils; +import lombok.val; +import scala.Tuple2; + +/** + * created the Atomic Action for each tipe of results + */ +public class SparkAtomicActionUsageJob implements Serializable { + + private static final Logger log = LoggerFactory.getLogger(SparkAtomicActionUsageJob.class); + private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper(); + + public static void main(String[] args) throws Exception { + + String jsonConfiguration = IOUtils + .toString( + SparkAtomicActionUsageJob.class + .getResourceAsStream( + "/eu/dnetlib/dhp/actionmanager/usagestats/input_actionset_parameter.json")); + + final ArgumentApplicationParser parser = new ArgumentApplicationParser(jsonConfiguration); + + parser.parseArgument(args); + + Boolean isSparkSessionManaged = Optional + .ofNullable(parser.get("isSparkSessionManaged")) + .map(Boolean::valueOf) + .orElse(Boolean.TRUE); + + log.info("isSparkSessionManaged: {}", isSparkSessionManaged); + + final String outputPath = parser.get("outputPath"); + log.info("outputPath {}: ", outputPath); + + SparkConf conf = new SparkConf(); + conf.set("hive.metastore.uris", parser.get("hive_metastore_uris")); + + final String dbname = parser.get("statsdb"); + + final String workingPath = parser.get("workingPath"); + + runWithSparkHiveSession( + conf, + isSparkSessionManaged, + spark -> { + removeOutputDir(spark, outputPath); + prepareResults(dbname, spark, outputPath); + prepareActionSet(spark, workingPath, outputPath); + }); + } + + public static void prepareResults(String db, SparkSession spark, String workingPath) { + spark + .sql( + "Select result_id, downloads, views " + + "from " + db + ".usage_stats") + .as(Encoders.bean(UsageStatsModel.class)) + .write() + .mode(SaveMode.Overwrite) + .option("compression", "gzip") + .json(workingPath); + } + + public static void prepareActionSet(SparkSession spark, String inputPath, String outputPath){ + readPath(spark, inputPath, UsageStatsModel.class) + .groupByKey((MapFunction) us -> us.getResult_id(), Encoders.STRING()) + .mapGroups((MapGroupsFunction) (k, it) -> { + UsageStatsModel first = it.next(); + it.forEachRemaining(us -> { + first.setDownloads(first.getDownloads() + us.getDownloads()); + first.setViews(first.getViews() + us.getViews()); + }); + + Result res = new Result(); + res.setId("50|" + k); + + + res.setMeasures(getMeasure(first.getDownloads(), first.getViews())); + return res; + }, Encoders.bean(Result.class)) + .write() + .mode(SaveMode.Overwrite) + .option("compression", "gzip") + .json(outputPath); + } + + private static List getMeasure(Long downloads, Long views) { + DataInfo dataInfo = OafMapperUtils + .dataInfo( + false, + UPDATE_DATA_INFO_TYPE, + true, + false, + OafMapperUtils + .qualifier( + UPDATE_MEASURE_USAGE_COUNTS_CLASS_ID, + UPDATE_CLASS_NAME, + ModelConstants.DNET_PROVENANCE_ACTIONS, + ModelConstants.DNET_PROVENANCE_ACTIONS), + ""); + + return Arrays + .asList( + Measure + .newInstance("downloads", String.valueOf(downloads), UPDATE_KEY_USAGE_COUNTS, dataInfo), + Measure.newInstance("views", String.valueOf(views), UPDATE_KEY_USAGE_COUNTS, dataInfo)); + + } + + private static void removeOutputDir(SparkSession spark, String path) { + HdfsSupport.remove(path, spark.sparkContext().hadoopConfiguration()); + } + + public static Dataset readPath( + SparkSession spark, String inputPath, Class clazz) { + return spark + .read() + .textFile(inputPath) + .map((MapFunction) value -> OBJECT_MAPPER.readValue(value, clazz), Encoders.bean(clazz)); + } + +} diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/usagestats/UsageStatsModel.java b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/usagestats/UsageStatsModel.java new file mode 100644 index 000000000..df8a77eb6 --- /dev/null +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/usagestats/UsageStatsModel.java @@ -0,0 +1,34 @@ + +package eu.dnetlib.dhp.actionmanager.usagestats; + +import java.io.Serializable; + +public class UsageStatsModel implements Serializable { + private String result_id; + private Long downloads; + private Long views; + + public String getResult_id() { + return result_id; + } + + public void setResult_id(String result_id) { + this.result_id = result_id; + } + + public Long getDownloads() { + return downloads; + } + + public void setDownloads(Long downloads) { + this.downloads = downloads; + } + + public Long getViews() { + return views; + } + + public void setViews(Long views) { + this.views = views; + } +} diff --git a/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/actionmanager/usagestats/input_actionset_parameter.json b/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/actionmanager/usagestats/input_actionset_parameter.json new file mode 100644 index 000000000..b8ae24e12 --- /dev/null +++ b/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/actionmanager/usagestats/input_actionset_parameter.json @@ -0,0 +1,26 @@ +[ + { + "paramName": "issm", + "paramLongName": "isSparkSessionManaged", + "paramDescription": "when true will stop SparkSession after job execution", + "paramRequired": false + }, + { + "paramName": "hmu", + "paramLongName": "hive_metastore_uris", + "paramDescription": "the URI for the hive metastore", + "paramRequired": true + }, + { + "paramName": "o", + "paramLongName": "outputPath", + "paramDescription": "the path of the new ActionSet", + "paramRequired": true + }, + { + "paramName": "sdb", + "paramLongName": "statsdb", + "paramDescription": "the name of the db to be used", + "paramRequired": true + } +] \ No newline at end of file diff --git a/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/actionmanager/usagestats/oozie_app/config-default.xml b/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/actionmanager/usagestats/oozie_app/config-default.xml new file mode 100644 index 000000000..d262cb6e0 --- /dev/null +++ b/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/actionmanager/usagestats/oozie_app/config-default.xml @@ -0,0 +1,30 @@ + + + jobTracker + yarnRM + + + nameNode + hdfs://nameservice1 + + + oozie.use.system.libpath + true + + + hiveMetastoreUris + thrift://iis-cdh5-test-m3.ocean.icm.edu.pl:9083 + + + hiveJdbcUrl + jdbc:hive2://iis-cdh5-test-m3.ocean.icm.edu.pl:10000 + + + hiveDbName + openaire + + + oozie.launcher.mapreduce.user.classpath.first + true + + diff --git a/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/actionmanager/usagestats/oozie_app/workflow.xml b/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/actionmanager/usagestats/oozie_app/workflow.xml new file mode 100644 index 000000000..781efb475 --- /dev/null +++ b/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/actionmanager/usagestats/oozie_app/workflow.xml @@ -0,0 +1,98 @@ + + + + outputPath + the path where to store the actionset + + + statsdb + the path where to store the actionset + + + sparkDriverMemory + memory for driver process + + + sparkExecutorMemory + memory for individual executor + + + sparkExecutorCores + number of cores used by single executor + + + oozieActionShareLibForSpark2 + oozie action sharelib for spark 2.* + + + spark2ExtraListeners + com.cloudera.spark.lineage.NavigatorAppListener + spark 2.* extra listeners classname + + + spark2SqlQueryExecutionListeners + com.cloudera.spark.lineage.NavigatorQueryListener + spark 2.* sql query execution listeners classname + + + spark2YarnHistoryServerAddress + spark 2.* yarn history server address + + + spark2EventLogDir + spark 2.* event log dir location + + + + + ${jobTracker} + ${nameNode} + + + mapreduce.job.queuename + ${queueName} + + + oozie.launcher.mapred.job.queue.name + ${oozieLauncherQueueName} + + + oozie.action.sharelib.for.spark + ${oozieActionShareLibForSpark2} + + + + + + + Action failed, error message[${wf:errorMessage(wf:lastErrorNode())}] + + + + + + yarn + cluster + Produces the atomic action with the usage stats count for results + eu.dnetlib.dhp.actionmanager.usagestats.SparkAtomicActionUsageJob + dhp-aggregation-${projectVersion}.jar + + --executor-memory=${sparkExecutorMemory} + --executor-cores=${sparkExecutorCores} + --driver-memory=${sparkDriverMemory} + --conf spark.extraListeners=${spark2ExtraListeners} + --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} + --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} + --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} + --conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir} + + --hive_metastore_uris${hiveMetastoreUris} + --outputPath${outputPath} + --statsdb${statsdb} + + + + + + + \ No newline at end of file diff --git a/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/actionmanager/usagestats/SparkAtomicActionCountJobTest.java b/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/actionmanager/usagestats/SparkAtomicActionCountJobTest.java new file mode 100644 index 000000000..33d76893d --- /dev/null +++ b/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/actionmanager/usagestats/SparkAtomicActionCountJobTest.java @@ -0,0 +1,133 @@ + +package eu.dnetlib.dhp.actionmanager.usagestats; + +import static org.junit.jupiter.api.Assertions.assertEquals; + +import java.io.IOException; +import java.nio.file.Files; +import java.nio.file.Path; +import java.util.stream.Collectors; + +import org.apache.commons.io.FileUtils; +import org.apache.hadoop.io.Text; +import org.apache.spark.SparkConf; +import org.apache.spark.api.java.JavaRDD; +import org.apache.spark.api.java.JavaSparkContext; +import org.apache.spark.sql.Dataset; +import org.apache.spark.sql.Encoders; +import org.apache.spark.sql.Row; +import org.apache.spark.sql.SparkSession; +import org.junit.jupiter.api.AfterAll; +import org.junit.jupiter.api.Assertions; +import org.junit.jupiter.api.BeforeAll; +import org.junit.jupiter.api.Test; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import com.fasterxml.jackson.databind.ObjectMapper; + +import eu.dnetlib.dhp.actionmanager.bipfinder.SparkAtomicActionScoreJob; +import eu.dnetlib.dhp.schema.action.AtomicAction; +import eu.dnetlib.dhp.schema.oaf.Result; + +public class SparkAtomicActionCountJobTest { + + private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper(); + + private static SparkSession spark; + + private static Path workingDir; + private static final Logger log = LoggerFactory + .getLogger(SparkAtomicActionCountJobTest.class); + + @BeforeAll + public static void beforeAll() throws IOException { + workingDir = Files + .createTempDirectory(SparkAtomicActionCountJobTest.class.getSimpleName()); + log.info("using work dir {}", workingDir); + + SparkConf conf = new SparkConf(); + conf.setAppName(SparkAtomicActionCountJobTest.class.getSimpleName()); + + conf.setMaster("local[*]"); + conf.set("spark.driver.host", "localhost"); + conf.set("hive.metastore.local", "true"); + conf.set("spark.ui.enabled", "false"); + conf.set("spark.sql.warehouse.dir", workingDir.toString()); + conf.set("hive.metastore.warehouse.dir", workingDir.resolve("warehouse").toString()); + + spark = SparkSession + .builder() + .appName(SparkAtomicActionCountJobTest.class.getSimpleName()) + .config(conf) + .getOrCreate(); + } + + @AfterAll + public static void afterAll() throws IOException { + FileUtils.deleteDirectory(workingDir.toFile()); + spark.stop(); + } + + @Test + void testMatch() { + String usageScoresPath = getClass() + .getResource("/eu/dnetlib/dhp/actionmanager/usagestats/usagestatsdb") + .getPath(); + + SparkAtomicActionUsageJob.prepareActionSet(spark, usageScoresPath, workingDir.toString() + "/actionSet"); + + final JavaSparkContext sc = new JavaSparkContext(spark.sparkContext()); + + JavaRDD tmp = sc + .textFile(workingDir.toString() + "/actionSet") + .map(usm -> OBJECT_MAPPER.readValue(usm, Result.class)); + + Assertions.assertEquals(9, tmp.count()); + + tmp.foreach(r -> Assertions.assertEquals(2, r.getMeasures().size())); + tmp.foreach(r -> r.getMeasures().stream().forEach(m -> + m.getUnit().stream().forEach(u -> Assertions.assertFalse(u.getDataInfo().getDeletedbyinference())))); + tmp.foreach(r -> r.getMeasures().stream().forEach(m -> + m.getUnit().stream().forEach(u -> Assertions.assertTrue(u.getDataInfo().getInferred())))); + tmp.foreach(r -> r.getMeasures().stream().forEach(m -> + m.getUnit().stream().forEach(u -> Assertions.assertFalse(u.getDataInfo().getInvisible())))); + + tmp.foreach(r -> r.getMeasures().stream().forEach(m -> + m.getUnit().stream().forEach(u -> Assertions.assertEquals("measure:usage_counts", + u.getDataInfo().getProvenanceaction().getClassid())))); + tmp.foreach(r -> r.getMeasures().stream().forEach(m -> + m.getUnit().stream().forEach(u -> Assertions.assertEquals("Inferred by OpenAIRE", + u.getDataInfo().getProvenanceaction().getClassname())))); + + tmp.foreach(r -> r.getMeasures().stream().forEach(m -> + m.getUnit().stream().forEach(u -> Assertions.assertEquals("count", + u.getKey())))); + + Assertions.assertEquals(1, tmp.filter(r -> r.getId().equals("50|dedup_wf_001::53575dc69e9ace947e02d47ecd54a7a6")).count()); + + Assertions.assertEquals("0", tmp.filter(r -> r.getId().equals("50|dedup_wf_001::53575dc69e9ace947e02d47ecd54a7a6")).collect().get(0) + .getMeasures().stream().filter(m -> m.getId().equals("downloads")).collect(Collectors.toList()).get(0) + .getUnit().get(0).getValue()); + Assertions.assertEquals("5", tmp.filter(r -> r.getId().equals("50|dedup_wf_001::53575dc69e9ace947e02d47ecd54a7a6")).collect().get(0) + .getMeasures().stream().filter(m -> m.getId().equals("views")).collect(Collectors.toList()).get(0) + .getUnit().get(0).getValue()); + + Assertions.assertEquals("0", tmp.filter(r -> r.getId().equals("50|doi_________::17eda2ff77407538fbe5d3d719b9d1c0")).collect().get(0) + .getMeasures().stream().filter(m -> m.getId().equals("downloads")).collect(Collectors.toList()).get(0) + .getUnit().get(0).getValue()); + Assertions.assertEquals("1", tmp.filter(r -> r.getId().equals("50|doi_________::17eda2ff77407538fbe5d3d719b9d1c0")).collect().get(0) + .getMeasures().stream().filter(m -> m.getId().equals("views")).collect(Collectors.toList()).get(0) + .getUnit().get(0).getValue()); + + Assertions.assertEquals("2", tmp.filter(r -> r.getId().equals("50|doi_________::3085e4c6e051378ca6157fe7f0430c1f")).collect().get(0) + .getMeasures().stream().filter(m -> m.getId().equals("downloads")).collect(Collectors.toList()).get(0) + .getUnit().get(0).getValue()); + Assertions.assertEquals("6", tmp.filter(r -> r.getId().equals("50|doi_________::3085e4c6e051378ca6157fe7f0430c1f")).collect().get(0) + .getMeasures().stream().filter(m -> m.getId().equals("views")).collect(Collectors.toList()).get(0) + .getUnit().get(0).getValue()); + } + + + +} diff --git a/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/actionmanager/usagestats/usagestatsdb b/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/actionmanager/usagestats/usagestatsdb new file mode 100644 index 000000000..fee74f697 --- /dev/null +++ b/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/actionmanager/usagestats/usagestatsdb @@ -0,0 +1,12 @@ +{"result_id":"dedup_wf_001::53575dc69e9ace947e02d47ecd54a7a6","downloads":0,"views":4} +{"result_id":"dedup_wf_001::53575dc69e9ace947e02d47ecd54a7a6","downloads":0,"views":1} +{"result_id":"doi_________::17eda2ff77407538fbe5d3d719b9d1c0","downloads":0,"views":1} +{"result_id":"doi_________::1d4dc08605fd0a2be1105d30c63bfea1","downloads":1,"views":3} +{"result_id":"doi_________::2e3527822854ca9816f6dfea5bff61a8","downloads":1,"views":1} +{"result_id":"doi_________::3085e4c6e051378ca6157fe7f0430c1f","downloads":2,"views":3} +{"result_id":"doi_________::3085e4c6e051378ca6157fe7f0430c1f","downloads":0,"views":3} +{"result_id":"doi_________::33f710e6dd30cc5e67e35b371ddc33cf","downloads":0,"views":1} +{"result_id":"doi_________::39738ebf10654732dd3a7af9f24655f8","downloads":1,"views":3} +{"result_id":"doi_________::3c3b65f07c1a06c7894397eda1d11bbf","downloads":1,"views":8} +{"result_id":"doi_________::3c3b65f07c1a06c7894397eda1d11bbf","downloads":0,"views":2} +{"result_id":"doi_________::4938a71a884dd481d329657aa543b850","downloads":0,"views":3} \ No newline at end of file diff --git a/pom.xml b/pom.xml index 136b9b867..b0957393a 100644 --- a/pom.xml +++ b/pom.xml @@ -801,7 +801,7 @@ 3.3.3 3.4.2 [2.12,3.0) - [2.10.32] + [2.11.34-SNAPSHOT] [4.0.3] [6.0.5] [3.1.6] From 5feae77937df688e43f8c17988424dcf04259722 Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Wed, 20 Apr 2022 15:13:09 +0200 Subject: [PATCH 154/162] [Measures] last changes to accomodate tests --- .../usagestats/SparkAtomicActionUsageJob.java | 23 +- .../usagestats/input_actionset_parameter.json | 6 + .../usagestats/oozie_app/workflow.xml | 1 + .../SparkAtomicActionCountJobTest.java | 200 ++++++++++++++---- 4 files changed, 181 insertions(+), 49 deletions(-) diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/usagestats/SparkAtomicActionUsageJob.java b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/usagestats/SparkAtomicActionUsageJob.java index 04f861df5..012261c39 100644 --- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/usagestats/SparkAtomicActionUsageJob.java +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/usagestats/SparkAtomicActionUsageJob.java @@ -94,19 +94,19 @@ public class SparkAtomicActionUsageJob implements Serializable { public static void prepareResults(String db, SparkSession spark, String workingPath) { spark - .sql( - "Select result_id, downloads, views " + - "from " + db + ".usage_stats") - .as(Encoders.bean(UsageStatsModel.class)) - .write() - .mode(SaveMode.Overwrite) - .option("compression", "gzip") - .json(workingPath); + .sql( + "Select result_id, downloads, views " + + "from " + db + ".usage_stats") + .as(Encoders.bean(UsageStatsModel.class)) + .write() + .mode(SaveMode.Overwrite) + .option("compression", "gzip") + .json(workingPath); } - public static void prepareActionSet(SparkSession spark, String inputPath, String outputPath){ - readPath(spark, inputPath, UsageStatsModel.class) - .groupByKey((MapFunction) us -> us.getResult_id(), Encoders.STRING()) + public static void prepareActionSet(SparkSession spark, String inputPath, String outputPath) { + readPath(spark, inputPath, UsageStatsModel.class) + .groupByKey((MapFunction) us -> us.getResult_id(), Encoders.STRING()) .mapGroups((MapGroupsFunction) (k, it) -> { UsageStatsModel first = it.next(); it.forEachRemaining(us -> { @@ -117,7 +117,6 @@ public class SparkAtomicActionUsageJob implements Serializable { Result res = new Result(); res.setId("50|" + k); - res.setMeasures(getMeasure(first.getDownloads(), first.getViews())); return res; }, Encoders.bean(Result.class)) diff --git a/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/actionmanager/usagestats/input_actionset_parameter.json b/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/actionmanager/usagestats/input_actionset_parameter.json index b8ae24e12..821905da4 100644 --- a/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/actionmanager/usagestats/input_actionset_parameter.json +++ b/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/actionmanager/usagestats/input_actionset_parameter.json @@ -22,5 +22,11 @@ "paramLongName": "statsdb", "paramDescription": "the name of the db to be used", "paramRequired": true + }, + { + "paramName": "wp", + "paramLongName": "workingPath", + "paramDescription": "the workingPath where to save the content of the usage_stats table", + "paramRequired": true } ] \ No newline at end of file diff --git a/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/actionmanager/usagestats/oozie_app/workflow.xml b/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/actionmanager/usagestats/oozie_app/workflow.xml index 781efb475..5b552ca4b 100644 --- a/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/actionmanager/usagestats/oozie_app/workflow.xml +++ b/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/actionmanager/usagestats/oozie_app/workflow.xml @@ -89,6 +89,7 @@ --hive_metastore_uris${hiveMetastoreUris} --outputPath${outputPath} --statsdb${statsdb} + --workingPath${workingDir}/usageDb diff --git a/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/actionmanager/usagestats/SparkAtomicActionCountJobTest.java b/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/actionmanager/usagestats/SparkAtomicActionCountJobTest.java index 33d76893d..7cc9eb326 100644 --- a/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/actionmanager/usagestats/SparkAtomicActionCountJobTest.java +++ b/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/actionmanager/usagestats/SparkAtomicActionCountJobTest.java @@ -75,7 +75,7 @@ public class SparkAtomicActionCountJobTest { .getResource("/eu/dnetlib/dhp/actionmanager/usagestats/usagestatsdb") .getPath(); - SparkAtomicActionUsageJob.prepareActionSet(spark, usageScoresPath, workingDir.toString() + "/actionSet"); + SparkAtomicActionUsageJob.prepareActionSet(spark, usageScoresPath, workingDir.toString() + "/actionSet"); final JavaSparkContext sc = new JavaSparkContext(spark.sparkContext()); @@ -86,48 +86,174 @@ public class SparkAtomicActionCountJobTest { Assertions.assertEquals(9, tmp.count()); tmp.foreach(r -> Assertions.assertEquals(2, r.getMeasures().size())); - tmp.foreach(r -> r.getMeasures().stream().forEach(m -> - m.getUnit().stream().forEach(u -> Assertions.assertFalse(u.getDataInfo().getDeletedbyinference())))); - tmp.foreach(r -> r.getMeasures().stream().forEach(m -> - m.getUnit().stream().forEach(u -> Assertions.assertTrue(u.getDataInfo().getInferred())))); - tmp.foreach(r -> r.getMeasures().stream().forEach(m -> - m.getUnit().stream().forEach(u -> Assertions.assertFalse(u.getDataInfo().getInvisible())))); + tmp + .foreach( + r -> r + .getMeasures() + .stream() + .forEach( + m -> m + .getUnit() + .stream() + .forEach(u -> Assertions.assertFalse(u.getDataInfo().getDeletedbyinference())))); + tmp + .foreach( + r -> r + .getMeasures() + .stream() + .forEach( + m -> m.getUnit().stream().forEach(u -> Assertions.assertTrue(u.getDataInfo().getInferred())))); + tmp + .foreach( + r -> r + .getMeasures() + .stream() + .forEach( + m -> m + .getUnit() + .stream() + .forEach(u -> Assertions.assertFalse(u.getDataInfo().getInvisible())))); - tmp.foreach(r -> r.getMeasures().stream().forEach(m -> - m.getUnit().stream().forEach(u -> Assertions.assertEquals("measure:usage_counts", - u.getDataInfo().getProvenanceaction().getClassid())))); - tmp.foreach(r -> r.getMeasures().stream().forEach(m -> - m.getUnit().stream().forEach(u -> Assertions.assertEquals("Inferred by OpenAIRE", - u.getDataInfo().getProvenanceaction().getClassname())))); + tmp + .foreach( + r -> r + .getMeasures() + .stream() + .forEach( + m -> m + .getUnit() + .stream() + .forEach( + u -> Assertions + .assertEquals( + "measure:usage_counts", + u.getDataInfo().getProvenanceaction().getClassid())))); + tmp + .foreach( + r -> r + .getMeasures() + .stream() + .forEach( + m -> m + .getUnit() + .stream() + .forEach( + u -> Assertions + .assertEquals( + "Inferred by OpenAIRE", + u.getDataInfo().getProvenanceaction().getClassname())))); - tmp.foreach(r -> r.getMeasures().stream().forEach(m -> - m.getUnit().stream().forEach(u -> Assertions.assertEquals("count", - u.getKey())))); + tmp + .foreach( + r -> r + .getMeasures() + .stream() + .forEach( + m -> m + .getUnit() + .stream() + .forEach( + u -> Assertions + .assertEquals( + "count", + u.getKey())))); - Assertions.assertEquals(1, tmp.filter(r -> r.getId().equals("50|dedup_wf_001::53575dc69e9ace947e02d47ecd54a7a6")).count()); + Assertions + .assertEquals( + 1, tmp.filter(r -> r.getId().equals("50|dedup_wf_001::53575dc69e9ace947e02d47ecd54a7a6")).count()); - Assertions.assertEquals("0", tmp.filter(r -> r.getId().equals("50|dedup_wf_001::53575dc69e9ace947e02d47ecd54a7a6")).collect().get(0) - .getMeasures().stream().filter(m -> m.getId().equals("downloads")).collect(Collectors.toList()).get(0) - .getUnit().get(0).getValue()); - Assertions.assertEquals("5", tmp.filter(r -> r.getId().equals("50|dedup_wf_001::53575dc69e9ace947e02d47ecd54a7a6")).collect().get(0) - .getMeasures().stream().filter(m -> m.getId().equals("views")).collect(Collectors.toList()).get(0) - .getUnit().get(0).getValue()); + Assertions + .assertEquals( + "0", + tmp + .filter(r -> r.getId().equals("50|dedup_wf_001::53575dc69e9ace947e02d47ecd54a7a6")) + .collect() + .get(0) + .getMeasures() + .stream() + .filter(m -> m.getId().equals("downloads")) + .collect(Collectors.toList()) + .get(0) + .getUnit() + .get(0) + .getValue()); + Assertions + .assertEquals( + "5", + tmp + .filter(r -> r.getId().equals("50|dedup_wf_001::53575dc69e9ace947e02d47ecd54a7a6")) + .collect() + .get(0) + .getMeasures() + .stream() + .filter(m -> m.getId().equals("views")) + .collect(Collectors.toList()) + .get(0) + .getUnit() + .get(0) + .getValue()); - Assertions.assertEquals("0", tmp.filter(r -> r.getId().equals("50|doi_________::17eda2ff77407538fbe5d3d719b9d1c0")).collect().get(0) - .getMeasures().stream().filter(m -> m.getId().equals("downloads")).collect(Collectors.toList()).get(0) - .getUnit().get(0).getValue()); - Assertions.assertEquals("1", tmp.filter(r -> r.getId().equals("50|doi_________::17eda2ff77407538fbe5d3d719b9d1c0")).collect().get(0) - .getMeasures().stream().filter(m -> m.getId().equals("views")).collect(Collectors.toList()).get(0) - .getUnit().get(0).getValue()); + Assertions + .assertEquals( + "0", + tmp + .filter(r -> r.getId().equals("50|doi_________::17eda2ff77407538fbe5d3d719b9d1c0")) + .collect() + .get(0) + .getMeasures() + .stream() + .filter(m -> m.getId().equals("downloads")) + .collect(Collectors.toList()) + .get(0) + .getUnit() + .get(0) + .getValue()); + Assertions + .assertEquals( + "1", + tmp + .filter(r -> r.getId().equals("50|doi_________::17eda2ff77407538fbe5d3d719b9d1c0")) + .collect() + .get(0) + .getMeasures() + .stream() + .filter(m -> m.getId().equals("views")) + .collect(Collectors.toList()) + .get(0) + .getUnit() + .get(0) + .getValue()); - Assertions.assertEquals("2", tmp.filter(r -> r.getId().equals("50|doi_________::3085e4c6e051378ca6157fe7f0430c1f")).collect().get(0) - .getMeasures().stream().filter(m -> m.getId().equals("downloads")).collect(Collectors.toList()).get(0) - .getUnit().get(0).getValue()); - Assertions.assertEquals("6", tmp.filter(r -> r.getId().equals("50|doi_________::3085e4c6e051378ca6157fe7f0430c1f")).collect().get(0) - .getMeasures().stream().filter(m -> m.getId().equals("views")).collect(Collectors.toList()).get(0) - .getUnit().get(0).getValue()); + Assertions + .assertEquals( + "2", + tmp + .filter(r -> r.getId().equals("50|doi_________::3085e4c6e051378ca6157fe7f0430c1f")) + .collect() + .get(0) + .getMeasures() + .stream() + .filter(m -> m.getId().equals("downloads")) + .collect(Collectors.toList()) + .get(0) + .getUnit() + .get(0) + .getValue()); + Assertions + .assertEquals( + "6", + tmp + .filter(r -> r.getId().equals("50|doi_________::3085e4c6e051378ca6157fe7f0430c1f")) + .collect() + .get(0) + .getMeasures() + .stream() + .filter(m -> m.getId().equals("views")) + .collect(Collectors.toList()) + .get(0) + .getUnit() + .get(0) + .getValue()); } - - } From dbfbe8841a8ec3335ed479111cc96b48f2a6b30b Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Wed, 20 Apr 2022 15:41:03 +0200 Subject: [PATCH 155/162] [Clean Context] changed the description in input parameters --- .../dnetlib/dhp/oa/graph/input_clean_context_parameters.json | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/input_clean_context_parameters.json b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/input_clean_context_parameters.json index 9b1588283..e3d31d69f 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/input_clean_context_parameters.json +++ b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/input_clean_context_parameters.json @@ -20,7 +20,7 @@ { "paramName": "ci", "paramLongName": "contextId", - "paramDescription": "url to the ISLookup Service", + "paramDescription": "the id of the context to be removed", "paramRequired": true }, { @@ -31,7 +31,7 @@ },{ "paramName": "vf", "paramLongName": "verifyParam", - "paramDescription": "url to the ISLookup Service", + "paramDescription": "the parameter to be verified to remove the context", "paramRequired": true } ] From 5295effc9664fea8f92e148019d36469091484fd Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Wed, 20 Apr 2022 16:20:40 +0200 Subject: [PATCH 156/162] [Measures] fixed issue --- .../dhp/actionmanager/usagestats/SparkAtomicActionUsageJob.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/usagestats/SparkAtomicActionUsageJob.java b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/usagestats/SparkAtomicActionUsageJob.java index 012261c39..261794598 100644 --- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/usagestats/SparkAtomicActionUsageJob.java +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/usagestats/SparkAtomicActionUsageJob.java @@ -87,7 +87,7 @@ public class SparkAtomicActionUsageJob implements Serializable { isSparkSessionManaged, spark -> { removeOutputDir(spark, outputPath); - prepareResults(dbname, spark, outputPath); + prepareResults(dbname, spark, workingPath); prepareActionSet(spark, workingPath, outputPath); }); } From c304657d9195b334c44bce4a11649a68c60a3299 Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Thu, 21 Apr 2022 11:27:26 +0200 Subject: [PATCH 157/162] [Measures] put the logic in common, no need to change the schema --- .../dhp/schema/oaf/utils/OafMapperUtils.java | 15 +++++++++++++ .../usagestats/SparkAtomicActionUsageJob.java | 22 ++++--------------- pom.xml | 2 +- 3 files changed, 20 insertions(+), 19 deletions(-) diff --git a/dhp-common/src/main/java/eu/dnetlib/dhp/schema/oaf/utils/OafMapperUtils.java b/dhp-common/src/main/java/eu/dnetlib/dhp/schema/oaf/utils/OafMapperUtils.java index 0a51e8600..8d67ce804 100644 --- a/dhp-common/src/main/java/eu/dnetlib/dhp/schema/oaf/utils/OafMapperUtils.java +++ b/dhp-common/src/main/java/eu/dnetlib/dhp/schema/oaf/utils/OafMapperUtils.java @@ -391,4 +391,19 @@ public class OafMapperUtils { } return null; } + + public static KeyValue newKeyValueInstance (String key, String value, DataInfo dataInfo){ + KeyValue kv = new KeyValue(); + kv.setDataInfo(dataInfo); + kv.setKey(key); + kv.setValue(value); + return kv; + } + + public static Measure newMeasureInstance(String id, String value, String key, DataInfo dataInfo) { + Measure m = new Measure(); + m.setId(id); + m.setUnit(Arrays.asList(newKeyValueInstance(key, value, dataInfo))); + return m; + } } diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/usagestats/SparkAtomicActionUsageJob.java b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/usagestats/SparkAtomicActionUsageJob.java index 261794598..790170ccc 100644 --- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/usagestats/SparkAtomicActionUsageJob.java +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/usagestats/SparkAtomicActionUsageJob.java @@ -3,23 +3,14 @@ package eu.dnetlib.dhp.actionmanager.usagestats; import static eu.dnetlib.dhp.actionmanager.Constants.*; import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkHiveSession; -import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession; import java.io.Serializable; -import java.util.ArrayList; import java.util.Arrays; import java.util.List; import java.util.Optional; -import java.util.stream.Collectors; import org.apache.commons.io.IOUtils; -import org.apache.cxf.wsdl.service.factory.MethodNameSoapActionServiceConfiguration; -import org.apache.hadoop.io.Text; -import org.apache.hadoop.mapred.SequenceFileOutputFormat; import org.apache.spark.SparkConf; -import org.apache.spark.api.java.JavaRDD; -import org.apache.spark.api.java.JavaSparkContext; -import org.apache.spark.api.java.function.ForeachFunction; import org.apache.spark.api.java.function.MapFunction; import org.apache.spark.api.java.function.MapGroupsFunction; import org.apache.spark.sql.Dataset; @@ -31,19 +22,15 @@ import org.slf4j.LoggerFactory; import com.fasterxml.jackson.databind.ObjectMapper; -import eu.dnetlib.dhp.actionmanager.bipmodel.BipDeserialize; -import eu.dnetlib.dhp.actionmanager.bipmodel.BipScore; + import eu.dnetlib.dhp.application.ArgumentApplicationParser; import eu.dnetlib.dhp.common.HdfsSupport; -import eu.dnetlib.dhp.schema.action.AtomicAction; import eu.dnetlib.dhp.schema.common.ModelConstants; import eu.dnetlib.dhp.schema.oaf.DataInfo; -import eu.dnetlib.dhp.schema.oaf.KeyValue; import eu.dnetlib.dhp.schema.oaf.Measure; import eu.dnetlib.dhp.schema.oaf.Result; import eu.dnetlib.dhp.schema.oaf.utils.OafMapperUtils; -import lombok.val; -import scala.Tuple2; + /** * created the Atomic Action for each tipe of results @@ -143,9 +130,8 @@ public class SparkAtomicActionUsageJob implements Serializable { return Arrays .asList( - Measure - .newInstance("downloads", String.valueOf(downloads), UPDATE_KEY_USAGE_COUNTS, dataInfo), - Measure.newInstance("views", String.valueOf(views), UPDATE_KEY_USAGE_COUNTS, dataInfo)); + OafMapperUtils.newMeasureInstance("downloads", String.valueOf(downloads), UPDATE_KEY_USAGE_COUNTS, dataInfo), + OafMapperUtils.newMeasureInstance("views", String.valueOf(views), UPDATE_KEY_USAGE_COUNTS, dataInfo)); } diff --git a/pom.xml b/pom.xml index b0957393a..136b9b867 100644 --- a/pom.xml +++ b/pom.xml @@ -801,7 +801,7 @@ 3.3.3 3.4.2 [2.12,3.0) - [2.11.34-SNAPSHOT] + [2.10.32] [4.0.3] [6.0.5] [3.1.6] From b61efd613b5ba5e324f2a1e4a40f549baf52b4dc Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Thu, 21 Apr 2022 12:09:37 +0200 Subject: [PATCH 158/162] [Measures] addressed comments in the PR --- .../eu/dnetlib/dhp/schema/oaf/utils/OafMapperUtils.java | 2 +- .../usagestats/SparkAtomicActionUsageJob.java | 9 ++++----- .../usagestats/input_actionset_parameter.json | 2 +- .../dhp/actionmanager/usagestats/oozie_app/workflow.xml | 6 +++--- 4 files changed, 9 insertions(+), 10 deletions(-) diff --git a/dhp-common/src/main/java/eu/dnetlib/dhp/schema/oaf/utils/OafMapperUtils.java b/dhp-common/src/main/java/eu/dnetlib/dhp/schema/oaf/utils/OafMapperUtils.java index 8d67ce804..9ee359cd5 100644 --- a/dhp-common/src/main/java/eu/dnetlib/dhp/schema/oaf/utils/OafMapperUtils.java +++ b/dhp-common/src/main/java/eu/dnetlib/dhp/schema/oaf/utils/OafMapperUtils.java @@ -392,7 +392,7 @@ public class OafMapperUtils { return null; } - public static KeyValue newKeyValueInstance (String key, String value, DataInfo dataInfo){ + public static KeyValue newKeyValueInstance(String key, String value, DataInfo dataInfo) { KeyValue kv = new KeyValue(); kv.setDataInfo(dataInfo); kv.setKey(key); diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/usagestats/SparkAtomicActionUsageJob.java b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/usagestats/SparkAtomicActionUsageJob.java index 790170ccc..04533aa53 100644 --- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/usagestats/SparkAtomicActionUsageJob.java +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/usagestats/SparkAtomicActionUsageJob.java @@ -22,7 +22,6 @@ import org.slf4j.LoggerFactory; import com.fasterxml.jackson.databind.ObjectMapper; - import eu.dnetlib.dhp.application.ArgumentApplicationParser; import eu.dnetlib.dhp.common.HdfsSupport; import eu.dnetlib.dhp.schema.common.ModelConstants; @@ -31,7 +30,6 @@ import eu.dnetlib.dhp.schema.oaf.Measure; import eu.dnetlib.dhp.schema.oaf.Result; import eu.dnetlib.dhp.schema.oaf.utils.OafMapperUtils; - /** * created the Atomic Action for each tipe of results */ @@ -65,7 +63,7 @@ public class SparkAtomicActionUsageJob implements Serializable { SparkConf conf = new SparkConf(); conf.set("hive.metastore.uris", parser.get("hive_metastore_uris")); - final String dbname = parser.get("statsdb"); + final String dbname = parser.get("usagestatsdb"); final String workingPath = parser.get("workingPath"); @@ -130,8 +128,9 @@ public class SparkAtomicActionUsageJob implements Serializable { return Arrays .asList( - OafMapperUtils.newMeasureInstance("downloads", String.valueOf(downloads), UPDATE_KEY_USAGE_COUNTS, dataInfo), - OafMapperUtils.newMeasureInstance("views", String.valueOf(views), UPDATE_KEY_USAGE_COUNTS, dataInfo)); + OafMapperUtils + .newMeasureInstance("downloads", String.valueOf(downloads), UPDATE_KEY_USAGE_COUNTS, dataInfo), + OafMapperUtils.newMeasureInstance("views", String.valueOf(views), UPDATE_KEY_USAGE_COUNTS, dataInfo)); } diff --git a/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/actionmanager/usagestats/input_actionset_parameter.json b/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/actionmanager/usagestats/input_actionset_parameter.json index 821905da4..e9200d3ad 100644 --- a/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/actionmanager/usagestats/input_actionset_parameter.json +++ b/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/actionmanager/usagestats/input_actionset_parameter.json @@ -19,7 +19,7 @@ }, { "paramName": "sdb", - "paramLongName": "statsdb", + "paramLongName": "usagestatsdb", "paramDescription": "the name of the db to be used", "paramRequired": true }, diff --git a/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/actionmanager/usagestats/oozie_app/workflow.xml b/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/actionmanager/usagestats/oozie_app/workflow.xml index 5b552ca4b..d94cf7d53 100644 --- a/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/actionmanager/usagestats/oozie_app/workflow.xml +++ b/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/actionmanager/usagestats/oozie_app/workflow.xml @@ -5,8 +5,8 @@ the path where to store the actionset - statsdb - the path where to store the actionset + usagestatsdb + the name of the db to be used sparkDriverMemory @@ -88,7 +88,7 @@ --hive_metastore_uris${hiveMetastoreUris} --outputPath${outputPath} - --statsdb${statsdb} + --usagestatsdb${usagestatsdb} --workingPath${workingDir}/usageDb From 20de75ca64cb0237dcc6a19caa0fc1308d60f7d5 Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Thu, 21 Apr 2022 12:14:03 +0200 Subject: [PATCH 159/162] [Measures] removed typo --- .../dhp/actionmanager/usagestats/SparkAtomicActionUsageJob.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/usagestats/SparkAtomicActionUsageJob.java b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/usagestats/SparkAtomicActionUsageJob.java index 04533aa53..c284ad8bd 100644 --- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/usagestats/SparkAtomicActionUsageJob.java +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/usagestats/SparkAtomicActionUsageJob.java @@ -31,7 +31,7 @@ import eu.dnetlib.dhp.schema.oaf.Result; import eu.dnetlib.dhp.schema.oaf.utils.OafMapperUtils; /** - * created the Atomic Action for each tipe of results + * created the Atomic Action for each type of results */ public class SparkAtomicActionUsageJob implements Serializable { From 29150a5d0c69a540b546ffcfbc91114e68c40301 Mon Sep 17 00:00:00 2001 From: Claudio Atzori Date: Thu, 21 Apr 2022 13:31:56 +0200 Subject: [PATCH 160/162] code formatting --- .../oa/graph/clean/CleanContextSparkJob.java | 181 +++++---- .../dhp/oa/graph/clean/CleanContextTest.java | 375 ++++++++++++------ 2 files changed, 357 insertions(+), 199 deletions(-) diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/clean/CleanContextSparkJob.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/clean/CleanContextSparkJob.java index b20dcb67b..3e9b17f3f 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/clean/CleanContextSparkJob.java +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/clean/CleanContextSparkJob.java @@ -1,16 +1,13 @@ + package eu.dnetlib.dhp.oa.graph.clean; -import com.fasterxml.jackson.databind.ObjectMapper; -import eu.dnetlib.dhp.application.ArgumentApplicationParser; -import eu.dnetlib.dhp.common.HdfsSupport; -import eu.dnetlib.dhp.common.vocabulary.VocabularyGroup; -import eu.dnetlib.dhp.schema.common.ModelConstants; -import eu.dnetlib.dhp.schema.oaf.Context; -import eu.dnetlib.dhp.schema.oaf.Oaf; -import eu.dnetlib.dhp.schema.oaf.OafEntity; -import eu.dnetlib.dhp.schema.oaf.Result; -import eu.dnetlib.dhp.utils.ISLookupClientFactory; -import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpService; +import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession; + +import java.io.Serializable; +import java.util.List; +import java.util.Optional; +import java.util.stream.Collectors; + import org.apache.commons.io.IOUtils; import org.apache.spark.SparkConf; import org.apache.spark.api.java.function.MapFunction; @@ -21,93 +18,113 @@ import org.apache.spark.sql.SparkSession; import org.slf4j.Logger; import org.slf4j.LoggerFactory; -import java.io.Serializable; -import java.util.List; -import java.util.Optional; -import java.util.stream.Collectors; +import com.fasterxml.jackson.databind.ObjectMapper; -import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession; +import eu.dnetlib.dhp.application.ArgumentApplicationParser; +import eu.dnetlib.dhp.common.HdfsSupport; +import eu.dnetlib.dhp.common.vocabulary.VocabularyGroup; +import eu.dnetlib.dhp.schema.common.ModelConstants; +import eu.dnetlib.dhp.schema.oaf.Context; +import eu.dnetlib.dhp.schema.oaf.Oaf; +import eu.dnetlib.dhp.schema.oaf.OafEntity; +import eu.dnetlib.dhp.schema.oaf.Result; +import eu.dnetlib.dhp.utils.ISLookupClientFactory; +import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpService; public class CleanContextSparkJob implements Serializable { - private static final Logger log = LoggerFactory.getLogger(CleanContextSparkJob.class); + private static final Logger log = LoggerFactory.getLogger(CleanContextSparkJob.class); - private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper(); + private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper(); - public static void main(String[] args) throws Exception { + public static void main(String[] args) throws Exception { - String jsonConfiguration = IOUtils - .toString( - CleanContextSparkJob.class - .getResourceAsStream( - "/eu/dnetlib/dhp/oa/graph/input_clean_context_parameters.json")); - final ArgumentApplicationParser parser = new ArgumentApplicationParser(jsonConfiguration); - parser.parseArgument(args); + String jsonConfiguration = IOUtils + .toString( + CleanContextSparkJob.class + .getResourceAsStream( + "/eu/dnetlib/dhp/oa/graph/input_clean_context_parameters.json")); + final ArgumentApplicationParser parser = new ArgumentApplicationParser(jsonConfiguration); + parser.parseArgument(args); - Boolean isSparkSessionManaged = Optional - .ofNullable(parser.get("isSparkSessionManaged")) - .map(Boolean::valueOf) - .orElse(Boolean.TRUE); - log.info("isSparkSessionManaged: {}", isSparkSessionManaged); + Boolean isSparkSessionManaged = Optional + .ofNullable(parser.get("isSparkSessionManaged")) + .map(Boolean::valueOf) + .orElse(Boolean.TRUE); + log.info("isSparkSessionManaged: {}", isSparkSessionManaged); - String inputPath = parser.get("inputPath"); - log.info("inputPath: {}", inputPath); + String inputPath = parser.get("inputPath"); + log.info("inputPath: {}", inputPath); - String workingPath = parser.get("workingPath"); - log.info("workingPath: {}", workingPath); + String workingPath = parser.get("workingPath"); + log.info("workingPath: {}", workingPath); - String contextId = parser.get("contextId"); - log.info("contextId: {}", contextId); + String contextId = parser.get("contextId"); + log.info("contextId: {}", contextId); - String verifyParam = parser.get("verifyParam"); - log.info("verifyParam: {}", verifyParam); + String verifyParam = parser.get("verifyParam"); + log.info("verifyParam: {}", verifyParam); + String graphTableClassName = parser.get("graphTableClassName"); + log.info("graphTableClassName: {}", graphTableClassName); - String graphTableClassName = parser.get("graphTableClassName"); - log.info("graphTableClassName: {}", graphTableClassName); + Class entityClazz = (Class) Class.forName(graphTableClassName); - Class entityClazz = (Class) Class.forName(graphTableClassName); + SparkConf conf = new SparkConf(); + runWithSparkSession( + conf, + isSparkSessionManaged, + spark -> { + cleanContext(spark, contextId, verifyParam, inputPath, entityClazz, workingPath); + }); + } - SparkConf conf = new SparkConf(); - runWithSparkSession( - conf, - isSparkSessionManaged, - spark -> { + private static void cleanContext(SparkSession spark, String contextId, String verifyParam, + String inputPath, Class entityClazz, String workingPath) { + Dataset res = spark + .read() + .textFile(inputPath) + .map( + (MapFunction) value -> OBJECT_MAPPER.readValue(value, entityClazz), + Encoders.bean(entityClazz)); - cleanContext(spark, contextId, verifyParam, inputPath, entityClazz, workingPath); - }); - } + res.map((MapFunction) r -> { + if (!r + .getTitle() + .stream() + .filter( + t -> t + .getQualifier() + .getClassid() + .equalsIgnoreCase(ModelConstants.MAIN_TITLE_QUALIFIER.getClassid())) + .anyMatch(t -> t.getValue().toLowerCase().startsWith(verifyParam.toLowerCase()))) { + return r; + } + r + .setContext( + r + .getContext() + .stream() + .filter( + c -> !c.getId().split("::")[0] + .equalsIgnoreCase(contextId)) + .collect(Collectors.toList())); + return r; + }, Encoders.bean(entityClazz)) + .write() + .mode(SaveMode.Overwrite) + .option("compression", "gzip") + .json(workingPath); - private static void cleanContext(SparkSession spark, String contextId, String verifyParam, String inputPath, Class entityClazz, String workingPath) { - Dataset res = spark - .read() - .textFile(inputPath) - .map( - (MapFunction) value -> OBJECT_MAPPER.readValue(value, entityClazz), - Encoders.bean(entityClazz)); - - res.map((MapFunction) r -> { - if(!r.getTitle() - .stream() - .filter(t -> t.getQualifier().getClassid() - .equalsIgnoreCase(ModelConstants.MAIN_TITLE_QUALIFIER.getClassid())) - .anyMatch(t -> t.getValue().toLowerCase().startsWith(verifyParam.toLowerCase()))){ - return r; - } - r.setContext(r.getContext().stream().filter(c -> !c.getId().split("::")[0] - .equalsIgnoreCase(contextId)).collect(Collectors.toList())); - return r; - } ,Encoders.bean(entityClazz)) - .write() - .mode(SaveMode.Overwrite) - .option("compression","gzip") - .json(workingPath); - - spark.read().textFile(workingPath).map((MapFunction) value -> OBJECT_MAPPER.readValue(value, entityClazz), - Encoders.bean(entityClazz)) - .write() - .mode(SaveMode.Overwrite) - .option("compression","gzip") - .json(inputPath); - } + spark + .read() + .textFile(workingPath) + .map( + (MapFunction) value -> OBJECT_MAPPER.readValue(value, entityClazz), + Encoders.bean(entityClazz)) + .write() + .mode(SaveMode.Overwrite) + .option("compression", "gzip") + .json(inputPath); + } } diff --git a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/clean/CleanContextTest.java b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/clean/CleanContextTest.java index bba814346..472d3781d 100644 --- a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/clean/CleanContextTest.java +++ b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/clean/CleanContextTest.java @@ -1,18 +1,12 @@ + package eu.dnetlib.dhp.oa.graph.clean; -import com.fasterxml.jackson.core.JsonProcessingException; -import com.fasterxml.jackson.databind.ObjectMapper; -import eu.dnetlib.dhp.oa.graph.dump.Constants; -import eu.dnetlib.dhp.oa.graph.dump.DumpJobTest; -import eu.dnetlib.dhp.oa.graph.dump.DumpProducts; -import eu.dnetlib.dhp.oa.graph.dump.community.CommunityMap; -import eu.dnetlib.dhp.schema.common.ModelConstants; -import eu.dnetlib.dhp.schema.dump.oaf.Instance; -import eu.dnetlib.dhp.schema.dump.oaf.OpenAccessRoute; -import eu.dnetlib.dhp.schema.dump.oaf.graph.GraphResult; -import eu.dnetlib.dhp.schema.oaf.Publication; -import eu.dnetlib.dhp.schema.oaf.Software; -import eu.dnetlib.dhp.schema.oaf.StructuredProperty; +import java.io.IOException; +import java.nio.file.Files; +import java.nio.file.Path; +import java.util.List; +import java.util.Locale; + import org.apache.commons.io.FileUtils; import org.apache.spark.SparkConf; import org.apache.spark.api.java.JavaRDD; @@ -27,133 +21,280 @@ import org.junit.jupiter.api.Test; import org.slf4j.Logger; import org.slf4j.LoggerFactory; -import java.io.IOException; -import java.nio.file.Files; -import java.nio.file.Path; -import java.util.List; -import java.util.Locale; +import com.fasterxml.jackson.core.JsonProcessingException; +import com.fasterxml.jackson.databind.ObjectMapper; + +import eu.dnetlib.dhp.oa.graph.dump.Constants; +import eu.dnetlib.dhp.oa.graph.dump.DumpJobTest; +import eu.dnetlib.dhp.oa.graph.dump.DumpProducts; +import eu.dnetlib.dhp.oa.graph.dump.community.CommunityMap; +import eu.dnetlib.dhp.schema.common.ModelConstants; +import eu.dnetlib.dhp.schema.dump.oaf.Instance; +import eu.dnetlib.dhp.schema.dump.oaf.OpenAccessRoute; +import eu.dnetlib.dhp.schema.dump.oaf.graph.GraphResult; +import eu.dnetlib.dhp.schema.oaf.Publication; +import eu.dnetlib.dhp.schema.oaf.Software; +import eu.dnetlib.dhp.schema.oaf.StructuredProperty; public class CleanContextTest { - private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper(); + private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper(); - private static SparkSession spark; + private static SparkSession spark; - private static Path workingDir; + private static Path workingDir; - private static final Logger log = LoggerFactory.getLogger(CleanContextTest.class); + private static final Logger log = LoggerFactory.getLogger(CleanContextTest.class); - @BeforeAll - public static void beforeAll() throws IOException { - workingDir = Files.createTempDirectory(DumpJobTest.class.getSimpleName()); - log.info("using work dir {}", workingDir); + @BeforeAll + public static void beforeAll() throws IOException { + workingDir = Files.createTempDirectory(DumpJobTest.class.getSimpleName()); + log.info("using work dir {}", workingDir); - SparkConf conf = new SparkConf(); - conf.setAppName(DumpJobTest.class.getSimpleName()); + SparkConf conf = new SparkConf(); + conf.setAppName(DumpJobTest.class.getSimpleName()); - conf.setMaster("local[*]"); - conf.set("spark.driver.host", "localhost"); - conf.set("hive.metastore.local", "true"); - conf.set("spark.ui.enabled", "false"); - conf.set("spark.sql.warehouse.dir", workingDir.toString()); - conf.set("hive.metastore.warehouse.dir", workingDir.resolve("warehouse").toString()); + conf.setMaster("local[*]"); + conf.set("spark.driver.host", "localhost"); + conf.set("hive.metastore.local", "true"); + conf.set("spark.ui.enabled", "false"); + conf.set("spark.sql.warehouse.dir", workingDir.toString()); + conf.set("hive.metastore.warehouse.dir", workingDir.resolve("warehouse").toString()); - spark = SparkSession - .builder() - .appName(DumpJobTest.class.getSimpleName()) - .config(conf) - .getOrCreate(); - } + spark = SparkSession + .builder() + .appName(DumpJobTest.class.getSimpleName()) + .config(conf) + .getOrCreate(); + } - @AfterAll - public static void afterAll() throws IOException { - FileUtils.deleteDirectory(workingDir.toFile()); - spark.stop(); - } + @AfterAll + public static void afterAll() throws IOException { + FileUtils.deleteDirectory(workingDir.toFile()); + spark.stop(); + } - @Test - public void testResultClean() throws Exception { - final String sourcePath = getClass() - .getResource("/eu/dnetlib/dhp/oa/graph/clean/publication_clean_context.json") - .getPath(); - final String prefix = "gcube "; + @Test + public void testResultClean() throws Exception { + final String sourcePath = getClass() + .getResource("/eu/dnetlib/dhp/oa/graph/clean/publication_clean_context.json") + .getPath(); + final String prefix = "gcube "; + spark + .read() + .textFile(sourcePath) + .map( + (MapFunction) r -> OBJECT_MAPPER.readValue(r, Publication.class), + Encoders.bean(Publication.class)) + .write() + .json(workingDir.toString() + "/publication"); - spark.read().textFile(sourcePath).map((MapFunction) r -> OBJECT_MAPPER.readValue(r, Publication.class), Encoders.bean(Publication.class)) - .write().json(workingDir.toString() + "/publication"); + CleanContextSparkJob.main(new String[] { + "--isSparkSessionManaged", Boolean.FALSE.toString(), + "--inputPath", workingDir.toString() + "/publication", + "-graphTableClassName", Publication.class.getCanonicalName(), + "-workingPath", workingDir.toString() + "/working", + "-contextId", "sobigdata", + "-verifyParam", "gCube " + }); + final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext()); + JavaRDD tmp = sc + .textFile(workingDir.toString() + "/publication") + .map(item -> OBJECT_MAPPER.readValue(item, Publication.class)); - CleanContextSparkJob.main(new String[] { - "--isSparkSessionManaged", Boolean.FALSE.toString(), - "--inputPath", workingDir.toString() + "/publication", - "-graphTableClassName", Publication.class.getCanonicalName(), - "-workingPath", workingDir.toString() + "/working", - "-contextId","sobigdata", - "-verifyParam","gCube " - }); + Assertions.assertEquals(7, tmp.count()); - final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext()); - JavaRDD tmp = sc - .textFile(workingDir.toString() + "/publication") - .map(item -> OBJECT_MAPPER.readValue(item, Publication.class)); + // original result with sobigdata context and gcube as starting string in the main title for the publication + Assertions + .assertEquals( + 0, + tmp + .filter(p -> p.getId().equals("50|DansKnawCris::0224aae28af558f21768dbc6439c7a95")) + .collect() + .get(0) + .getContext() + .size()); - Assertions.assertEquals(7, tmp.count()); + // original result with sobigdata context without gcube as starting string in the main title for the publication + Assertions + .assertEquals( + 1, + tmp + .filter(p -> p.getId().equals("50|DansKnawCris::20c414a3b1c742d5dd3851f1b67df2d9")) + .collect() + .get(0) + .getContext() + .size()); + Assertions + .assertEquals( + "sobigdata::projects::2", + tmp + .filter(p -> p.getId().equals("50|DansKnawCris::20c414a3b1c742d5dd3851f1b67df2d9")) + .collect() + .get(0) + .getContext() + .get(0) + .getId()); - //original result with sobigdata context and gcube as starting string in the main title for the publication - Assertions.assertEquals(0, - tmp.filter(p->p.getId().equals("50|DansKnawCris::0224aae28af558f21768dbc6439c7a95")).collect().get(0).getContext().size()); + // original result with sobigdata context with gcube as starting string in the subtitle + Assertions + .assertEquals( + 1, + tmp + .filter(p -> p.getId().equals("50|DansKnawCris::3c81248c335f0aa07e06817ece6fa6af")) + .collect() + .get(0) + .getContext() + .size()); + Assertions + .assertEquals( + "sobigdata::projects::2", + tmp + .filter(p -> p.getId().equals("50|DansKnawCris::3c81248c335f0aa07e06817ece6fa6af")) + .collect() + .get(0) + .getContext() + .get(0) + .getId()); + List titles = tmp + .filter(p -> p.getId().equals("50|DansKnawCris::3c81248c335f0aa07e06817ece6fa6af")) + .collect() + .get(0) + .getTitle(); + Assertions.assertEquals(1, titles.size()); + Assertions.assertTrue(titles.get(0).getValue().toLowerCase().startsWith(prefix)); + Assertions.assertEquals("subtitle", titles.get(0).getQualifier().getClassid()); - //original result with sobigdata context without gcube as starting string in the main title for the publication - Assertions.assertEquals(1, - tmp.filter(p->p.getId().equals("50|DansKnawCris::20c414a3b1c742d5dd3851f1b67df2d9")).collect().get(0).getContext().size()); - Assertions.assertEquals("sobigdata::projects::2",tmp.filter(p->p.getId().equals("50|DansKnawCris::20c414a3b1c742d5dd3851f1b67df2d9")).collect().get(0).getContext().get(0).getId() ); + // original result with sobigdata context with gcube not as starting string in the main title + Assertions + .assertEquals( + 1, + tmp + .filter(p -> p.getId().equals("50|DansKnawCris::3c9f068ddc930360bec6925488a9a97f")) + .collect() + .get(0) + .getContext() + .size()); + Assertions + .assertEquals( + "sobigdata::projects::1", + tmp + .filter(p -> p.getId().equals("50|DansKnawCris::3c9f068ddc930360bec6925488a9a97f")) + .collect() + .get(0) + .getContext() + .get(0) + .getId()); + titles = tmp + .filter(p -> p.getId().equals("50|DansKnawCris::3c9f068ddc930360bec6925488a9a97f")) + .collect() + .get(0) + .getTitle(); + Assertions.assertEquals(1, titles.size()); + Assertions.assertFalse(titles.get(0).getValue().toLowerCase().startsWith(prefix)); + Assertions.assertTrue(titles.get(0).getValue().toLowerCase().contains(prefix.trim())); + Assertions.assertEquals("main title", titles.get(0).getQualifier().getClassid()); - //original result with sobigdata context with gcube as starting string in the subtitle - Assertions.assertEquals(1, - tmp.filter(p->p.getId().equals("50|DansKnawCris::3c81248c335f0aa07e06817ece6fa6af")).collect().get(0).getContext().size()); - Assertions.assertEquals("sobigdata::projects::2",tmp.filter(p->p.getId().equals("50|DansKnawCris::3c81248c335f0aa07e06817ece6fa6af")).collect().get(0).getContext().get(0).getId() ); - List titles = tmp.filter(p -> p.getId().equals("50|DansKnawCris::3c81248c335f0aa07e06817ece6fa6af")).collect().get(0).getTitle(); - Assertions.assertEquals(1, titles.size()); - Assertions.assertTrue(titles.get(0).getValue().toLowerCase().startsWith(prefix) ); - Assertions.assertEquals("subtitle", titles.get(0).getQualifier().getClassid()); + // original result with sobigdata in context and also other contexts with gcube as starting string for the main + // title + Assertions + .assertEquals( + 1, + tmp + .filter(p -> p.getId().equals("50|DansKnawCris::4669a378a73661417182c208e6fdab53")) + .collect() + .get(0) + .getContext() + .size()); + Assertions + .assertEquals( + "dh-ch", + tmp + .filter(p -> p.getId().equals("50|DansKnawCris::4669a378a73661417182c208e6fdab53")) + .collect() + .get(0) + .getContext() + .get(0) + .getId()); + titles = tmp + .filter(p -> p.getId().equals("50|DansKnawCris::4669a378a73661417182c208e6fdab53")) + .collect() + .get(0) + .getTitle(); + Assertions.assertEquals(1, titles.size()); + Assertions.assertTrue(titles.get(0).getValue().toLowerCase().startsWith(prefix)); + Assertions.assertEquals("main title", titles.get(0).getQualifier().getClassid()); - //original result with sobigdata context with gcube not as starting string in the main title - Assertions.assertEquals(1, - tmp.filter(p->p.getId().equals("50|DansKnawCris::3c9f068ddc930360bec6925488a9a97f")).collect().get(0).getContext().size()); - Assertions.assertEquals("sobigdata::projects::1",tmp.filter(p->p.getId().equals("50|DansKnawCris::3c9f068ddc930360bec6925488a9a97f")).collect().get(0).getContext().get(0).getId() ); - titles = tmp.filter(p -> p.getId().equals("50|DansKnawCris::3c9f068ddc930360bec6925488a9a97f")).collect().get(0).getTitle(); - Assertions.assertEquals(1, titles.size()); - Assertions.assertFalse(titles.get(0).getValue().toLowerCase().startsWith(prefix) ); - Assertions.assertTrue(titles.get(0).getValue().toLowerCase().contains(prefix.trim()) ); - Assertions.assertEquals("main title", titles.get(0).getQualifier().getClassid()); + // original result with multiple main title one of which whith gcube as starting string and with 2 contextes + Assertions + .assertEquals( + 1, + tmp + .filter(p -> p.getId().equals("50|DansKnawCris::4a9152e80f860eab99072e921d74a0ff")) + .collect() + .get(0) + .getContext() + .size()); + Assertions + .assertEquals( + "dh-ch", + tmp + .filter(p -> p.getId().equals("50|DansKnawCris::4a9152e80f860eab99072e921d74a0ff")) + .collect() + .get(0) + .getContext() + .get(0) + .getId()); + titles = tmp + .filter(p -> p.getId().equals("50|DansKnawCris::4a9152e80f860eab99072e921d74a0ff")) + .collect() + .get(0) + .getTitle(); + Assertions.assertEquals(2, titles.size()); + Assertions + .assertTrue( + titles + .stream() + .anyMatch( + t -> t.getQualifier().getClassid().equals("main title") + && t.getValue().toLowerCase().startsWith(prefix))); - //original result with sobigdata in context and also other contexts with gcube as starting string for the main title - Assertions.assertEquals(1, - tmp.filter(p->p.getId().equals("50|DansKnawCris::4669a378a73661417182c208e6fdab53")).collect().get(0).getContext().size()); - Assertions.assertEquals("dh-ch",tmp.filter(p->p.getId().equals("50|DansKnawCris::4669a378a73661417182c208e6fdab53")).collect().get(0).getContext().get(0).getId() ); - titles = tmp.filter(p -> p.getId().equals("50|DansKnawCris::4669a378a73661417182c208e6fdab53")).collect().get(0).getTitle(); - Assertions.assertEquals(1, titles.size()); - Assertions.assertTrue(titles.get(0).getValue().toLowerCase().startsWith(prefix) ); - Assertions.assertEquals("main title", titles.get(0).getQualifier().getClassid()); + // original result without sobigdata in context with gcube as starting string for the main title + Assertions + .assertEquals( + 1, + tmp + .filter(p -> p.getId().equals("50|dedup_wf_001::01e6a28565ca01376b7548e530c6f6e8")) + .collect() + .get(0) + .getContext() + .size()); + Assertions + .assertEquals( + "dh-ch", + tmp + .filter(p -> p.getId().equals("50|dedup_wf_001::01e6a28565ca01376b7548e530c6f6e8")) + .collect() + .get(0) + .getContext() + .get(0) + .getId()); + titles = tmp + .filter(p -> p.getId().equals("50|dedup_wf_001::01e6a28565ca01376b7548e530c6f6e8")) + .collect() + .get(0) + .getTitle(); + Assertions.assertEquals(2, titles.size()); - //original result with multiple main title one of which whith gcube as starting string and with 2 contextes - Assertions.assertEquals(1, - tmp.filter(p->p.getId().equals("50|DansKnawCris::4a9152e80f860eab99072e921d74a0ff")).collect().get(0).getContext().size()); - Assertions.assertEquals("dh-ch",tmp.filter(p->p.getId().equals("50|DansKnawCris::4a9152e80f860eab99072e921d74a0ff")).collect().get(0).getContext().get(0).getId() ); - titles = tmp.filter(p -> p.getId().equals("50|DansKnawCris::4a9152e80f860eab99072e921d74a0ff")).collect().get(0).getTitle(); - Assertions.assertEquals(2, titles.size()); - Assertions.assertTrue(titles.stream().anyMatch(t -> t.getQualifier().getClassid().equals("main title") && t.getValue().toLowerCase().startsWith(prefix)) ); + Assertions + .assertTrue( + titles + .stream() + .anyMatch( + t -> t.getQualifier().getClassid().equals("main title") + && t.getValue().toLowerCase().startsWith(prefix))); - - //original result without sobigdata in context with gcube as starting string for the main title - Assertions.assertEquals(1, - tmp.filter(p->p.getId().equals("50|dedup_wf_001::01e6a28565ca01376b7548e530c6f6e8")).collect().get(0).getContext().size()); - Assertions.assertEquals("dh-ch",tmp.filter(p->p.getId().equals("50|dedup_wf_001::01e6a28565ca01376b7548e530c6f6e8")).collect().get(0).getContext().get(0).getId() ); - titles = tmp.filter(p -> p.getId().equals("50|dedup_wf_001::01e6a28565ca01376b7548e530c6f6e8")).collect().get(0).getTitle(); - Assertions.assertEquals(2, titles.size()); - - Assertions.assertTrue(titles.stream().anyMatch(t -> t.getQualifier().getClassid().equals("main title") && t.getValue().toLowerCase().startsWith(prefix))); - - - } + } } From aa12429f500e563a114b39650b3d8c13c361d3fa Mon Sep 17 00:00:00 2001 From: Sandro La Bruzzo Date: Fri, 22 Apr 2022 11:05:08 +0200 Subject: [PATCH 161/162] Modified last intersection since we lost many titles. --- .../doiboost/SparkGenerateDoiBoost.scala | 83 ++++++++----------- 1 file changed, 34 insertions(+), 49 deletions(-) diff --git a/dhp-workflows/dhp-doiboost/src/main/scala/eu/dnetlib/doiboost/SparkGenerateDoiBoost.scala b/dhp-workflows/dhp-doiboost/src/main/scala/eu/dnetlib/doiboost/SparkGenerateDoiBoost.scala index 9323c994c..10a5be969 100644 --- a/dhp-workflows/dhp-doiboost/src/main/scala/eu/dnetlib/doiboost/SparkGenerateDoiBoost.scala +++ b/dhp-workflows/dhp-doiboost/src/main/scala/eu/dnetlib/doiboost/SparkGenerateDoiBoost.scala @@ -59,52 +59,6 @@ object SparkGenerateDoiBoost { val workingDirPath = parser.get("workingPath") val openaireOrganizationPath = parser.get("openaireOrganizationPath") - val crossrefAggregator = new Aggregator[(String, Publication), Publication, Publication] with Serializable { - override def zero: Publication = new Publication - - override def reduce(b: Publication, a: (String, Publication)): Publication = { - - if (b == null) { - if (a != null && a._2 != null) { - a._2.setId(a._1) - return a._2 - } - } else { - if (a != null && a._2 != null) { - b.mergeFrom(a._2) - b.setId(a._1) - val authors = AuthorMerger.mergeAuthor(b.getAuthor, a._2.getAuthor) - b.setAuthor(authors) - return b - } - } - new Publication - } - - override def merge(b1: Publication, b2: Publication): Publication = { - if (b1 == null) { - if (b2 != null) - return b2 - } else { - if (b2 != null) { - b1.mergeFrom(b2) - val authors = AuthorMerger.mergeAuthor(b1.getAuthor, b2.getAuthor) - b1.setAuthor(authors) - if (b2.getId != null && b2.getId.nonEmpty) - b1.setId(b2.getId) - return b1 - } - } - new Publication - } - - override def finish(reduction: Publication): Publication = reduction - - override def bufferEncoder: Encoder[Publication] = Encoders.kryo[Publication] - - override def outputEncoder: Encoder[Publication] = Encoders.kryo[Publication] - } - implicit val mapEncoderPub: Encoder[Publication] = Encoders.kryo[Publication] implicit val mapEncoderOrg: Encoder[Organization] = Encoders.kryo[Organization] implicit val mapEncoderDataset: Encoder[OafDataset] = Encoders.kryo[OafDataset] @@ -175,12 +129,43 @@ object SparkGenerateDoiBoost { .map(DoiBoostMappingUtil.fixPublication) .map(p => (p.getId, p)) .groupByKey(_._1) - .agg(crossrefAggregator.toColumn) - .map(p => p._2) + .reduceGroups((left, right) => + { + //Check left is not null + if (left != null && left._1 != null) + { + //If right is null then return left + if (right == null || right._2 == null) + left + else { + // Here Left and Right are not null + // So we have to merge + val b1 = left._2 + val b2 = right._2 + b1.mergeFrom(b2) + b1.mergeOAFDataInfo(b2) + val authors = AuthorMerger.mergeAuthor(b1.getAuthor, b2.getAuthor) + b1.setAuthor(authors) + if (b2.getId != null && b2.getId.nonEmpty) + b1.setId(b2.getId) + //Return publication Merged + (b1.getId, b1) + } + } + else { + // Left is Null so we return right + right + } + } + + ) + .filter(s => s!= null && s._2!=null) + .map(s => s._2._2) .write .mode(SaveMode.Overwrite) .save(s"$workingDirPath/doiBoostPublicationFiltered") + val affiliationPath = parser.get("affiliationPath") val paperAffiliationPath = parser.get("paperAffiliationPath") @@ -305,4 +290,4 @@ object SparkGenerateDoiBoost { .save(s"$workingDirPath/doiBoostOrganization") } -} +} \ No newline at end of file From a82ec3aaaf60ea77918f3ff1cfab20538ef6fe7e Mon Sep 17 00:00:00 2001 From: Sandro La Bruzzo Date: Fri, 22 Apr 2022 11:08:13 +0200 Subject: [PATCH 162/162] code formatter --- .../doiboost/SparkGenerateDoiBoost.scala | 18 ++++++------------ 1 file changed, 6 insertions(+), 12 deletions(-) diff --git a/dhp-workflows/dhp-doiboost/src/main/scala/eu/dnetlib/doiboost/SparkGenerateDoiBoost.scala b/dhp-workflows/dhp-doiboost/src/main/scala/eu/dnetlib/doiboost/SparkGenerateDoiBoost.scala index 10a5be969..2cbd53097 100644 --- a/dhp-workflows/dhp-doiboost/src/main/scala/eu/dnetlib/doiboost/SparkGenerateDoiBoost.scala +++ b/dhp-workflows/dhp-doiboost/src/main/scala/eu/dnetlib/doiboost/SparkGenerateDoiBoost.scala @@ -129,11 +129,9 @@ object SparkGenerateDoiBoost { .map(DoiBoostMappingUtil.fixPublication) .map(p => (p.getId, p)) .groupByKey(_._1) - .reduceGroups((left, right) => - { + .reduceGroups((left, right) => { //Check left is not null - if (left != null && left._1 != null) - { + if (left != null && left._1 != null) { //If right is null then return left if (right == null || right._2 == null) left @@ -151,21 +149,17 @@ object SparkGenerateDoiBoost { //Return publication Merged (b1.getId, b1) } - } - else { + } else { // Left is Null so we return right right } - } - - ) - .filter(s => s!= null && s._2!=null) + }) + .filter(s => s != null && s._2 != null) .map(s => s._2._2) .write .mode(SaveMode.Overwrite) .save(s"$workingDirPath/doiBoostPublicationFiltered") - val affiliationPath = parser.get("affiliationPath") val paperAffiliationPath = parser.get("paperAffiliationPath") @@ -290,4 +284,4 @@ object SparkGenerateDoiBoost { .save(s"$workingDirPath/doiBoostOrganization") } -} \ No newline at end of file +}