From 2a52a42169ecdede8024fc9fcbadb1008e49f912 Mon Sep 17 00:00:00 2001 From: dimitrispie Date: Tue, 6 Dec 2022 10:10:21 +0200 Subject: [PATCH 01/18] Added 4 institutions: -University of Modena and Reggio Emilia -Bilkent University -Saints Cyril and Methodius University of Skopje -University of Milan --- .../stats/oozie_app/scripts/step20-createMonitorDB.sql | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step20-createMonitorDB.sql b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step20-createMonitorDB.sql index 2bdcbfa3d..98dca7129 100644 --- a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step20-createMonitorDB.sql +++ b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step20-createMonitorDB.sql @@ -50,7 +50,11 @@ create table TARGET.result stored as parquet as 'openorgs____::1698a2eb1885ef8adb5a4a969e745ad3', -- École des Ponts ParisTech 'openorgs____::e15adb13c4dadd49de4d35c39b5da93a', -- Nanyang Technological University 'openorgs____::4b34103bde246228fcd837f5f1bf4212', -- Autonomous University of Barcelona - 'openorgs____::72ec75fcfc4e0df1a76dc4c49007fceb' -- McMaster University + 'openorgs____::72ec75fcfc4e0df1a76dc4c49007fceb', -- McMaster University + 'openorgs____::51c7fc556e46381734a25a6fbc3fd398', -- University of Modena and Reggio Emilia + 'openorgs____::235d7f9ad18ecd7e6dc62ea4990cb9db', -- Bilkent University + 'openorgs____::31f2fa9e05b49d4cf40a19c3fed8eb06', -- Saints Cyril and Methodius University of Skopje + 'openorgs____::db7686f30f22cbe73a4fde872ce812a6' -- University of Milan ) )) foo; compute stats TARGET.result; From 6449ff42073979226d1c02c08ee608173657bbef Mon Sep 17 00:00:00 2001 From: dimitrispie Date: Thu, 22 Dec 2022 10:18:21 +0200 Subject: [PATCH 02/18] 1. Added a decision node to enables the workflow to make a selection on the execution path to follow 2. Added new organization 3. Added 5 new tables from Eurostast --- .../oa/graph/stats/oozie_app/scripts/step15_5.sql | 5 +++++ .../oozie_app/scripts/step20-createMonitorDB.sql | 8 +++++++- .../dhp/oa/graph/stats/oozie_app/workflow.xml | 13 ++++++++++++- 3 files changed, 24 insertions(+), 2 deletions(-) diff --git a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step15_5.sql b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step15_5.sql index 86ead4a2c..584de0a56 100644 --- a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step15_5.sql +++ b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step15_5.sql @@ -30,6 +30,11 @@ from rcount group by rcount.pid; create view ${stats_db_name}.rndexpenditure as select * from stats_ext.rndexpediture; +create view ${stats_db_name}.rndgdpexpenditure as select * from stats_ext.rndgdpexpenditure; +create view ${stats_db_name}.doctoratestudents as select * from stats_ext.doctoratestudents; +create view ${stats_db_name}.totalresearchers as select * from stats_ext.totalresearchers; +create view ${stats_db_name}.totalresearchersft as select * from stats_ext.totalresearchersft; +create view ${stats_db_name}.hrrst as select * from stats_ext.hrrst; create table ${stats_db_name}.result_instance stored as parquet as select distinct r.* diff --git a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step20-createMonitorDB.sql b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step20-createMonitorDB.sql index 98dca7129..3e69ff58d 100644 --- a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step20-createMonitorDB.sql +++ b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step20-createMonitorDB.sql @@ -10,6 +10,11 @@ create view if not exists TARGET.creation_date as select * from SOURCE.creation_ create view if not exists TARGET.funder as select * from SOURCE.funder; create view if not exists TARGET.fundref as select * from SOURCE.fundref; create view if not exists TARGET.rndexpenditure as select * from SOURCE.rndexpediture; +create view if not exists TARGET.rndgdpexpenditure as select * from SOURCE.rndgdpexpenditure; +create view if not exists TARGET.doctoratestudents as select * from SOURCE.doctoratestudents; +create view if not exists TARGET.totalresearchers as select * from SOURCE.totalresearchers; +create view if not exists TARGET.totalresearchersft as select * from SOURCE.totalresearchersft; +create view if not exists TARGET.hrrst as select * from SOURCE.hrrst; create table TARGET.result stored as parquet as select distinct * from ( @@ -54,7 +59,8 @@ create table TARGET.result stored as parquet as 'openorgs____::51c7fc556e46381734a25a6fbc3fd398', -- University of Modena and Reggio Emilia 'openorgs____::235d7f9ad18ecd7e6dc62ea4990cb9db', -- Bilkent University 'openorgs____::31f2fa9e05b49d4cf40a19c3fed8eb06', -- Saints Cyril and Methodius University of Skopje - 'openorgs____::db7686f30f22cbe73a4fde872ce812a6' -- University of Milan + 'openorgs____::db7686f30f22cbe73a4fde872ce812a6', -- University of Milan + 'openorgs____::b8b8ca674452579f3f593d9f5e557483' -- University College Cork ) )) foo; compute stats TARGET.result; diff --git a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/workflow.xml b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/workflow.xml index 08d33f4e8..17dcd1fdd 100644 --- a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/workflow.xml +++ b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/workflow.xml @@ -70,7 +70,18 @@ - + + + + + ${wf:conf('resumeFrom') eq 'Step1'} + ${wf:conf('resumeFrom') eq 'step20-createMonitorDB'} + ${wf:conf('resumeFrom') eq 'step21-createObservatoryDB-pre'} + ${wf:conf('resumeFrom') eq 'step21-createObservatoryDB'} + + + + Action failed, error message[${wf:errorMessage(wf:lastErrorNode())}] From 592013d5ddf1bac85dee76bb84931b4a31ad36b0 Mon Sep 17 00:00:00 2001 From: dimitrispie Date: Fri, 23 Dec 2022 09:43:16 +0200 Subject: [PATCH 03/18] Added more steps in decision node --- .../dhp/oa/graph/stats/oozie_app/workflow.xml | 22 +++++++++++++++++++ 1 file changed, 22 insertions(+) diff --git a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/workflow.xml b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/workflow.xml index 17dcd1fdd..c68ae46ca 100644 --- a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/workflow.xml +++ b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/workflow.xml @@ -75,9 +75,31 @@ ${wf:conf('resumeFrom') eq 'Step1'} + ${wf:conf('resumeFrom') eq 'Step2'} + ${wf:conf('resumeFrom') eq 'Step3'} + ${wf:conf('resumeFrom') eq 'Step4'} + ${wf:conf('resumeFrom') eq 'Step5'} + ${wf:conf('resumeFrom') eq 'Step6'} + ${wf:conf('resumeFrom') eq 'Step7'} + ${wf:conf('resumeFrom') eq 'Step8'} + ${wf:conf('resumeFrom') eq 'Step9'} + ${wf:conf('resumeFrom') eq 'Step10'} + ${wf:conf('resumeFrom') eq 'Step11'} + ${wf:conf('resumeFrom') eq 'Step12'} + ${wf:conf('resumeFrom') eq 'Step13'} + ${wf:conf('resumeFrom') eq 'Step14'} + ${wf:conf('resumeFrom') eq 'Step15'} + ${wf:conf('resumeFrom') eq 'Step15_5'} + ${wf:conf('resumeFrom') eq 'Contexts'} + ${wf:conf('resumeFrom') eq 'Step16-createIndicatorsTables'} + ${wf:conf('resumeFrom') eq 'Step16_1-definitions'} + ${wf:conf('resumeFrom') eq 'Step16_5'} + ${wf:conf('resumeFrom') eq 'Step19-finalize'} ${wf:conf('resumeFrom') eq 'step20-createMonitorDB'} ${wf:conf('resumeFrom') eq 'step21-createObservatoryDB-pre'} ${wf:conf('resumeFrom') eq 'step21-createObservatoryDB'} + ${wf:conf('resumeFrom') eq 'step21-createObservatoryDB-post'} + ${wf:conf('resumeFrom') eq 'Step22'} From becb242c1797b3ba40e5e92b4dd263248d59b14d Mon Sep 17 00:00:00 2001 From: dimitrispie Date: Wed, 4 Jan 2023 16:50:29 +0200 Subject: [PATCH 04/18] Monitor DB only Workflow --- dhp-workflows/dhp-monitor-update/pom.xml | 32 +++ .../monitor/oozie_app/config-default.xml | 34 +++ .../graph/monitor/oozie_app/monitor-post.sh | 21 ++ .../dhp/oa/graph/monitor/oozie_app/monitor.sh | 24 ++ .../oozie_app/scripts/createMonitorDB.sql | 241 ++++++++++++++++++ .../oa/graph/monitor/oozie_app/updateCache.sh | 4 + .../oa/graph/monitor/oozie_app/workflow.xml | 105 ++++++++ 7 files changed, 461 insertions(+) create mode 100644 dhp-workflows/dhp-monitor-update/pom.xml create mode 100644 dhp-workflows/dhp-monitor-update/src/main/resources/eu/dnetlib/dhp/oa/graph/monitor/oozie_app/config-default.xml create mode 100644 dhp-workflows/dhp-monitor-update/src/main/resources/eu/dnetlib/dhp/oa/graph/monitor/oozie_app/monitor-post.sh create mode 100644 dhp-workflows/dhp-monitor-update/src/main/resources/eu/dnetlib/dhp/oa/graph/monitor/oozie_app/monitor.sh create mode 100644 dhp-workflows/dhp-monitor-update/src/main/resources/eu/dnetlib/dhp/oa/graph/monitor/oozie_app/scripts/createMonitorDB.sql create mode 100644 dhp-workflows/dhp-monitor-update/src/main/resources/eu/dnetlib/dhp/oa/graph/monitor/oozie_app/updateCache.sh create mode 100644 dhp-workflows/dhp-monitor-update/src/main/resources/eu/dnetlib/dhp/oa/graph/monitor/oozie_app/workflow.xml diff --git a/dhp-workflows/dhp-monitor-update/pom.xml b/dhp-workflows/dhp-monitor-update/pom.xml new file mode 100644 index 000000000..ca0bb9837 --- /dev/null +++ b/dhp-workflows/dhp-monitor-update/pom.xml @@ -0,0 +1,32 @@ + + + + dhp-workflows + eu.dnetlib.dhp + 1.2.4-SNAPSHOT + + 4.0.0 + dhp-monitor-update + + + org.apache.spark + spark-core_2.11 + + + org.apache.spark + spark-sql_2.11 + + + + + + pl.project13.maven + git-commit-id-plugin + 2.1.11 + + false + + + + + diff --git a/dhp-workflows/dhp-monitor-update/src/main/resources/eu/dnetlib/dhp/oa/graph/monitor/oozie_app/config-default.xml b/dhp-workflows/dhp-monitor-update/src/main/resources/eu/dnetlib/dhp/oa/graph/monitor/oozie_app/config-default.xml new file mode 100644 index 000000000..63fc84d75 --- /dev/null +++ b/dhp-workflows/dhp-monitor-update/src/main/resources/eu/dnetlib/dhp/oa/graph/monitor/oozie_app/config-default.xml @@ -0,0 +1,34 @@ + + + jobTracker + ${jobTracker} + + + nameNode + ${nameNode} + + + oozie.use.system.libpath + true + + + oozie.action.sharelib.for.spark + spark2 + + + hive_metastore_uris + thrift://iis-cdh5-test-m3.ocean.icm.edu.pl:9083 + + + hive_jdbc_url + jdbc:hive2://iis-cdh5-test-m3.ocean.icm.edu.pl:10000/;UseNativeQuery=1;?spark.executor.memory=19166291558;spark.yarn.executor.memoryOverhead=3225;spark.driver.memory=11596411699;spark.yarn.driver.memoryOverhead=1228 + + + oozie.wf.workflow.notification.url + {serviceUrl}/v1/oozieNotification/jobUpdate?jobId=$jobId%26status=$status + + + stats_tool_api_url + ${stats_tool_api_url} + + \ No newline at end of file diff --git a/dhp-workflows/dhp-monitor-update/src/main/resources/eu/dnetlib/dhp/oa/graph/monitor/oozie_app/monitor-post.sh b/dhp-workflows/dhp-monitor-update/src/main/resources/eu/dnetlib/dhp/oa/graph/monitor/oozie_app/monitor-post.sh new file mode 100644 index 000000000..b8c71681a --- /dev/null +++ b/dhp-workflows/dhp-monitor-update/src/main/resources/eu/dnetlib/dhp/oa/graph/monitor/oozie_app/monitor-post.sh @@ -0,0 +1,21 @@ +export PYTHON_EGG_CACHE=/home/$(whoami)/.python-eggs +export link_folder=/tmp/impala-shell-python-egg-cache-$(whoami) +if ! [ -L $link_folder ] +then + rm -Rf "$link_folder" + ln -sfn ${PYTHON_EGG_CACHE}${link_folder} ${link_folder} +fi + +export SOURCE=$1 +export TARGET=$2 +export SHADOW=$3 + +impala-shell -q "invalidate metadata;" +impala-shell -d ${TARGET} -q "show tables" --delimited | sed "s/\(.*\)/compute stats ${TARGET}.\1;/" | impala-shell -f - +echo "Impala shell finished" + +echo "Updating shadow monitor database" +impala-shell -q "create database if not exists ${SHADOW}" +impala-shell -d ${SHADOW} -q "show tables" --delimited | sed "s/^/drop view if exists ${SHADOW}./" | sed "s/$/;/" | impala-shell -f - +impala-shell -d ${TARGET} -q "show tables" --delimited | sed "s/\(.*\)/create view ${SHADOW}.\1 as select * from ${TARGET}.\1;/" | impala-shell -f - +echo "Shadow db ready!" \ No newline at end of file diff --git a/dhp-workflows/dhp-monitor-update/src/main/resources/eu/dnetlib/dhp/oa/graph/monitor/oozie_app/monitor.sh b/dhp-workflows/dhp-monitor-update/src/main/resources/eu/dnetlib/dhp/oa/graph/monitor/oozie_app/monitor.sh new file mode 100644 index 000000000..f39bf4893 --- /dev/null +++ b/dhp-workflows/dhp-monitor-update/src/main/resources/eu/dnetlib/dhp/oa/graph/monitor/oozie_app/monitor.sh @@ -0,0 +1,24 @@ +export PYTHON_EGG_CACHE=/home/$(whoami)/.python-eggs +export link_folder=/tmp/impala-shell-python-egg-cache-$(whoami) +if ! [ -L $link_folder ] +then + rm -Rf "$link_folder" + ln -sfn ${PYTHON_EGG_CACHE}${link_folder} ${link_folder} +fi + +export SOURCE=$1 +export TARGET=$2 +export SHADOW=$3 +export SCRIPT_PATH=$4 + +export HIVE_OPTS="-hiveconf mapred.job.queue.name=analytics -hiveconf hive.spark.client.connect.timeout=120000ms -hiveconf hive.spark.client.server.connect.timeout=300000ms -hiveconf spark.executor.memory=19166291558 -hiveconf spark.yarn.executor.memoryOverhead=3225 -hiveconf spark.driver.memory=11596411699 -hiveconf spark.yarn.driver.memoryOverhead=1228" +export HADOOP_USER_NAME="oozie" + +echo "Getting file from " $SCRIPT_PATH +hdfs dfs -copyToLocal $SCRIPT_PATH + +echo "Creating monitor database" +#cat step20-createMonitorDB.sql | sed s/SOURCE/$1/g | sed s/TARGET/$2/g1 > foo +cat createMonitorDB.sql | sed "s/TARGET/${TARGET}/g" | sed "s/SOURCE/${SOURCE}/g" > foo +hive $HIVE_OPTS -f foo +echo "Hive shell finished" \ No newline at end of file diff --git a/dhp-workflows/dhp-monitor-update/src/main/resources/eu/dnetlib/dhp/oa/graph/monitor/oozie_app/scripts/createMonitorDB.sql b/dhp-workflows/dhp-monitor-update/src/main/resources/eu/dnetlib/dhp/oa/graph/monitor/oozie_app/scripts/createMonitorDB.sql new file mode 100644 index 000000000..e9e460cb0 --- /dev/null +++ b/dhp-workflows/dhp-monitor-update/src/main/resources/eu/dnetlib/dhp/oa/graph/monitor/oozie_app/scripts/createMonitorDB.sql @@ -0,0 +1,241 @@ +drop database if exists TARGET cascade; +create database if not exists TARGET; + +create view if not exists TARGET.category as select * from SOURCE.category; +create view if not exists TARGET.concept as select * from SOURCE.concept; +create view if not exists TARGET.context as select * from SOURCE.context; +create view if not exists TARGET.country as select * from SOURCE.country; +create view if not exists TARGET.countrygdp as select * from SOURCE.countrygdp; +create view if not exists TARGET.creation_date as select * from SOURCE.creation_date; +create view if not exists TARGET.funder as select * from SOURCE.funder; +create view if not exists TARGET.fundref as select * from SOURCE.fundref; +create view if not exists TARGET.rndexpenditure as select * from SOURCE.rndexpediture; +create view if not exists TARGET.rndgdpexpenditure as select * from SOURCE.rndgdpexpenditure; +create view if not exists TARGET.doctoratestudents as select * from SOURCE.doctoratestudents; +create view if not exists TARGET.totalresearchers as select * from SOURCE.totalresearchers; +create view if not exists TARGET.totalresearchersft as select * from SOURCE.totalresearchersft; +create view if not exists TARGET.hrrst as select * from SOURCE.hrrst; + +create table TARGET.result stored as parquet as + select distinct * from ( + select * from SOURCE.result r where exists (select 1 from SOURCE.result_projects rp join SOURCE.project p on rp.project=p.id where rp.id=r.id) + union all + select * from SOURCE.result r where exists (select 1 from SOURCE.result_concepts rc where rc.id=r.id) + union all + select * from SOURCE.result r where exists (select 1 from SOURCE.result_organization ro where ro.id=r.id and ro.organization in ( + 'openorgs____::b84450f9864182c67b8611b5593f4250', --"Athena Research and Innovation Center In Information Communication & Knowledge Technologies', --ARC" + 'openorgs____::d41cf6bd4ab1b1362a44397e0b95c975', --National Research Council + 'openorgs____::d2a09b9d5eabb10c95f9470e172d05d2', --??? Not exists ?? + 'openorgs____::d169c7407dd417152596908d48c11460', --Masaryk University + 'openorgs____::1ec924b1759bb16d0a02f2dad8689b21', --University of Belgrade + 'openorgs____::0ae431b820e4c33db8967fbb2b919150', --University of Helsinki + 'openorgs____::759d59f05d77188faee99b7493b46805', --University of Minho + 'openorgs____::cad284878801b9465fa51a95b1d779db', --Universidad Politécnica de Madrid + 'openorgs____::eadc8da90a546e98c03f896661a2e4d4', --University of Göttingen + 'openorgs____::c0286313e36479eff8676dba9b724b40', --National and Kapodistrian University of Athens + -- 'openorgs____::c80a8243a5e5c620d7931c88d93bf17a', --Université Paris Diderot + 'openorgs____::c08634f0a6b0081c3dc6e6c93a4314f3', --Bielefeld University + 'openorgs____::6fc85e4a8f7ecaf4b0c738d010e967ea', --University of Southern Denmark + 'openorgs____::3d6122f87f9a97a99d8f6e3d73313720', --Humboldt-Universität zu Berlin + 'openorgs____::16720ada63d0fa8ca41601feae7d1aa5', --TU Darmstadt + 'openorgs____::ccc0a066b56d2cfaf90c2ae369df16f5', --KU Leuven + 'openorgs____::4c6f119632adf789746f0a057ed73e90', --University of the Western Cape + 'openorgs____::ec3665affa01aeafa28b7852c4176dbd', --Rudjer Boskovic Institute + 'openorgs____::5f31346d444a7f06a28c880fb170b0f6', --Ghent University + 'openorgs____::2dbe47117fd5409f9c61620813456632', --University of Luxembourg + 'openorgs____::6445d7758d3a40c4d997953b6632a368', --National Institute of Informatics (NII) + 'openorgs____::b77c01aa15de3675da34277d48de2ec1', -- Valencia Catholic University Saint Vincent Martyr + 'openorgs____::7fe2f66cdc43983c6b24816bfe9cf6a0', -- Unviersity of Warsaw + 'openorgs____::15e7921fc50d9aa1229a82a84429419e', -- University Of Thessaly + 'openorgs____::11f7919dadc8f8a7251af54bba60c956', -- Technical University of Crete + 'openorgs____::84f0c5f5dbb6daf42748485924efde4b', -- University of Piraeus + 'openorgs____::4ac562f0376fce3539504567649cb373', -- University of Patras + 'openorgs____::3e8d1f8c3f6cd7f418b09f1f58b4873b', -- Aristotle University of Thessaloniki + 'openorgs____::3fcef6e1c469c10f2a84b281372c9814', -- World Bank + 'openorgs____::1698a2eb1885ef8adb5a4a969e745ad3', -- École des Ponts ParisTech + 'openorgs____::e15adb13c4dadd49de4d35c39b5da93a', -- Nanyang Technological University + 'openorgs____::4b34103bde246228fcd837f5f1bf4212', -- Autonomous University of Barcelona + 'openorgs____::72ec75fcfc4e0df1a76dc4c49007fceb', -- McMaster University + 'openorgs____::51c7fc556e46381734a25a6fbc3fd398', -- University of Modena and Reggio Emilia + 'openorgs____::235d7f9ad18ecd7e6dc62ea4990cb9db', -- Bilkent University + 'openorgs____::31f2fa9e05b49d4cf40a19c3fed8eb06', -- Saints Cyril and Methodius University of Skopje + 'openorgs____::db7686f30f22cbe73a4fde872ce812a6', -- University of Milan + 'openorgs____::b8b8ca674452579f3f593d9f5e557483' -- University College Cork + ) )) foo; + +ANALYZE TABLE TARGET.result COMPUTE STATISTICS; + +create table TARGET.result_citations stored as parquet as select * from SOURCE.result_citations orig where exists (select 1 from TARGET.result r where r.id=orig.id); +ANALYZE TABLE TARGET.result_citations COMPUTE STATISTICS; + +create table TARGET.result_references_oc stored as parquet as select * from SOURCE.result_references_oc orig where exists (select 1 from TARGET.result r where r.id=orig.id); +ANALYZE TABLE TARGET.result_references_oc COMPUTE STATISTICS; + +create table TARGET.result_citations_oc stored as parquet as select * from SOURCE.result_citations_oc orig where exists (select 1 from TARGET.result r where r.id=orig.id); +ANALYZE TABLE TARGET.result_citations_oc COMPUTE STATISTICS; + +create table TARGET.result_classifications stored as parquet as select * from SOURCE.result_classifications orig where exists (select 1 from TARGET.result r where r.id=orig.id); +ANALYZE TABLE TARGET.result_classifications COMPUTE STATISTICS; + +create table TARGET.result_apc stored as parquet as select * from SOURCE.result_apc orig where exists (select 1 from TARGET.result r where r.id=orig.id); +ANALYZE TABLE TARGET.result_apc COMPUTE STATISTICS; + +create table TARGET.result_concepts stored as parquet as select * from SOURCE.result_concepts orig where exists (select 1 from TARGET.result r where r.id=orig.id); +ANALYZE TABLE TARGET.result_concepts COMPUTE STATISTICS; + +create table TARGET.result_datasources stored as parquet as select * from SOURCE.result_datasources orig where exists (select 1 from TARGET.result r where r.id=orig.id); +ANALYZE TABLE TARGET.result_datasources COMPUTE STATISTICS; + +create table TARGET.result_fundercount stored as parquet as select * from SOURCE.result_fundercount orig where exists (select 1 from TARGET.result r where r.id=orig.id); +ANALYZE TABLE TARGET.result_fundercount COMPUTE STATISTICS; + +create table TARGET.result_gold stored as parquet as select * from SOURCE.result_gold orig where exists (select 1 from TARGET.result r where r.id=orig.id); +ANALYZE TABLE TARGET.result_gold COMPUTE STATISTICS; + +create table TARGET.result_greenoa stored as parquet as select * from SOURCE.result_greenoa orig where exists (select 1 from TARGET.result r where r.id=orig.id); +ANALYZE TABLE TARGET.result_greenoa COMPUTE STATISTICS; + +create table TARGET.result_languages stored as parquet as select * from SOURCE.result_languages orig where exists (select 1 from TARGET.result r where r.id=orig.id); +ANALYZE TABLE TARGET.result_languages COMPUTE STATISTICS; + +create table TARGET.result_licenses stored as parquet as select * from SOURCE.result_licenses orig where exists (select 1 from TARGET.result r where r.id=orig.id); +ANALYZE TABLE TARGET.result_licenses COMPUTE STATISTICS; + +create table TARGET.licenses_normalized STORED AS PARQUET as select * from SOURCE.licenses_normalized; +ANALYZE TABLE TARGET.licenses_normalized COMPUTE STATISTICS; + +create table TARGET.result_oids stored as parquet as select * from SOURCE.result_oids orig where exists (select 1 from TARGET.result r where r.id=orig.id); +ANALYZE TABLE TARGET.result_oids COMPUTE STATISTICS; + +create table TARGET.result_organization stored as parquet as select * from SOURCE.result_organization orig where exists (select 1 from TARGET.result r where r.id=orig.id); +ANALYZE TABLE TARGET.result_organization COMPUTE STATISTICS; + +create table TARGET.result_peerreviewed stored as parquet as select * from SOURCE.result_peerreviewed orig where exists (select 1 from TARGET.result r where r.id=orig.id); +ANALYZE TABLE TARGET.result_peerreviewed COMPUTE STATISTICS; + +create table TARGET.result_pids stored as parquet as select * from SOURCE.result_pids orig where exists (select 1 from TARGET.result r where r.id=orig.id); +ANALYZE TABLE TARGET.result_pids COMPUTE STATISTICS; + +create table TARGET.result_projectcount stored as parquet as select * from SOURCE.result_projectcount orig where exists (select 1 from TARGET.result r where r.id=orig.id); +ANALYZE TABLE TARGET.result_projectcount COMPUTE STATISTICS; + +create table TARGET.result_projects stored as parquet as select * from SOURCE.result_projects orig where exists (select 1 from TARGET.result r where r.id=orig.id); +ANALYZE TABLE TARGET.result_projects COMPUTE STATISTICS; + +create table TARGET.result_refereed stored as parquet as select * from SOURCE.result_refereed orig where exists (select 1 from TARGET.result r where r.id=orig.id); +ANALYZE TABLE TARGET.result_refereed COMPUTE STATISTICS; + +create table TARGET.result_sources stored as parquet as select * from SOURCE.result_sources orig where exists (select 1 from TARGET.result r where r.id=orig.id); +ANALYZE TABLE TARGET.result_sources COMPUTE STATISTICS; + +create table TARGET.result_topics stored as parquet as select * from SOURCE.result_topics orig where exists (select 1 from TARGET.result r where r.id=orig.id); +ANALYZE TABLE TARGET.result_topics COMPUTE STATISTICS; + +create table TARGET.result_fos stored as parquet as select * from SOURCE.result_fos orig where exists (select 1 from TARGET.result r where r.id=orig.id); +ANALYZE TABLE TARGET.result_fos COMPUTE STATISTICS; + +create view TARGET.foo1 as select * from SOURCE.result_result rr where rr.source in (select id from TARGET.result); +create view TARGET.foo2 as select * from SOURCE.result_result rr where rr.target in (select id from TARGET.result); +create table TARGET.result_result STORED AS PARQUET as select distinct * from (select * from TARGET.foo1 union all select * from TARGET.foo2) foufou; +drop view TARGET.foo1; +drop view TARGET.foo2; +ANALYZE TABLE TARGET.result_result COMPUTE STATISTICS; + +-- datasources +create view if not exists TARGET.datasource as select * from SOURCE.datasource; +create view if not exists TARGET.datasource_oids as select * from SOURCE.datasource_oids; +create view if not exists TARGET.datasource_organizations as select * from SOURCE.datasource_organizations; +create view if not exists TARGET.datasource_sources as select * from SOURCE.datasource_sources; + +create table TARGET.datasource_results stored as parquet as select id as result, datasource as id from TARGET.result_datasources; +ANALYZE TABLE TARGET.datasource_results COMPUTE STATISTICS; + +-- organizations +create view if not exists TARGET.organization as select * from SOURCE.organization; +create view if not exists TARGET.organization_datasources as select * from SOURCE.organization_datasources; +create view if not exists TARGET.organization_pids as select * from SOURCE.organization_pids; +create view if not exists TARGET.organization_projects as select * from SOURCE.organization_projects; +create view if not exists TARGET.organization_sources as select * from SOURCE.organization_sources; + +-- projects +create view if not exists TARGET.project as select * from SOURCE.project; +create view if not exists TARGET.project_oids as select * from SOURCE.project_oids; +create view if not exists TARGET.project_organizations as select * from SOURCE.project_organizations; +create view if not exists TARGET.project_resultcount as select * from SOURCE.project_resultcount; +create view if not exists TARGET.project_classification as select * from SOURCE.project_classification; + +create table TARGET.project_results stored as parquet as select id as result, project as id from TARGET.result_projects; +ANALYZE TABLE TARGET.project_results COMPUTE STATISTICS; + +-- indicators +-- Sprint 1 ---- +create table TARGET.indi_pub_green_oa stored as parquet as select * from SOURCE.indi_pub_green_oa orig where exists (select 1 from TARGET.result r where r.id=orig.id); +ANALYZE TABLE TARGET.indi_pub_green_oa COMPUTE STATISTICS; +create table TARGET.indi_pub_grey_lit stored as parquet as select * from SOURCE.indi_pub_grey_lit orig where exists (select 1 from TARGET.result r where r.id=orig.id); +ANALYZE TABLE TARGET.indi_pub_grey_lit COMPUTE STATISTICS; +create table TARGET.indi_pub_doi_from_crossref stored as parquet as select * from SOURCE.indi_pub_doi_from_crossref orig where exists (select 1 from TARGET.result r where r.id=orig.id); +ANALYZE TABLE TARGET.indi_pub_doi_from_crossref COMPUTE STATISTICS; +-- Sprint 2 ---- +create table TARGET.indi_result_has_cc_licence stored as parquet as select * from SOURCE.indi_result_has_cc_licence orig where exists (select 1 from TARGET.result r where r.id=orig.id); +ANALYZE TABLE TARGET.indi_result_has_cc_licence COMPUTE STATISTICS; +create table TARGET.indi_result_has_cc_licence_url stored as parquet as select * from SOURCE.indi_result_has_cc_licence_url orig where exists (select 1 from TARGET.result r where r.id=orig.id); +ANALYZE TABLE TARGET.indi_result_has_cc_licence_url COMPUTE STATISTICS; +create table TARGET.indi_pub_has_abstract stored as parquet as select * from SOURCE.indi_pub_has_abstract orig where exists (select 1 from TARGET.result r where r.id=orig.id); +ANALYZE TABLE TARGET.indi_pub_has_abstract COMPUTE STATISTICS; +create table TARGET.indi_result_with_orcid stored as parquet as select * from SOURCE.indi_result_with_orcid orig where exists (select 1 from TARGET.result r where r.id=orig.id); +ANALYZE TABLE TARGET.indi_result_with_orcid COMPUTE STATISTICS; +---- Sprint 3 ---- +create table TARGET.indi_funded_result_with_fundref stored as parquet as select * from SOURCE.indi_funded_result_with_fundref orig where exists (select 1 from TARGET.result r where r.id=orig.id); +ANALYZE TABLE TARGET.indi_funded_result_with_fundref COMPUTE STATISTICS; +create view TARGET.indi_result_org_collab as select * from SOURCE.indi_result_org_collab; +create view TARGET.indi_result_org_country_collab as select * from SOURCE.indi_result_org_country_collab; +create view TARGET.indi_project_collab_org as select * from SOURCE.indi_project_collab_org; +create view TARGET.indi_project_collab_org_country as select * from SOURCE.indi_project_collab_org_country; +create view TARGET.indi_funder_country_collab as select * from SOURCE.indi_funder_country_collab; +create view TARGET.indi_result_country_collab as select * from SOURCE.indi_result_country_collab; +---- Sprint 4 ---- +create table TARGET.indi_pub_diamond stored as parquet as select * from SOURCE.indi_pub_diamond orig where exists (select 1 from TARGET.result r where r.id=orig.id); +ANALYZE TABLE TARGET.indi_pub_diamond COMPUTE STATISTICS; +create table TARGET.indi_pub_in_transformative stored as parquet as select * from SOURCE.indi_pub_in_transformative orig where exists (select 1 from TARGET.result r where r.id=orig.id); +ANALYZE TABLE TARGET.indi_pub_in_transformative COMPUTE STATISTICS; +create table TARGET.indi_pub_closed_other_open stored as parquet as select * from SOURCE.indi_pub_closed_other_open orig where exists (select 1 from TARGET.result r where r.id=orig.id); +ANALYZE TABLE TARGET.indi_pub_closed_other_open COMPUTE STATISTICS; +---- Sprint 5 ---- +create table TARGET.indi_result_no_of_copies stored as parquet as select * from SOURCE.indi_result_no_of_copies orig where exists (select 1 from TARGET.result r where r.id=orig.id); +ANALYZE TABLE TARGET.indi_result_no_of_copies COMPUTE STATISTICS; +---- Sprint 6 ---- +create table TARGET.indi_pub_hybrid_oa_with_cc stored as parquet as select * from SOURCE.indi_pub_hybrid_oa_with_cc orig where exists (select 1 from TARGET.result r where r.id=orig.id); +ANALYZE TABLE TARGET.indi_pub_hybrid_oa_with_cc COMPUTE STATISTICS; +create table TARGET.indi_pub_downloads stored as parquet as select * from SOURCE.indi_pub_downloads orig where exists (select 1 from TARGET.result r where r.id=orig.result_id); +ANALYZE TABLE TARGET.indi_pub_downloads COMPUTE STATISTICS; +create table TARGET.indi_pub_downloads_datasource stored as parquet as select * from SOURCE.indi_pub_downloads_datasource orig where exists (select 1 from TARGET.result r where r.id=orig.result_id); +ANALYZE TABLE TARGET.indi_pub_downloads_datasource COMPUTE STATISTICS; +create table TARGET.indi_pub_downloads_year stored as parquet as select * from SOURCE.indi_pub_downloads_year orig where exists (select 1 from TARGET.result r where r.id=orig.result_id); +ANALYZE TABLE TARGET.indi_pub_downloads_year COMPUTE STATISTICS; +create table TARGET.indi_pub_downloads_datasource_year stored as parquet as select * from SOURCE.indi_pub_downloads_datasource_year orig where exists (select 1 from TARGET.result r where r.id=orig.result_id); +ANALYZE TABLE TARGET.indi_pub_downloads_datasource_year COMPUTE STATISTICS; +---- Sprint 7 ---- +create table TARGET.indi_pub_gold_oa stored as parquet as select * from SOURCE.indi_pub_gold_oa orig where exists (select 1 from TARGET.result r where r.id=orig.id); +ANALYZE TABLE TARGET.indi_pub_gold_oa COMPUTE STATISTICS; +create table TARGET.indi_pub_hybrid stored as parquet as select * from SOURCE.indi_pub_hybrid orig where exists (select 1 from TARGET.result r where r.id=orig.id); +ANALYZE TABLE TARGET.indi_pub_hybrid COMPUTE STATISTICS; +create view TARGET.indi_org_fairness as select * from SOURCE.indi_org_fairness; +create view TARGET.indi_org_fairness_pub_pr as select * from SOURCE.indi_org_fairness_pub_pr; +create view TARGET.indi_org_fairness_pub_year as select * from SOURCE.indi_org_fairness_pub_year; +create view TARGET.indi_org_fairness_pub as select * from SOURCE.indi_org_fairness_pub; +create view TARGET.indi_org_fairness_year as select * from SOURCE.indi_org_fairness_year; +create view TARGET.indi_org_findable_year as select * from SOURCE.indi_org_findable_year; +create view TARGET.indi_org_findable as select * from SOURCE.indi_org_findable; +create view TARGET.indi_org_openess as select * from SOURCE.indi_org_openess; +create view TARGET.indi_org_openess_year as select * from SOURCE.indi_org_openess_year; +create table TARGET.indi_pub_has_preprint stored as parquet as select * from SOURCE.indi_pub_has_preprint orig where exists (select 1 from TARGET.result r where r.id=orig.id); +ANALYZE TABLE TARGET.indi_pub_has_preprint COMPUTE STATISTICS; +create table TARGET.indi_pub_in_subscribed stored as parquet as select * from SOURCE.indi_pub_in_subscribed orig where exists (select 1 from TARGET.result r where r.id=orig.id); +ANALYZE TABLE TARGET.indi_pub_in_subscribed COMPUTE STATISTICS; +create table TARGET.indi_result_with_pid stored as parquet as select * from SOURCE.indi_result_with_pid orig where exists (select 1 from TARGET.result r where r.id=orig.id); +ANALYZE TABLE TARGET.indi_result_with_pid COMPUTE STATISTICS; +--create table TARGET.indi_datasets_gold_oa stored as parquet as select * from SOURCE.indi_datasets_gold_oa orig where exists (select 1 from TARGET.result r where r.id=orig.id); +--compute stats TARGET.indi_datasets_gold_oa; +--create table TARGET.indi_software_gold_oa stored as parquet as select * from SOURCE.indi_software_gold_oa orig where exists (select 1 from TARGET.result r where r.id=orig.id); +--compute stats TARGET.indi_software_gold_oa; + diff --git a/dhp-workflows/dhp-monitor-update/src/main/resources/eu/dnetlib/dhp/oa/graph/monitor/oozie_app/updateCache.sh b/dhp-workflows/dhp-monitor-update/src/main/resources/eu/dnetlib/dhp/oa/graph/monitor/oozie_app/updateCache.sh new file mode 100644 index 000000000..03aa535e1 --- /dev/null +++ b/dhp-workflows/dhp-monitor-update/src/main/resources/eu/dnetlib/dhp/oa/graph/monitor/oozie_app/updateCache.sh @@ -0,0 +1,4 @@ +#!/usr/bin/env bash + +curl --request GET $1/cache/updateCache +sleep 6h \ No newline at end of file diff --git a/dhp-workflows/dhp-monitor-update/src/main/resources/eu/dnetlib/dhp/oa/graph/monitor/oozie_app/workflow.xml b/dhp-workflows/dhp-monitor-update/src/main/resources/eu/dnetlib/dhp/oa/graph/monitor/oozie_app/workflow.xml new file mode 100644 index 000000000..2bcff70ac --- /dev/null +++ b/dhp-workflows/dhp-monitor-update/src/main/resources/eu/dnetlib/dhp/oa/graph/monitor/oozie_app/workflow.xml @@ -0,0 +1,105 @@ + + + + stats_db_name + the target stats database name + + + stats_db_shadow_name + the name of the shadow schema + + + monitor_db_name + the target monitor db name + + + monitor_db_shadow_name + the name of the shadow monitor db + + + stats_tool_api_url + The url of the API of the stats tool. Is used to trigger the cache update. + + + hive_metastore_uris + hive server metastore URIs + + + hive_jdbc_url + hive server jdbc url + + + hive_timeout + the time period, in seconds, after which Hive fails a transaction if a Hive client has not sent a hearbeat. The default value is 300 seconds. + + + context_api_url + the base url of the context api (https://services.openaire.eu/openaire) + + + + + ${jobTracker} + ${nameNode} + + + hive.metastore.uris + ${hive_metastore_uris} + + + hive.txn.timeout + ${hive_timeout} + + + + + + + + Action failed, error message[${wf:errorMessage(wf:lastErrorNode())}] + + + + + ${jobTracker} + ${nameNode} + monitor.sh + ${stats_db_name} + ${monitor_db_name} + ${monitor_db_shadow_name} + ${wf:appPath()}/scripts/createMonitorDB.sql + monitor.sh + + + + + + + + + ${jobTracker} + ${nameNode} + monitor-post.sh + ${stats_db_name} + ${monitor_db_name} + ${monitor_db_shadow_name} + monitor-post.sh + + + + + + + + ${jobTracker} + ${nameNode} + updateCache.sh + ${stats_tool_api_url} + updateCache.sh + + + + + + + \ No newline at end of file From 686580a22068b6437ff4e8dafccbf919d06e2a77 Mon Sep 17 00:00:00 2001 From: dimitrispie Date: Thu, 12 Jan 2023 11:18:03 +0200 Subject: [PATCH 05/18] - New Monitor DB workflow - New Organization added --- .../graph/monitor/oozie_app/monitor-post.sh | 21 -- .../dhp/oa/graph/monitor/oozie_app/monitor.sh | 16 +- .../oozie_app/scripts/createMonitorDB.sql | 293 ++++++------------ .../oa/graph/monitor/oozie_app/updateCache.sh | 4 - .../oa/graph/monitor/oozie_app/workflow.xml | 28 -- .../scripts/step20-createMonitorDB.sql | 3 +- 6 files changed, 107 insertions(+), 258 deletions(-) delete mode 100644 dhp-workflows/dhp-monitor-update/src/main/resources/eu/dnetlib/dhp/oa/graph/monitor/oozie_app/monitor-post.sh delete mode 100644 dhp-workflows/dhp-monitor-update/src/main/resources/eu/dnetlib/dhp/oa/graph/monitor/oozie_app/updateCache.sh diff --git a/dhp-workflows/dhp-monitor-update/src/main/resources/eu/dnetlib/dhp/oa/graph/monitor/oozie_app/monitor-post.sh b/dhp-workflows/dhp-monitor-update/src/main/resources/eu/dnetlib/dhp/oa/graph/monitor/oozie_app/monitor-post.sh deleted file mode 100644 index b8c71681a..000000000 --- a/dhp-workflows/dhp-monitor-update/src/main/resources/eu/dnetlib/dhp/oa/graph/monitor/oozie_app/monitor-post.sh +++ /dev/null @@ -1,21 +0,0 @@ -export PYTHON_EGG_CACHE=/home/$(whoami)/.python-eggs -export link_folder=/tmp/impala-shell-python-egg-cache-$(whoami) -if ! [ -L $link_folder ] -then - rm -Rf "$link_folder" - ln -sfn ${PYTHON_EGG_CACHE}${link_folder} ${link_folder} -fi - -export SOURCE=$1 -export TARGET=$2 -export SHADOW=$3 - -impala-shell -q "invalidate metadata;" -impala-shell -d ${TARGET} -q "show tables" --delimited | sed "s/\(.*\)/compute stats ${TARGET}.\1;/" | impala-shell -f - -echo "Impala shell finished" - -echo "Updating shadow monitor database" -impala-shell -q "create database if not exists ${SHADOW}" -impala-shell -d ${SHADOW} -q "show tables" --delimited | sed "s/^/drop view if exists ${SHADOW}./" | sed "s/$/;/" | impala-shell -f - -impala-shell -d ${TARGET} -q "show tables" --delimited | sed "s/\(.*\)/create view ${SHADOW}.\1 as select * from ${TARGET}.\1;/" | impala-shell -f - -echo "Shadow db ready!" \ No newline at end of file diff --git a/dhp-workflows/dhp-monitor-update/src/main/resources/eu/dnetlib/dhp/oa/graph/monitor/oozie_app/monitor.sh b/dhp-workflows/dhp-monitor-update/src/main/resources/eu/dnetlib/dhp/oa/graph/monitor/oozie_app/monitor.sh index f39bf4893..36cfcd325 100644 --- a/dhp-workflows/dhp-monitor-update/src/main/resources/eu/dnetlib/dhp/oa/graph/monitor/oozie_app/monitor.sh +++ b/dhp-workflows/dhp-monitor-update/src/main/resources/eu/dnetlib/dhp/oa/graph/monitor/oozie_app/monitor.sh @@ -8,17 +8,11 @@ fi export SOURCE=$1 export TARGET=$2 -export SHADOW=$3 -export SCRIPT_PATH=$4 +export SCRIPT_PATH=$3 -export HIVE_OPTS="-hiveconf mapred.job.queue.name=analytics -hiveconf hive.spark.client.connect.timeout=120000ms -hiveconf hive.spark.client.server.connect.timeout=300000ms -hiveconf spark.executor.memory=19166291558 -hiveconf spark.yarn.executor.memoryOverhead=3225 -hiveconf spark.driver.memory=11596411699 -hiveconf spark.yarn.driver.memoryOverhead=1228" -export HADOOP_USER_NAME="oozie" - -echo "Getting file from " $SCRIPT_PATH -hdfs dfs -copyToLocal $SCRIPT_PATH +echo "Getting file from " $3 +hdfs dfs -copyToLocal $3 echo "Creating monitor database" -#cat step20-createMonitorDB.sql | sed s/SOURCE/$1/g | sed s/TARGET/$2/g1 > foo -cat createMonitorDB.sql | sed "s/TARGET/${TARGET}/g" | sed "s/SOURCE/${SOURCE}/g" > foo -hive $HIVE_OPTS -f foo -echo "Hive shell finished" \ No newline at end of file +cat createMonitorDB.sql | sed s/SOURCE/$1/g | sed s/TARGET/$2/g1 | impala-shell -f - +echo "Impala shell finished" diff --git a/dhp-workflows/dhp-monitor-update/src/main/resources/eu/dnetlib/dhp/oa/graph/monitor/oozie_app/scripts/createMonitorDB.sql b/dhp-workflows/dhp-monitor-update/src/main/resources/eu/dnetlib/dhp/oa/graph/monitor/oozie_app/scripts/createMonitorDB.sql index e9e460cb0..2c46082fa 100644 --- a/dhp-workflows/dhp-monitor-update/src/main/resources/eu/dnetlib/dhp/oa/graph/monitor/oozie_app/scripts/createMonitorDB.sql +++ b/dhp-workflows/dhp-monitor-update/src/main/resources/eu/dnetlib/dhp/oa/graph/monitor/oozie_app/scripts/createMonitorDB.sql @@ -1,241 +1,148 @@ -drop database if exists TARGET cascade; -create database if not exists TARGET; +DROP TABLE IF EXISTS TARGET.result_new; -create view if not exists TARGET.category as select * from SOURCE.category; -create view if not exists TARGET.concept as select * from SOURCE.concept; -create view if not exists TARGET.context as select * from SOURCE.context; -create view if not exists TARGET.country as select * from SOURCE.country; -create view if not exists TARGET.countrygdp as select * from SOURCE.countrygdp; -create view if not exists TARGET.creation_date as select * from SOURCE.creation_date; -create view if not exists TARGET.funder as select * from SOURCE.funder; -create view if not exists TARGET.fundref as select * from SOURCE.fundref; -create view if not exists TARGET.rndexpenditure as select * from SOURCE.rndexpediture; -create view if not exists TARGET.rndgdpexpenditure as select * from SOURCE.rndgdpexpenditure; -create view if not exists TARGET.doctoratestudents as select * from SOURCE.doctoratestudents; -create view if not exists TARGET.totalresearchers as select * from SOURCE.totalresearchers; -create view if not exists TARGET.totalresearchersft as select * from SOURCE.totalresearchersft; -create view if not exists TARGET.hrrst as select * from SOURCE.hrrst; - -create table TARGET.result stored as parquet as +create table TARGET.result_new stored as parquet as select distinct * from ( select * from SOURCE.result r where exists (select 1 from SOURCE.result_projects rp join SOURCE.project p on rp.project=p.id where rp.id=r.id) union all select * from SOURCE.result r where exists (select 1 from SOURCE.result_concepts rc where rc.id=r.id) union all select * from SOURCE.result r where exists (select 1 from SOURCE.result_organization ro where ro.id=r.id and ro.organization in ( - 'openorgs____::b84450f9864182c67b8611b5593f4250', --"Athena Research and Innovation Center In Information Communication & Knowledge Technologies', --ARC" - 'openorgs____::d41cf6bd4ab1b1362a44397e0b95c975', --National Research Council - 'openorgs____::d2a09b9d5eabb10c95f9470e172d05d2', --??? Not exists ?? - 'openorgs____::d169c7407dd417152596908d48c11460', --Masaryk University - 'openorgs____::1ec924b1759bb16d0a02f2dad8689b21', --University of Belgrade - 'openorgs____::0ae431b820e4c33db8967fbb2b919150', --University of Helsinki - 'openorgs____::759d59f05d77188faee99b7493b46805', --University of Minho - 'openorgs____::cad284878801b9465fa51a95b1d779db', --Universidad Politécnica de Madrid - 'openorgs____::eadc8da90a546e98c03f896661a2e4d4', --University of Göttingen - 'openorgs____::c0286313e36479eff8676dba9b724b40', --National and Kapodistrian University of Athens - -- 'openorgs____::c80a8243a5e5c620d7931c88d93bf17a', --Université Paris Diderot - 'openorgs____::c08634f0a6b0081c3dc6e6c93a4314f3', --Bielefeld University - 'openorgs____::6fc85e4a8f7ecaf4b0c738d010e967ea', --University of Southern Denmark - 'openorgs____::3d6122f87f9a97a99d8f6e3d73313720', --Humboldt-Universität zu Berlin - 'openorgs____::16720ada63d0fa8ca41601feae7d1aa5', --TU Darmstadt - 'openorgs____::ccc0a066b56d2cfaf90c2ae369df16f5', --KU Leuven - 'openorgs____::4c6f119632adf789746f0a057ed73e90', --University of the Western Cape - 'openorgs____::ec3665affa01aeafa28b7852c4176dbd', --Rudjer Boskovic Institute - 'openorgs____::5f31346d444a7f06a28c880fb170b0f6', --Ghent University - 'openorgs____::2dbe47117fd5409f9c61620813456632', --University of Luxembourg - 'openorgs____::6445d7758d3a40c4d997953b6632a368', --National Institute of Informatics (NII) - 'openorgs____::b77c01aa15de3675da34277d48de2ec1', -- Valencia Catholic University Saint Vincent Martyr - 'openorgs____::7fe2f66cdc43983c6b24816bfe9cf6a0', -- Unviersity of Warsaw - 'openorgs____::15e7921fc50d9aa1229a82a84429419e', -- University Of Thessaly - 'openorgs____::11f7919dadc8f8a7251af54bba60c956', -- Technical University of Crete - 'openorgs____::84f0c5f5dbb6daf42748485924efde4b', -- University of Piraeus - 'openorgs____::4ac562f0376fce3539504567649cb373', -- University of Patras - 'openorgs____::3e8d1f8c3f6cd7f418b09f1f58b4873b', -- Aristotle University of Thessaloniki - 'openorgs____::3fcef6e1c469c10f2a84b281372c9814', -- World Bank - 'openorgs____::1698a2eb1885ef8adb5a4a969e745ad3', -- École des Ponts ParisTech - 'openorgs____::e15adb13c4dadd49de4d35c39b5da93a', -- Nanyang Technological University - 'openorgs____::4b34103bde246228fcd837f5f1bf4212', -- Autonomous University of Barcelona - 'openorgs____::72ec75fcfc4e0df1a76dc4c49007fceb', -- McMaster University - 'openorgs____::51c7fc556e46381734a25a6fbc3fd398', -- University of Modena and Reggio Emilia - 'openorgs____::235d7f9ad18ecd7e6dc62ea4990cb9db', -- Bilkent University - 'openorgs____::31f2fa9e05b49d4cf40a19c3fed8eb06', -- Saints Cyril and Methodius University of Skopje - 'openorgs____::db7686f30f22cbe73a4fde872ce812a6', -- University of Milan - 'openorgs____::b8b8ca674452579f3f593d9f5e557483' -- University College Cork +-- 'openorgs____::b8b8ca674452579f3f593d9f5e557483', -- University College Cork + 'openorgs____::38d7097854736583dde879d12dacafca' -- Brown University ) )) foo; -ANALYZE TABLE TARGET.result COMPUTE STATISTICS; +COMPUTE STATS TARGET.result_new; -create table TARGET.result_citations stored as parquet as select * from SOURCE.result_citations orig where exists (select 1 from TARGET.result r where r.id=orig.id); -ANALYZE TABLE TARGET.result_citations COMPUTE STATISTICS; +INSERT INTO TARGET.result select * from TARGET.result_new; -create table TARGET.result_references_oc stored as parquet as select * from SOURCE.result_references_oc orig where exists (select 1 from TARGET.result r where r.id=orig.id); -ANALYZE TABLE TARGET.result_references_oc COMPUTE STATISTICS; +INSERT INTO TARGET.result_citations select * from TARGET.result_citations orig where exists (select 1 from TARGET.result_new r where r.id=orig.id); +COMPUTE STATS TARGET.result_citations; -create table TARGET.result_citations_oc stored as parquet as select * from SOURCE.result_citations_oc orig where exists (select 1 from TARGET.result r where r.id=orig.id); -ANALYZE TABLE TARGET.result_citations_oc COMPUTE STATISTICS; +INSERT INTO TARGET.result_references_oc select * from TARGET.result_references_oc orig where exists (select 1 from TARGET.result_new r where r.id=orig.id); +COMPUTE STATS TARGET.result_references_oc; -create table TARGET.result_classifications stored as parquet as select * from SOURCE.result_classifications orig where exists (select 1 from TARGET.result r where r.id=orig.id); -ANALYZE TABLE TARGET.result_classifications COMPUTE STATISTICS; +INSERT INTO TARGET.result_citations_oc select * from TARGET.result_citations_oc orig where exists (select 1 from TARGET.result_new r where r.id=orig.id); +COMPUTE STATS TARGET.result_citations_oc; -create table TARGET.result_apc stored as parquet as select * from SOURCE.result_apc orig where exists (select 1 from TARGET.result r where r.id=orig.id); -ANALYZE TABLE TARGET.result_apc COMPUTE STATISTICS; +INSERT INTO TARGET.result_classifications select * from TARGET.result_classifications orig where exists (select 1 from TARGET.result_new r where r.id=orig.id); +COMPUTE STATS TARGET.result_classifications; -create table TARGET.result_concepts stored as parquet as select * from SOURCE.result_concepts orig where exists (select 1 from TARGET.result r where r.id=orig.id); -ANALYZE TABLE TARGET.result_concepts COMPUTE STATISTICS; +INSERT INTO TARGET.result_apc select * from TARGET.result_apc orig where exists (select 1 from TARGET.result_new r where r.id=orig.id); +COMPUTE STATS TARGET.result_apc; -create table TARGET.result_datasources stored as parquet as select * from SOURCE.result_datasources orig where exists (select 1 from TARGET.result r where r.id=orig.id); -ANALYZE TABLE TARGET.result_datasources COMPUTE STATISTICS; +INSERT INTO TARGET.result_concepts select * from TARGET.result_concepts orig where exists (select 1 from TARGET.result_new r where r.id=orig.id); +COMPUTE STATS TARGET.result_concepts; -create table TARGET.result_fundercount stored as parquet as select * from SOURCE.result_fundercount orig where exists (select 1 from TARGET.result r where r.id=orig.id); -ANALYZE TABLE TARGET.result_fundercount COMPUTE STATISTICS; +INSERT INTO TARGET.result_datasources select * from TARGET.result_datasources orig where exists (select 1 from TARGET.result_new r where r.id=orig.id); +COMPUTE STATS TARGET.result_datasources; -create table TARGET.result_gold stored as parquet as select * from SOURCE.result_gold orig where exists (select 1 from TARGET.result r where r.id=orig.id); -ANALYZE TABLE TARGET.result_gold COMPUTE STATISTICS; +INSERT INTO TARGET.result_fundercount select * from TARGET.result_fundercount orig where exists (select 1 from TARGET.result_new r where r.id=orig.id); +COMPUTE STATS TARGET.result_fundercount; -create table TARGET.result_greenoa stored as parquet as select * from SOURCE.result_greenoa orig where exists (select 1 from TARGET.result r where r.id=orig.id); -ANALYZE TABLE TARGET.result_greenoa COMPUTE STATISTICS; +INSERT INTO TARGET.result_gold select * from TARGET.result_gold orig where exists (select 1 from TARGET.result_new r where r.id=orig.id); +COMPUTE STATS TARGET.result_gold; -create table TARGET.result_languages stored as parquet as select * from SOURCE.result_languages orig where exists (select 1 from TARGET.result r where r.id=orig.id); -ANALYZE TABLE TARGET.result_languages COMPUTE STATISTICS; +INSERT INTO TARGET.result_greenoa select * from TARGET.result_greenoa orig where exists (select 1 from TARGET.result_new r where r.id=orig.id); +COMPUTE STATS TARGET.result_greenoa; -create table TARGET.result_licenses stored as parquet as select * from SOURCE.result_licenses orig where exists (select 1 from TARGET.result r where r.id=orig.id); -ANALYZE TABLE TARGET.result_licenses COMPUTE STATISTICS; +INSERT INTO TARGET.result_languages select * from TARGET.result_languages orig where exists (select 1 from TARGET.result_new r where r.id=orig.id); +COMPUTE STATS TARGET.result_languages; -create table TARGET.licenses_normalized STORED AS PARQUET as select * from SOURCE.licenses_normalized; -ANALYZE TABLE TARGET.licenses_normalized COMPUTE STATISTICS; +INSERT INTO TARGET.result_licenses select * from TARGET.result_licenses orig where exists (select 1 from TARGET.result_new r where r.id=orig.id); +COMPUTE STATS TARGET.result_licenses; -create table TARGET.result_oids stored as parquet as select * from SOURCE.result_oids orig where exists (select 1 from TARGET.result r where r.id=orig.id); -ANALYZE TABLE TARGET.result_oids COMPUTE STATISTICS; +INSERT INTO TARGET.result_oids select * from TARGET.result_oids orig where exists (select 1 from TARGET.result_new r where r.id=orig.id); +COMPUTE STATS TARGET.result_oids; -create table TARGET.result_organization stored as parquet as select * from SOURCE.result_organization orig where exists (select 1 from TARGET.result r where r.id=orig.id); -ANALYZE TABLE TARGET.result_organization COMPUTE STATISTICS; +INSERT INTO TARGET.result_organization select * from TARGET.result_organization orig where exists (select 1 from TARGET.result_new r where r.id=orig.id); +COMPUTE STATS TARGET.result_organization; -create table TARGET.result_peerreviewed stored as parquet as select * from SOURCE.result_peerreviewed orig where exists (select 1 from TARGET.result r where r.id=orig.id); -ANALYZE TABLE TARGET.result_peerreviewed COMPUTE STATISTICS; +INSERT INTO TARGET.result_peerreviewed select * from TARGET.result_peerreviewed orig where exists (select 1 from TARGET.result_new r where r.id=orig.id); +COMPUTE STATS TARGET.result_peerreviewed; -create table TARGET.result_pids stored as parquet as select * from SOURCE.result_pids orig where exists (select 1 from TARGET.result r where r.id=orig.id); -ANALYZE TABLE TARGET.result_pids COMPUTE STATISTICS; +INSERT INTO TARGET.result_pids select * from TARGET.result_pids orig where exists (select 1 from TARGET.result_new r where r.id=orig.id); +COMPUTE STATS TARGET.result_pids; -create table TARGET.result_projectcount stored as parquet as select * from SOURCE.result_projectcount orig where exists (select 1 from TARGET.result r where r.id=orig.id); -ANALYZE TABLE TARGET.result_projectcount COMPUTE STATISTICS; +INSERT INTO TARGET.result_projectcount select * from TARGET.result_projectcount orig where exists (select 1 from TARGET.result_new r where r.id=orig.id); +COMPUTE STATS TARGET.result_projectcount; -create table TARGET.result_projects stored as parquet as select * from SOURCE.result_projects orig where exists (select 1 from TARGET.result r where r.id=orig.id); -ANALYZE TABLE TARGET.result_projects COMPUTE STATISTICS; +INSERT INTO TARGET.result_projects select * from TARGET.result_projects orig where exists (select 1 from TARGET.result_new r where r.id=orig.id); +COMPUTE STATS TARGET.result_projects; -create table TARGET.result_refereed stored as parquet as select * from SOURCE.result_refereed orig where exists (select 1 from TARGET.result r where r.id=orig.id); -ANALYZE TABLE TARGET.result_refereed COMPUTE STATISTICS; +INSERT INTO TARGET.result_refereed select * from TARGET.result_refereed orig where exists (select 1 from TARGET.result_new r where r.id=orig.id); +COMPUTE STATS TARGET.result_refereed; -create table TARGET.result_sources stored as parquet as select * from SOURCE.result_sources orig where exists (select 1 from TARGET.result r where r.id=orig.id); -ANALYZE TABLE TARGET.result_sources COMPUTE STATISTICS; +INSERT INTO TARGET.result_sources select * from TARGET.result_sources orig where exists (select 1 from TARGET.result_new r where r.id=orig.id); +COMPUTE STATS TARGET.result_sources; -create table TARGET.result_topics stored as parquet as select * from SOURCE.result_topics orig where exists (select 1 from TARGET.result r where r.id=orig.id); -ANALYZE TABLE TARGET.result_topics COMPUTE STATISTICS; +INSERT INTO TARGET.result_topics select * from TARGET.result_topics orig where exists (select 1 from TARGET.result_new r where r.id=orig.id); +COMPUTE STATS TARGET.result_topics; -create table TARGET.result_fos stored as parquet as select * from SOURCE.result_fos orig where exists (select 1 from TARGET.result r where r.id=orig.id); -ANALYZE TABLE TARGET.result_fos COMPUTE STATISTICS; +INSERT INTO TARGET.result_fos select * from TARGET.result_fos orig where exists (select 1 from TARGET.result_new r where r.id=orig.id); +COMPUTE STATS TARGET.result_fos; -create view TARGET.foo1 as select * from SOURCE.result_result rr where rr.source in (select id from TARGET.result); -create view TARGET.foo2 as select * from SOURCE.result_result rr where rr.target in (select id from TARGET.result); -create table TARGET.result_result STORED AS PARQUET as select distinct * from (select * from TARGET.foo1 union all select * from TARGET.foo2) foufou; +create view TARGET.foo1 as select * from TARGET.result_result rr where rr.source in (select id from TARGET.result_new); +create view TARGET.foo2 as select * from TARGET.result_result rr where rr.target in (select id from TARGET.result_new); +INSERT INTO TARGET.result_result select distinct * from (select * from TARGET.foo1 union all select * from TARGET.foo2) foufou; drop view TARGET.foo1; drop view TARGET.foo2; -ANALYZE TABLE TARGET.result_result COMPUTE STATISTICS; - --- datasources -create view if not exists TARGET.datasource as select * from SOURCE.datasource; -create view if not exists TARGET.datasource_oids as select * from SOURCE.datasource_oids; -create view if not exists TARGET.datasource_organizations as select * from SOURCE.datasource_organizations; -create view if not exists TARGET.datasource_sources as select * from SOURCE.datasource_sources; - -create table TARGET.datasource_results stored as parquet as select id as result, datasource as id from TARGET.result_datasources; -ANALYZE TABLE TARGET.datasource_results COMPUTE STATISTICS; - --- organizations -create view if not exists TARGET.organization as select * from SOURCE.organization; -create view if not exists TARGET.organization_datasources as select * from SOURCE.organization_datasources; -create view if not exists TARGET.organization_pids as select * from SOURCE.organization_pids; -create view if not exists TARGET.organization_projects as select * from SOURCE.organization_projects; -create view if not exists TARGET.organization_sources as select * from SOURCE.organization_sources; - --- projects -create view if not exists TARGET.project as select * from SOURCE.project; -create view if not exists TARGET.project_oids as select * from SOURCE.project_oids; -create view if not exists TARGET.project_organizations as select * from SOURCE.project_organizations; -create view if not exists TARGET.project_resultcount as select * from SOURCE.project_resultcount; -create view if not exists TARGET.project_classification as select * from SOURCE.project_classification; - -create table TARGET.project_results stored as parquet as select id as result, project as id from TARGET.result_projects; -ANALYZE TABLE TARGET.project_results COMPUTE STATISTICS; +COMPUTE STATS TARGET.result_result; -- indicators -- Sprint 1 ---- -create table TARGET.indi_pub_green_oa stored as parquet as select * from SOURCE.indi_pub_green_oa orig where exists (select 1 from TARGET.result r where r.id=orig.id); -ANALYZE TABLE TARGET.indi_pub_green_oa COMPUTE STATISTICS; -create table TARGET.indi_pub_grey_lit stored as parquet as select * from SOURCE.indi_pub_grey_lit orig where exists (select 1 from TARGET.result r where r.id=orig.id); -ANALYZE TABLE TARGET.indi_pub_grey_lit COMPUTE STATISTICS; -create table TARGET.indi_pub_doi_from_crossref stored as parquet as select * from SOURCE.indi_pub_doi_from_crossref orig where exists (select 1 from TARGET.result r where r.id=orig.id); -ANALYZE TABLE TARGET.indi_pub_doi_from_crossref COMPUTE STATISTICS; +INSERT INTO TARGET.indi_pub_green_oa select * from TARGET.indi_pub_green_oa orig where exists (select 1 from TARGET.result_new r where r.id=orig.id); +COMPUTE STATS TARGET.indi_pub_green_oa; +INSERT INTO TARGET.indi_pub_grey_lit select * from TARGET.indi_pub_grey_lit orig where exists (select 1 from TARGET.result_new r where r.id=orig.id); +COMPUTE STATS TARGET.indi_pub_grey_lit; +INSERT INTO TARGET.indi_pub_doi_from_crossref select * from TARGET.indi_pub_doi_from_crossref orig where exists (select 1 from TARGET.result_new r where r.id=orig.id); +COMPUTE STATS TARGET.indi_pub_doi_from_crossref; -- Sprint 2 ---- -create table TARGET.indi_result_has_cc_licence stored as parquet as select * from SOURCE.indi_result_has_cc_licence orig where exists (select 1 from TARGET.result r where r.id=orig.id); -ANALYZE TABLE TARGET.indi_result_has_cc_licence COMPUTE STATISTICS; -create table TARGET.indi_result_has_cc_licence_url stored as parquet as select * from SOURCE.indi_result_has_cc_licence_url orig where exists (select 1 from TARGET.result r where r.id=orig.id); -ANALYZE TABLE TARGET.indi_result_has_cc_licence_url COMPUTE STATISTICS; -create table TARGET.indi_pub_has_abstract stored as parquet as select * from SOURCE.indi_pub_has_abstract orig where exists (select 1 from TARGET.result r where r.id=orig.id); -ANALYZE TABLE TARGET.indi_pub_has_abstract COMPUTE STATISTICS; -create table TARGET.indi_result_with_orcid stored as parquet as select * from SOURCE.indi_result_with_orcid orig where exists (select 1 from TARGET.result r where r.id=orig.id); -ANALYZE TABLE TARGET.indi_result_with_orcid COMPUTE STATISTICS; +INSERT INTO TARGET.indi_result_has_cc_licence select * from TARGET.indi_result_has_cc_licence orig where exists (select 1 from TARGET.result_new r where r.id=orig.id); +COMPUTE STATS TARGET.indi_result_has_cc_licence; +INSERT INTO TARGET.indi_result_has_cc_licence_url select * from TARGET.indi_result_has_cc_licence_url orig where exists (select 1 from TARGET.result_new r where r.id=orig.id); +COMPUTE STATS TARGET.indi_result_has_cc_licence_url; +INSERT INTO TARGET.indi_pub_has_abstract select * from TARGET.indi_pub_has_abstract orig where exists (select 1 from TARGET.result_new r where r.id=orig.id); +COMPUTE STATS TARGET.indi_pub_has_abstract; +INSERT INTO TARGET.indi_result_with_orcid select * from TARGET.indi_result_with_orcid orig where exists (select 1 from TARGET.result_new r where r.id=orig.id); +COMPUTE STATS TARGET.indi_result_with_orcid; ---- Sprint 3 ---- -create table TARGET.indi_funded_result_with_fundref stored as parquet as select * from SOURCE.indi_funded_result_with_fundref orig where exists (select 1 from TARGET.result r where r.id=orig.id); -ANALYZE TABLE TARGET.indi_funded_result_with_fundref COMPUTE STATISTICS; -create view TARGET.indi_result_org_collab as select * from SOURCE.indi_result_org_collab; -create view TARGET.indi_result_org_country_collab as select * from SOURCE.indi_result_org_country_collab; -create view TARGET.indi_project_collab_org as select * from SOURCE.indi_project_collab_org; -create view TARGET.indi_project_collab_org_country as select * from SOURCE.indi_project_collab_org_country; -create view TARGET.indi_funder_country_collab as select * from SOURCE.indi_funder_country_collab; -create view TARGET.indi_result_country_collab as select * from SOURCE.indi_result_country_collab; +INSERT INTO TARGET.indi_funded_result_with_fundref select * from TARGET.indi_funded_result_with_fundref orig where exists (select 1 from TARGET.result_new r where r.id=orig.id); +COMPUTE STATS TARGET.indi_funded_result_with_fundref; ---- Sprint 4 ---- -create table TARGET.indi_pub_diamond stored as parquet as select * from SOURCE.indi_pub_diamond orig where exists (select 1 from TARGET.result r where r.id=orig.id); -ANALYZE TABLE TARGET.indi_pub_diamond COMPUTE STATISTICS; -create table TARGET.indi_pub_in_transformative stored as parquet as select * from SOURCE.indi_pub_in_transformative orig where exists (select 1 from TARGET.result r where r.id=orig.id); -ANALYZE TABLE TARGET.indi_pub_in_transformative COMPUTE STATISTICS; -create table TARGET.indi_pub_closed_other_open stored as parquet as select * from SOURCE.indi_pub_closed_other_open orig where exists (select 1 from TARGET.result r where r.id=orig.id); -ANALYZE TABLE TARGET.indi_pub_closed_other_open COMPUTE STATISTICS; +INSERT INTO TARGET.indi_pub_diamond select * from TARGET.indi_pub_diamond orig where exists (select 1 from TARGET.result_new r where r.id=orig.id); +COMPUTE STATS TARGET.indi_pub_diamond; +INSERT INTO TARGET.indi_pub_in_transformative select * from TARGET.indi_pub_in_transformative orig where exists (select 1 from TARGET.result_new r where r.id=orig.id); +COMPUTE STATS TARGET.indi_pub_in_transformative; +INSERT INTO TARGET.indi_pub_closed_other_open select * from TARGET.indi_pub_closed_other_open orig where exists (select 1 from TARGET.result_new r where r.id=orig.id); +COMPUTE STATS TARGET.indi_pub_closed_other_open; ---- Sprint 5 ---- -create table TARGET.indi_result_no_of_copies stored as parquet as select * from SOURCE.indi_result_no_of_copies orig where exists (select 1 from TARGET.result r where r.id=orig.id); -ANALYZE TABLE TARGET.indi_result_no_of_copies COMPUTE STATISTICS; +INSERT INTO TARGET.indi_result_no_of_copies select * from TARGET.indi_result_no_of_copies orig where exists (select 1 from TARGET.result_new r where r.id=orig.id); +COMPUTE STATS TARGET.indi_result_no_of_copies; ---- Sprint 6 ---- -create table TARGET.indi_pub_hybrid_oa_with_cc stored as parquet as select * from SOURCE.indi_pub_hybrid_oa_with_cc orig where exists (select 1 from TARGET.result r where r.id=orig.id); -ANALYZE TABLE TARGET.indi_pub_hybrid_oa_with_cc COMPUTE STATISTICS; -create table TARGET.indi_pub_downloads stored as parquet as select * from SOURCE.indi_pub_downloads orig where exists (select 1 from TARGET.result r where r.id=orig.result_id); -ANALYZE TABLE TARGET.indi_pub_downloads COMPUTE STATISTICS; -create table TARGET.indi_pub_downloads_datasource stored as parquet as select * from SOURCE.indi_pub_downloads_datasource orig where exists (select 1 from TARGET.result r where r.id=orig.result_id); -ANALYZE TABLE TARGET.indi_pub_downloads_datasource COMPUTE STATISTICS; -create table TARGET.indi_pub_downloads_year stored as parquet as select * from SOURCE.indi_pub_downloads_year orig where exists (select 1 from TARGET.result r where r.id=orig.result_id); -ANALYZE TABLE TARGET.indi_pub_downloads_year COMPUTE STATISTICS; -create table TARGET.indi_pub_downloads_datasource_year stored as parquet as select * from SOURCE.indi_pub_downloads_datasource_year orig where exists (select 1 from TARGET.result r where r.id=orig.result_id); -ANALYZE TABLE TARGET.indi_pub_downloads_datasource_year COMPUTE STATISTICS; +INSERT INTO TARGET.indi_pub_hybrid_oa_with_cc select * from TARGET.indi_pub_hybrid_oa_with_cc orig where exists (select 1 from TARGET.result_new r where r.id=orig.id); +COMPUTE STATS TARGET.indi_pub_hybrid_oa_with_cc; +INSERT INTO TARGET.indi_pub_downloads select * from TARGET.indi_pub_downloads orig where exists (select 1 from TARGET.result_new r where r.id=orig.result_id); +COMPUTE STATS TARGET.indi_pub_downloads; +INSERT INTO TARGET.indi_pub_downloads_datasource select * from TARGET.indi_pub_downloads_datasource orig where exists (select 1 from TARGET.result_new r where r.id=orig.result_id); +COMPUTE STATS TARGET.indi_pub_downloads_datasource; +INSERT INTO TARGET.indi_pub_downloads_year select * from TARGET.indi_pub_downloads_year orig where exists (select 1 from TARGET.result_new r where r.id=orig.result_id); +COMPUTE STATS TARGET.indi_pub_downloads_year; +INSERT INTO TARGET.indi_pub_downloads_datasource_year select * from TARGET.indi_pub_downloads_datasource_year orig where exists (select 1 from TARGET.result_new r where r.id=orig.result_id); +COMPUTE STATS TARGET.indi_pub_downloads_datasource_year; ---- Sprint 7 ---- -create table TARGET.indi_pub_gold_oa stored as parquet as select * from SOURCE.indi_pub_gold_oa orig where exists (select 1 from TARGET.result r where r.id=orig.id); -ANALYZE TABLE TARGET.indi_pub_gold_oa COMPUTE STATISTICS; -create table TARGET.indi_pub_hybrid stored as parquet as select * from SOURCE.indi_pub_hybrid orig where exists (select 1 from TARGET.result r where r.id=orig.id); -ANALYZE TABLE TARGET.indi_pub_hybrid COMPUTE STATISTICS; -create view TARGET.indi_org_fairness as select * from SOURCE.indi_org_fairness; -create view TARGET.indi_org_fairness_pub_pr as select * from SOURCE.indi_org_fairness_pub_pr; -create view TARGET.indi_org_fairness_pub_year as select * from SOURCE.indi_org_fairness_pub_year; -create view TARGET.indi_org_fairness_pub as select * from SOURCE.indi_org_fairness_pub; -create view TARGET.indi_org_fairness_year as select * from SOURCE.indi_org_fairness_year; -create view TARGET.indi_org_findable_year as select * from SOURCE.indi_org_findable_year; -create view TARGET.indi_org_findable as select * from SOURCE.indi_org_findable; -create view TARGET.indi_org_openess as select * from SOURCE.indi_org_openess; -create view TARGET.indi_org_openess_year as select * from SOURCE.indi_org_openess_year; -create table TARGET.indi_pub_has_preprint stored as parquet as select * from SOURCE.indi_pub_has_preprint orig where exists (select 1 from TARGET.result r where r.id=orig.id); -ANALYZE TABLE TARGET.indi_pub_has_preprint COMPUTE STATISTICS; -create table TARGET.indi_pub_in_subscribed stored as parquet as select * from SOURCE.indi_pub_in_subscribed orig where exists (select 1 from TARGET.result r where r.id=orig.id); -ANALYZE TABLE TARGET.indi_pub_in_subscribed COMPUTE STATISTICS; -create table TARGET.indi_result_with_pid stored as parquet as select * from SOURCE.indi_result_with_pid orig where exists (select 1 from TARGET.result r where r.id=orig.id); -ANALYZE TABLE TARGET.indi_result_with_pid COMPUTE STATISTICS; +INSERT INTO TARGET.indi_pub_gold_oa select * from TARGET.indi_pub_gold_oa orig where exists (select 1 from TARGET.result_new r where r.id=orig.id); +COMPUTE STATS TARGET.indi_pub_gold_oa; +INSERT INTO TARGET.indi_pub_hybrid select * from TARGET.indi_pub_hybrid orig where exists (select 1 from TARGET.result_new r where r.id=orig.id); +COMPUTE STATS TARGET.indi_pub_hybrid; + +INSERT INTO TARGET.indi_pub_has_preprint select * from TARGET.indi_pub_has_preprint orig where exists (select 1 from TARGET.result_new r where r.id=orig.id); +COMPUTE STATS TARGET.indi_pub_has_preprint; +INSERT INTO TARGET.indi_pub_in_subscribed select * from TARGET.indi_pub_in_subscribed orig where exists (select 1 from TARGET.result_new r where r.id=orig.id); +COMPUTE STATS TARGET.indi_pub_in_subscribed; +INSERT INTO TARGET.indi_result_with_pid select * from TARGET.indi_result_with_pid orig where exists (select 1 from TARGET.result_new r where r.id=orig.id); +COMPUTE STATS TARGET.indi_result_with_pid; --create table TARGET.indi_datasets_gold_oa stored as parquet as select * from SOURCE.indi_datasets_gold_oa orig where exists (select 1 from TARGET.result r where r.id=orig.id); --compute stats TARGET.indi_datasets_gold_oa; --create table TARGET.indi_software_gold_oa stored as parquet as select * from SOURCE.indi_software_gold_oa orig where exists (select 1 from TARGET.result r where r.id=orig.id); --compute stats TARGET.indi_software_gold_oa; - +DROP TABLE TARGET.result_new; diff --git a/dhp-workflows/dhp-monitor-update/src/main/resources/eu/dnetlib/dhp/oa/graph/monitor/oozie_app/updateCache.sh b/dhp-workflows/dhp-monitor-update/src/main/resources/eu/dnetlib/dhp/oa/graph/monitor/oozie_app/updateCache.sh deleted file mode 100644 index 03aa535e1..000000000 --- a/dhp-workflows/dhp-monitor-update/src/main/resources/eu/dnetlib/dhp/oa/graph/monitor/oozie_app/updateCache.sh +++ /dev/null @@ -1,4 +0,0 @@ -#!/usr/bin/env bash - -curl --request GET $1/cache/updateCache -sleep 6h \ No newline at end of file diff --git a/dhp-workflows/dhp-monitor-update/src/main/resources/eu/dnetlib/dhp/oa/graph/monitor/oozie_app/workflow.xml b/dhp-workflows/dhp-monitor-update/src/main/resources/eu/dnetlib/dhp/oa/graph/monitor/oozie_app/workflow.xml index 2bcff70ac..dda645d8f 100644 --- a/dhp-workflows/dhp-monitor-update/src/main/resources/eu/dnetlib/dhp/oa/graph/monitor/oozie_app/workflow.xml +++ b/dhp-workflows/dhp-monitor-update/src/main/resources/eu/dnetlib/dhp/oa/graph/monitor/oozie_app/workflow.xml @@ -66,37 +66,9 @@ monitor.sh ${stats_db_name} ${monitor_db_name} - ${monitor_db_shadow_name} ${wf:appPath()}/scripts/createMonitorDB.sql monitor.sh - - - - - - - - ${jobTracker} - ${nameNode} - monitor-post.sh - ${stats_db_name} - ${monitor_db_name} - ${monitor_db_shadow_name} - monitor-post.sh - - - - - - - - ${jobTracker} - ${nameNode} - updateCache.sh - ${stats_tool_api_url} - updateCache.sh - diff --git a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step20-createMonitorDB.sql b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step20-createMonitorDB.sql index 3e69ff58d..885f7e4f7 100644 --- a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step20-createMonitorDB.sql +++ b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step20-createMonitorDB.sql @@ -60,7 +60,8 @@ create table TARGET.result stored as parquet as 'openorgs____::235d7f9ad18ecd7e6dc62ea4990cb9db', -- Bilkent University 'openorgs____::31f2fa9e05b49d4cf40a19c3fed8eb06', -- Saints Cyril and Methodius University of Skopje 'openorgs____::db7686f30f22cbe73a4fde872ce812a6', -- University of Milan - 'openorgs____::b8b8ca674452579f3f593d9f5e557483' -- University College Cork + 'openorgs____::b8b8ca674452579f3f593d9f5e557483', -- University College Cork + 'openorgs____::38d7097854736583dde879d12dacafca' -- Brown University ) )) foo; compute stats TARGET.result; From 51f7ab5864ca80023b1cc7d350e4dda38dd1cd99 Mon Sep 17 00:00:00 2001 From: dimitrispie Date: Thu, 12 Jan 2023 17:15:06 +0200 Subject: [PATCH 06/18] Bug fixes --- .../oa/graph/monitor/oozie_app/scripts/createMonitorDB.sql | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/dhp-workflows/dhp-monitor-update/src/main/resources/eu/dnetlib/dhp/oa/graph/monitor/oozie_app/scripts/createMonitorDB.sql b/dhp-workflows/dhp-monitor-update/src/main/resources/eu/dnetlib/dhp/oa/graph/monitor/oozie_app/scripts/createMonitorDB.sql index 2c46082fa..7e1333f92 100644 --- a/dhp-workflows/dhp-monitor-update/src/main/resources/eu/dnetlib/dhp/oa/graph/monitor/oozie_app/scripts/createMonitorDB.sql +++ b/dhp-workflows/dhp-monitor-update/src/main/resources/eu/dnetlib/dhp/oa/graph/monitor/oozie_app/scripts/createMonitorDB.sql @@ -2,11 +2,7 @@ DROP TABLE IF EXISTS TARGET.result_new; create table TARGET.result_new stored as parquet as select distinct * from ( - select * from SOURCE.result r where exists (select 1 from SOURCE.result_projects rp join SOURCE.project p on rp.project=p.id where rp.id=r.id) - union all - select * from SOURCE.result r where exists (select 1 from SOURCE.result_concepts rc where rc.id=r.id) - union all - select * from SOURCE.result r where exists (select 1 from SOURCE.result_organization ro where ro.id=r.id and ro.organization in ( + select * from result r where exists (select 1 from result_organization ro where ro.id=r.id and ro.organization in ( -- 'openorgs____::b8b8ca674452579f3f593d9f5e557483', -- University College Cork 'openorgs____::38d7097854736583dde879d12dacafca' -- Brown University ) )) foo; From dd70c32ad7ab3fccc38dd6c42c04b6ca2c6d15b1 Mon Sep 17 00:00:00 2001 From: dimitrispie Date: Thu, 12 Jan 2023 17:18:05 +0200 Subject: [PATCH 07/18] Bug fixes --- .../dhp/oa/graph/monitor/oozie_app/scripts/createMonitorDB.sql | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dhp-workflows/dhp-monitor-update/src/main/resources/eu/dnetlib/dhp/oa/graph/monitor/oozie_app/scripts/createMonitorDB.sql b/dhp-workflows/dhp-monitor-update/src/main/resources/eu/dnetlib/dhp/oa/graph/monitor/oozie_app/scripts/createMonitorDB.sql index 7e1333f92..2eb95294a 100644 --- a/dhp-workflows/dhp-monitor-update/src/main/resources/eu/dnetlib/dhp/oa/graph/monitor/oozie_app/scripts/createMonitorDB.sql +++ b/dhp-workflows/dhp-monitor-update/src/main/resources/eu/dnetlib/dhp/oa/graph/monitor/oozie_app/scripts/createMonitorDB.sql @@ -2,7 +2,7 @@ DROP TABLE IF EXISTS TARGET.result_new; create table TARGET.result_new stored as parquet as select distinct * from ( - select * from result r where exists (select 1 from result_organization ro where ro.id=r.id and ro.organization in ( + select * from SOURCE.result r where exists (select 1 from SOURCE.result_organization ro where ro.id=r.id and ro.organization in ( -- 'openorgs____::b8b8ca674452579f3f593d9f5e557483', -- University College Cork 'openorgs____::38d7097854736583dde879d12dacafca' -- Brown University ) )) foo; From db7d625ba9b436060e8be56452215945d80efca6 Mon Sep 17 00:00:00 2001 From: dimitrispie Date: Wed, 25 Jan 2023 12:22:21 +0200 Subject: [PATCH 08/18] =?UTF-8?q?Addedd=20Arts=20et=20M=C3=A9tiers=20Paris?= =?UTF-8?q?Tech=20organization?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../eu/dnetlib/dhp/oa/graph/monitor/oozie_app/monitor.sh | 2 +- .../oa/graph/monitor/oozie_app/scripts/createMonitorDB.sql | 6 ++++-- .../stats/oozie_app/scripts/step20-createMonitorDB.sql | 3 ++- 3 files changed, 7 insertions(+), 4 deletions(-) diff --git a/dhp-workflows/dhp-monitor-update/src/main/resources/eu/dnetlib/dhp/oa/graph/monitor/oozie_app/monitor.sh b/dhp-workflows/dhp-monitor-update/src/main/resources/eu/dnetlib/dhp/oa/graph/monitor/oozie_app/monitor.sh index 36cfcd325..10c1ed4ca 100644 --- a/dhp-workflows/dhp-monitor-update/src/main/resources/eu/dnetlib/dhp/oa/graph/monitor/oozie_app/monitor.sh +++ b/dhp-workflows/dhp-monitor-update/src/main/resources/eu/dnetlib/dhp/oa/graph/monitor/oozie_app/monitor.sh @@ -13,6 +13,6 @@ export SCRIPT_PATH=$3 echo "Getting file from " $3 hdfs dfs -copyToLocal $3 -echo "Creating monitor database" +echo "Updating monitor database" cat createMonitorDB.sql | sed s/SOURCE/$1/g | sed s/TARGET/$2/g1 | impala-shell -f - echo "Impala shell finished" diff --git a/dhp-workflows/dhp-monitor-update/src/main/resources/eu/dnetlib/dhp/oa/graph/monitor/oozie_app/scripts/createMonitorDB.sql b/dhp-workflows/dhp-monitor-update/src/main/resources/eu/dnetlib/dhp/oa/graph/monitor/oozie_app/scripts/createMonitorDB.sql index 2eb95294a..265610e90 100644 --- a/dhp-workflows/dhp-monitor-update/src/main/resources/eu/dnetlib/dhp/oa/graph/monitor/oozie_app/scripts/createMonitorDB.sql +++ b/dhp-workflows/dhp-monitor-update/src/main/resources/eu/dnetlib/dhp/oa/graph/monitor/oozie_app/scripts/createMonitorDB.sql @@ -1,15 +1,17 @@ DROP TABLE IF EXISTS TARGET.result_new; -create table TARGET.result_new stored as parquet as +create table TARGET.result_new as select distinct * from ( select * from SOURCE.result r where exists (select 1 from SOURCE.result_organization ro where ro.id=r.id and ro.organization in ( -- 'openorgs____::b8b8ca674452579f3f593d9f5e557483', -- University College Cork - 'openorgs____::38d7097854736583dde879d12dacafca' -- Brown University +-- 'openorgs____::38d7097854736583dde879d12dacafca' -- Brown University + 'openorgs____::57784c9e047e826fefdb1ef816120d92' --Arts et Métiers ParisTech ) )) foo; COMPUTE STATS TARGET.result_new; INSERT INTO TARGET.result select * from TARGET.result_new; +COMPUTE STATS TARGET.result; INSERT INTO TARGET.result_citations select * from TARGET.result_citations orig where exists (select 1 from TARGET.result_new r where r.id=orig.id); COMPUTE STATS TARGET.result_citations; diff --git a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step20-createMonitorDB.sql b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step20-createMonitorDB.sql index 885f7e4f7..c6b7d8ae2 100644 --- a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step20-createMonitorDB.sql +++ b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step20-createMonitorDB.sql @@ -61,7 +61,8 @@ create table TARGET.result stored as parquet as 'openorgs____::31f2fa9e05b49d4cf40a19c3fed8eb06', -- Saints Cyril and Methodius University of Skopje 'openorgs____::db7686f30f22cbe73a4fde872ce812a6', -- University of Milan 'openorgs____::b8b8ca674452579f3f593d9f5e557483', -- University College Cork - 'openorgs____::38d7097854736583dde879d12dacafca' -- Brown University + 'openorgs____::38d7097854736583dde879d12dacafca', -- Brown University + 'openorgs____::57784c9e047e826fefdb1ef816120d92' --Arts et Métiers ParisTech ) )) foo; compute stats TARGET.result; From 973d78a4d64718dbeb16ca4857f99419bdfade18 Mon Sep 17 00:00:00 2001 From: dimitrispie Date: Thu, 2 Feb 2023 08:03:54 +0200 Subject: [PATCH 09/18] Update step15_5.sql Added unpaywalls open access colors --- .../dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step15_5.sql | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step15_5.sql b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step15_5.sql index 584de0a56..753d61ca0 100644 --- a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step15_5.sql +++ b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step15_5.sql @@ -39,7 +39,7 @@ create view ${stats_db_name}.hrrst as select * from stats_ext.hrrst; create table ${stats_db_name}.result_instance stored as parquet as select distinct r.* from ( - select substr(r.id, 4) as id, inst.accessright.classname as accessright, substr(inst.collectedfrom.key, 4) as collectedfrom, + select substr(r.id, 4) as id, inst.accessright.classname as accessright, inst.accessright.openaccessroute as accessright_uw, substr(inst.collectedfrom.key, 4) as collectedfrom, substr(inst.hostedby.key, 4) as hostedby, inst.dateofacceptance.value as dateofacceptance, inst.license.value as license, p.qualifier.classname as pidtype, p.value as pid from ${openaire_db_name}.result r lateral view explode(r.instance) instances as inst lateral view explode(inst.pid) pids as p) r join ${stats_db_name}.result res on res.id=r.id; From 98c34263ed54655ca028753552a496392198795d Mon Sep 17 00:00:00 2001 From: dimitrispie Date: Tue, 7 Feb 2023 08:14:48 +0200 Subject: [PATCH 10/18] Update step20-createMonitorDB.sql Add University of Cape Town organization --- .../graph/stats/oozie_app/scripts/step20-createMonitorDB.sql | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step20-createMonitorDB.sql b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step20-createMonitorDB.sql index c6b7d8ae2..237f68fae 100644 --- a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step20-createMonitorDB.sql +++ b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step20-createMonitorDB.sql @@ -62,8 +62,9 @@ create table TARGET.result stored as parquet as 'openorgs____::db7686f30f22cbe73a4fde872ce812a6', -- University of Milan 'openorgs____::b8b8ca674452579f3f593d9f5e557483', -- University College Cork 'openorgs____::38d7097854736583dde879d12dacafca', -- Brown University - 'openorgs____::57784c9e047e826fefdb1ef816120d92' --Arts et Métiers ParisTech - ) )) foo; + 'openorgs____::57784c9e047e826fefdb1ef816120d92', --Arts et Métiers ParisTech + 'openorgs____::2530baca8a15936ba2e3297f2bce2e7e' -- University of Cape Town + ))) foo; compute stats TARGET.result; create table TARGET.result_citations stored as parquet as select * from SOURCE.result_citations orig where exists (select 1 from TARGET.result r where r.id=orig.id); From 85e53fad004386fe295751caebe43ad0b7777830 Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Thu, 9 Feb 2023 18:59:45 +0100 Subject: [PATCH 11/18] [UsageCount] addition of usagecount for Projects and datasources. Extention of the action set created for the results with new entities for projects and datasources. Extention of the resource set and modification of the testing class --- .../usagestats/SparkAtomicActionUsageJob.java | 103 +++++++++++++----- .../usagestats/UsageStatsModel.java | 10 +- .../usagestats/oozie_app/workflow.xml | 2 +- .../SparkAtomicActionCountJobTest.java | 45 +++++--- .../dhp/actionmanager/usagestats/datasourceDb | 12 ++ .../dhp/actionmanager/usagestats/projectDb | 12 ++ .../dhp/actionmanager/usagestats/usageDb | 12 ++ .../dhp/actionmanager/usagestats/usagestatsdb | 12 -- pom.xml | 2 +- 9 files changed, 143 insertions(+), 67 deletions(-) create mode 100644 dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/actionmanager/usagestats/datasourceDb create mode 100644 dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/actionmanager/usagestats/projectDb create mode 100644 dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/actionmanager/usagestats/usageDb delete mode 100644 dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/actionmanager/usagestats/usagestatsdb diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/usagestats/SparkAtomicActionUsageJob.java b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/usagestats/SparkAtomicActionUsageJob.java index 5f099b8f2..3dbda3788 100644 --- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/usagestats/SparkAtomicActionUsageJob.java +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/usagestats/SparkAtomicActionUsageJob.java @@ -9,6 +9,8 @@ import java.util.Arrays; import java.util.List; import java.util.Optional; +import eu.dnetlib.dhp.schema.common.MainEntityType; +import eu.dnetlib.dhp.schema.oaf.*; import org.apache.commons.io.IOUtils; import org.apache.hadoop.io.Text; import org.apache.hadoop.mapred.SequenceFileOutputFormat; @@ -28,9 +30,6 @@ import eu.dnetlib.dhp.application.ArgumentApplicationParser; import eu.dnetlib.dhp.common.HdfsSupport; import eu.dnetlib.dhp.schema.action.AtomicAction; import eu.dnetlib.dhp.schema.common.ModelConstants; -import eu.dnetlib.dhp.schema.oaf.DataInfo; -import eu.dnetlib.dhp.schema.oaf.Measure; -import eu.dnetlib.dhp.schema.oaf.Result; import eu.dnetlib.dhp.schema.oaf.utils.OafMapperUtils; import scala.Tuple2; @@ -76,41 +75,37 @@ public class SparkAtomicActionUsageJob implements Serializable { isSparkSessionManaged, spark -> { removeOutputDir(spark, outputPath); - prepareResults(dbname, spark, workingPath); + prepareData(dbname, spark, workingPath + "/usageDb", "usage_stats"); + prepareData(dbname, spark, workingPath + "/projectDb", "project_stats"); + prepareData(dbname, spark, workingPath + "/datasourceDb", "datasource_stats"); writeActionSet(spark, workingPath, outputPath); }); } - public static void prepareResults(String db, SparkSession spark, String workingPath) { + private static void prepareData(String dbname, SparkSession spark, String workingPath, String tableName) { spark - .sql( - "Select result_id, downloads, views " + - "from " + db + ".usage_stats") - .as(Encoders.bean(UsageStatsModel.class)) - .write() - .mode(SaveMode.Overwrite) - .option("compression", "gzip") - .json(workingPath); + .sql( + "Select result_id, downloads, views " + + "from " + dbname + "." + tableName) + .as(Encoders.bean(UsageStatsModel.class)) + .write() + .mode(SaveMode.Overwrite) + .option("compression", "gzip") + .json(workingPath); } + + public static void writeActionSet(SparkSession spark, String inputPath, String outputPath) { - readPath(spark, inputPath, UsageStatsModel.class) - .groupByKey((MapFunction) us -> us.getResult_id(), Encoders.STRING()) - .mapGroups((MapGroupsFunction) (k, it) -> { - UsageStatsModel first = it.next(); - it.forEachRemaining(us -> { - first.setDownloads(first.getDownloads() + us.getDownloads()); - first.setViews(first.getViews() + us.getViews()); - }); - - Result res = new Result(); - res.setId("50|" + k); - - res.setMeasures(getMeasure(first.getDownloads(), first.getViews())); - return res; - }, Encoders.bean(Result.class)) - .toJavaRDD() - .map(p -> new AtomicAction(p.getClass(), p)) + getFinalIndicatorsResult(spark, inputPath+ "/usageDb"). + toJavaRDD(). + map(p -> new AtomicAction(p.getClass(),p)) + .union(getFinalIndicatorsProject(spark, inputPath + "/projectDb") + .toJavaRDD() + .map(p -> new AtomicAction(p.getClass(), p ))) + .union(getFinalIndicatorsDatasource(spark, inputPath + "/datasourceDb") + .toJavaRDD() + .map(p -> new AtomicAction(p.getClass(), p))) .mapToPair( aa -> new Tuple2<>(new Text(aa.getClazz().getCanonicalName()), new Text(OBJECT_MAPPER.writeValueAsString(aa)))) @@ -118,6 +113,54 @@ public class SparkAtomicActionUsageJob implements Serializable { } + private static Dataset getFinalIndicatorsResult(SparkSession spark, String inputPath) { + + return getUsageStatsModelDataset(spark, inputPath) + .map((MapFunction) usm -> { + Result r = new Result(); + r.setId("50|" + usm.getId()); + r.setMeasures(getMeasure(usm.getDownloads(), usm.getViews())); + return r; + }, Encoders.bean(Result.class)); + } + + private static Dataset getFinalIndicatorsProject(SparkSession spark, String inputPath) { + + return getUsageStatsModelDataset(spark, inputPath) + .map((MapFunction) usm -> { + Project r = new Project(); + r.setId("40|" + usm.getId()); + r.setMeasures(getMeasure(usm.getDownloads(), usm.getViews())); + return r; + }, Encoders.bean(Project.class)); + } + + private static Dataset getFinalIndicatorsDatasource(SparkSession spark, String inputPath) { + + return getUsageStatsModelDataset(spark, inputPath) + .map((MapFunction) usm -> { + Datasource r = new Datasource(); + r.setId("10|" + usm.getId()); + r.setMeasures(getMeasure(usm.getDownloads(), usm.getViews())); + return r; + }, Encoders.bean(Datasource.class)); + } + + private static Dataset getUsageStatsModelDataset(SparkSession spark, String inputPath) { + return readPath(spark, inputPath, UsageStatsModel.class) + .groupByKey((MapFunction) us -> us.getId(), Encoders.STRING()) + .mapGroups((MapGroupsFunction) (k, it) -> { + UsageStatsModel first = it.next(); + it.forEachRemaining(us -> { + first.setDownloads(first.getDownloads() + us.getDownloads()); + first.setViews(first.getViews() + us.getViews()); + }); + first.setId(k); + return first; + + }, Encoders.bean(UsageStatsModel.class)); + } + private static List getMeasure(Long downloads, Long views) { DataInfo dataInfo = OafMapperUtils .dataInfo( diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/usagestats/UsageStatsModel.java b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/usagestats/UsageStatsModel.java index df8a77eb6..07f69b0bb 100644 --- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/usagestats/UsageStatsModel.java +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/usagestats/UsageStatsModel.java @@ -4,16 +4,16 @@ package eu.dnetlib.dhp.actionmanager.usagestats; import java.io.Serializable; public class UsageStatsModel implements Serializable { - private String result_id; + private String id; private Long downloads; private Long views; - public String getResult_id() { - return result_id; + public String getId() { + return id; } - public void setResult_id(String result_id) { - this.result_id = result_id; + public void setId(String id) { + this.id = id; } public Long getDownloads() { diff --git a/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/actionmanager/usagestats/oozie_app/workflow.xml b/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/actionmanager/usagestats/oozie_app/workflow.xml index d94cf7d53..de188718a 100644 --- a/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/actionmanager/usagestats/oozie_app/workflow.xml +++ b/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/actionmanager/usagestats/oozie_app/workflow.xml @@ -89,7 +89,7 @@ --hive_metastore_uris${hiveMetastoreUris} --outputPath${outputPath} --usagestatsdb${usagestatsdb} - --workingPath${workingDir}/usageDb + --workingPath${workingDir} diff --git a/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/actionmanager/usagestats/SparkAtomicActionCountJobTest.java b/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/actionmanager/usagestats/SparkAtomicActionCountJobTest.java index 8aa718bae..8ff3c36f7 100644 --- a/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/actionmanager/usagestats/SparkAtomicActionCountJobTest.java +++ b/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/actionmanager/usagestats/SparkAtomicActionCountJobTest.java @@ -8,6 +8,7 @@ import java.nio.file.Files; import java.nio.file.Path; import java.util.stream.Collectors; +import eu.dnetlib.dhp.schema.oaf.OafEntity; import org.apache.commons.io.FileUtils; import org.apache.hadoop.io.Text; import org.apache.spark.SparkConf; @@ -68,24 +69,26 @@ public class SparkAtomicActionCountJobTest { @Test void testMatch() { String usageScoresPath = getClass() - .getResource("/eu/dnetlib/dhp/actionmanager/usagestats/usagestatsdb") + .getResource("/eu/dnetlib/dhp/actionmanager/usagestats") .getPath(); SparkAtomicActionUsageJob.writeActionSet(spark, usageScoresPath, workingDir.toString() + "/actionSet"); final JavaSparkContext sc = new JavaSparkContext(spark.sparkContext()); - JavaRDD tmp = sc + JavaRDD tmp = sc .sequenceFile(workingDir.toString() + "/actionSet", Text.class, Text.class) - .map(usm -> OBJECT_MAPPER.readValue(usm._2.getBytes(), AtomicAction.class)) - .map(aa -> (Result) aa.getPayload()); + .map(usm -> OBJECT_MAPPER.readValue(usm._2.getBytes(), AtomicAction.class)); + //.map(aa -> (Result) aa.getPayload()); - Assertions.assertEquals(9, tmp.count()); + Assertions.assertEquals(9,tmp.filter(aa -> ((OafEntity) aa.getPayload()).getId().startsWith("50|")).count()); + Assertions.assertEquals(9,tmp.filter(aa -> ((OafEntity) aa.getPayload()).getId().startsWith("10|")).count()); + Assertions.assertEquals(9,tmp.filter(aa -> ((OafEntity) aa.getPayload()).getId().startsWith("40|")).count()); - tmp.foreach(r -> Assertions.assertEquals(2, r.getMeasures().size())); + tmp.foreach(r -> Assertions.assertEquals(2, ((OafEntity)r.getPayload()).getMeasures().size())); tmp .foreach( - r -> r + r -> ((OafEntity)r.getPayload()) .getMeasures() .stream() .forEach( @@ -95,14 +98,14 @@ public class SparkAtomicActionCountJobTest { .forEach(u -> Assertions.assertFalse(u.getDataInfo().getDeletedbyinference())))); tmp .foreach( - r -> r + r -> ((OafEntity)r.getPayload()) .getMeasures() .stream() .forEach( m -> m.getUnit().stream().forEach(u -> Assertions.assertTrue(u.getDataInfo().getInferred())))); tmp .foreach( - r -> r + r -> ((OafEntity)r.getPayload()) .getMeasures() .stream() .forEach( @@ -113,7 +116,7 @@ public class SparkAtomicActionCountJobTest { tmp .foreach( - r -> r + r -> ((OafEntity)r.getPayload()) .getMeasures() .stream() .forEach( @@ -127,7 +130,7 @@ public class SparkAtomicActionCountJobTest { u.getDataInfo().getProvenanceaction().getClassid())))); tmp .foreach( - r -> r + r -> ((OafEntity)r.getPayload()) .getMeasures() .stream() .forEach( @@ -142,7 +145,7 @@ public class SparkAtomicActionCountJobTest { tmp .foreach( - r -> r + r -> ((OafEntity)r.getPayload()) .getMeasures() .stream() .forEach( @@ -157,12 +160,13 @@ public class SparkAtomicActionCountJobTest { Assertions .assertEquals( - 1, tmp.filter(r -> r.getId().equals("50|dedup_wf_001::53575dc69e9ace947e02d47ecd54a7a6")).count()); + 1, tmp.filter(r -> ((OafEntity)r.getPayload()).getId().equals("50|dedup_wf_001::53575dc69e9ace947e02d47ecd54a7a6")).count()); Assertions .assertEquals( "0", tmp + .map(r -> ((OafEntity)r.getPayload())) .filter(r -> r.getId().equals("50|dedup_wf_001::53575dc69e9ace947e02d47ecd54a7a6")) .collect() .get(0) @@ -178,7 +182,8 @@ public class SparkAtomicActionCountJobTest { .assertEquals( "5", tmp - .filter(r -> r.getId().equals("50|dedup_wf_001::53575dc69e9ace947e02d47ecd54a7a6")) + .map(r -> ((OafEntity)r.getPayload())) + .filter(r -> r.getId().equals("50|dedup_wf_001::53575dc69e9ace947e02d47ecd54a7a6")) .collect() .get(0) .getMeasures() @@ -194,7 +199,8 @@ public class SparkAtomicActionCountJobTest { .assertEquals( "0", tmp - .filter(r -> r.getId().equals("50|doi_________::17eda2ff77407538fbe5d3d719b9d1c0")) + .map(r -> ((OafEntity)r.getPayload())) + .filter(r -> r.getId().equals("50|doi_________::17eda2ff77407538fbe5d3d719b9d1c0")) .collect() .get(0) .getMeasures() @@ -209,7 +215,8 @@ public class SparkAtomicActionCountJobTest { .assertEquals( "1", tmp - .filter(r -> r.getId().equals("50|doi_________::17eda2ff77407538fbe5d3d719b9d1c0")) + .map(r -> ((OafEntity)r.getPayload())) + .filter(r -> r.getId().equals("50|doi_________::17eda2ff77407538fbe5d3d719b9d1c0")) .collect() .get(0) .getMeasures() @@ -225,7 +232,8 @@ public class SparkAtomicActionCountJobTest { .assertEquals( "2", tmp - .filter(r -> r.getId().equals("50|doi_________::3085e4c6e051378ca6157fe7f0430c1f")) + .map(r -> ((OafEntity)r.getPayload())) + .filter(r -> r.getId().equals("50|doi_________::3085e4c6e051378ca6157fe7f0430c1f")) .collect() .get(0) .getMeasures() @@ -240,7 +248,8 @@ public class SparkAtomicActionCountJobTest { .assertEquals( "6", tmp - .filter(r -> r.getId().equals("50|doi_________::3085e4c6e051378ca6157fe7f0430c1f")) + .map(r -> ((OafEntity)r.getPayload())) + .filter(r -> r.getId().equals("50|doi_________::3085e4c6e051378ca6157fe7f0430c1f")) .collect() .get(0) .getMeasures() diff --git a/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/actionmanager/usagestats/datasourceDb b/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/actionmanager/usagestats/datasourceDb new file mode 100644 index 000000000..7337ba3e2 --- /dev/null +++ b/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/actionmanager/usagestats/datasourceDb @@ -0,0 +1,12 @@ +{"id":"d1__________::53575dc69e9ace947e02d47ecd54a7a6","downloads":0,"views":4} +{"id":"d1__________::53575dc69e9ace947e02d47ecd54a7a6","downloads":0,"views":1} +{"id":"d11_________::17eda2ff77407538fbe5d3d719b9d1c0","downloads":0,"views":1} +{"id":"d11_________::1d4dc08605fd0a2be1105d30c63bfea1","downloads":1,"views":3} +{"id":"d11_________::2e3527822854ca9816f6dfea5bff61a8","downloads":1,"views":1} +{"id":"d12_________::3085e4c6e051378ca6157fe7f0430c1f","downloads":2,"views":3} +{"id":"d12_________::3085e4c6e051378ca6157fe7f0430c1f","downloads":0,"views":3} +{"id":"d12_________::33f710e6dd30cc5e67e35b371ddc33cf","downloads":0,"views":1} +{"id":"d12_________::39738ebf10654732dd3a7af9f24655f8","downloads":1,"views":3} +{"id":"d13_________::3c3b65f07c1a06c7894397eda1d11bbf","downloads":1,"views":8} +{"id":"d13_________::3c3b65f07c1a06c7894397eda1d11bbf","downloads":0,"views":2} +{"id":"d13_________::4938a71a884dd481d329657aa543b850","downloads":0,"views":3} \ No newline at end of file diff --git a/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/actionmanager/usagestats/projectDb b/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/actionmanager/usagestats/projectDb new file mode 100644 index 000000000..0ecab2a82 --- /dev/null +++ b/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/actionmanager/usagestats/projectDb @@ -0,0 +1,12 @@ +{"id":"f1__________::53575dc69e9ace947e02d47ecd54a7a6","downloads":0,"views":4} +{"id":"f1__________::53575dc69e9ace947e02d47ecd54a7a6","downloads":0,"views":1} +{"id":"f11_________::17eda2ff77407538fbe5d3d719b9d1c0","downloads":0,"views":1} +{"id":"f11_________::1d4dc08605fd0a2be1105d30c63bfea1","downloads":1,"views":3} +{"id":"f11_________::2e3527822854ca9816f6dfea5bff61a8","downloads":1,"views":1} +{"id":"f12_________::3085e4c6e051378ca6157fe7f0430c1f","downloads":2,"views":3} +{"id":"f12_________::3085e4c6e051378ca6157fe7f0430c1f","downloads":0,"views":3} +{"id":"f12_________::33f710e6dd30cc5e67e35b371ddc33cf","downloads":0,"views":1} +{"id":"f12_________::39738ebf10654732dd3a7af9f24655f8","downloads":1,"views":3} +{"id":"f13_________::3c3b65f07c1a06c7894397eda1d11bbf","downloads":1,"views":8} +{"id":"f13_________::3c3b65f07c1a06c7894397eda1d11bbf","downloads":0,"views":2} +{"id":"f13_________::4938a71a884dd481d329657aa543b850","downloads":0,"views":3} \ No newline at end of file diff --git a/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/actionmanager/usagestats/usageDb b/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/actionmanager/usagestats/usageDb new file mode 100644 index 000000000..eb3290eda --- /dev/null +++ b/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/actionmanager/usagestats/usageDb @@ -0,0 +1,12 @@ +{"id":"dedup_wf_001::53575dc69e9ace947e02d47ecd54a7a6","downloads":0,"views":4} +{"id":"dedup_wf_001::53575dc69e9ace947e02d47ecd54a7a6","downloads":0,"views":1} +{"id":"doi_________::17eda2ff77407538fbe5d3d719b9d1c0","downloads":0,"views":1} +{"id":"doi_________::1d4dc08605fd0a2be1105d30c63bfea1","downloads":1,"views":3} +{"id":"doi_________::2e3527822854ca9816f6dfea5bff61a8","downloads":1,"views":1} +{"id":"doi_________::3085e4c6e051378ca6157fe7f0430c1f","downloads":2,"views":3} +{"id":"doi_________::3085e4c6e051378ca6157fe7f0430c1f","downloads":0,"views":3} +{"id":"doi_________::33f710e6dd30cc5e67e35b371ddc33cf","downloads":0,"views":1} +{"id":"doi_________::39738ebf10654732dd3a7af9f24655f8","downloads":1,"views":3} +{"id":"doi_________::3c3b65f07c1a06c7894397eda1d11bbf","downloads":1,"views":8} +{"id":"doi_________::3c3b65f07c1a06c7894397eda1d11bbf","downloads":0,"views":2} +{"id":"doi_________::4938a71a884dd481d329657aa543b850","downloads":0,"views":3} \ No newline at end of file diff --git a/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/actionmanager/usagestats/usagestatsdb b/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/actionmanager/usagestats/usagestatsdb deleted file mode 100644 index fee74f697..000000000 --- a/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/actionmanager/usagestats/usagestatsdb +++ /dev/null @@ -1,12 +0,0 @@ -{"result_id":"dedup_wf_001::53575dc69e9ace947e02d47ecd54a7a6","downloads":0,"views":4} -{"result_id":"dedup_wf_001::53575dc69e9ace947e02d47ecd54a7a6","downloads":0,"views":1} -{"result_id":"doi_________::17eda2ff77407538fbe5d3d719b9d1c0","downloads":0,"views":1} -{"result_id":"doi_________::1d4dc08605fd0a2be1105d30c63bfea1","downloads":1,"views":3} -{"result_id":"doi_________::2e3527822854ca9816f6dfea5bff61a8","downloads":1,"views":1} -{"result_id":"doi_________::3085e4c6e051378ca6157fe7f0430c1f","downloads":2,"views":3} -{"result_id":"doi_________::3085e4c6e051378ca6157fe7f0430c1f","downloads":0,"views":3} -{"result_id":"doi_________::33f710e6dd30cc5e67e35b371ddc33cf","downloads":0,"views":1} -{"result_id":"doi_________::39738ebf10654732dd3a7af9f24655f8","downloads":1,"views":3} -{"result_id":"doi_________::3c3b65f07c1a06c7894397eda1d11bbf","downloads":1,"views":8} -{"result_id":"doi_________::3c3b65f07c1a06c7894397eda1d11bbf","downloads":0,"views":2} -{"result_id":"doi_________::4938a71a884dd481d329657aa543b850","downloads":0,"views":3} \ No newline at end of file diff --git a/pom.xml b/pom.xml index 9b60b9078..839188a62 100644 --- a/pom.xml +++ b/pom.xml @@ -807,7 +807,7 @@ 3.3.3 3.4.2 [2.12,3.0) - [3.15.0] + [3.15.1-SNAPSHOT] [4.0.3] [6.0.5] [3.1.6] From bb5bba51b33bec386270170f0af6eadc7734ae57 Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Thu, 9 Feb 2023 19:08:30 +0100 Subject: [PATCH 12/18] [UsageCount] extended test --- .../SparkAtomicActionCountJobTest.java | 200 ++++++++++++++++++ 1 file changed, 200 insertions(+) diff --git a/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/actionmanager/usagestats/SparkAtomicActionCountJobTest.java b/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/actionmanager/usagestats/SparkAtomicActionCountJobTest.java index 8ff3c36f7..1512f9cf8 100644 --- a/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/actionmanager/usagestats/SparkAtomicActionCountJobTest.java +++ b/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/actionmanager/usagestats/SparkAtomicActionCountJobTest.java @@ -260,6 +260,206 @@ public class SparkAtomicActionCountJobTest { .getUnit() .get(0) .getValue()); + + + Assertions + .assertEquals( + "0", + tmp + .map(r -> ((OafEntity)r.getPayload())) + .filter(r -> r.getId().equals("40|f1__________::53575dc69e9ace947e02d47ecd54a7a6")) + .collect() + .get(0) + .getMeasures() + .stream() + .filter(m -> m.getId().equals("downloads")) + .collect(Collectors.toList()) + .get(0) + .getUnit() + .get(0) + .getValue()); + Assertions + .assertEquals( + "5", + tmp + .map(r -> ((OafEntity)r.getPayload())) + .filter(r -> r.getId().equals("40|f1__________::53575dc69e9ace947e02d47ecd54a7a6")) + .collect() + .get(0) + .getMeasures() + .stream() + .filter(m -> m.getId().equals("views")) + .collect(Collectors.toList()) + .get(0) + .getUnit() + .get(0) + .getValue()); + + Assertions + .assertEquals( + "0", + tmp + .map(r -> ((OafEntity)r.getPayload())) + .filter(r -> r.getId().equals("40|f11_________::17eda2ff77407538fbe5d3d719b9d1c0")) + .collect() + .get(0) + .getMeasures() + .stream() + .filter(m -> m.getId().equals("downloads")) + .collect(Collectors.toList()) + .get(0) + .getUnit() + .get(0) + .getValue()); + Assertions + .assertEquals( + "1", + tmp + .map(r -> ((OafEntity)r.getPayload())) + .filter(r -> r.getId().equals("40|f11_________::17eda2ff77407538fbe5d3d719b9d1c0")) + .collect() + .get(0) + .getMeasures() + .stream() + .filter(m -> m.getId().equals("views")) + .collect(Collectors.toList()) + .get(0) + .getUnit() + .get(0) + .getValue()); + + Assertions + .assertEquals( + "2", + tmp + .map(r -> ((OafEntity)r.getPayload())) + .filter(r -> r.getId().equals("40|f12_________::3085e4c6e051378ca6157fe7f0430c1f")) + .collect() + .get(0) + .getMeasures() + .stream() + .filter(m -> m.getId().equals("downloads")) + .collect(Collectors.toList()) + .get(0) + .getUnit() + .get(0) + .getValue()); + Assertions + .assertEquals( + "6", + tmp + .map(r -> ((OafEntity)r.getPayload())) + .filter(r -> r.getId().equals("40|f12_________::3085e4c6e051378ca6157fe7f0430c1f")) + .collect() + .get(0) + .getMeasures() + .stream() + .filter(m -> m.getId().equals("views")) + .collect(Collectors.toList()) + .get(0) + .getUnit() + .get(0) + .getValue()); + + + Assertions + .assertEquals( + "0", + tmp + .map(r -> ((OafEntity)r.getPayload())) + .filter(r -> r.getId().equals("10|d1__________::53575dc69e9ace947e02d47ecd54a7a6")) + .collect() + .get(0) + .getMeasures() + .stream() + .filter(m -> m.getId().equals("downloads")) + .collect(Collectors.toList()) + .get(0) + .getUnit() + .get(0) + .getValue()); + Assertions + .assertEquals( + "5", + tmp + .map(r -> ((OafEntity)r.getPayload())) + .filter(r -> r.getId().equals("10|d1__________::53575dc69e9ace947e02d47ecd54a7a6")) + .collect() + .get(0) + .getMeasures() + .stream() + .filter(m -> m.getId().equals("views")) + .collect(Collectors.toList()) + .get(0) + .getUnit() + .get(0) + .getValue()); + + Assertions + .assertEquals( + "0", + tmp + .map(r -> ((OafEntity)r.getPayload())) + .filter(r -> r.getId().equals("10|d11_________::17eda2ff77407538fbe5d3d719b9d1c0")) + .collect() + .get(0) + .getMeasures() + .stream() + .filter(m -> m.getId().equals("downloads")) + .collect(Collectors.toList()) + .get(0) + .getUnit() + .get(0) + .getValue()); + Assertions + .assertEquals( + "1", + tmp + .map(r -> ((OafEntity)r.getPayload())) + .filter(r -> r.getId().equals("10|d11_________::17eda2ff77407538fbe5d3d719b9d1c0")) + .collect() + .get(0) + .getMeasures() + .stream() + .filter(m -> m.getId().equals("views")) + .collect(Collectors.toList()) + .get(0) + .getUnit() + .get(0) + .getValue()); + + Assertions + .assertEquals( + "2", + tmp + .map(r -> ((OafEntity)r.getPayload())) + .filter(r -> r.getId().equals("10|d12_________::3085e4c6e051378ca6157fe7f0430c1f")) + .collect() + .get(0) + .getMeasures() + .stream() + .filter(m -> m.getId().equals("downloads")) + .collect(Collectors.toList()) + .get(0) + .getUnit() + .get(0) + .getValue()); + Assertions + .assertEquals( + "6", + tmp + .map(r -> ((OafEntity)r.getPayload())) + .filter(r -> r.getId().equals("10|d12_________::3085e4c6e051378ca6157fe7f0430c1f")) + .collect() + .get(0) + .getMeasures() + .stream() + .filter(m -> m.getId().equals("views")) + .collect(Collectors.toList()) + .get(0) + .getUnit() + .get(0) + .getValue()); } } From f803530df6785c8366bbf32943cc5b17a579bac7 Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Fri, 10 Feb 2023 15:50:56 +0100 Subject: [PATCH 13/18] [UsageCount] fixed query --- .../usagestats/SparkAtomicActionUsageJob.java | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/usagestats/SparkAtomicActionUsageJob.java b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/usagestats/SparkAtomicActionUsageJob.java index 3dbda3788..f99f84265 100644 --- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/usagestats/SparkAtomicActionUsageJob.java +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/usagestats/SparkAtomicActionUsageJob.java @@ -75,17 +75,17 @@ public class SparkAtomicActionUsageJob implements Serializable { isSparkSessionManaged, spark -> { removeOutputDir(spark, outputPath); - prepareData(dbname, spark, workingPath + "/usageDb", "usage_stats"); - prepareData(dbname, spark, workingPath + "/projectDb", "project_stats"); - prepareData(dbname, spark, workingPath + "/datasourceDb", "datasource_stats"); + prepareData(dbname, spark, workingPath + "/usageDb", "usage_stats", "result_id"); + prepareData(dbname, spark, workingPath + "/projectDb", "project_stats", "id"); + prepareData(dbname, spark, workingPath + "/datasourceDb", "datasource_stats", "repositor_id"); writeActionSet(spark, workingPath, outputPath); }); } - private static void prepareData(String dbname, SparkSession spark, String workingPath, String tableName) { + private static void prepareData(String dbname, SparkSession spark, String workingPath, String tableName, String attribute_name) { spark .sql( - "Select result_id, downloads, views " + + "Select " + attribute_name + " as id, downloads, views " + "from " + dbname + "." + tableName) .as(Encoders.bean(UsageStatsModel.class)) .write() From 5cf902a2b098185cc013fd6065366725351b5d02 Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Fri, 10 Feb 2023 16:16:37 +0100 Subject: [PATCH 14/18] [UsageCount] changed query to make the sum be computed via sql instead of grouping --- .../usagestats/SparkAtomicActionUsageJob.java | 28 +++++-------------- .../dhp/actionmanager/usagestats/datasourceDb | 9 ++---- .../actionmanager/usagestats/datasourceDb_old | 12 ++++++++ .../dhp/actionmanager/usagestats/projectDb | 9 ++---- .../actionmanager/usagestats/projectDb_old | 12 ++++++++ .../dhp/actionmanager/usagestats/usageDb | 9 ++---- .../dhp/actionmanager/usagestats/usageDb_old | 12 ++++++++ 7 files changed, 52 insertions(+), 39 deletions(-) create mode 100644 dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/actionmanager/usagestats/datasourceDb_old create mode 100644 dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/actionmanager/usagestats/projectDb_old create mode 100644 dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/actionmanager/usagestats/usageDb_old diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/usagestats/SparkAtomicActionUsageJob.java b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/usagestats/SparkAtomicActionUsageJob.java index f99f84265..d7ca00708 100644 --- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/usagestats/SparkAtomicActionUsageJob.java +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/usagestats/SparkAtomicActionUsageJob.java @@ -8,15 +8,13 @@ import java.io.Serializable; import java.util.Arrays; import java.util.List; import java.util.Optional; - -import eu.dnetlib.dhp.schema.common.MainEntityType; import eu.dnetlib.dhp.schema.oaf.*; import org.apache.commons.io.IOUtils; import org.apache.hadoop.io.Text; import org.apache.hadoop.mapred.SequenceFileOutputFormat; import org.apache.spark.SparkConf; import org.apache.spark.api.java.function.MapFunction; -import org.apache.spark.api.java.function.MapGroupsFunction; + import org.apache.spark.sql.Dataset; import org.apache.spark.sql.Encoders; import org.apache.spark.sql.SaveMode; @@ -85,8 +83,9 @@ public class SparkAtomicActionUsageJob implements Serializable { private static void prepareData(String dbname, SparkSession spark, String workingPath, String tableName, String attribute_name) { spark .sql( - "Select " + attribute_name + " as id, downloads, views " + - "from " + dbname + "." + tableName) + "Select " + attribute_name + " as id, sum(downloads) as downloads, sum(views) as views " + + "from " + dbname + "." + tableName + + "group by " + attribute_name) .as(Encoders.bean(UsageStatsModel.class)) .write() .mode(SaveMode.Overwrite) @@ -115,7 +114,7 @@ public class SparkAtomicActionUsageJob implements Serializable { private static Dataset getFinalIndicatorsResult(SparkSession spark, String inputPath) { - return getUsageStatsModelDataset(spark, inputPath) + return readPath(spark, inputPath, UsageStatsModel.class) .map((MapFunction) usm -> { Result r = new Result(); r.setId("50|" + usm.getId()); @@ -126,7 +125,7 @@ public class SparkAtomicActionUsageJob implements Serializable { private static Dataset getFinalIndicatorsProject(SparkSession spark, String inputPath) { - return getUsageStatsModelDataset(spark, inputPath) + return readPath(spark, inputPath, UsageStatsModel.class) .map((MapFunction) usm -> { Project r = new Project(); r.setId("40|" + usm.getId()); @@ -137,7 +136,7 @@ public class SparkAtomicActionUsageJob implements Serializable { private static Dataset getFinalIndicatorsDatasource(SparkSession spark, String inputPath) { - return getUsageStatsModelDataset(spark, inputPath) + return readPath(spark, inputPath, UsageStatsModel.class) .map((MapFunction) usm -> { Datasource r = new Datasource(); r.setId("10|" + usm.getId()); @@ -146,20 +145,7 @@ public class SparkAtomicActionUsageJob implements Serializable { }, Encoders.bean(Datasource.class)); } - private static Dataset getUsageStatsModelDataset(SparkSession spark, String inputPath) { - return readPath(spark, inputPath, UsageStatsModel.class) - .groupByKey((MapFunction) us -> us.getId(), Encoders.STRING()) - .mapGroups((MapGroupsFunction) (k, it) -> { - UsageStatsModel first = it.next(); - it.forEachRemaining(us -> { - first.setDownloads(first.getDownloads() + us.getDownloads()); - first.setViews(first.getViews() + us.getViews()); - }); - first.setId(k); - return first; - }, Encoders.bean(UsageStatsModel.class)); - } private static List getMeasure(Long downloads, Long views) { DataInfo dataInfo = OafMapperUtils diff --git a/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/actionmanager/usagestats/datasourceDb b/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/actionmanager/usagestats/datasourceDb index 7337ba3e2..efbb4cfbd 100644 --- a/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/actionmanager/usagestats/datasourceDb +++ b/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/actionmanager/usagestats/datasourceDb @@ -1,12 +1,9 @@ -{"id":"d1__________::53575dc69e9ace947e02d47ecd54a7a6","downloads":0,"views":4} -{"id":"d1__________::53575dc69e9ace947e02d47ecd54a7a6","downloads":0,"views":1} +{"id":"d1__________::53575dc69e9ace947e02d47ecd54a7a6","downloads":0,"views":5} {"id":"d11_________::17eda2ff77407538fbe5d3d719b9d1c0","downloads":0,"views":1} {"id":"d11_________::1d4dc08605fd0a2be1105d30c63bfea1","downloads":1,"views":3} {"id":"d11_________::2e3527822854ca9816f6dfea5bff61a8","downloads":1,"views":1} -{"id":"d12_________::3085e4c6e051378ca6157fe7f0430c1f","downloads":2,"views":3} -{"id":"d12_________::3085e4c6e051378ca6157fe7f0430c1f","downloads":0,"views":3} +{"id":"d12_________::3085e4c6e051378ca6157fe7f0430c1f","downloads":2,"views":6} {"id":"d12_________::33f710e6dd30cc5e67e35b371ddc33cf","downloads":0,"views":1} {"id":"d12_________::39738ebf10654732dd3a7af9f24655f8","downloads":1,"views":3} -{"id":"d13_________::3c3b65f07c1a06c7894397eda1d11bbf","downloads":1,"views":8} -{"id":"d13_________::3c3b65f07c1a06c7894397eda1d11bbf","downloads":0,"views":2} +{"id":"d13_________::3c3b65f07c1a06c7894397eda1d11bbf","downloads":1,"views":10} {"id":"d13_________::4938a71a884dd481d329657aa543b850","downloads":0,"views":3} \ No newline at end of file diff --git a/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/actionmanager/usagestats/datasourceDb_old b/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/actionmanager/usagestats/datasourceDb_old new file mode 100644 index 000000000..7337ba3e2 --- /dev/null +++ b/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/actionmanager/usagestats/datasourceDb_old @@ -0,0 +1,12 @@ +{"id":"d1__________::53575dc69e9ace947e02d47ecd54a7a6","downloads":0,"views":4} +{"id":"d1__________::53575dc69e9ace947e02d47ecd54a7a6","downloads":0,"views":1} +{"id":"d11_________::17eda2ff77407538fbe5d3d719b9d1c0","downloads":0,"views":1} +{"id":"d11_________::1d4dc08605fd0a2be1105d30c63bfea1","downloads":1,"views":3} +{"id":"d11_________::2e3527822854ca9816f6dfea5bff61a8","downloads":1,"views":1} +{"id":"d12_________::3085e4c6e051378ca6157fe7f0430c1f","downloads":2,"views":3} +{"id":"d12_________::3085e4c6e051378ca6157fe7f0430c1f","downloads":0,"views":3} +{"id":"d12_________::33f710e6dd30cc5e67e35b371ddc33cf","downloads":0,"views":1} +{"id":"d12_________::39738ebf10654732dd3a7af9f24655f8","downloads":1,"views":3} +{"id":"d13_________::3c3b65f07c1a06c7894397eda1d11bbf","downloads":1,"views":8} +{"id":"d13_________::3c3b65f07c1a06c7894397eda1d11bbf","downloads":0,"views":2} +{"id":"d13_________::4938a71a884dd481d329657aa543b850","downloads":0,"views":3} \ No newline at end of file diff --git a/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/actionmanager/usagestats/projectDb b/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/actionmanager/usagestats/projectDb index 0ecab2a82..0b8cd1d70 100644 --- a/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/actionmanager/usagestats/projectDb +++ b/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/actionmanager/usagestats/projectDb @@ -1,12 +1,9 @@ -{"id":"f1__________::53575dc69e9ace947e02d47ecd54a7a6","downloads":0,"views":4} -{"id":"f1__________::53575dc69e9ace947e02d47ecd54a7a6","downloads":0,"views":1} +{"id":"f1__________::53575dc69e9ace947e02d47ecd54a7a6","downloads":0,"views":5} {"id":"f11_________::17eda2ff77407538fbe5d3d719b9d1c0","downloads":0,"views":1} {"id":"f11_________::1d4dc08605fd0a2be1105d30c63bfea1","downloads":1,"views":3} {"id":"f11_________::2e3527822854ca9816f6dfea5bff61a8","downloads":1,"views":1} -{"id":"f12_________::3085e4c6e051378ca6157fe7f0430c1f","downloads":2,"views":3} -{"id":"f12_________::3085e4c6e051378ca6157fe7f0430c1f","downloads":0,"views":3} +{"id":"f12_________::3085e4c6e051378ca6157fe7f0430c1f","downloads":2,"views":6} {"id":"f12_________::33f710e6dd30cc5e67e35b371ddc33cf","downloads":0,"views":1} {"id":"f12_________::39738ebf10654732dd3a7af9f24655f8","downloads":1,"views":3} -{"id":"f13_________::3c3b65f07c1a06c7894397eda1d11bbf","downloads":1,"views":8} -{"id":"f13_________::3c3b65f07c1a06c7894397eda1d11bbf","downloads":0,"views":2} +{"id":"f13_________::3c3b65f07c1a06c7894397eda1d11bbf","downloads":1,"views":10} {"id":"f13_________::4938a71a884dd481d329657aa543b850","downloads":0,"views":3} \ No newline at end of file diff --git a/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/actionmanager/usagestats/projectDb_old b/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/actionmanager/usagestats/projectDb_old new file mode 100644 index 000000000..0ecab2a82 --- /dev/null +++ b/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/actionmanager/usagestats/projectDb_old @@ -0,0 +1,12 @@ +{"id":"f1__________::53575dc69e9ace947e02d47ecd54a7a6","downloads":0,"views":4} +{"id":"f1__________::53575dc69e9ace947e02d47ecd54a7a6","downloads":0,"views":1} +{"id":"f11_________::17eda2ff77407538fbe5d3d719b9d1c0","downloads":0,"views":1} +{"id":"f11_________::1d4dc08605fd0a2be1105d30c63bfea1","downloads":1,"views":3} +{"id":"f11_________::2e3527822854ca9816f6dfea5bff61a8","downloads":1,"views":1} +{"id":"f12_________::3085e4c6e051378ca6157fe7f0430c1f","downloads":2,"views":3} +{"id":"f12_________::3085e4c6e051378ca6157fe7f0430c1f","downloads":0,"views":3} +{"id":"f12_________::33f710e6dd30cc5e67e35b371ddc33cf","downloads":0,"views":1} +{"id":"f12_________::39738ebf10654732dd3a7af9f24655f8","downloads":1,"views":3} +{"id":"f13_________::3c3b65f07c1a06c7894397eda1d11bbf","downloads":1,"views":8} +{"id":"f13_________::3c3b65f07c1a06c7894397eda1d11bbf","downloads":0,"views":2} +{"id":"f13_________::4938a71a884dd481d329657aa543b850","downloads":0,"views":3} \ No newline at end of file diff --git a/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/actionmanager/usagestats/usageDb b/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/actionmanager/usagestats/usageDb index eb3290eda..495ae0fc5 100644 --- a/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/actionmanager/usagestats/usageDb +++ b/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/actionmanager/usagestats/usageDb @@ -1,12 +1,9 @@ -{"id":"dedup_wf_001::53575dc69e9ace947e02d47ecd54a7a6","downloads":0,"views":4} -{"id":"dedup_wf_001::53575dc69e9ace947e02d47ecd54a7a6","downloads":0,"views":1} +{"id":"dedup_wf_001::53575dc69e9ace947e02d47ecd54a7a6","downloads":0,"views":5} {"id":"doi_________::17eda2ff77407538fbe5d3d719b9d1c0","downloads":0,"views":1} {"id":"doi_________::1d4dc08605fd0a2be1105d30c63bfea1","downloads":1,"views":3} {"id":"doi_________::2e3527822854ca9816f6dfea5bff61a8","downloads":1,"views":1} -{"id":"doi_________::3085e4c6e051378ca6157fe7f0430c1f","downloads":2,"views":3} -{"id":"doi_________::3085e4c6e051378ca6157fe7f0430c1f","downloads":0,"views":3} +{"id":"doi_________::3085e4c6e051378ca6157fe7f0430c1f","downloads":2,"views":6} {"id":"doi_________::33f710e6dd30cc5e67e35b371ddc33cf","downloads":0,"views":1} {"id":"doi_________::39738ebf10654732dd3a7af9f24655f8","downloads":1,"views":3} -{"id":"doi_________::3c3b65f07c1a06c7894397eda1d11bbf","downloads":1,"views":8} -{"id":"doi_________::3c3b65f07c1a06c7894397eda1d11bbf","downloads":0,"views":2} +{"id":"doi_________::3c3b65f07c1a06c7894397eda1d11bbf","downloads":1,"views":10} {"id":"doi_________::4938a71a884dd481d329657aa543b850","downloads":0,"views":3} \ No newline at end of file diff --git a/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/actionmanager/usagestats/usageDb_old b/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/actionmanager/usagestats/usageDb_old new file mode 100644 index 000000000..eb3290eda --- /dev/null +++ b/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/actionmanager/usagestats/usageDb_old @@ -0,0 +1,12 @@ +{"id":"dedup_wf_001::53575dc69e9ace947e02d47ecd54a7a6","downloads":0,"views":4} +{"id":"dedup_wf_001::53575dc69e9ace947e02d47ecd54a7a6","downloads":0,"views":1} +{"id":"doi_________::17eda2ff77407538fbe5d3d719b9d1c0","downloads":0,"views":1} +{"id":"doi_________::1d4dc08605fd0a2be1105d30c63bfea1","downloads":1,"views":3} +{"id":"doi_________::2e3527822854ca9816f6dfea5bff61a8","downloads":1,"views":1} +{"id":"doi_________::3085e4c6e051378ca6157fe7f0430c1f","downloads":2,"views":3} +{"id":"doi_________::3085e4c6e051378ca6157fe7f0430c1f","downloads":0,"views":3} +{"id":"doi_________::33f710e6dd30cc5e67e35b371ddc33cf","downloads":0,"views":1} +{"id":"doi_________::39738ebf10654732dd3a7af9f24655f8","downloads":1,"views":3} +{"id":"doi_________::3c3b65f07c1a06c7894397eda1d11bbf","downloads":1,"views":8} +{"id":"doi_________::3c3b65f07c1a06c7894397eda1d11bbf","downloads":0,"views":2} +{"id":"doi_________::4938a71a884dd481d329657aa543b850","downloads":0,"views":3} \ No newline at end of file From 554df257abfdf5a979bb733f8e25c3231c5ddf01 Mon Sep 17 00:00:00 2001 From: "michele.artini" Date: Mon, 13 Feb 2023 16:05:58 +0100 Subject: [PATCH 15/18] null values in date range conditions --- .../dhp/broker/oa/util/SubscriptionUtils.java | 18 +++++++++++++++--- .../broker/oa/util/SubscriptionUtilsTest.java | 12 ++++++++++++ 2 files changed, 27 insertions(+), 3 deletions(-) diff --git a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/util/SubscriptionUtils.java b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/util/SubscriptionUtils.java index cf3562193..4792a7719 100644 --- a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/util/SubscriptionUtils.java +++ b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/util/SubscriptionUtils.java @@ -37,12 +37,24 @@ public class SubscriptionUtils { } public static boolean verifyDateRange(final long date, final String min, final String max) { + + long from = 0; + long to = Long.MAX_VALUE; + try { - return date >= DateUtils.parseDate(min, "yyyy-MM-dd").getTime() - && date < DateUtils.parseDate(max, "yyyy-MM-dd").getTime() + ONE_DAY; + from = min != null ? DateUtils.parseDate(min, "yyyy-MM-dd").getTime() : 0; } catch (final ParseException e) { - return false; + from = 0; } + + try { + to = max != null ? DateUtils.parseDate(max, "yyyy-MM-dd").getTime() + ONE_DAY : Long.MAX_VALUE; + } catch (final ParseException e) { + to = Long.MAX_VALUE; + } + + return date >= from && date < to; + } public static boolean verifyExact(final String s1, final String s2) { diff --git a/dhp-workflows/dhp-broker-events/src/test/java/eu/dnetlib/dhp/broker/oa/util/SubscriptionUtilsTest.java b/dhp-workflows/dhp-broker-events/src/test/java/eu/dnetlib/dhp/broker/oa/util/SubscriptionUtilsTest.java index d93390e4a..63b49d362 100644 --- a/dhp-workflows/dhp-broker-events/src/test/java/eu/dnetlib/dhp/broker/oa/util/SubscriptionUtilsTest.java +++ b/dhp-workflows/dhp-broker-events/src/test/java/eu/dnetlib/dhp/broker/oa/util/SubscriptionUtilsTest.java @@ -41,6 +41,18 @@ public class SubscriptionUtilsTest { assertTrue(SubscriptionUtils.verifyDateRange(date, "2010-01-01", "2011-01-01")); assertFalse(SubscriptionUtils.verifyDateRange(date, "2020-01-01", "2021-01-01")); + + assertTrue(SubscriptionUtils.verifyDateRange(date, "2010-01-01", "NULL")); + assertTrue(SubscriptionUtils.verifyDateRange(date, "2010-01-01", null)); + assertTrue(SubscriptionUtils.verifyDateRange(date, "NULL", "2011-01-01")); + assertTrue(SubscriptionUtils.verifyDateRange(date, null, "2011-01-01")); + assertTrue(SubscriptionUtils.verifyDateRange(date, "NULL", "NULL")); + assertTrue(SubscriptionUtils.verifyDateRange(date, null, null)); + + assertFalse(SubscriptionUtils.verifyDateRange(date, "2020-01-01", null)); + assertFalse(SubscriptionUtils.verifyDateRange(date, "2020-01-01", "NULL")); + assertFalse(SubscriptionUtils.verifyDateRange(date, null, "2005-01-01")); + assertFalse(SubscriptionUtils.verifyDateRange(date, "NULL", "2005-01-01")); } @Test From 9a03f71db1ab600743ed34f05d5fe4d4f750ca26 Mon Sep 17 00:00:00 2001 From: Claudio Atzori Date: Mon, 13 Feb 2023 16:25:47 +0100 Subject: [PATCH 16/18] code formatting --- .../oaf/utils/GraphCleaningFunctions.java | 4 +- .../ebi/SparkCreateBaselineDataFrame.scala | 3 +- .../dnetlib/dhp/sx/bio/BioScholixTest.scala | 32 +++++++++------ .../crossref/CrossrefMappingTest.scala | 4 +- .../orcid/MappingORCIDToOAFTest.scala | 8 ++-- .../doiboost/uw/UnpayWallMappingTest.scala | 4 +- .../dhp/oa/graph/clean/CleanCountryTest.java | 41 +++++++++---------- .../resolution/ResolveEntitiesTest.scala | 8 ++-- .../sx/graph/scholix/ScholixGraphTest.scala | 5 ++- 9 files changed, 61 insertions(+), 48 deletions(-) diff --git a/dhp-common/src/main/java/eu/dnetlib/dhp/schema/oaf/utils/GraphCleaningFunctions.java b/dhp-common/src/main/java/eu/dnetlib/dhp/schema/oaf/utils/GraphCleaningFunctions.java index b24daaa5d..fc515b5b1 100644 --- a/dhp-common/src/main/java/eu/dnetlib/dhp/schema/oaf/utils/GraphCleaningFunctions.java +++ b/dhp-common/src/main/java/eu/dnetlib/dhp/schema/oaf/utils/GraphCleaningFunctions.java @@ -13,6 +13,8 @@ import java.util.stream.Collectors; import java.util.stream.Stream; import org.apache.commons.lang3.StringUtils; +import org.apache.spark.api.java.function.MapFunction; +import org.apache.spark.sql.Encoders; import com.github.sisyphsu.dateparser.DateParserUtils; import com.google.common.collect.Lists; @@ -23,8 +25,6 @@ import eu.dnetlib.dhp.schema.common.ModelConstants; import eu.dnetlib.dhp.schema.common.ModelSupport; import eu.dnetlib.dhp.schema.oaf.*; import me.xuender.unidecode.Unidecode; -import org.apache.spark.api.java.function.MapFunction; -import org.apache.spark.sql.Encoders; public class GraphCleaningFunctions extends CleaningFunctions { diff --git a/dhp-workflows/dhp-aggregation/src/main/scala/eu/dnetlib/dhp/sx/bio/ebi/SparkCreateBaselineDataFrame.scala b/dhp-workflows/dhp-aggregation/src/main/scala/eu/dnetlib/dhp/sx/bio/ebi/SparkCreateBaselineDataFrame.scala index 87116f00a..8ac8b00bf 100644 --- a/dhp-workflows/dhp-aggregation/src/main/scala/eu/dnetlib/dhp/sx/bio/ebi/SparkCreateBaselineDataFrame.scala +++ b/dhp-workflows/dhp-aggregation/src/main/scala/eu/dnetlib/dhp/sx/bio/ebi/SparkCreateBaselineDataFrame.scala @@ -27,7 +27,8 @@ object SparkCreateBaselineDataFrame { def requestBaseLineUpdatePage(maxFile: String): List[(String, String)] = { val data = requestPage("https://ftp.ncbi.nlm.nih.gov/pubmed/updatefiles/") - val result = data.linesWithSeparators.map(l =>l.stripLineEnd) + val result = data.linesWithSeparators + .map(l => l.stripLineEnd) .filter(l => l.startsWith("") diff --git a/dhp-workflows/dhp-aggregation/src/test/scala/eu/dnetlib/dhp/sx/bio/BioScholixTest.scala b/dhp-workflows/dhp-aggregation/src/test/scala/eu/dnetlib/dhp/sx/bio/BioScholixTest.scala index 24caaa553..d1611300d 100644 --- a/dhp-workflows/dhp-aggregation/src/test/scala/eu/dnetlib/dhp/sx/bio/BioScholixTest.scala +++ b/dhp-workflows/dhp-aggregation/src/test/scala/eu/dnetlib/dhp/sx/bio/BioScholixTest.scala @@ -63,7 +63,9 @@ class BioScholixTest extends AbstractVocabularyTest { val records: String = Source .fromInputStream(getClass.getResourceAsStream("/eu/dnetlib/dhp/sx/graph/bio/pubmed_dump")) .mkString - val r: List[Oaf] = records.linesWithSeparators.map(l =>l.stripLineEnd).toList + val r: List[Oaf] = records.linesWithSeparators + .map(l => l.stripLineEnd) + .toList .map(s => mapper.readValue(s, classOf[PMArticle])) .map(a => PubMedToOaf.convert(a, vocabularies)) assertEquals(10, r.size) @@ -173,9 +175,10 @@ class BioScholixTest extends AbstractVocabularyTest { val records: String = Source .fromInputStream(getClass.getResourceAsStream("/eu/dnetlib/dhp/sx/graph/bio/pdb_dump")) .mkString - records.linesWithSeparators.map(l =>l.stripLineEnd).foreach(s => assertTrue(s.nonEmpty)) + records.linesWithSeparators.map(l => l.stripLineEnd).foreach(s => assertTrue(s.nonEmpty)) - val result: List[Oaf] = records.linesWithSeparators.map(l =>l.stripLineEnd).toList.flatMap(o => BioDBToOAF.pdbTOOaf(o)) + val result: List[Oaf] = + records.linesWithSeparators.map(l => l.stripLineEnd).toList.flatMap(o => BioDBToOAF.pdbTOOaf(o)) assertTrue(result.nonEmpty) result.foreach(r => assertNotNull(r)) @@ -194,9 +197,10 @@ class BioScholixTest extends AbstractVocabularyTest { val records: String = Source .fromInputStream(getClass.getResourceAsStream("/eu/dnetlib/dhp/sx/graph/bio/uniprot_dump")) .mkString - records.linesWithSeparators.map(l =>l.stripLineEnd).foreach(s => assertTrue(s.nonEmpty)) + records.linesWithSeparators.map(l => l.stripLineEnd).foreach(s => assertTrue(s.nonEmpty)) - val result: List[Oaf] = records.linesWithSeparators.map(l =>l.stripLineEnd).toList.flatMap(o => BioDBToOAF.uniprotToOAF(o)) + val result: List[Oaf] = + records.linesWithSeparators.map(l => l.stripLineEnd).toList.flatMap(o => BioDBToOAF.uniprotToOAF(o)) assertTrue(result.nonEmpty) result.foreach(r => assertNotNull(r)) @@ -239,9 +243,10 @@ class BioScholixTest extends AbstractVocabularyTest { val records: String = Source .fromInputStream(getClass.getResourceAsStream("/eu/dnetlib/dhp/sx/graph/bio/crossref_links")) .mkString - records.linesWithSeparators.map(l =>l.stripLineEnd).foreach(s => assertTrue(s.nonEmpty)) + records.linesWithSeparators.map(l => l.stripLineEnd).foreach(s => assertTrue(s.nonEmpty)) - val result: List[Oaf] = records.linesWithSeparators.map(l =>l.stripLineEnd).map(s => BioDBToOAF.crossrefLinksToOaf(s)).toList + val result: List[Oaf] = + records.linesWithSeparators.map(l => l.stripLineEnd).map(s => BioDBToOAF.crossrefLinksToOaf(s)).toList assertNotNull(result) assertTrue(result.nonEmpty) @@ -276,14 +281,17 @@ class BioScholixTest extends AbstractVocabularyTest { getClass.getResourceAsStream("/eu/dnetlib/dhp/sx/graph/bio/scholix_resolved") ) .mkString - records.linesWithSeparators.map(l =>l.stripLineEnd).foreach(s => assertTrue(s.nonEmpty)) + records.linesWithSeparators.map(l => l.stripLineEnd).foreach(s => assertTrue(s.nonEmpty)) implicit lazy val formats: DefaultFormats.type = org.json4s.DefaultFormats - val l: List[ScholixResolved] = records.linesWithSeparators.map(l =>l.stripLineEnd).map { input => - lazy val json = parse(input) - json.extract[ScholixResolved] - }.toList + val l: List[ScholixResolved] = records.linesWithSeparators + .map(l => l.stripLineEnd) + .map { input => + lazy val json = parse(input) + json.extract[ScholixResolved] + } + .toList val result: List[Oaf] = l.map(s => BioDBToOAF.scholixResolvedToOAF(s)) diff --git a/dhp-workflows/dhp-doiboost/src/test/scala/eu/dnetlib/dhp/doiboost/crossref/CrossrefMappingTest.scala b/dhp-workflows/dhp-doiboost/src/test/scala/eu/dnetlib/dhp/doiboost/crossref/CrossrefMappingTest.scala index aba8cee12..12a61454d 100644 --- a/dhp-workflows/dhp-doiboost/src/test/scala/eu/dnetlib/dhp/doiboost/crossref/CrossrefMappingTest.scala +++ b/dhp-workflows/dhp-doiboost/src/test/scala/eu/dnetlib/dhp/doiboost/crossref/CrossrefMappingTest.scala @@ -31,13 +31,13 @@ class CrossrefMappingTest { .fromInputStream(getClass.getResourceAsStream("/eu/dnetlib/doiboost/crossref/funder_doi")) .mkString - for (line <- funder_doi.linesWithSeparators.map(l =>l.stripLineEnd)) { + for (line <- funder_doi.linesWithSeparators.map(l => l.stripLineEnd)) { val json = template.replace("%s", line) val resultList: List[Oaf] = Crossref2Oaf.convert(json) assertTrue(resultList.nonEmpty) checkRelation(resultList) } - for (line <- funder_name.linesWithSeparators.map(l =>l.stripLineEnd)) { + for (line <- funder_name.linesWithSeparators.map(l => l.stripLineEnd)) { val json = template.replace("%s", line) val resultList: List[Oaf] = Crossref2Oaf.convert(json) assertTrue(resultList.nonEmpty) diff --git a/dhp-workflows/dhp-doiboost/src/test/scala/eu/dnetlib/dhp/doiboost/orcid/MappingORCIDToOAFTest.scala b/dhp-workflows/dhp-doiboost/src/test/scala/eu/dnetlib/dhp/doiboost/orcid/MappingORCIDToOAFTest.scala index d7a6a94a5..8033f02fb 100644 --- a/dhp-workflows/dhp-doiboost/src/test/scala/eu/dnetlib/dhp/doiboost/orcid/MappingORCIDToOAFTest.scala +++ b/dhp-workflows/dhp-doiboost/src/test/scala/eu/dnetlib/dhp/doiboost/orcid/MappingORCIDToOAFTest.scala @@ -25,9 +25,11 @@ class MappingORCIDToOAFTest { .mkString assertNotNull(json) assertFalse(json.isEmpty) - json.linesWithSeparators.map(l =>l.stripLineEnd).foreach(s => { - assertNotNull(ORCIDToOAF.extractValueFromInputString(s)) - }) + json.linesWithSeparators + .map(l => l.stripLineEnd) + .foreach(s => { + assertNotNull(ORCIDToOAF.extractValueFromInputString(s)) + }) } @Test diff --git a/dhp-workflows/dhp-doiboost/src/test/scala/eu/dnetlib/dhp/doiboost/uw/UnpayWallMappingTest.scala b/dhp-workflows/dhp-doiboost/src/test/scala/eu/dnetlib/dhp/doiboost/uw/UnpayWallMappingTest.scala index 7fe0e9935..30001acb5 100644 --- a/dhp-workflows/dhp-doiboost/src/test/scala/eu/dnetlib/dhp/doiboost/uw/UnpayWallMappingTest.scala +++ b/dhp-workflows/dhp-doiboost/src/test/scala/eu/dnetlib/dhp/doiboost/uw/UnpayWallMappingTest.scala @@ -22,7 +22,7 @@ class UnpayWallMappingTest { .mkString var i: Int = 0 - for (line <- Ilist.linesWithSeparators.map(l =>l.stripLineEnd)) { + for (line <- Ilist.linesWithSeparators.map(l => l.stripLineEnd)) { val p = UnpayWallToOAF.convertToOAF(line) if (p != null) { @@ -43,7 +43,7 @@ class UnpayWallMappingTest { i = i + 1 } - val l = Ilist.linesWithSeparators.map(l =>l.stripLineEnd).next() + val l = Ilist.linesWithSeparators.map(l => l.stripLineEnd).next() val item = UnpayWallToOAF.convertToOAF(l) diff --git a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/clean/CleanCountryTest.java b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/clean/CleanCountryTest.java index de9e4fc90..3bc69cfd1 100644 --- a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/clean/CleanCountryTest.java +++ b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/clean/CleanCountryTest.java @@ -5,7 +5,6 @@ import java.io.IOException; import java.nio.file.Files; import java.nio.file.Path; -import eu.dnetlib.dhp.schema.oaf.Dataset; import org.apache.commons.io.FileUtils; import org.apache.spark.SparkConf; import org.apache.spark.api.java.JavaRDD; @@ -27,6 +26,7 @@ import org.slf4j.LoggerFactory; import com.fasterxml.jackson.databind.ObjectMapper; import eu.dnetlib.dhp.oa.graph.clean.country.CleanCountrySparkJob; +import eu.dnetlib.dhp.schema.oaf.Dataset; import eu.dnetlib.dhp.schema.oaf.Publication; public class CleanCountryTest { @@ -151,41 +151,40 @@ public class CleanCountryTest { @Test public void testDatasetClean() throws Exception { final String sourcePath = getClass() - .getResource("/eu/dnetlib/dhp/oa/graph/clean/dataset_clean_country.json") - .getPath(); + .getResource("/eu/dnetlib/dhp/oa/graph/clean/dataset_clean_country.json") + .getPath(); spark - .read() - .textFile(sourcePath) - .map( - (MapFunction) r -> OBJECT_MAPPER.readValue(r, Dataset.class), - Encoders.bean(Dataset.class)) - .write() - .json(workingDir.toString() + "/dataset"); + .read() + .textFile(sourcePath) + .map( + (MapFunction) r -> OBJECT_MAPPER.readValue(r, Dataset.class), + Encoders.bean(Dataset.class)) + .write() + .json(workingDir.toString() + "/dataset"); CleanCountrySparkJob.main(new String[] { - "--isSparkSessionManaged", Boolean.FALSE.toString(), - "--inputPath", workingDir.toString() + "/dataset", - "-graphTableClassName", Dataset.class.getCanonicalName(), - "-workingDir", workingDir.toString() + "/working", - "-country", "NL", - "-verifyParam", "10.17632", - "-collectedfrom", "NARCIS", - "-hostedBy", getClass() + "--isSparkSessionManaged", Boolean.FALSE.toString(), + "--inputPath", workingDir.toString() + "/dataset", + "-graphTableClassName", Dataset.class.getCanonicalName(), + "-workingDir", workingDir.toString() + "/working", + "-country", "NL", + "-verifyParam", "10.17632", + "-collectedfrom", "NARCIS", + "-hostedBy", getClass() .getResource("/eu/dnetlib/dhp/oa/graph/clean/hostedBy") .getPath() }); final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext()); JavaRDD tmp = sc - .textFile(workingDir.toString() + "/dataset") - .map(item -> OBJECT_MAPPER.readValue(item, Dataset.class)); + .textFile(workingDir.toString() + "/dataset") + .map(item -> OBJECT_MAPPER.readValue(item, Dataset.class)); Assertions.assertEquals(1, tmp.count()); Assertions.assertEquals(0, tmp.first().getCountry().size()); - } } diff --git a/dhp-workflows/dhp-graph-mapper/src/test/scala/eu/dnetlib/dhp/oa/graph/resolution/ResolveEntitiesTest.scala b/dhp-workflows/dhp-graph-mapper/src/test/scala/eu/dnetlib/dhp/oa/graph/resolution/ResolveEntitiesTest.scala index d415b7fc9..022168de5 100644 --- a/dhp-workflows/dhp-graph-mapper/src/test/scala/eu/dnetlib/dhp/oa/graph/resolution/ResolveEntitiesTest.scala +++ b/dhp-workflows/dhp-graph-mapper/src/test/scala/eu/dnetlib/dhp/oa/graph/resolution/ResolveEntitiesTest.scala @@ -53,7 +53,8 @@ class ResolveEntitiesTest extends Serializable { def generateUpdates(spark: SparkSession): Unit = { val template = Source.fromInputStream(this.getClass.getResourceAsStream("updates")).mkString - val pids: List[String] = template.linesWithSeparators.map(l =>l.stripLineEnd) + val pids: List[String] = template.linesWithSeparators + .map(l => l.stripLineEnd) .map { id => val r = new Result r.setId(id.toLowerCase.trim) @@ -127,7 +128,7 @@ class ResolveEntitiesTest extends Serializable { entities.foreach { e => val template = Source.fromInputStream(this.getClass.getResourceAsStream(s"$e")).mkString spark - .createDataset(spark.sparkContext.parallelize(template.linesWithSeparators.map(l =>l.stripLineEnd).toList)) + .createDataset(spark.sparkContext.parallelize(template.linesWithSeparators.map(l => l.stripLineEnd).toList)) .as[String] .write .option("compression", "gzip") @@ -264,7 +265,8 @@ class ResolveEntitiesTest extends Serializable { Source .fromInputStream(this.getClass.getResourceAsStream(s"publication")) .mkString - .linesWithSeparators.map(l =>l.stripLineEnd) + .linesWithSeparators + .map(l => l.stripLineEnd) .next(), classOf[Publication] ) diff --git a/dhp-workflows/dhp-graph-mapper/src/test/scala/eu/dnetlib/dhp/sx/graph/scholix/ScholixGraphTest.scala b/dhp-workflows/dhp-graph-mapper/src/test/scala/eu/dnetlib/dhp/sx/graph/scholix/ScholixGraphTest.scala index 0ea908290..b838ae065 100644 --- a/dhp-workflows/dhp-graph-mapper/src/test/scala/eu/dnetlib/dhp/sx/graph/scholix/ScholixGraphTest.scala +++ b/dhp-workflows/dhp-graph-mapper/src/test/scala/eu/dnetlib/dhp/sx/graph/scholix/ScholixGraphTest.scala @@ -47,7 +47,7 @@ class ScholixGraphTest extends AbstractVocabularyTest { val inputRelations = Source .fromInputStream(getClass.getResourceAsStream("/eu/dnetlib/dhp/sx/graph/oaf_to_summary")) .mkString - val items = inputRelations.linesWithSeparators.map(l =>l.stripLineEnd).toList + val items = inputRelations.linesWithSeparators.map(l => l.stripLineEnd).toList assertNotNull(items) items.foreach(i => assertTrue(i.nonEmpty)) val result = @@ -69,7 +69,8 @@ class ScholixGraphTest extends AbstractVocabularyTest { getClass.getResourceAsStream("/eu/dnetlib/dhp/sx/graph/merge_result_scholix") ) .mkString - val result: List[(Relation, ScholixSummary)] = inputRelations.linesWithSeparators.map(l =>l.stripLineEnd) + val result: List[(Relation, ScholixSummary)] = inputRelations.linesWithSeparators + .map(l => l.stripLineEnd) .sliding(2) .map(s => (s.head, s(1))) .map(p => (mapper.readValue(p._1, classOf[Relation]), mapper.readValue(p._2, classOf[ScholixSummary]))) From a8ac79fa25636a5a7995c148f5536d342feafbc6 Mon Sep 17 00:00:00 2001 From: Sandro La Bruzzo Date: Wed, 15 Feb 2023 10:29:13 +0100 Subject: [PATCH 17/18] Added citation relation on crossref Mapping --- .../doiboost/crossref/Crossref2Oaf.scala | 49 + .../crossref/publication_license_embargo.json | 3006 ++++++++--------- .../crossref/CrossrefMappingTest.scala | 43 + 3 files changed, 1564 insertions(+), 1534 deletions(-) diff --git a/dhp-workflows/dhp-doiboost/src/main/scala/eu/dnetlib/doiboost/crossref/Crossref2Oaf.scala b/dhp-workflows/dhp-doiboost/src/main/scala/eu/dnetlib/doiboost/crossref/Crossref2Oaf.scala index 7fb10863f..a271f4c33 100644 --- a/dhp-workflows/dhp-doiboost/src/main/scala/eu/dnetlib/doiboost/crossref/Crossref2Oaf.scala +++ b/dhp-workflows/dhp-doiboost/src/main/scala/eu/dnetlib/doiboost/crossref/Crossref2Oaf.scala @@ -309,6 +309,8 @@ case object Crossref2Oaf { result } + + def generateAuhtor(given: String, family: String, orcid: String, index: Int): Author = { val a = new Author a.setName(given) @@ -370,10 +372,57 @@ case object Crossref2Oaf { case dataset: Dataset => convertDataset(dataset) } + + val doisReference:List[String] = for { + JObject(reference_json) <- json \ "reference" + JField("DOI", JString(doi_json)) <- reference_json + } yield doi_json + + + + if (doisReference!= null && doisReference.nonEmpty) { + val citation_relations:List[Relation] = generateCitationRelations(doisReference, result) + resultList = resultList ::: citation_relations + } resultList = resultList ::: List(result) resultList } + + + private def createCiteRelation(source:Result, targetPid:String, targetPidType:String) :List[Relation] = { + + + val targetId = IdentifierFactory.idFromPid("50",targetPidType, targetPid, true) + + val from = new Relation + from.setSource(source.getId) + from.setTarget(targetId) + from.setRelType(ModelConstants.RESULT_RESULT) + from.setRelClass(ModelConstants.CITES) + from.setSubRelType(ModelConstants.CITATION) + from.setCollectedfrom(source.getCollectedfrom) + from.setDataInfo(source.getDataInfo) + from.setLastupdatetimestamp(source.getLastupdatetimestamp) + + + val to = new Relation + to.setTarget(source.getId) + to.setSource(targetId) + to.setRelType(ModelConstants.RESULT_RESULT) + to.setRelClass(ModelConstants.IS_CITED_BY) + to.setSubRelType(ModelConstants.CITATION) + to.setCollectedfrom(source.getCollectedfrom) + to.setDataInfo(source.getDataInfo) + to.setLastupdatetimestamp(source.getLastupdatetimestamp) + + List(from,to) + } + + def generateCitationRelations(dois:List[String], result:Result):List[Relation] = { + dois.flatMap(d => createCiteRelation(result, d, "doi")) + } + def mappingFunderToRelations( funders: List[mappingFunder], sourceId: String, diff --git a/dhp-workflows/dhp-doiboost/src/test/resources/eu/dnetlib/doiboost/crossref/publication_license_embargo.json b/dhp-workflows/dhp-doiboost/src/test/resources/eu/dnetlib/doiboost/crossref/publication_license_embargo.json index 788946fea..b66147b2d 100644 --- a/dhp-workflows/dhp-doiboost/src/test/resources/eu/dnetlib/doiboost/crossref/publication_license_embargo.json +++ b/dhp-workflows/dhp-doiboost/src/test/resources/eu/dnetlib/doiboost/crossref/publication_license_embargo.json @@ -1,1537 +1,1475 @@ { -"indexed": { -"date-parts": [ -[ -2021, -7, -2 -] -], -"date-time": "2021-07-02T07:30:10Z", -"timestamp": 1625211010708 -}, -"reference-count": 83, -"publisher": "Springer Science and Business Media LLC", -"issue": "5", -"license": [ -{ -"URL": "https://www.springer.com/tdm", -"start": { -"date-parts": [ -[ -2021, -2, -22 -] -], -"date-time": "2021-02-22T00:00:00Z", -"timestamp": 1613952000000 -}, -"delay-in-days": 0, -"content-version": "tdm" -}, -{ -"URL": "https://academic.oup.com/journals/pages/open_access/funder_policies/chorus/standard_publication_model", -"start": { -"date-parts": [ -[ -2021, -2, -22 -] -], -"date-time": "2021-02-22T00:00:00Z", -"timestamp": 1613952000000 -}, -"delay-in-days": 0, -"content-version": "vor" -} -], -"content-domain": { -"domain": [ -"link.springer.com" -], -"crossmark-restriction": false -}, -"short-container-title": [ -"Nat Astron" -], -"published-print": { -"date-parts": [ -[ -2021, -5 -] -] -}, -"DOI": "10.1038/s41550-020-01295-8", -"type": "journal-article", -"created": { -"date-parts": [ -[ -2021, -2, -22 -] -], -"date-time": "2021-02-22T17:03:42Z", -"timestamp": 1614013422000 -}, -"page": "510-518", -"update-policy": "http://dx.doi.org/10.1007/springer_crossmark_policy", -"source": "Crossref", -"is-referenced-by-count": 6, -"title": [ -"A tidal disruption event coincident with a high-energy neutrino" -], -"prefix": "10.1038", -"volume": "5", -"author": [ -{ -"ORCID": "http://orcid.org/0000-0003-2434-0387", -"authenticated-orcid": false, -"given": "Robert", -"family": "Stein", -"sequence": "first", -"affiliation": [ - -] -}, -{ -"ORCID": "http://orcid.org/0000-0002-3859-8074", -"authenticated-orcid": false, -"given": "Sjoert van", -"family": "Velzen", -"sequence": "additional", -"affiliation": [ - -] -}, -{ -"ORCID": "http://orcid.org/0000-0001-8594-8666", -"authenticated-orcid": false, -"given": "Marek", -"family": "Kowalski", -"sequence": "additional", -"affiliation": [ - -] -}, -{ -"given": "Anna", -"family": "Franckowiak", -"sequence": "additional", -"affiliation": [ - -] -}, -{ -"ORCID": "http://orcid.org/0000-0003-3703-5154", -"authenticated-orcid": false, -"given": "Suvi", -"family": "Gezari", -"sequence": "additional", -"affiliation": [ - -] -}, -{ -"ORCID": "http://orcid.org/0000-0003-3124-2814", -"authenticated-orcid": false, -"given": "James C. A.", -"family": "Miller-Jones", -"sequence": "additional", -"affiliation": [ - -] -}, -{ -"given": "Sara", -"family": "Frederick", -"sequence": "additional", -"affiliation": [ - -] -}, -{ -"ORCID": "http://orcid.org/0000-0003-0466-3779", -"authenticated-orcid": false, -"given": "Itai", -"family": "Sfaradi", -"sequence": "additional", -"affiliation": [ - -] -}, -{ -"given": "Michael F.", -"family": "Bietenholz", -"sequence": "additional", -"affiliation": [ - -] -}, -{ -"ORCID": "http://orcid.org/0000-0002-5936-1156", -"authenticated-orcid": false, -"given": "Assaf", -"family": "Horesh", -"sequence": "additional", -"affiliation": [ - -] -}, -{ -"given": "Rob", -"family": "Fender", -"sequence": "additional", -"affiliation": [ - -] -}, -{ -"ORCID": "http://orcid.org/0000-0003-2403-4582", -"authenticated-orcid": false, -"given": "Simone", -"family": "Garrappa", -"sequence": "additional", -"affiliation": [ - -] -}, -{ -"ORCID": "http://orcid.org/0000-0002-2184-6430", -"authenticated-orcid": false, -"given": "Tomás", -"family": "Ahumada", -"sequence": "additional", -"affiliation": [ - -] -}, -{ -"given": "Igor", -"family": "Andreoni", -"sequence": "additional", -"affiliation": [ - -] -}, -{ -"given": "Justin", -"family": "Belicki", -"sequence": "additional", -"affiliation": [ - -] -}, -{ -"ORCID": "http://orcid.org/0000-0001-8018-5348", -"authenticated-orcid": false, -"given": "Eric C.", -"family": "Bellm", -"sequence": "additional", -"affiliation": [ - -] -}, -{ -"given": "Markus", -"family": "Böttcher", -"sequence": "additional", -"affiliation": [ - -] -}, -{ -"given": "Valery", -"family": "Brinnel", -"sequence": "additional", -"affiliation": [ - -] -}, -{ -"given": "Rick", -"family": "Burruss", -"sequence": "additional", -"affiliation": [ - -] -}, -{ -"ORCID": "http://orcid.org/0000-0003-1673-970X", -"authenticated-orcid": false, -"given": "S. Bradley", -"family": "Cenko", -"sequence": "additional", -"affiliation": [ - -] -}, -{ -"ORCID": "http://orcid.org/0000-0002-8262-2924", -"authenticated-orcid": false, -"given": "Michael W.", -"family": "Coughlin", -"sequence": "additional", -"affiliation": [ - -] -}, -{ -"ORCID": "http://orcid.org/0000-0003-2292-0441", -"authenticated-orcid": false, -"given": "Virginia", -"family": "Cunningham", -"sequence": "additional", -"affiliation": [ - -] -}, -{ -"given": "Andrew", -"family": "Drake", -"sequence": "additional", -"affiliation": [ - -] -}, -{ -"given": "Glennys R.", -"family": "Farrar", -"sequence": "additional", -"affiliation": [ - -] -}, -{ -"given": "Michael", -"family": "Feeney", -"sequence": "additional", -"affiliation": [ - -] -}, -{ -"given": "Ryan J.", -"family": "Foley", -"sequence": "additional", -"affiliation": [ - -] -}, -{ -"ORCID": "http://orcid.org/0000-0002-3653-5598", -"authenticated-orcid": false, -"given": "Avishay", -"family": "Gal-Yam", -"sequence": "additional", -"affiliation": [ - -] -}, -{ -"given": "V. Zach", -"family": "Golkhou", -"sequence": "additional", -"affiliation": [ - -] -}, -{ -"ORCID": "http://orcid.org/0000-0002-4163-4996", -"authenticated-orcid": false, -"given": "Ariel", -"family": "Goobar", -"sequence": "additional", -"affiliation": [ - -] -}, -{ -"ORCID": "http://orcid.org/0000-0002-3168-0139", -"authenticated-orcid": false, -"given": "Matthew J.", -"family": "Graham", -"sequence": "additional", -"affiliation": [ - -] -}, -{ -"given": "Erica", -"family": "Hammerstein", -"sequence": "additional", -"affiliation": [ - -] -}, -{ -"ORCID": "http://orcid.org/0000-0003-3367-3415", -"authenticated-orcid": false, -"given": "George", -"family": "Helou", -"sequence": "additional", -"affiliation": [ - -] -}, -{ -"ORCID": "http://orcid.org/0000-0002-9878-7889", -"authenticated-orcid": false, -"given": "Tiara", -"family": "Hung", -"sequence": "additional", -"affiliation": [ - -] -}, -{ -"given": "Mansi M.", -"family": "Kasliwal", -"sequence": "additional", -"affiliation": [ - -] -}, -{ -"ORCID": "http://orcid.org/0000-0002-5740-7747", -"authenticated-orcid": false, -"given": "Charles D.", -"family": "Kilpatrick", -"sequence": "additional", -"affiliation": [ - -] -}, -{ -"ORCID": "http://orcid.org/0000-0002-5105-344X", -"authenticated-orcid": false, -"given": "Albert K. H.", -"family": "Kong", -"sequence": "additional", -"affiliation": [ - -] -}, -{ -"ORCID": "http://orcid.org/0000-0002-6540-1484", -"authenticated-orcid": false, -"given": "Thomas", -"family": "Kupfer", -"sequence": "additional", -"affiliation": [ - -] -}, -{ -"ORCID": "http://orcid.org/0000-0003-2451-5482", -"authenticated-orcid": false, -"given": "Russ R.", -"family": "Laher", -"sequence": "additional", -"affiliation": [ - -] -}, -{ -"ORCID": "http://orcid.org/0000-0003-2242-0244", -"authenticated-orcid": false, -"given": "Ashish A.", -"family": "Mahabal", -"sequence": "additional", -"affiliation": [ - -] -}, -{ -"ORCID": "http://orcid.org/0000-0002-8532-9395", -"authenticated-orcid": false, -"given": "Frank J.", -"family": "Masci", -"sequence": "additional", -"affiliation": [ - -] -}, -{ -"ORCID": "http://orcid.org/0000-0003-0280-7484", -"authenticated-orcid": false, -"given": "Jannis", -"family": "Necker", -"sequence": "additional", -"affiliation": [ - -] -}, -{ -"ORCID": "http://orcid.org/0000-0001-8342-6274", -"authenticated-orcid": false, -"given": "Jakob", -"family": "Nordin", -"sequence": "additional", -"affiliation": [ - -] -}, -{ -"given": "Daniel A.", -"family": "Perley", -"sequence": "additional", -"affiliation": [ - -] -}, -{ -"ORCID": "http://orcid.org/0000-0002-8121-2560", -"authenticated-orcid": false, -"given": "Mickael", -"family": "Rigault", -"sequence": "additional", -"affiliation": [ - -] -}, -{ -"ORCID": "http://orcid.org/0000-0002-7788-628X", -"authenticated-orcid": false, -"given": "Simeon", -"family": "Reusch", -"sequence": "additional", -"affiliation": [ - -] -}, -{ -"given": "Hector", -"family": "Rodriguez", -"sequence": "additional", -"affiliation": [ - -] -}, -{ -"ORCID": "http://orcid.org/0000-0002-7559-315X", -"authenticated-orcid": false, -"given": "César", -"family": "Rojas-Bravo", -"sequence": "additional", -"affiliation": [ - -] -}, -{ -"ORCID": "http://orcid.org/0000-0001-7648-4142", -"authenticated-orcid": false, -"given": "Ben", -"family": "Rusholme", -"sequence": "additional", -"affiliation": [ - -] -}, -{ -"ORCID": "http://orcid.org/0000-0003-4401-0430", -"authenticated-orcid": false, -"given": "David L.", -"family": "Shupe", -"sequence": "additional", -"affiliation": [ - -] -}, -{ -"ORCID": "http://orcid.org/0000-0001-9898-5597", -"authenticated-orcid": false, -"given": "Leo P.", -"family": "Singer", -"sequence": "additional", -"affiliation": [ - -] -}, -{ -"ORCID": "http://orcid.org/0000-0003-1546-6615", -"authenticated-orcid": false, -"given": "Jesper", -"family": "Sollerman", -"sequence": "additional", -"affiliation": [ - -] -}, -{ -"given": "Maayane T.", -"family": "Soumagnac", -"sequence": "additional", -"affiliation": [ - -] -}, -{ -"given": "Daniel", -"family": "Stern", -"sequence": "additional", -"affiliation": [ - -] -}, -{ -"given": "Kirsty", -"family": "Taggart", -"sequence": "additional", -"affiliation": [ - -] -}, -{ -"given": "Jakob", -"family": "van Santen", -"sequence": "additional", -"affiliation": [ - -] -}, -{ -"given": "Charlotte", -"family": "Ward", -"sequence": "additional", -"affiliation": [ - -] -}, -{ -"given": "Patrick", -"family": "Woudt", -"sequence": "additional", -"affiliation": [ - -] -}, -{ -"ORCID": "http://orcid.org/0000-0001-6747-8509", -"authenticated-orcid": false, -"given": "Yuhan", -"family": "Yao", -"sequence": "additional", -"affiliation": [ - -] -} -], -"member": "297", -"published-online": { -"date-parts": [ -[ -2021, -2, -22 -] -] -}, -"reference": [ -{ -"key": "1295_CR1", -"doi-asserted-by": "crossref", -"first-page": "P03012", -"DOI": "10.1088/1748-0221/12/03/P03012", -"volume": "12", -"author": "MG Aartsen", -"year": "2017", -"unstructured": "Aartsen, M. G. et al. The IceCube Neutrino Observatory: instrumentation and online systems. J. Instrum. 12, P03012 (2017).", -"journal-title": "J. Instrum." -}, -{ -"key": "1295_CR2", -"unstructured": "Stein, R. IceCube-191001A—IceCube observation of a high-energy neutrino candidate event. GCN Circ. 25913 (2019)." -}, -{ -"key": "1295_CR3", -"doi-asserted-by": "crossref", -"first-page": "018002", -"DOI": "10.1088/1538-3873/aaecbe", -"volume": "131", -"author": "EC Bellm", -"year": "2019", -"unstructured": "Bellm, E. C. et al. The Zwicky Transient Facility: system overview, performance, and first results. Publ. Astron. Soc. Pac. 131, 018002 (2019).", -"journal-title": "Publ. Astron. Soc. Pac." -}, -{ -"key": "1295_CR4", -"doi-asserted-by": "crossref", -"first-page": "533", -"DOI": "10.1016/j.astropartphys.2007.03.005", -"volume": "27", -"author": "M Kowalski", -"year": "2007", -"unstructured": "Kowalski, M. & Mohr, A. Detecting neutrino transients with optical follow-up observations. Astropart. Phys. 27, 533–538 (2007).", -"journal-title": "Astropart. Phys." -}, -{ -"key": "1295_CR5", -"doi-asserted-by": "crossref", -"first-page": "329", -"DOI": "10.1088/0004-637X/693/1/329", -"volume": "693", -"author": "GR Farrar", -"year": "2009", -"unstructured": "Farrar, G. R. & Gruzinov, A. Giant AGN flares and cosmic ray bursts. Astrophys. J. 693, 329–332 (2009).", -"journal-title": "Astrophys. J." -}, -{ -"key": "1295_CR6", -"doi-asserted-by": "crossref", -"first-page": "1354", -"DOI": "10.1093/mnras/stx863", -"volume": "469", -"author": "L Dai", -"year": "2017", -"unstructured": "Dai, L. & Fang, K. Can tidal disruption events produce the IceCube neutrinos? Mon. Not. R. Astron. Soc. 469, 1354–1359 (2017).", -"journal-title": "Mon. Not. R. Astron. Soc." -}, -{ -"key": "1295_CR7", -"doi-asserted-by": "crossref", -"first-page": "114", -"DOI": "10.3847/1538-4357/ab44ca", -"volume": "886", -"author": "K Hayasaki", -"year": "2019", -"unstructured": "Hayasaki, K. & Yamazaki, R. Neutrino emissions from tidal disruption remnants. Astrophys. J. 886, 114 (2019).", -"journal-title": "Astrophys. J." -}, -{ -"key": "1295_CR8", -"unstructured": "Farrar, G. R. & Piran, T. Tidal disruption jets as the source of Ultra-High Energy Cosmic Rays. Preprint at https://arxiv.org/abs/1411.0704 (2014)." -}, -{ -"key": "1295_CR9", -"doi-asserted-by": "crossref", -"first-page": "3", -"DOI": "10.3847/1538-4357/aa6344", -"volume": "838", -"author": "N Senno", -"year": "2017", -"unstructured": "Senno, N., Murase, K. & Mészáros, P. High-energy neutrino flares from X-ray bright and dark tidal disruption events. Astrophys. J. 838, 3 (2017).", -"journal-title": "Astrophys. J." -}, -{ -"key": "1295_CR10", -"doi-asserted-by": "crossref", -"first-page": "083005", -"DOI": "10.1103/PhysRevD.93.083005", -"volume": "93", -"author": "XY Wang", -"year": "2016", -"unstructured": "Wang, X. Y. & Liu, R. Y. Tidal disruption jets of supermassive black holes as hidden sources of cosmic rays: explaining the IceCube TeV–PeV neutrinos. Phys. Rev. D 93, 083005 (2016).", -"journal-title": "Phys. Rev. D" -}, -{ -"key": "1295_CR11", -"doi-asserted-by": "crossref", -"first-page": "123001", -"DOI": "10.1103/PhysRevD.95.123001", -"volume": "95", -"author": "C Lunardini", -"year": "2017", -"unstructured": "Lunardini, C. & Winter, W. High energy neutrinos from the tidal disruption of stars. Phys. Rev. D 95, 123001 (2017).", -"journal-title": "Phys. Rev. D" -}, -{ -"key": "1295_CR12", -"unstructured": "Stein, R., Franckowiak, A., Necker, J., Gezari, S. & Velzen, S. V. Candidate counterparts to IceCube-191001A with ZTF. Astron. Telegr. 13160 (2019)." -}, -{ -"key": "1295_CR13", -"doi-asserted-by": "crossref", -"first-page": "078001", -"DOI": "10.1088/1538-3873/ab006c", -"volume": "131", -"author": "MJ Graham", -"year": "2019", -"unstructured": "Graham, M. J. et al. The Zwicky Transient Facility: science objectives. Publ. Astron. Soc. Pac. 131, 078001 (2019).", -"journal-title": "Publ. Astron. Soc. Pac." -}, -{ -"key": "1295_CR14", -"unstructured": "Nordin, J. et al. TNS Astronomical Transient Report 33340 (2019)." -}, -{ -"key": "1295_CR15", -"unstructured": "Nicholl, M. et al. ePESSTO+ classification of optical transients. Astron. Telegr. 12752 (2019)." -}, -{ -"key": "1295_CR16", -"unstructured": "van Velzen, S. et al. Seventeen tidal disruption events from the first half of ZTF survey observations: entering a new era of population studies. Preprint at https://arxiv.org/abs/2001.01409 (2020)." -}, -{ -"key": "1295_CR17", -"doi-asserted-by": "crossref", -"first-page": "82", -"DOI": "10.3847/1538-4357/ab1844", -"volume": "878", -"author": "S van Velzen", -"year": "2019", -"unstructured": "van Velzen, S. et al. Late-time UV observations of tidal disruption flares reveal unobscured, compact accretion disks. Astrophys. J. 878, 82 (2019).", -"journal-title": "Astrophys. J." -}, -{ -"key": "1295_CR18", -"doi-asserted-by": "crossref", -"first-page": "5655", -"DOI": "10.1093/mnras/staa192", -"volume": "492", -"author": "A Mummery", -"year": "2020", -"unstructured": "Mummery, A. & Balbus, S. A. The spectral evolution of disc dominated tidal disruption events. Mon. Not. R. Astron. Soc. 492, 5655–5674 (2020).", -"journal-title": "Mon. Not. R. Astron. Soc." -}, -{ -"key": "1295_CR19", -"doi-asserted-by": "crossref", -"first-page": "184", -"DOI": "10.1088/0004-637X/764/2/184", -"volume": "764", -"author": "NJ McConnell", -"year": "2013", -"unstructured": "McConnell, N. J. & Ma, C. P. Revisiting the scaling relations of black hole masses and host galaxy properties. Astrophys. J. 764, 184 (2013).", -"journal-title": "Astrophys. J." -}, -{ -"key": "1295_CR20", -"doi-asserted-by": "crossref", -"first-page": "149", -"DOI": "10.3847/1538-4357/aa633b", -"volume": "838", -"author": "K Auchettl", -"year": "2017", -"unstructured": "Auchettl, K., Guillochon, J. & Ramirez-Ruiz, E. New physical insights about tidal disruption events from a comprehensive observational inventory at X-ray wavelengths. Astrophys. J. 838, 149 (2017).", -"journal-title": "Astrophys. J." -}, -{ -"key": "1295_CR21", -"doi-asserted-by": "crossref", -"first-page": "4136", -"DOI": "10.1093/mnras/stz1602", -"volume": "487", -"author": "T Wevers", -"year": "2019", -"unstructured": "Wevers, T. et al. Black hole masses of tidal disruption event host galaxies II. Mon. Not. R. Astron. Soc. 487, 4136–4152 (2019).", -"journal-title": "Mon. Not. R. Astron. Soc." -}, -{ -"key": "1295_CR22", -"doi-asserted-by": "crossref", -"first-page": "198", -"DOI": "10.3847/1538-4357/aafe0c", -"volume": "872", -"author": "S van Velzen", -"year": "2019", -"unstructured": "van Velzen, S. et al. The first tidal disruption flare in ZTF: from photometric selection to multi-wavelength characterization. Astrophys. J. 872, 198 (2019).", -"journal-title": "Astrophys. J." -}, -{ -"key": "1295_CR23", -"doi-asserted-by": "crossref", -"first-page": "A81", -"DOI": "10.1051/0004-6361/201117855", -"volume": "538", -"author": "G Morlino", -"year": "2012", -"unstructured": "Morlino, G. & Caprioli, D. Strong evidence for hadron acceleration in Tycho’s supernova remnant. Astron. Astrophys. 538, A81 (2012).", -"journal-title": "Astron. Astrophys." -}, -{ -"key": "1295_CR24", -"doi-asserted-by": "crossref", -"first-page": "86", -"DOI": "10.3847/1538-4357/aaa8e0", -"volume": "854", -"author": "T Eftekhari", -"year": "2018", -"unstructured": "Eftekhari, T., Berger, E., Zauderer, B. A., Margutti, R. & Alexander, K. D. Radio monitoring of the tidal disruption event Swift J164449.3+573451. III. Late-time jet energetics and a deviation from equipartition. Astrophys. J. 854, 86 (2018).", -"journal-title": "Astrophys. J." -}, -{ -"key": "1295_CR25", -"doi-asserted-by": "crossref", -"first-page": "1258", -"DOI": "10.1093/mnras/stt1645", -"volume": "436", -"author": "A Horesh", -"year": "2013", -"unstructured": "Horesh, A. et al. An early and comprehensive millimetre and centimetre wave and X-ray study of SN 2011dh: a non-equipartition blast wave expanding into a massive stellar wind. Mon. Not. R. Astron. Soc. 436, 1258–1267 (2013).", -"journal-title": "Mon. Not. R. Astron. Soc." -}, -{ -"key": "1295_CR26", -"doi-asserted-by": "crossref", -"first-page": "78", -"DOI": "10.1088/0004-637X/772/1/78", -"volume": "772", -"author": "R Barniol Duran", -"year": "2013", -"unstructured": "Barniol Duran, R., Nakar, E. & Piran, T. Radius constraints and minimal equipartition energy of relativistically moving synchrotron sources. Astrophys. J. 772, 78 (2013).", -"journal-title": "Astrophys. J." -}, -{ -"key": "1295_CR27", -"doi-asserted-by": "crossref", -"first-page": "69", -"DOI": "10.1071/AS02053", -"volume": "20", -"author": "AG Polatidis", -"year": "2003", -"unstructured": "Polatidis, A. G. & Conway, J. E. Proper motions in compact symmetric objects. Publ. Astron. Soc. Aust. 20, 69–74 (2003).", -"journal-title": "Publ. Astron. Soc. Aust." -}, -{ -"key": "1295_CR28", -"doi-asserted-by": "crossref", -"first-page": "L25", -"DOI": "10.3847/2041-8205/819/2/L25", -"volume": "819", -"author": "KD Alexander", -"year": "2016", -"unstructured": "Alexander, K. D., Berger, E., Guillochon, J., Zauderer, B. A. & Williams, P. K. G. Discovery of an outflow from radio observations of the tidal disruption event ASASSN-14li. Astrophys. J. Lett. 819, L25 (2016).", -"journal-title": "Astrophys. J. Lett." -}, -{ -"key": "1295_CR29", -"doi-asserted-by": "crossref", -"first-page": "127", -"DOI": "10.3847/0004-637X/827/2/127", -"volume": "827", -"author": "J Krolik", -"year": "2016", -"unstructured": "Krolik, J., Piran, T., Svirski, G. & Cheng, R. M. ASASSN-14li: a model tidal disruption event. Astrophys. J. 827, 127 (2016).", -"journal-title": "Astrophys. J." -}, -{ -"key": "1295_CR30", -"doi-asserted-by": "crossref", -"first-page": "1", -"DOI": "10.3847/1538-4357/aab361", -"volume": "856", -"author": "DR Pasham", -"year": "2018", -"unstructured": "Pasham, D. R. & van Velzen, S. Discovery of a time lag between the soft X-ray and radio emission of the tidal disruption flare ASASSN-14li: evidence for linear disk–jet coupling. Astrophys. J. 856, 1 (2018).", -"journal-title": "Astrophys. J." -}, -{ -"key": "1295_CR31", -"doi-asserted-by": "crossref", -"first-page": "L9", -"DOI": "10.1051/0004-6361/201834750", -"volume": "622", -"author": "NL Strotjohann", -"year": "2019", -"unstructured": "Strotjohann, N. L., Kowalski, M. & Franckowiak, A. Eddington bias for cosmic neutrino sources. Astron. Astrophys. 622, L9 (2019).", -"journal-title": "Astron. Astrophys." -}, -{ -"key": "1295_CR32", -"doi-asserted-by": "crossref", -"first-page": "425", -"DOI": "10.1146/annurev.aa.22.090184.002233", -"volume": "22", -"author": "AM Hillas", -"year": "1984", -"unstructured": "Hillas, A. M. The origin of ultra-high-energy cosmic rays. Annu. Rev. Astron. Astrophys. 22, 425–444 (1984).", -"journal-title": "Annu. Rev. Astron. Astrophys." -}, -{ -"key": "1295_CR33", -"doi-asserted-by": "crossref", -"first-page": "eaat1378", -"DOI": "10.1126/science.aat1378", -"volume": "361", -"author": "IceCube Collaboration", -"year": "2018", -"unstructured": "IceCube Collaboration et al. Multimessenger observations of a flaring blazar coincident with high-energy neutrino IceCube-170922A. Science 361, eaat1378 (2018).", -"journal-title": "Science" -}, -{ -"key": "1295_CR34", -"unstructured": "Blaufuss, E., Kintscher, T., Lu, L. & Tung, C. F. The next generation of IceCube real-time neutrino alerts. In Proc. 36th International Cosmic Ray Conference (ICRC2019) 1021 (PoS, 2019)." -}, -{ -"key": "1295_CR35", -"doi-asserted-by": "crossref", -"first-page": "071101", -"DOI": "10.1103/PhysRevLett.116.071101", -"volume": "116", -"author": "K Murase", -"year": "2016", -"unstructured": "Murase, K., Guetta, D. & Ahlers, M. Hidden cosmic-ray accelerators as an origin of TeV–PeV cosmic neutrinos. Phys. Rev. Lett. 116, 071101 (2016).", -"journal-title": "Phys. Rev. Lett." -}, -{ -"key": "1295_CR36", -"unstructured": "Stein, R. Search for neutrinos from populations of optical transients. In Proc. 36th International Cosmic Ray Conference (ICRC2019) 1016 (PoS, 2019).", -"DOI": "10.22323/1.358.1016", -"doi-asserted-by": "crossref" -}, -{ -"key": "1295_CR37", -"doi-asserted-by": "crossref", -"first-page": "048001", -"DOI": "10.1088/1538-3873/aaff99", -"volume": "131", -"author": "MW Coughlin", -"year": "2019", -"unstructured": "Coughlin, M. W. et al. 2900 square degree search for the optical counterpart of short gamma-ray burst GRB 180523B with the Zwicky Transient Facility. Publ. Astron. Soc. Pac. 131, 048001 (2019).", -"journal-title": "Publ. Astron. Soc. Pac." -}, -{ -"key": "1295_CR38", -"unstructured": "Stein, R. IceCube-200107A: IceCube observation of a high-energy neutrino candidate event. GCN Circ. 26655 (2020)." -}, -{ -"key": "1295_CR39", -"doi-asserted-by": "crossref", -"first-page": "018003", -"DOI": "10.1088/1538-3873/aae8ac", -"volume": "131", -"author": "FJ Masci", -"year": "2019", -"unstructured": "Masci, F. J. et al. The Zwicky Transient Facility: data processing, products, and archive. Publ. Astron. Soc. Pac. 131, 018003 (2019).", -"journal-title": "Publ. Astron. Soc. Pac." -}, -{ -"key": "1295_CR40", -"doi-asserted-by": "crossref", -"first-page": "018001", -"DOI": "10.1088/1538-3873/aae904", -"volume": "131", -"author": "MT Patterson", -"year": "2019", -"unstructured": "Patterson, M. T. et al. The Zwicky Transient Facility Alert Distribution System. Publ. Astron. Soc. Pac. 131, 018001 (2019).", -"journal-title": "Publ. Astron. Soc. Pac." -}, -{ -"key": "1295_CR41", -"unstructured": "Stein, R. & Reusch, S. robertdstein/ampel_followup_pipeline: V1.1 Release (Zenodo, 2020); https://doi.org/10.5281/zenodo.4048336", -"DOI": "10.5281/zenodo.4048336", -"doi-asserted-by": "publisher" -}, -{ -"key": "1295_CR42", -"doi-asserted-by": "crossref", -"first-page": "A147", -"DOI": "10.1051/0004-6361/201935634", -"volume": "631", -"author": "J Nordin", -"year": "2019", -"unstructured": "Nordin, J. et al. Transient processing and analysis using AMPEL: alert management, photometry, and evaluation of light curves. Astron. Astrophys. 631, A147 (2019).", -"journal-title": "Astron. Astrophys." -}, -{ -"key": "1295_CR43", -"doi-asserted-by": "crossref", -"first-page": "038002", -"DOI": "10.1088/1538-3873/aaf3fa", -"volume": "131", -"author": "A Mahabal", -"year": "2019", -"unstructured": "Mahabal, A. et al. Machine learning for the Zwicky Transient Facility. Publ. Astron. Soc. Pac. 131, 038002 (2019).", -"journal-title": "Publ. Astron. Soc. Pac." -}, -{ -"key": "1295_CR44", -"doi-asserted-by": "crossref", -"first-page": "075002", -"DOI": "10.1088/1538-3873/aac410", -"volume": "130", -"author": "MT Soumagnac", -"year": "2018", -"unstructured": "Soumagnac, M. T. & Ofek, E. O. catsHTM: a tool for fast accessing and cross-matching large astronomical catalogs. Publ. Astron. Soc. Pac. 130, 075002 (2018).", -"journal-title": "Publ. Astron. Soc. Pac." -}, -{ -"key": "1295_CR45", -"doi-asserted-by": "crossref", -"first-page": "A1", -"DOI": "10.1051/0004-6361/201833051", -"volume": "616", -"author": "Gaia Collaboration", -"year": "2018", -"unstructured": "Gaia Collaboration et al. Gaia Data Release 2. Summary of the contents and survey properties. Astron. Astrophys. 616, A1 (2018).", -"journal-title": "Astron. Astrophys." -}, -{ -"key": "1295_CR46", -"doi-asserted-by": "crossref", -"first-page": "128001", -"DOI": "10.1088/1538-3873/aae3d9", -"volume": "130", -"author": "Y Tachibana", -"year": "2018", -"unstructured": "Tachibana, Y. & Miller, A. A. A morphological classification model to identify unresolved PanSTARRS1 sources: application in the ZTF real-time pipeline. Publ. Astron. Soc. Pac. 130, 128001 (2018).", -"journal-title": "Publ. Astron. Soc. Pac." -}, -{ -"key": "1295_CR47", -"unstructured": "Chambers, K. C. et al. The Pan-STARRS1 Surveys. Preprint at https://arxiv.org/abs/1612.05560 (2016)." -}, -{ -"key": "1295_CR48", -"doi-asserted-by": "crossref", -"first-page": "1868", -"DOI": "10.1088/0004-6256/140/6/1868", -"volume": "140", -"author": "EL Wright", -"year": "2010", -"unstructured": "Wright, E. L. et al. The Wide-field Infrared Survey Explorer (WISE): mission description and initial on-orbit performance. Astron. J. 140, 1868–1881 (2010).", -"journal-title": "Astron. J." -}, -{ -"key": "1295_CR49", -"doi-asserted-by": "crossref", -"first-page": "051103", -"DOI": "10.1103/PhysRevLett.124.051103", -"volume": "124", -"author": "MG Aartsen", -"year": "2020", -"unstructured": "Aartsen, M. G. et al. Time-integrated neutrino source searches with 10 years of IceCube data. Phys. Rev. Lett. 124, 051103 (2020).", -"journal-title": "Phys. Rev. Lett." -}, -{ -"key": "1295_CR50", -"unstructured": "Steele, I. A. et al. The Liverpool Telescope: performance and first results. Proc. SPIE 5489, https://doi.org/10.1117/12.551456 (2004).", -"DOI": "10.1117/12.551456", -"doi-asserted-by": "publisher" -}, -{ -"key": "1295_CR51", -"doi-asserted-by": "crossref", -"first-page": "035003", -"DOI": "10.1088/1538-3873/aaa53f", -"volume": "130", -"author": "N Blagorodnova", -"year": "2018", -"unstructured": "Blagorodnova, N. et al. The SED Machine: a robotic spectrograph for fast transient classification. Publ. Astron. Soc. Pac. 130, 035003 (2018).", -"journal-title": "Publ. Astron. Soc. Pac." -}, -{ -"key": "1295_CR52", -"doi-asserted-by": "crossref", -"first-page": "A115", -"DOI": "10.1051/0004-6361/201935344", -"volume": "627", -"author": "M Rigault", -"year": "2019", -"unstructured": "Rigault, M. et al. Fully automated integral field spectrograph pipeline for the SEDMachine: pysedm. Astron. Astrophys. 627, A115 (2019).", -"journal-title": "Astron. Astrophys." -}, -{ -"key": "1295_CR53", -"doi-asserted-by": "crossref", -"first-page": "A68", -"DOI": "10.1051/0004-6361/201628275", -"volume": "593", -"author": "C Fremling", -"year": "2016", -"unstructured": "Fremling, C. et al. PTF12os and iPTF13bvn. Two stripped-envelope supernovae from low-mass progenitors in NGC 5806. Astron. Astrophys. 593, A68 (2016).", -"journal-title": "Astron. Astrophys." -}, -{ -"key": "1295_CR54", -"doi-asserted-by": "crossref", -"first-page": "72", -"DOI": "10.3847/1538-4357/aa998e", -"volume": "852", -"author": "S van Velzen", -"year": "2018", -"unstructured": "van Velzen, S. On the mass and luminosity functions of tidal disruption flares: rate suppression due to black hole event horizons. Astrophys. J. 852, 72 (2018).", -"journal-title": "Astrophys. J." -}, -{ -"key": "1295_CR55", -"doi-asserted-by": "crossref", -"first-page": "95", -"DOI": "10.1007/s11214-005-5095-4", -"volume": "120", -"author": "PWA Roming", -"year": "2005", -"unstructured": "Roming, P. W. A. et al. The Swift Ultra-Violet/Optical Telescope. Space Sci. Rev. 120, 95–142 (2005).", -"journal-title": "Space Sci. Rev." -}, -{ -"key": "1295_CR56", -"doi-asserted-by": "crossref", -"first-page": "1005", -"DOI": "10.1086/422091", -"volume": "611", -"author": "N Gehrels", -"year": "2004", -"unstructured": "Gehrels, N. et al. The Swift Gamma-Ray Burst Mission. Astrophys. J. 611, 1005–1020 (2004).", -"journal-title": "Astrophys. J." -}, -{ -"key": "1295_CR57", -"doi-asserted-by": "crossref", -"first-page": "19", -"DOI": "10.3847/0004-637X/829/1/19", -"volume": "829", -"author": "S van Velzen", -"year": "2016", -"unstructured": "van Velzen, S., Mendez, A. J., Krolik, J. H. & Gorjian, V. Discovery of transient infrared emission from dust heated by stellar tidal disruption flares. Astrophys. J. 829, 19 (2016).", -"journal-title": "Astrophys. J." -}, -{ -"key": "1295_CR58", -"doi-asserted-by": "crossref", -"first-page": "575", -"DOI": "10.1093/mnras/stw307", -"volume": "458", -"author": "W Lu", -"year": "2016", -"unstructured": "Lu, W., Kumar, P. & Evans, N. J. Infrared emission from tidal disruption events—probing the pc-scale dust content around galactic nuclei. Mon. Not. R. Astron. Soc. 458, 575–581 (2016).", -"journal-title": "Mon. Not. R. Astron. Soc." -}, -{ -"key": "1295_CR59", -"unstructured": "Miller, J. S. & Stone, R. P. S. The Kast Double Spectrograph. Technical Report No. 66 (Lick Observatory, 1993)." -}, -{ -"key": "1295_CR60", -"doi-asserted-by": "crossref", -"first-page": "375", -"DOI": "10.1086/133562", -"volume": "107", -"author": "JB Oke", -"year": "1995", -"unstructured": "Oke, J. B. et al. The Keck Low-Resolution Imaging Spectrometer. Publ. Astron. Soc. Pac. 107, 375–385 (1995).", -"journal-title": "Publ. Astron. Soc. Pac." -}, -{ -"key": "1295_CR61", -"doi-asserted-by": "crossref", -"first-page": "765", -"DOI": "10.1111/j.1365-2966.2005.08957.x", -"volume": "359", -"author": "A Garcia-Rissmann", -"year": "2005", -"unstructured": "Garcia-Rissmann, A. et al. An atlas of calcium triplet spectra of active galaxies. Mon. Not. R. Astron. Soc. 359, 765–780 (2005).", -"journal-title": "Mon. Not. R. Astron. Soc." -}, -{ -"key": "1295_CR62", -"doi-asserted-by": "crossref", -"first-page": "165", -"DOI": "10.1007/s11214-005-5097-2", -"volume": "120", -"author": "DN Burrows", -"year": "2005", -"unstructured": "Burrows, D. N. et al. The Swift X-Ray Telescope. Space Sci. Rev. 120, 165–195 (2005).", -"journal-title": "Space Sci. Rev." -}, -{ -"key": "1295_CR63", -"doi-asserted-by": "crossref", -"first-page": "L1", -"DOI": "10.1051/0004-6361:20000036", -"volume": "365", -"author": "F Jansen", -"year": "2001", -"unstructured": "Jansen, F. et al. XMM-Newton Observatory. I. The spacecraft and operations. Astron. Astrophys. 365, L1–L6 (2001).", -"journal-title": "Astron. Astrophys." -}, -{ -"key": "1295_CR64", -"unstructured": "HI4PI Collaboration et al. HI4PI: a full-sky H i survey based on EBHIS and GASS. Astron. Astrophys. 594, A116 (2016).", -"DOI": "10.1051/0004-6361/201629178", -"doi-asserted-by": "crossref" -}, -{ -"key": "1295_CR65", -"unstructured": "Arnaud, K. A. in Astronomical Data Analysis Software and Systems V (eds Jacoby, G. H. & Barnes, J.) 17 (Astronomical Society of the Pacific, 1996)." -}, -{ -"key": "1295_CR66", -"doi-asserted-by": "crossref", -"first-page": "1545", -"DOI": "10.1111/j.1365-2966.2008.13953.x", -"volume": "391", -"author": "JTL Zwart", -"year": "2008", -"unstructured": "Zwart, J. T. L. et al. The Arcminute Microkelvin Imager. Mon. Not. R. Astron. Soc. 391, 1545–1558 (2008).", -"journal-title": "Mon. Not. R. Astron. Soc." -}, -{ -"key": "1295_CR67", -"doi-asserted-by": "crossref", -"first-page": "5677", -"DOI": "10.1093/mnras/sty074", -"volume": "475", -"author": "J Hickish", -"year": "2018", -"unstructured": "Hickish, J. et al. A digital correlator upgrade for the Arcminute MicroKelvin Imager. Mon. Not. R. Astron. Soc. 475, 5677–5687 (2018).", -"journal-title": "Mon. Not. R. Astron. Soc." -}, -{ -"key": "1295_CR68", -"doi-asserted-by": "crossref", -"first-page": "1396", -"DOI": "10.1093/mnras/stv1728", -"volume": "453", -"author": "YC Perrott", -"year": "2015", -"unstructured": "Perrott, Y. C. et al. AMI galactic plane survey at 16 GHz—II. Full data release with extended coverage and improved processing. Mon. Not. R. Astron. Soc. 453, 1396–1403 (2015).", -"journal-title": "Mon. Not. R. Astron. Soc." -}, -{ -"key": "1295_CR69", -"unstructured": "McMullin, J. P., Waters, B., Schiebel, D., Young, W. & Golap, K. in Astronomical Data Analysis Software and Systems XVI (eds Shaw, R. A. et al.) 127 (Astronomical Society of the Pacific, 2007)." -}, -{ -"key": "1295_CR70", -"doi-asserted-by": "crossref", -"first-page": "1071", -"DOI": "10.1088/0004-637X/697/2/1071", -"volume": "697", -"author": "WB Atwood", -"year": "2009", -"unstructured": "Atwood, W. B. et al. The Large Area Telescope on the Fermi Gamma-ray Space Telescope mission. Astrophys. J. 697, 1071–1102 (2009).", -"journal-title": "Astrophys. J." -}, -{ -"key": "1295_CR71", -"unstructured": "Wood, M. et al. Fermipy: an open-source Python package for analysis of Fermi-LAT Data. In Proc. 35th International Cosmic Ray Conference (ICRC2017) 824 (PoS, 2017).", -"DOI": "10.22323/1.301.0824", -"doi-asserted-by": "crossref" -}, -{ -"key": "1295_CR72", -"unstructured": "Garrappa, S. & Buson, S. Fermi-LAT gamma-ray observations of IceCube-191001A. GCN Circ. 25932 (2019)." -}, -{ -"key": "1295_CR73", -"unstructured": "The Fermi-LAT collaboration. Fermi Large Area Telescope Fourth Source Catalog. Astrophys. J. Suppl. Ser. 247, 33 (2020)." -}, -{ -"key": "1295_CR74", -"doi-asserted-by": "crossref", -"first-page": "14", -"DOI": "10.1088/0004-637X/767/1/14", -"volume": "767", -"author": "T Pursimo", -"year": "2013", -"unstructured": "Pursimo, T. et al. The Micro-Arcsecond Scintillation-Induced Variability (MASIV) survey. III. Optical identifications and new redshifts. Astrophys. J. 767, 14 (2013).", -"journal-title": "Astrophys. J." -}, -{ -"key": "1295_CR75", -"unstructured": "Garrappa, S., Buson, S. & Fermi-LAT Collaboration. Fermi-LAT gamma-ray observations of IceCube-191001A. GCN Circ. 25932 (2019)." -}, -{ -"key": "1295_CR76", -"doi-asserted-by": "crossref", -"first-page": "133", -"DOI": "10.1088/0004-637X/802/2/133", -"volume": "802", -"author": "C Diltz", -"year": "2015", -"unstructured": "Diltz, C., Böttcher, M. & Fossati, G. Time dependent hadronic modeling of flat spectrum radio quasars. Astrophys. J. 802, 133 (2015).", -"journal-title": "Astrophys. J." -}, -{ -"key": "1295_CR77", -"doi-asserted-by": "crossref", -"first-page": "88", -"DOI": "10.1038/s41550-018-0610-1", -"volume": "3", -"author": "S Gao", -"year": "2019", -"unstructured": "Gao, S., Fedynitch, A., Winter, W. & Pohl, M. Modelling the coincident observation of a high-energy neutrino and a bright blazar flare. Nat. Astron. 3, 88–92 (2019).", -"journal-title": "Nat. Astron." -}, -{ -"key": "1295_CR78", -"unstructured": "Ayala, H. IceCube-191001A: HAWC follow-up. GCN Circ. 25936 (2019)." -}, -{ -"key": "1295_CR79", -"doi-asserted-by": "crossref", -"first-page": "62", -"DOI": "10.1126/science.aad1182", -"volume": "351", -"author": "S van Velzen", -"year": "2016", -"unstructured": "van Velzen, S. et al. A radio jet from the optical and x-ray bright stellar tidal disruption flare ASASSN-14li. Science 351, 62–65 (2016).", -"journal-title": "Science" -}, -{ -"key": "1295_CR80", -"doi-asserted-by": "crossref", -"first-page": "306", -"DOI": "10.1086/670067", -"volume": "125", -"author": "D Foreman-Mackey", -"year": "2013", -"unstructured": "Foreman-Mackey, D., Hogg, D. W., Lang, D. & Goodman, J. emcee: the MCMC Hammer. Publ. Astron. Soc. Pac. 125, 306 (2013).", -"journal-title": "Publ. Astron. Soc. Pac." -}, -{ -"key": "1295_CR81", -"doi-asserted-by": "crossref", -"first-page": "6", -"DOI": "10.3847/1538-4365/aab761", -"volume": "236", -"author": "J Guillochon", -"year": "2018", -"unstructured": "Guillochon, J. et al. MOSFiT: Modular Open Source Fitter for Transients. Astrophys. J. Suppl. Ser. 236, 6 (2018).", -"journal-title": "Astrophys. J. Suppl. Ser." -}, -{ -"key": "1295_CR82", -"doi-asserted-by": "crossref", -"first-page": "e008", -"DOI": "10.1017/pasa.2013.44", -"volume": "31", -"author": "J Granot", -"year": "2014", -"unstructured": "Granot, J. & van der Horst, A. J. Gamma-ray burst jets and their radio observations. Publ. Astron. Soc. Aust. 31, e008 (2014).", -"journal-title": "Publ. Astron. Soc. Aust." -}, -{ -"key": "1295_CR83", -"doi-asserted-by": "crossref", -"first-page": "102", -"DOI": "10.1088/0004-637X/815/2/102", -"volume": "815", -"author": "W Fong", -"year": "2015", -"unstructured": "Fong, W., Berger, E., Margutti, R. & Zauderer, B. A. A decade of short-duration gamma-ray burst broadband afterglows: energetics, circumburst densities, and jet opening angles. Astrophys. J. 815, 102 (2015).", -"journal-title": "Astrophys. J." -} -], -"container-title": [ -"Nature Astronomy" -], -"original-title": [ - -], -"language": "en", -"link": [ -{ -"URL": "http://www.nature.com/articles/s41550-020-01295-8.pdf", -"content-type": "application/pdf", -"content-version": "vor", -"intended-application": "text-mining" -}, -{ -"URL": "http://www.nature.com/articles/s41550-020-01295-8", -"content-type": "text/html", -"content-version": "vor", -"intended-application": "text-mining" -}, -{ -"URL": "http://www.nature.com/articles/s41550-020-01295-8.pdf", -"content-type": "application/pdf", -"content-version": "vor", -"intended-application": "similarity-checking" -} -], -"deposited": { -"date-parts": [ -[ -2021, -5, -17 -] -], -"date-time": "2021-05-17T15:08:12Z", -"timestamp": 1621264092000 -}, -"score": 1.0, -"subtitle": [ - -], -"short-title": [ - -], -"issued": { -"date-parts": [ -[ -3021, -2, -22 -] -] -}, -"references-count": 83, -"journal-issue": { -"published-print": { -"date-parts": [ -[ -2021, -5 -] -] -}, -"issue": "5" -}, -"alternative-id": [ -"1295" -], -"URL": "http://dx.doi.org/10.1038/s41550-020-01295-8", -"relation": { -"cites": [ - -] -}, -"ISSN": [ -"2397-3366" -], -"issn-type": [ -{ -"value": "2397-3366", -"type": "electronic" -} -], -"assertion": [ -{ -"value": "21 July 2020", -"order": 1, -"name": "received", -"label": "Received", -"group": { -"name": "ArticleHistory", -"label": "Article History" -} -}, -{ -"value": "16 December 2020", -"order": 2, -"name": "accepted", -"label": "Accepted", -"group": { -"name": "ArticleHistory", -"label": "Article History" -} -}, -{ -"value": "22 February 2021", -"order": 3, -"name": "first_online", -"label": "First Online", -"group": { -"name": "ArticleHistory", -"label": "Article History" -} -}, -{ -"value": "The authors declare no competing interests.", -"order": 1, -"name": "Ethics", -"group": { -"name": "EthicsHeading", -"label": "Competing interests" -} -} -] + "indexed": { + "date-parts": [ + [ + 2021, + 7, + 2 + ] + ], + "date-time": "2021-07-02T07:30:10Z", + "timestamp": 1625211010708 + }, + "reference-count": 83, + "publisher": "Springer Science and Business Media LLC", + "issue": "5", + "license": [ + { + "URL": "https://www.springer.com/tdm", + "start": { + "date-parts": [ + [ + 2021, + 2, + 22 + ] + ], + "date-time": "2021-02-22T00:00:00Z", + "timestamp": 1613952000000 + }, + "delay-in-days": 0, + "content-version": "tdm" + }, + { + "URL": "https://academic.oup.com/journals/pages/open_access/funder_policies/chorus/standard_publication_model", + "start": { + "date-parts": [ + [ + 2021, + 2, + 22 + ] + ], + "date-time": "2021-02-22T00:00:00Z", + "timestamp": 1613952000000 + }, + "delay-in-days": 0, + "content-version": "vor" + } + ], + "content-domain": { + "domain": [ + "link.springer.com" + ], + "crossmark-restriction": false + }, + "short-container-title": [ + "Nat Astron" + ], + "published-print": { + "date-parts": [ + [ + 2021, + 5 + ] + ] + }, + "DOI": "10.1038/s41550-020-01295-8", + "type": "journal-article", + "created": { + "date-parts": [ + [ + 2021, + 2, + 22 + ] + ], + "date-time": "2021-02-22T17:03:42Z", + "timestamp": 1614013422000 + }, + "page": "510-518", + "update-policy": "http://dx.doi.org/10.1007/springer_crossmark_policy", + "source": "Crossref", + "is-referenced-by-count": 6, + "title": [ + "A tidal disruption event coincident with a high-energy neutrino" + ], + "prefix": "10.1038", + "volume": "5", + "author": [ + { + "ORCID": "http://orcid.org/0000-0003-2434-0387", + "authenticated-orcid": false, + "given": "Robert", + "family": "Stein", + "sequence": "first", + "affiliation": [ + ] + }, + { + "ORCID": "http://orcid.org/0000-0002-3859-8074", + "authenticated-orcid": false, + "given": "Sjoert van", + "family": "Velzen", + "sequence": "additional", + "affiliation": [ + ] + }, + { + "ORCID": "http://orcid.org/0000-0001-8594-8666", + "authenticated-orcid": false, + "given": "Marek", + "family": "Kowalski", + "sequence": "additional", + "affiliation": [ + ] + }, + { + "given": "Anna", + "family": "Franckowiak", + "sequence": "additional", + "affiliation": [ + ] + }, + { + "ORCID": "http://orcid.org/0000-0003-3703-5154", + "authenticated-orcid": false, + "given": "Suvi", + "family": "Gezari", + "sequence": "additional", + "affiliation": [ + ] + }, + { + "ORCID": "http://orcid.org/0000-0003-3124-2814", + "authenticated-orcid": false, + "given": "James C. A.", + "family": "Miller-Jones", + "sequence": "additional", + "affiliation": [ + ] + }, + { + "given": "Sara", + "family": "Frederick", + "sequence": "additional", + "affiliation": [ + ] + }, + { + "ORCID": "http://orcid.org/0000-0003-0466-3779", + "authenticated-orcid": false, + "given": "Itai", + "family": "Sfaradi", + "sequence": "additional", + "affiliation": [ + ] + }, + { + "given": "Michael F.", + "family": "Bietenholz", + "sequence": "additional", + "affiliation": [ + ] + }, + { + "ORCID": "http://orcid.org/0000-0002-5936-1156", + "authenticated-orcid": false, + "given": "Assaf", + "family": "Horesh", + "sequence": "additional", + "affiliation": [ + ] + }, + { + "given": "Rob", + "family": "Fender", + "sequence": "additional", + "affiliation": [ + ] + }, + { + "ORCID": "http://orcid.org/0000-0003-2403-4582", + "authenticated-orcid": false, + "given": "Simone", + "family": "Garrappa", + "sequence": "additional", + "affiliation": [ + ] + }, + { + "ORCID": "http://orcid.org/0000-0002-2184-6430", + "authenticated-orcid": false, + "given": "Tomás", + "family": "Ahumada", + "sequence": "additional", + "affiliation": [ + ] + }, + { + "given": "Igor", + "family": "Andreoni", + "sequence": "additional", + "affiliation": [ + ] + }, + { + "given": "Justin", + "family": "Belicki", + "sequence": "additional", + "affiliation": [ + ] + }, + { + "ORCID": "http://orcid.org/0000-0001-8018-5348", + "authenticated-orcid": false, + "given": "Eric C.", + "family": "Bellm", + "sequence": "additional", + "affiliation": [ + ] + }, + { + "given": "Markus", + "family": "Böttcher", + "sequence": "additional", + "affiliation": [ + ] + }, + { + "given": "Valery", + "family": "Brinnel", + "sequence": "additional", + "affiliation": [ + ] + }, + { + "given": "Rick", + "family": "Burruss", + "sequence": "additional", + "affiliation": [ + ] + }, + { + "ORCID": "http://orcid.org/0000-0003-1673-970X", + "authenticated-orcid": false, + "given": "S. Bradley", + "family": "Cenko", + "sequence": "additional", + "affiliation": [ + ] + }, + { + "ORCID": "http://orcid.org/0000-0002-8262-2924", + "authenticated-orcid": false, + "given": "Michael W.", + "family": "Coughlin", + "sequence": "additional", + "affiliation": [ + ] + }, + { + "ORCID": "http://orcid.org/0000-0003-2292-0441", + "authenticated-orcid": false, + "given": "Virginia", + "family": "Cunningham", + "sequence": "additional", + "affiliation": [ + ] + }, + { + "given": "Andrew", + "family": "Drake", + "sequence": "additional", + "affiliation": [ + ] + }, + { + "given": "Glennys R.", + "family": "Farrar", + "sequence": "additional", + "affiliation": [ + ] + }, + { + "given": "Michael", + "family": "Feeney", + "sequence": "additional", + "affiliation": [ + ] + }, + { + "given": "Ryan J.", + "family": "Foley", + "sequence": "additional", + "affiliation": [ + ] + }, + { + "ORCID": "http://orcid.org/0000-0002-3653-5598", + "authenticated-orcid": false, + "given": "Avishay", + "family": "Gal-Yam", + "sequence": "additional", + "affiliation": [ + ] + }, + { + "given": "V. Zach", + "family": "Golkhou", + "sequence": "additional", + "affiliation": [ + ] + }, + { + "ORCID": "http://orcid.org/0000-0002-4163-4996", + "authenticated-orcid": false, + "given": "Ariel", + "family": "Goobar", + "sequence": "additional", + "affiliation": [ + ] + }, + { + "ORCID": "http://orcid.org/0000-0002-3168-0139", + "authenticated-orcid": false, + "given": "Matthew J.", + "family": "Graham", + "sequence": "additional", + "affiliation": [ + ] + }, + { + "given": "Erica", + "family": "Hammerstein", + "sequence": "additional", + "affiliation": [ + ] + }, + { + "ORCID": "http://orcid.org/0000-0003-3367-3415", + "authenticated-orcid": false, + "given": "George", + "family": "Helou", + "sequence": "additional", + "affiliation": [ + ] + }, + { + "ORCID": "http://orcid.org/0000-0002-9878-7889", + "authenticated-orcid": false, + "given": "Tiara", + "family": "Hung", + "sequence": "additional", + "affiliation": [ + ] + }, + { + "given": "Mansi M.", + "family": "Kasliwal", + "sequence": "additional", + "affiliation": [ + ] + }, + { + "ORCID": "http://orcid.org/0000-0002-5740-7747", + "authenticated-orcid": false, + "given": "Charles D.", + "family": "Kilpatrick", + "sequence": "additional", + "affiliation": [ + ] + }, + { + "ORCID": "http://orcid.org/0000-0002-5105-344X", + "authenticated-orcid": false, + "given": "Albert K. H.", + "family": "Kong", + "sequence": "additional", + "affiliation": [ + ] + }, + { + "ORCID": "http://orcid.org/0000-0002-6540-1484", + "authenticated-orcid": false, + "given": "Thomas", + "family": "Kupfer", + "sequence": "additional", + "affiliation": [ + ] + }, + { + "ORCID": "http://orcid.org/0000-0003-2451-5482", + "authenticated-orcid": false, + "given": "Russ R.", + "family": "Laher", + "sequence": "additional", + "affiliation": [ + ] + }, + { + "ORCID": "http://orcid.org/0000-0003-2242-0244", + "authenticated-orcid": false, + "given": "Ashish A.", + "family": "Mahabal", + "sequence": "additional", + "affiliation": [ + ] + }, + { + "ORCID": "http://orcid.org/0000-0002-8532-9395", + "authenticated-orcid": false, + "given": "Frank J.", + "family": "Masci", + "sequence": "additional", + "affiliation": [ + ] + }, + { + "ORCID": "http://orcid.org/0000-0003-0280-7484", + "authenticated-orcid": false, + "given": "Jannis", + "family": "Necker", + "sequence": "additional", + "affiliation": [ + ] + }, + { + "ORCID": "http://orcid.org/0000-0001-8342-6274", + "authenticated-orcid": false, + "given": "Jakob", + "family": "Nordin", + "sequence": "additional", + "affiliation": [ + ] + }, + { + "given": "Daniel A.", + "family": "Perley", + "sequence": "additional", + "affiliation": [ + ] + }, + { + "ORCID": "http://orcid.org/0000-0002-8121-2560", + "authenticated-orcid": false, + "given": "Mickael", + "family": "Rigault", + "sequence": "additional", + "affiliation": [ + ] + }, + { + "ORCID": "http://orcid.org/0000-0002-7788-628X", + "authenticated-orcid": false, + "given": "Simeon", + "family": "Reusch", + "sequence": "additional", + "affiliation": [ + ] + }, + { + "given": "Hector", + "family": "Rodriguez", + "sequence": "additional", + "affiliation": [ + ] + }, + { + "ORCID": "http://orcid.org/0000-0002-7559-315X", + "authenticated-orcid": false, + "given": "César", + "family": "Rojas-Bravo", + "sequence": "additional", + "affiliation": [ + ] + }, + { + "ORCID": "http://orcid.org/0000-0001-7648-4142", + "authenticated-orcid": false, + "given": "Ben", + "family": "Rusholme", + "sequence": "additional", + "affiliation": [ + ] + }, + { + "ORCID": "http://orcid.org/0000-0003-4401-0430", + "authenticated-orcid": false, + "given": "David L.", + "family": "Shupe", + "sequence": "additional", + "affiliation": [ + ] + }, + { + "ORCID": "http://orcid.org/0000-0001-9898-5597", + "authenticated-orcid": false, + "given": "Leo P.", + "family": "Singer", + "sequence": "additional", + "affiliation": [ + ] + }, + { + "ORCID": "http://orcid.org/0000-0003-1546-6615", + "authenticated-orcid": false, + "given": "Jesper", + "family": "Sollerman", + "sequence": "additional", + "affiliation": [ + ] + }, + { + "given": "Maayane T.", + "family": "Soumagnac", + "sequence": "additional", + "affiliation": [ + ] + }, + { + "given": "Daniel", + "family": "Stern", + "sequence": "additional", + "affiliation": [ + ] + }, + { + "given": "Kirsty", + "family": "Taggart", + "sequence": "additional", + "affiliation": [ + ] + }, + { + "given": "Jakob", + "family": "van Santen", + "sequence": "additional", + "affiliation": [ + ] + }, + { + "given": "Charlotte", + "family": "Ward", + "sequence": "additional", + "affiliation": [ + ] + }, + { + "given": "Patrick", + "family": "Woudt", + "sequence": "additional", + "affiliation": [ + ] + }, + { + "ORCID": "http://orcid.org/0000-0001-6747-8509", + "authenticated-orcid": false, + "given": "Yuhan", + "family": "Yao", + "sequence": "additional", + "affiliation": [ + ] + } + ], + "member": "297", + "published-online": { + "date-parts": [ + [ + 2021, + 2, + 22 + ] + ] + }, + "reference": [ + { + "key": "1295_CR1", + "doi-asserted-by": "crossref", + "first-page": "P03012", + "DOI": "10.1088/1748-0221/12/03/P03012", + "volume": "12", + "author": "MG Aartsen", + "year": "2017", + "unstructured": "Aartsen, M. G. et al. The IceCube Neutrino Observatory: instrumentation and online systems. J. Instrum. 12, P03012 (2017).", + "journal-title": "J. Instrum." + }, + { + "key": "1295_CR2", + "unstructured": "Stein, R. IceCube-191001A—IceCube observation of a high-energy neutrino candidate event. GCN Circ. 25913 (2019)." + }, + { + "key": "1295_CR3", + "doi-asserted-by": "crossref", + "first-page": "018002", + "DOI": "10.1088/1538-3873/aaecbe", + "volume": "131", + "author": "EC Bellm", + "year": "2019", + "unstructured": "Bellm, E. C. et al. The Zwicky Transient Facility: system overview, performance, and first results. Publ. Astron. Soc. Pac. 131, 018002 (2019).", + "journal-title": "Publ. Astron. Soc. Pac." + }, + { + "key": "1295_CR4", + "doi-asserted-by": "crossref", + "first-page": "533", + "DOI": "10.1016/j.astropartphys.2007.03.005", + "volume": "27", + "author": "M Kowalski", + "year": "2007", + "unstructured": "Kowalski, M. & Mohr, A. Detecting neutrino transients with optical follow-up observations. Astropart. Phys. 27, 533–538 (2007).", + "journal-title": "Astropart. Phys." + }, + { + "key": "1295_CR5", + "doi-asserted-by": "crossref", + "first-page": "329", + "DOI": "10.1088/0004-637X/693/1/329", + "volume": "693", + "author": "GR Farrar", + "year": "2009", + "unstructured": "Farrar, G. R. & Gruzinov, A. Giant AGN flares and cosmic ray bursts. Astrophys. J. 693, 329–332 (2009).", + "journal-title": "Astrophys. J." + }, + { + "key": "1295_CR6", + "doi-asserted-by": "crossref", + "first-page": "1354", + "DOI": "10.1093/mnras/stx863", + "volume": "469", + "author": "L Dai", + "year": "2017", + "unstructured": "Dai, L. & Fang, K. Can tidal disruption events produce the IceCube neutrinos? Mon. Not. R. Astron. Soc. 469, 1354–1359 (2017).", + "journal-title": "Mon. Not. R. Astron. Soc." + }, + { + "key": "1295_CR7", + "doi-asserted-by": "crossref", + "first-page": "114", + "DOI": "10.3847/1538-4357/ab44ca", + "volume": "886", + "author": "K Hayasaki", + "year": "2019", + "unstructured": "Hayasaki, K. & Yamazaki, R. Neutrino emissions from tidal disruption remnants. Astrophys. J. 886, 114 (2019).", + "journal-title": "Astrophys. J." + }, + { + "key": "1295_CR8", + "unstructured": "Farrar, G. R. & Piran, T. Tidal disruption jets as the source of Ultra-High Energy Cosmic Rays. Preprint at https://arxiv.org/abs/1411.0704 (2014)." + }, + { + "key": "1295_CR9", + "doi-asserted-by": "crossref", + "first-page": "3", + "DOI": "10.3847/1538-4357/aa6344", + "volume": "838", + "author": "N Senno", + "year": "2017", + "unstructured": "Senno, N., Murase, K. & Mészáros, P. High-energy neutrino flares from X-ray bright and dark tidal disruption events. Astrophys. J. 838, 3 (2017).", + "journal-title": "Astrophys. J." + }, + { + "key": "1295_CR10", + "doi-asserted-by": "crossref", + "first-page": "083005", + "DOI": "10.1103/PhysRevD.93.083005", + "volume": "93", + "author": "XY Wang", + "year": "2016", + "unstructured": "Wang, X. Y. & Liu, R. Y. Tidal disruption jets of supermassive black holes as hidden sources of cosmic rays: explaining the IceCube TeV–PeV neutrinos. Phys. Rev. D 93, 083005 (2016).", + "journal-title": "Phys. Rev. D" + }, + { + "key": "1295_CR11", + "doi-asserted-by": "crossref", + "first-page": "123001", + "DOI": "10.1103/PhysRevD.95.123001", + "volume": "95", + "author": "C Lunardini", + "year": "2017", + "unstructured": "Lunardini, C. & Winter, W. High energy neutrinos from the tidal disruption of stars. Phys. Rev. D 95, 123001 (2017).", + "journal-title": "Phys. Rev. D" + }, + { + "key": "1295_CR12", + "unstructured": "Stein, R., Franckowiak, A., Necker, J., Gezari, S. & Velzen, S. V. Candidate counterparts to IceCube-191001A with ZTF. Astron. Telegr. 13160 (2019)." + }, + { + "key": "1295_CR13", + "doi-asserted-by": "crossref", + "first-page": "078001", + "DOI": "10.1088/1538-3873/ab006c", + "volume": "131", + "author": "MJ Graham", + "year": "2019", + "unstructured": "Graham, M. J. et al. The Zwicky Transient Facility: science objectives. Publ. Astron. Soc. Pac. 131, 078001 (2019).", + "journal-title": "Publ. Astron. Soc. Pac." + }, + { + "key": "1295_CR14", + "unstructured": "Nordin, J. et al. TNS Astronomical Transient Report 33340 (2019)." + }, + { + "key": "1295_CR15", + "unstructured": "Nicholl, M. et al. ePESSTO+ classification of optical transients. Astron. Telegr. 12752 (2019)." + }, + { + "key": "1295_CR16", + "unstructured": "van Velzen, S. et al. Seventeen tidal disruption events from the first half of ZTF survey observations: entering a new era of population studies. Preprint at https://arxiv.org/abs/2001.01409 (2020)." + }, + { + "key": "1295_CR17", + "doi-asserted-by": "crossref", + "first-page": "82", + "DOI": "10.3847/1538-4357/ab1844", + "volume": "878", + "author": "S van Velzen", + "year": "2019", + "unstructured": "van Velzen, S. et al. Late-time UV observations of tidal disruption flares reveal unobscured, compact accretion disks. Astrophys. J. 878, 82 (2019).", + "journal-title": "Astrophys. J." + }, + { + "key": "1295_CR18", + "doi-asserted-by": "crossref", + "first-page": "5655", + "DOI": "10.1093/mnras/staa192", + "volume": "492", + "author": "A Mummery", + "year": "2020", + "unstructured": "Mummery, A. & Balbus, S. A. The spectral evolution of disc dominated tidal disruption events. Mon. Not. R. Astron. Soc. 492, 5655–5674 (2020).", + "journal-title": "Mon. Not. R. Astron. Soc." + }, + { + "key": "1295_CR19", + "doi-asserted-by": "crossref", + "first-page": "184", + "DOI": "10.1088/0004-637X/764/2/184", + "volume": "764", + "author": "NJ McConnell", + "year": "2013", + "unstructured": "McConnell, N. J. & Ma, C. P. Revisiting the scaling relations of black hole masses and host galaxy properties. Astrophys. J. 764, 184 (2013).", + "journal-title": "Astrophys. J." + }, + { + "key": "1295_CR20", + "doi-asserted-by": "crossref", + "first-page": "149", + "DOI": "10.3847/1538-4357/aa633b", + "volume": "838", + "author": "K Auchettl", + "year": "2017", + "unstructured": "Auchettl, K., Guillochon, J. & Ramirez-Ruiz, E. New physical insights about tidal disruption events from a comprehensive observational inventory at X-ray wavelengths. Astrophys. J. 838, 149 (2017).", + "journal-title": "Astrophys. J." + }, + { + "key": "1295_CR21", + "doi-asserted-by": "crossref", + "first-page": "4136", + "DOI": "10.1093/mnras/stz1602", + "volume": "487", + "author": "T Wevers", + "year": "2019", + "unstructured": "Wevers, T. et al. Black hole masses of tidal disruption event host galaxies II. Mon. Not. R. Astron. Soc. 487, 4136–4152 (2019).", + "journal-title": "Mon. Not. R. Astron. Soc." + }, + { + "key": "1295_CR22", + "doi-asserted-by": "crossref", + "first-page": "198", + "DOI": "10.3847/1538-4357/aafe0c", + "volume": "872", + "author": "S van Velzen", + "year": "2019", + "unstructured": "van Velzen, S. et al. The first tidal disruption flare in ZTF: from photometric selection to multi-wavelength characterization. Astrophys. J. 872, 198 (2019).", + "journal-title": "Astrophys. J." + }, + { + "key": "1295_CR23", + "doi-asserted-by": "crossref", + "first-page": "A81", + "DOI": "10.1051/0004-6361/201117855", + "volume": "538", + "author": "G Morlino", + "year": "2012", + "unstructured": "Morlino, G. & Caprioli, D. Strong evidence for hadron acceleration in Tycho’s supernova remnant. Astron. Astrophys. 538, A81 (2012).", + "journal-title": "Astron. Astrophys." + }, + { + "key": "1295_CR24", + "doi-asserted-by": "crossref", + "first-page": "86", + "DOI": "10.3847/1538-4357/aaa8e0", + "volume": "854", + "author": "T Eftekhari", + "year": "2018", + "unstructured": "Eftekhari, T., Berger, E., Zauderer, B. A., Margutti, R. & Alexander, K. D. Radio monitoring of the tidal disruption event Swift J164449.3+573451. III. Late-time jet energetics and a deviation from equipartition. Astrophys. J. 854, 86 (2018).", + "journal-title": "Astrophys. J." + }, + { + "key": "1295_CR25", + "doi-asserted-by": "crossref", + "first-page": "1258", + "DOI": "10.1093/mnras/stt1645", + "volume": "436", + "author": "A Horesh", + "year": "2013", + "unstructured": "Horesh, A. et al. An early and comprehensive millimetre and centimetre wave and X-ray study of SN 2011dh: a non-equipartition blast wave expanding into a massive stellar wind. Mon. Not. R. Astron. Soc. 436, 1258–1267 (2013).", + "journal-title": "Mon. Not. R. Astron. Soc." + }, + { + "key": "1295_CR26", + "doi-asserted-by": "crossref", + "first-page": "78", + "DOI": "10.1088/0004-637X/772/1/78", + "volume": "772", + "author": "R Barniol Duran", + "year": "2013", + "unstructured": "Barniol Duran, R., Nakar, E. & Piran, T. Radius constraints and minimal equipartition energy of relativistically moving synchrotron sources. Astrophys. J. 772, 78 (2013).", + "journal-title": "Astrophys. J." + }, + { + "key": "1295_CR27", + "doi-asserted-by": "crossref", + "first-page": "69", + "DOI": "10.1071/AS02053", + "volume": "20", + "author": "AG Polatidis", + "year": "2003", + "unstructured": "Polatidis, A. G. & Conway, J. E. Proper motions in compact symmetric objects. Publ. Astron. Soc. Aust. 20, 69–74 (2003).", + "journal-title": "Publ. Astron. Soc. Aust." + }, + { + "key": "1295_CR28", + "doi-asserted-by": "crossref", + "first-page": "L25", + "DOI": "10.3847/2041-8205/819/2/L25", + "volume": "819", + "author": "KD Alexander", + "year": "2016", + "unstructured": "Alexander, K. D., Berger, E., Guillochon, J., Zauderer, B. A. & Williams, P. K. G. Discovery of an outflow from radio observations of the tidal disruption event ASASSN-14li. Astrophys. J. Lett. 819, L25 (2016).", + "journal-title": "Astrophys. J. Lett." + }, + { + "key": "1295_CR29", + "doi-asserted-by": "crossref", + "first-page": "127", + "DOI": "10.3847/0004-637X/827/2/127", + "volume": "827", + "author": "J Krolik", + "year": "2016", + "unstructured": "Krolik, J., Piran, T., Svirski, G. & Cheng, R. M. ASASSN-14li: a model tidal disruption event. Astrophys. J. 827, 127 (2016).", + "journal-title": "Astrophys. J." + }, + { + "key": "1295_CR30", + "doi-asserted-by": "crossref", + "first-page": "1", + "DOI": "10.3847/1538-4357/aab361", + "volume": "856", + "author": "DR Pasham", + "year": "2018", + "unstructured": "Pasham, D. R. & van Velzen, S. Discovery of a time lag between the soft X-ray and radio emission of the tidal disruption flare ASASSN-14li: evidence for linear disk–jet coupling. Astrophys. J. 856, 1 (2018).", + "journal-title": "Astrophys. J." + }, + { + "key": "1295_CR31", + "doi-asserted-by": "crossref", + "first-page": "L9", + "DOI": "10.1051/0004-6361/201834750", + "volume": "622", + "author": "NL Strotjohann", + "year": "2019", + "unstructured": "Strotjohann, N. L., Kowalski, M. & Franckowiak, A. Eddington bias for cosmic neutrino sources. Astron. Astrophys. 622, L9 (2019).", + "journal-title": "Astron. Astrophys." + }, + { + "key": "1295_CR32", + "doi-asserted-by": "crossref", + "first-page": "425", + "DOI": "10.1146/annurev.aa.22.090184.002233", + "volume": "22", + "author": "AM Hillas", + "year": "1984", + "unstructured": "Hillas, A. M. The origin of ultra-high-energy cosmic rays. Annu. Rev. Astron. Astrophys. 22, 425–444 (1984).", + "journal-title": "Annu. Rev. Astron. Astrophys." + }, + { + "key": "1295_CR33", + "doi-asserted-by": "crossref", + "first-page": "eaat1378", + "DOI": "10.1126/science.aat1378", + "volume": "361", + "author": "IceCube Collaboration", + "year": "2018", + "unstructured": "IceCube Collaboration et al. Multimessenger observations of a flaring blazar coincident with high-energy neutrino IceCube-170922A. Science 361, eaat1378 (2018).", + "journal-title": "Science" + }, + { + "key": "1295_CR34", + "unstructured": "Blaufuss, E., Kintscher, T., Lu, L. & Tung, C. F. The next generation of IceCube real-time neutrino alerts. In Proc. 36th International Cosmic Ray Conference (ICRC2019) 1021 (PoS, 2019)." + }, + { + "key": "1295_CR35", + "doi-asserted-by": "crossref", + "first-page": "071101", + "DOI": "10.1103/PhysRevLett.116.071101", + "volume": "116", + "author": "K Murase", + "year": "2016", + "unstructured": "Murase, K., Guetta, D. & Ahlers, M. Hidden cosmic-ray accelerators as an origin of TeV–PeV cosmic neutrinos. Phys. Rev. Lett. 116, 071101 (2016).", + "journal-title": "Phys. Rev. Lett." + }, + { + "key": "1295_CR36", + "unstructured": "Stein, R. Search for neutrinos from populations of optical transients. In Proc. 36th International Cosmic Ray Conference (ICRC2019) 1016 (PoS, 2019).", + "DOI": "10.22323/1.358.1016", + "doi-asserted-by": "crossref" + }, + { + "key": "1295_CR37", + "doi-asserted-by": "crossref", + "first-page": "048001", + "DOI": "10.1088/1538-3873/aaff99", + "volume": "131", + "author": "MW Coughlin", + "year": "2019", + "unstructured": "Coughlin, M. W. et al. 2900 square degree search for the optical counterpart of short gamma-ray burst GRB 180523B with the Zwicky Transient Facility. Publ. Astron. Soc. Pac. 131, 048001 (2019).", + "journal-title": "Publ. Astron. Soc. Pac." + }, + { + "key": "1295_CR38", + "unstructured": "Stein, R. IceCube-200107A: IceCube observation of a high-energy neutrino candidate event. GCN Circ. 26655 (2020)." + }, + { + "key": "1295_CR39", + "doi-asserted-by": "crossref", + "first-page": "018003", + "DOI": "10.1088/1538-3873/aae8ac", + "volume": "131", + "author": "FJ Masci", + "year": "2019", + "unstructured": "Masci, F. J. et al. The Zwicky Transient Facility: data processing, products, and archive. Publ. Astron. Soc. Pac. 131, 018003 (2019).", + "journal-title": "Publ. Astron. Soc. Pac." + }, + { + "key": "1295_CR40", + "doi-asserted-by": "crossref", + "first-page": "018001", + "DOI": "10.1088/1538-3873/aae904", + "volume": "131", + "author": "MT Patterson", + "year": "2019", + "unstructured": "Patterson, M. T. et al. The Zwicky Transient Facility Alert Distribution System. Publ. Astron. Soc. Pac. 131, 018001 (2019).", + "journal-title": "Publ. Astron. Soc. Pac." + }, + { + "key": "1295_CR41", + "unstructured": "Stein, R. & Reusch, S. robertdstein/ampel_followup_pipeline: V1.1 Release (Zenodo, 2020); https://doi.org/10.5281/zenodo.4048336", + "DOI": "10.5281/zenodo.4048336", + "doi-asserted-by": "publisher" + }, + { + "key": "1295_CR42", + "doi-asserted-by": "crossref", + "first-page": "A147", + "DOI": "10.1051/0004-6361/201935634", + "volume": "631", + "author": "J Nordin", + "year": "2019", + "unstructured": "Nordin, J. et al. Transient processing and analysis using AMPEL: alert management, photometry, and evaluation of light curves. Astron. Astrophys. 631, A147 (2019).", + "journal-title": "Astron. Astrophys." + }, + { + "key": "1295_CR43", + "doi-asserted-by": "crossref", + "first-page": "038002", + "DOI": "10.1088/1538-3873/aaf3fa", + "volume": "131", + "author": "A Mahabal", + "year": "2019", + "unstructured": "Mahabal, A. et al. Machine learning for the Zwicky Transient Facility. Publ. Astron. Soc. Pac. 131, 038002 (2019).", + "journal-title": "Publ. Astron. Soc. Pac." + }, + { + "key": "1295_CR44", + "doi-asserted-by": "crossref", + "first-page": "075002", + "DOI": "10.1088/1538-3873/aac410", + "volume": "130", + "author": "MT Soumagnac", + "year": "2018", + "unstructured": "Soumagnac, M. T. & Ofek, E. O. catsHTM: a tool for fast accessing and cross-matching large astronomical catalogs. Publ. Astron. Soc. Pac. 130, 075002 (2018).", + "journal-title": "Publ. Astron. Soc. Pac." + }, + { + "key": "1295_CR45", + "doi-asserted-by": "crossref", + "first-page": "A1", + "DOI": "10.1051/0004-6361/201833051", + "volume": "616", + "author": "Gaia Collaboration", + "year": "2018", + "unstructured": "Gaia Collaboration et al. Gaia Data Release 2. Summary of the contents and survey properties. Astron. Astrophys. 616, A1 (2018).", + "journal-title": "Astron. Astrophys." + }, + { + "key": "1295_CR46", + "doi-asserted-by": "crossref", + "first-page": "128001", + "DOI": "10.1088/1538-3873/aae3d9", + "volume": "130", + "author": "Y Tachibana", + "year": "2018", + "unstructured": "Tachibana, Y. & Miller, A. A. A morphological classification model to identify unresolved PanSTARRS1 sources: application in the ZTF real-time pipeline. Publ. Astron. Soc. Pac. 130, 128001 (2018).", + "journal-title": "Publ. Astron. Soc. Pac." + }, + { + "key": "1295_CR47", + "unstructured": "Chambers, K. C. et al. The Pan-STARRS1 Surveys. Preprint at https://arxiv.org/abs/1612.05560 (2016)." + }, + { + "key": "1295_CR48", + "doi-asserted-by": "crossref", + "first-page": "1868", + "DOI": "10.1088/0004-6256/140/6/1868", + "volume": "140", + "author": "EL Wright", + "year": "2010", + "unstructured": "Wright, E. L. et al. The Wide-field Infrared Survey Explorer (WISE): mission description and initial on-orbit performance. Astron. J. 140, 1868–1881 (2010).", + "journal-title": "Astron. J." + }, + { + "key": "1295_CR49", + "doi-asserted-by": "crossref", + "first-page": "051103", + "DOI": "10.1103/PhysRevLett.124.051103", + "volume": "124", + "author": "MG Aartsen", + "year": "2020", + "unstructured": "Aartsen, M. G. et al. Time-integrated neutrino source searches with 10 years of IceCube data. Phys. Rev. Lett. 124, 051103 (2020).", + "journal-title": "Phys. Rev. Lett." + }, + { + "key": "1295_CR50", + "unstructured": "Steele, I. A. et al. The Liverpool Telescope: performance and first results. Proc. SPIE 5489, https://doi.org/10.1117/12.551456 (2004).", + "DOI": "10.1117/12.551456", + "doi-asserted-by": "publisher" + }, + { + "key": "1295_CR51", + "doi-asserted-by": "crossref", + "first-page": "035003", + "DOI": "10.1088/1538-3873/aaa53f", + "volume": "130", + "author": "N Blagorodnova", + "year": "2018", + "unstructured": "Blagorodnova, N. et al. The SED Machine: a robotic spectrograph for fast transient classification. Publ. Astron. Soc. Pac. 130, 035003 (2018).", + "journal-title": "Publ. Astron. Soc. Pac." + }, + { + "key": "1295_CR52", + "doi-asserted-by": "crossref", + "first-page": "A115", + "DOI": "10.1051/0004-6361/201935344", + "volume": "627", + "author": "M Rigault", + "year": "2019", + "unstructured": "Rigault, M. et al. Fully automated integral field spectrograph pipeline for the SEDMachine: pysedm. Astron. Astrophys. 627, A115 (2019).", + "journal-title": "Astron. Astrophys." + }, + { + "key": "1295_CR53", + "doi-asserted-by": "crossref", + "first-page": "A68", + "DOI": "10.1051/0004-6361/201628275", + "volume": "593", + "author": "C Fremling", + "year": "2016", + "unstructured": "Fremling, C. et al. PTF12os and iPTF13bvn. Two stripped-envelope supernovae from low-mass progenitors in NGC 5806. Astron. Astrophys. 593, A68 (2016).", + "journal-title": "Astron. Astrophys." + }, + { + "key": "1295_CR54", + "doi-asserted-by": "crossref", + "first-page": "72", + "DOI": "10.3847/1538-4357/aa998e", + "volume": "852", + "author": "S van Velzen", + "year": "2018", + "unstructured": "van Velzen, S. On the mass and luminosity functions of tidal disruption flares: rate suppression due to black hole event horizons. Astrophys. J. 852, 72 (2018).", + "journal-title": "Astrophys. J." + }, + { + "key": "1295_CR55", + "doi-asserted-by": "crossref", + "first-page": "95", + "DOI": "10.1007/s11214-005-5095-4", + "volume": "120", + "author": "PWA Roming", + "year": "2005", + "unstructured": "Roming, P. W. A. et al. The Swift Ultra-Violet/Optical Telescope. Space Sci. Rev. 120, 95–142 (2005).", + "journal-title": "Space Sci. Rev." + }, + { + "key": "1295_CR56", + "doi-asserted-by": "crossref", + "first-page": "1005", + "DOI": "10.1086/422091", + "volume": "611", + "author": "N Gehrels", + "year": "2004", + "unstructured": "Gehrels, N. et al. The Swift Gamma-Ray Burst Mission. Astrophys. J. 611, 1005–1020 (2004).", + "journal-title": "Astrophys. J." + }, + { + "key": "1295_CR57", + "doi-asserted-by": "crossref", + "first-page": "19", + "DOI": "10.3847/0004-637X/829/1/19", + "volume": "829", + "author": "S van Velzen", + "year": "2016", + "unstructured": "van Velzen, S., Mendez, A. J., Krolik, J. H. & Gorjian, V. Discovery of transient infrared emission from dust heated by stellar tidal disruption flares. Astrophys. J. 829, 19 (2016).", + "journal-title": "Astrophys. J." + }, + { + "key": "1295_CR58", + "doi-asserted-by": "crossref", + "first-page": "575", + "DOI": "10.1093/mnras/stw307", + "volume": "458", + "author": "W Lu", + "year": "2016", + "unstructured": "Lu, W., Kumar, P. & Evans, N. J. Infrared emission from tidal disruption events—probing the pc-scale dust content around galactic nuclei. Mon. Not. R. Astron. Soc. 458, 575–581 (2016).", + "journal-title": "Mon. Not. R. Astron. Soc." + }, + { + "key": "1295_CR59", + "unstructured": "Miller, J. S. & Stone, R. P. S. The Kast Double Spectrograph. Technical Report No. 66 (Lick Observatory, 1993)." + }, + { + "key": "1295_CR60", + "doi-asserted-by": "crossref", + "first-page": "375", + "DOI": "10.1086/133562", + "volume": "107", + "author": "JB Oke", + "year": "1995", + "unstructured": "Oke, J. B. et al. The Keck Low-Resolution Imaging Spectrometer. Publ. Astron. Soc. Pac. 107, 375–385 (1995).", + "journal-title": "Publ. Astron. Soc. Pac." + }, + { + "key": "1295_CR61", + "doi-asserted-by": "crossref", + "first-page": "765", + "DOI": "10.1111/j.1365-2966.2005.08957.x", + "volume": "359", + "author": "A Garcia-Rissmann", + "year": "2005", + "unstructured": "Garcia-Rissmann, A. et al. An atlas of calcium triplet spectra of active galaxies. Mon. Not. R. Astron. Soc. 359, 765–780 (2005).", + "journal-title": "Mon. Not. R. Astron. Soc." + }, + { + "key": "1295_CR62", + "doi-asserted-by": "crossref", + "first-page": "165", + "DOI": "10.1007/s11214-005-5097-2", + "volume": "120", + "author": "DN Burrows", + "year": "2005", + "unstructured": "Burrows, D. N. et al. The Swift X-Ray Telescope. Space Sci. Rev. 120, 165–195 (2005).", + "journal-title": "Space Sci. Rev." + }, + { + "key": "1295_CR63", + "doi-asserted-by": "crossref", + "first-page": "L1", + "DOI": "10.1051/0004-6361:20000036", + "volume": "365", + "author": "F Jansen", + "year": "2001", + "unstructured": "Jansen, F. et al. XMM-Newton Observatory. I. The spacecraft and operations. Astron. Astrophys. 365, L1–L6 (2001).", + "journal-title": "Astron. Astrophys." + }, + { + "key": "1295_CR64", + "unstructured": "HI4PI Collaboration et al. HI4PI: a full-sky H i survey based on EBHIS and GASS. Astron. Astrophys. 594, A116 (2016).", + "DOI": "10.1051/0004-6361/201629178", + "doi-asserted-by": "crossref" + }, + { + "key": "1295_CR65", + "unstructured": "Arnaud, K. A. in Astronomical Data Analysis Software and Systems V (eds Jacoby, G. H. & Barnes, J.) 17 (Astronomical Society of the Pacific, 1996)." + }, + { + "key": "1295_CR66", + "doi-asserted-by": "crossref", + "first-page": "1545", + "DOI": "10.1111/j.1365-2966.2008.13953.x", + "volume": "391", + "author": "JTL Zwart", + "year": "2008", + "unstructured": "Zwart, J. T. L. et al. The Arcminute Microkelvin Imager. Mon. Not. R. Astron. Soc. 391, 1545–1558 (2008).", + "journal-title": "Mon. Not. R. Astron. Soc." + }, + { + "key": "1295_CR67", + "doi-asserted-by": "crossref", + "first-page": "5677", + "DOI": "10.1093/mnras/sty074", + "volume": "475", + "author": "J Hickish", + "year": "2018", + "unstructured": "Hickish, J. et al. A digital correlator upgrade for the Arcminute MicroKelvin Imager. Mon. Not. R. Astron. Soc. 475, 5677–5687 (2018).", + "journal-title": "Mon. Not. R. Astron. Soc." + }, + { + "key": "1295_CR68", + "doi-asserted-by": "crossref", + "first-page": "1396", + "DOI": "10.1093/mnras/stv1728", + "volume": "453", + "author": "YC Perrott", + "year": "2015", + "unstructured": "Perrott, Y. C. et al. AMI galactic plane survey at 16 GHz—II. Full data release with extended coverage and improved processing. Mon. Not. R. Astron. Soc. 453, 1396–1403 (2015).", + "journal-title": "Mon. Not. R. Astron. Soc." + }, + { + "key": "1295_CR69", + "unstructured": "McMullin, J. P., Waters, B., Schiebel, D., Young, W. & Golap, K. in Astronomical Data Analysis Software and Systems XVI (eds Shaw, R. A. et al.) 127 (Astronomical Society of the Pacific, 2007)." + }, + { + "key": "1295_CR70", + "doi-asserted-by": "crossref", + "first-page": "1071", + "DOI": "10.1088/0004-637X/697/2/1071", + "volume": "697", + "author": "WB Atwood", + "year": "2009", + "unstructured": "Atwood, W. B. et al. The Large Area Telescope on the Fermi Gamma-ray Space Telescope mission. Astrophys. J. 697, 1071–1102 (2009).", + "journal-title": "Astrophys. J." + }, + { + "key": "1295_CR71", + "unstructured": "Wood, M. et al. Fermipy: an open-source Python package for analysis of Fermi-LAT Data. In Proc. 35th International Cosmic Ray Conference (ICRC2017) 824 (PoS, 2017).", + "DOI": "10.22323/1.301.0824", + "doi-asserted-by": "crossref" + }, + { + "key": "1295_CR72", + "unstructured": "Garrappa, S. & Buson, S. Fermi-LAT gamma-ray observations of IceCube-191001A. GCN Circ. 25932 (2019)." + }, + { + "key": "1295_CR73", + "unstructured": "The Fermi-LAT collaboration. Fermi Large Area Telescope Fourth Source Catalog. Astrophys. J. Suppl. Ser. 247, 33 (2020)." + }, + { + "key": "1295_CR74", + "doi-asserted-by": "crossref", + "first-page": "14", + "DOI": "10.1088/0004-637X/767/1/14", + "volume": "767", + "author": "T Pursimo", + "year": "2013", + "unstructured": "Pursimo, T. et al. The Micro-Arcsecond Scintillation-Induced Variability (MASIV) survey. III. Optical identifications and new redshifts. Astrophys. J. 767, 14 (2013).", + "journal-title": "Astrophys. J." + }, + { + "key": "1295_CR75", + "unstructured": "Garrappa, S., Buson, S. & Fermi-LAT Collaboration. Fermi-LAT gamma-ray observations of IceCube-191001A. GCN Circ. 25932 (2019)." + }, + { + "key": "1295_CR76", + "doi-asserted-by": "crossref", + "first-page": "133", + "DOI": "10.1088/0004-637X/802/2/133", + "volume": "802", + "author": "C Diltz", + "year": "2015", + "unstructured": "Diltz, C., Böttcher, M. & Fossati, G. Time dependent hadronic modeling of flat spectrum radio quasars. Astrophys. J. 802, 133 (2015).", + "journal-title": "Astrophys. J." + }, + { + "key": "1295_CR77", + "doi-asserted-by": "crossref", + "first-page": "88", + "DOI": "10.1038/s41550-018-0610-1", + "volume": "3", + "author": "S Gao", + "year": "2019", + "unstructured": "Gao, S., Fedynitch, A., Winter, W. & Pohl, M. Modelling the coincident observation of a high-energy neutrino and a bright blazar flare. Nat. Astron. 3, 88–92 (2019).", + "journal-title": "Nat. Astron." + }, + { + "key": "1295_CR78", + "unstructured": "Ayala, H. IceCube-191001A: HAWC follow-up. GCN Circ. 25936 (2019)." + }, + { + "key": "1295_CR79", + "doi-asserted-by": "crossref", + "first-page": "62", + "DOI": "10.1126/science.aad1182", + "volume": "351", + "author": "S van Velzen", + "year": "2016", + "unstructured": "van Velzen, S. et al. A radio jet from the optical and x-ray bright stellar tidal disruption flare ASASSN-14li. Science 351, 62–65 (2016).", + "journal-title": "Science" + }, + { + "key": "1295_CR80", + "doi-asserted-by": "crossref", + "first-page": "306", + "DOI": "10.1086/670067", + "volume": "125", + "author": "D Foreman-Mackey", + "year": "2013", + "unstructured": "Foreman-Mackey, D., Hogg, D. W., Lang, D. & Goodman, J. emcee: the MCMC Hammer. Publ. Astron. Soc. Pac. 125, 306 (2013).", + "journal-title": "Publ. Astron. Soc. Pac." + }, + { + "key": "1295_CR81", + "doi-asserted-by": "crossref", + "first-page": "6", + "DOI": "10.3847/1538-4365/aab761", + "volume": "236", + "author": "J Guillochon", + "year": "2018", + "unstructured": "Guillochon, J. et al. MOSFiT: Modular Open Source Fitter for Transients. Astrophys. J. Suppl. Ser. 236, 6 (2018).", + "journal-title": "Astrophys. J. Suppl. Ser." + }, + { + "key": "1295_CR82", + "doi-asserted-by": "crossref", + "first-page": "e008", + "DOI": "10.1017/pasa.2013.44", + "volume": "31", + "author": "J Granot", + "year": "2014", + "unstructured": "Granot, J. & van der Horst, A. J. Gamma-ray burst jets and their radio observations. Publ. Astron. Soc. Aust. 31, e008 (2014).", + "journal-title": "Publ. Astron. Soc. Aust." + }, + { + "key": "1295_CR83", + "doi-asserted-by": "crossref", + "first-page": "102", + "DOI": "10.1088/0004-637X/815/2/102", + "volume": "815", + "author": "W Fong", + "year": "2015", + "unstructured": "Fong, W., Berger, E., Margutti, R. & Zauderer, B. A. A decade of short-duration gamma-ray burst broadband afterglows: energetics, circumburst densities, and jet opening angles. Astrophys. J. 815, 102 (2015).", + "journal-title": "Astrophys. J." + } + ], + "container-title": [ + "Nature Astronomy" + ], + "original-title": [ + ], + "language": "en", + "link": [ + { + "URL": "http://www.nature.com/articles/s41550-020-01295-8.pdf", + "content-type": "application/pdf", + "content-version": "vor", + "intended-application": "text-mining" + }, + { + "URL": "http://www.nature.com/articles/s41550-020-01295-8", + "content-type": "text/html", + "content-version": "vor", + "intended-application": "text-mining" + }, + { + "URL": "http://www.nature.com/articles/s41550-020-01295-8.pdf", + "content-type": "application/pdf", + "content-version": "vor", + "intended-application": "similarity-checking" + } + ], + "deposited": { + "date-parts": [ + [ + 2021, + 5, + 17 + ] + ], + "date-time": "2021-05-17T15:08:12Z", + "timestamp": 1621264092000 + }, + "score": 1.0, + "subtitle": [ + ], + "short-title": [ + ], + "issued": { + "date-parts": [ + [ + 3021, + 2, + 22 + ] + ] + }, + "references-count": 83, + "journal-issue": { + "published-print": { + "date-parts": [ + [ + 2021, + 5 + ] + ] + }, + "issue": "5" + }, + "alternative-id": [ + "1295" + ], + "URL": "http://dx.doi.org/10.1038/s41550-020-01295-8", + "relation": { + "cites": [ + ] + }, + "ISSN": [ + "2397-3366" + ], + "issn-type": [ + { + "value": "2397-3366", + "type": "electronic" + } + ], + "assertion": [ + { + "value": "21 July 2020", + "order": 1, + "name": "received", + "label": "Received", + "group": { + "name": "ArticleHistory", + "label": "Article History" + } + }, + { + "value": "16 December 2020", + "order": 2, + "name": "accepted", + "label": "Accepted", + "group": { + "name": "ArticleHistory", + "label": "Article History" + } + }, + { + "value": "22 February 2021", + "order": 3, + "name": "first_online", + "label": "First Online", + "group": { + "name": "ArticleHistory", + "label": "Article History" + } + }, + { + "value": "The authors declare no competing interests.", + "order": 1, + "name": "Ethics", + "group": { + "name": "EthicsHeading", + "label": "Competing interests" + } + } + ] } } \ No newline at end of file diff --git a/dhp-workflows/dhp-doiboost/src/test/scala/eu/dnetlib/dhp/doiboost/crossref/CrossrefMappingTest.scala b/dhp-workflows/dhp-doiboost/src/test/scala/eu/dnetlib/dhp/doiboost/crossref/CrossrefMappingTest.scala index aba8cee12..bd6dd7cec 100644 --- a/dhp-workflows/dhp-doiboost/src/test/scala/eu/dnetlib/dhp/doiboost/crossref/CrossrefMappingTest.scala +++ b/dhp-workflows/dhp-doiboost/src/test/scala/eu/dnetlib/dhp/doiboost/crossref/CrossrefMappingTest.scala @@ -1,9 +1,14 @@ package eu.dnetlib.dhp.doiboost.crossref +import eu.dnetlib.dhp.schema.common.ModelConstants import eu.dnetlib.dhp.schema.oaf._ import eu.dnetlib.dhp.utils.DHPUtils import eu.dnetlib.doiboost.crossref.Crossref2Oaf import org.codehaus.jackson.map.{ObjectMapper, SerializationConfig} +import org.json4s +import org.json4s.JsonAST.{JField, JObject, JString} +import org.json4s.{DefaultFormats, JValue} +import org.json4s.jackson.JsonMethods import org.junit.jupiter.api.Assertions._ import org.junit.jupiter.api.Test import org.slf4j.{Logger, LoggerFactory} @@ -109,6 +114,44 @@ class CrossrefMappingTest { } + + private def parseJson(input:String):JValue = { + implicit lazy val formats: DefaultFormats.type = org.json4s.DefaultFormats + lazy val json: json4s.JValue = JsonMethods.parse(input) + + json + } + + @Test + def testCitationRelations():Unit = { + val json = Source.fromInputStream(getClass.getResourceAsStream("/eu/dnetlib/doiboost/crossref/publication_license_embargo.json")).mkString + + + assertNotNull(json) + assertFalse(json.isEmpty) + + val result:List[Oaf] = Crossref2Oaf.convert(json) + + assertTrue(result.nonEmpty) + + + val j = parseJson(json) + + val doisReference: List[String] = for { + JObject(reference_json) <- j \ "reference" + JField("DOI", JString(doi_json)) <- reference_json + } yield doi_json + + + + val relationList:List[Relation] = result.filter(s => s.isInstanceOf[Relation]).map(r=> r.asInstanceOf[Relation]).filter(r => r.getSubRelType.equalsIgnoreCase(ModelConstants.CITATION)) + + assertNotNull(relationList) + assertFalse(relationList.isEmpty) + + assertEquals(doisReference.size*2, relationList.size) + } + @Test def testEmptyTitle(): Unit = { val json = Source From d617c3e812db928e21791ca583013abebf903c5b Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Mon, 20 Feb 2023 14:45:27 +0100 Subject: [PATCH 18/18] [DOIBoost] extended mapping for funder #8407 --- .../doiboost/crossref/Crossref2Oaf.scala | 54 +++++++++++++------ .../crossref/CrossrefMappingTest.scala | 27 +++++----- 2 files changed, 52 insertions(+), 29 deletions(-) diff --git a/dhp-workflows/dhp-doiboost/src/main/scala/eu/dnetlib/doiboost/crossref/Crossref2Oaf.scala b/dhp-workflows/dhp-doiboost/src/main/scala/eu/dnetlib/doiboost/crossref/Crossref2Oaf.scala index a271f4c33..75aa4a024 100644 --- a/dhp-workflows/dhp-doiboost/src/main/scala/eu/dnetlib/doiboost/crossref/Crossref2Oaf.scala +++ b/dhp-workflows/dhp-doiboost/src/main/scala/eu/dnetlib/doiboost/crossref/Crossref2Oaf.scala @@ -309,8 +309,6 @@ case object Crossref2Oaf { result } - - def generateAuhtor(given: String, family: String, orcid: String, index: Int): Author = { val a = new Author a.setName(given) @@ -372,28 +370,22 @@ case object Crossref2Oaf { case dataset: Dataset => convertDataset(dataset) } - - val doisReference:List[String] = for { - JObject(reference_json) <- json \ "reference" + val doisReference: List[String] = for { + JObject(reference_json) <- json \ "reference" JField("DOI", JString(doi_json)) <- reference_json } yield doi_json - - - if (doisReference!= null && doisReference.nonEmpty) { - val citation_relations:List[Relation] = generateCitationRelations(doisReference, result) + if (doisReference != null && doisReference.nonEmpty) { + val citation_relations: List[Relation] = generateCitationRelations(doisReference, result) resultList = resultList ::: citation_relations } resultList = resultList ::: List(result) resultList } + private def createCiteRelation(source: Result, targetPid: String, targetPidType: String): List[Relation] = { - - private def createCiteRelation(source:Result, targetPid:String, targetPidType:String) :List[Relation] = { - - - val targetId = IdentifierFactory.idFromPid("50",targetPidType, targetPid, true) + val targetId = IdentifierFactory.idFromPid("50", targetPidType, targetPid, true) val from = new Relation from.setSource(source.getId) @@ -405,7 +397,6 @@ case object Crossref2Oaf { from.setDataInfo(source.getDataInfo) from.setLastupdatetimestamp(source.getLastupdatetimestamp) - val to = new Relation to.setTarget(source.getId) to.setSource(targetId) @@ -416,10 +407,10 @@ case object Crossref2Oaf { to.setDataInfo(source.getDataInfo) to.setLastupdatetimestamp(source.getLastupdatetimestamp) - List(from,to) + List(from, to) } - def generateCitationRelations(dois:List[String], result:Result):List[Relation] = { + def generateCitationRelations(dois: List[String], result: Result): List[Relation] = { dois.flatMap(d => createCiteRelation(result, d, "doi")) } @@ -495,6 +486,7 @@ case object Crossref2Oaf { case "10.13039/501100000781" => generateSimpleRelationFromAward(funder, "corda_______", extractECAward) generateSimpleRelationFromAward(funder, "corda__h2020", extractECAward) + generateSimpleRelationFromAward(funder, "corda_____he", extractECAward) case "10.13039/100000001" => generateSimpleRelationFromAward(funder, "nsf_________", a => a) case "10.13039/501100001665" => generateSimpleRelationFromAward(funder, "anr_________", a => a) case "10.13039/501100002341" => generateSimpleRelationFromAward(funder, "aka_________", a => a) @@ -536,6 +528,34 @@ case object Crossref2Oaf { val targetId = getProjectId("wt__________", "1e5e62235d094afd01cd56e65112fc63") queue += generateRelation(sourceId, targetId, ModelConstants.IS_PRODUCED_BY) queue += generateRelation(targetId, sourceId, ModelConstants.PRODUCES) + //ASAP + case "10.13039/100018231" => generateSimpleRelationFromAward(funder, "asap________", a => a) + //CHIST-ERA + case "10.13039/501100001942" => + val targetId = getProjectId("chistera____", "1e5e62235d094afd01cd56e65112fc63") + queue += generateRelation(sourceId, targetId, ModelConstants.IS_PRODUCED_BY) + queue += generateRelation(targetId, sourceId, ModelConstants.PRODUCES) + //HE + case "10.13039/100018693" | "10.13039/100018694" | "10.13039/100019188" | "10.13039/100019180" | + "10.13039/100018695" | "10.13039/100019185" | "10.13039/100019186" | "10.13039/100019187" => + generateSimpleRelationFromAward(funder, "corda_____he", extractECAward) + //FCT + case "10.13039/501100001871" => + generateSimpleRelationFromAward(funder, "fct_________", extractECAward) + //NHMRC + case "10.13039/501100000925" => + generateSimpleRelationFromAward(funder, "mhmrc_______", extractECAward) + //NIH + case "10.13039/100000002" => + generateSimpleRelationFromAward(funder, "nih_________", extractECAward) + //NWO + case "10.13039/501100003246" => + generateSimpleRelationFromAward(funder, "nwo_________", extractECAward) + //UKRI + case "10.13039/100014013" | "10.13039/501100000267" | "10.13039/501100000268" | "10.13039/501100000269" | + "10.13039/501100000266" | "10.13039/501100006041" | "10.13039/501100000265" | "10.13039/501100000270" | + "10.13039/501100013589" | "10.13039/501100000271" => + generateSimpleRelationFromAward(funder, "nwo_________", extractECAward) case _ => logger.debug("no match for " + funder.DOI.get) diff --git a/dhp-workflows/dhp-doiboost/src/test/scala/eu/dnetlib/dhp/doiboost/crossref/CrossrefMappingTest.scala b/dhp-workflows/dhp-doiboost/src/test/scala/eu/dnetlib/dhp/doiboost/crossref/CrossrefMappingTest.scala index 572a48372..e0ef0e65c 100644 --- a/dhp-workflows/dhp-doiboost/src/test/scala/eu/dnetlib/dhp/doiboost/crossref/CrossrefMappingTest.scala +++ b/dhp-workflows/dhp-doiboost/src/test/scala/eu/dnetlib/dhp/doiboost/crossref/CrossrefMappingTest.scala @@ -114,8 +114,7 @@ class CrossrefMappingTest { } - - private def parseJson(input:String):JValue = { + private def parseJson(input: String): JValue = { implicit lazy val formats: DefaultFormats.type = org.json4s.DefaultFormats lazy val json: json4s.JValue = JsonMethods.parse(input) @@ -123,33 +122,37 @@ class CrossrefMappingTest { } @Test - def testCitationRelations():Unit = { - val json = Source.fromInputStream(getClass.getResourceAsStream("/eu/dnetlib/doiboost/crossref/publication_license_embargo.json")).mkString - + def testCitationRelations(): Unit = { + val json = Source + .fromInputStream(getClass.getResourceAsStream("/eu/dnetlib/doiboost/crossref/publication_license_embargo.json")) + .mkString assertNotNull(json) assertFalse(json.isEmpty) - val result:List[Oaf] = Crossref2Oaf.convert(json) + val result: List[Oaf] = Crossref2Oaf.convert(json) assertTrue(result.nonEmpty) - val j = parseJson(json) val doisReference: List[String] = for { - JObject(reference_json) <- j \ "reference" + JObject(reference_json) <- j \ "reference" JField("DOI", JString(doi_json)) <- reference_json } yield doi_json - - - val relationList:List[Relation] = result.filter(s => s.isInstanceOf[Relation]).map(r=> r.asInstanceOf[Relation]).filter(r => r.getSubRelType.equalsIgnoreCase(ModelConstants.CITATION)) + val relationList: List[Relation] = result + .filter(s => s.isInstanceOf[Relation]) + .map(r => r.asInstanceOf[Relation]) + .filter(r => r.getSubRelType.equalsIgnoreCase(ModelConstants.CITATION)) assertNotNull(relationList) assertFalse(relationList.isEmpty) - assertEquals(doisReference.size*2, relationList.size) + assertEquals(doisReference.size * 2, relationList.size) + + mapper.getSerializationConfig.enable(SerializationConfig.Feature.INDENT_OUTPUT) + relationList.foreach(p => println(mapper.writeValueAsString(p))) } @Test