From 163b2ee2a8f27755a36de97c2f6115d27b367165 Mon Sep 17 00:00:00 2001 From: dimitrispie Date: Thu, 13 Jul 2023 15:25:00 +0300 Subject: [PATCH 01/57] Changes 1. Monitor updates 2. Bug fixes during copy to impala cluster --- .../oozie_app/config-default.xml | 30 + .../oozie_app/copyDataToImpalaCluster.sh | 75 ++ .../oozie_app/finalizeImpalaCluster.sh | 29 + .../graph/stats-monitor/oozie_app/monitor.sh | 54 ++ .../oozie_app/scripts/updateMonitorDB.sql | 138 ++++ .../oozie_app/scripts/updateMonitorDBAll.sql | 150 ++++ .../scripts/updateMonitorDB_institutions.sql | 12 + .../stats-monitor/oozie_app/workflow.xml | 110 +++ .../oozie_app/copyDataToImpalaCluster.sh | 8 +- .../stats/oozie_app/finalizeImpalaCluster.sh | 10 +- .../dhp/oa/graph/stats/oozie_app/monitor.sh | 22 +- .../graph/stats/oozie_app/scripts/step15.sql | 11 +- .../scripts/step16-createIndicatorsTables.sql | 718 +++++++++--------- .../scripts/step20-createMonitorDB.sql | 106 +-- .../scripts/step20-createMonitorDBAll.sql | 276 +++++++ .../scripts/step20-createMonitorDB_RIs.sql | 2 +- .../step20-createMonitorDB_RIs_tail.sql | 2 +- .../scripts/step20-createMonitorDB_funded.sql | 2 +- .../step20-createMonitorDB_institutions.sql | 9 +- .../scripts/step21-createObservatoryDB.sql | 38 +- .../dhp/oa/graph/stats/oozie_app/workflow.xml | 18 +- 21 files changed, 1347 insertions(+), 473 deletions(-) create mode 100644 dhp-workflows/dhp-stats-monitor-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats-monitor/oozie_app/config-default.xml create mode 100644 dhp-workflows/dhp-stats-monitor-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats-monitor/oozie_app/copyDataToImpalaCluster.sh create mode 100644 dhp-workflows/dhp-stats-monitor-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats-monitor/oozie_app/finalizeImpalaCluster.sh create mode 100644 dhp-workflows/dhp-stats-monitor-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats-monitor/oozie_app/monitor.sh create mode 100644 dhp-workflows/dhp-stats-monitor-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats-monitor/oozie_app/scripts/updateMonitorDB.sql create mode 100644 dhp-workflows/dhp-stats-monitor-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats-monitor/oozie_app/scripts/updateMonitorDBAll.sql create mode 100644 dhp-workflows/dhp-stats-monitor-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats-monitor/oozie_app/scripts/updateMonitorDB_institutions.sql create mode 100644 dhp-workflows/dhp-stats-monitor-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats-monitor/oozie_app/workflow.xml create mode 100644 dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step20-createMonitorDBAll.sql diff --git a/dhp-workflows/dhp-stats-monitor-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats-monitor/oozie_app/config-default.xml b/dhp-workflows/dhp-stats-monitor-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats-monitor/oozie_app/config-default.xml new file mode 100644 index 000000000..b2a1322e6 --- /dev/null +++ b/dhp-workflows/dhp-stats-monitor-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats-monitor/oozie_app/config-default.xml @@ -0,0 +1,30 @@ + + + jobTracker + ${jobTracker} + + + nameNode + ${nameNode} + + + oozie.use.system.libpath + true + + + oozie.action.sharelib.for.spark + spark2 + + + hive_metastore_uris + thrift://iis-cdh5-test-m3.ocean.icm.edu.pl:9083 + + + hive_jdbc_url + jdbc:hive2://iis-cdh5-test-m3.ocean.icm.edu.pl:10000/;UseNativeQuery=1;?spark.executor.memory=22166291558;spark.yarn.executor.memoryOverhead=3225;spark.driver.memory=15596411699;spark.yarn.driver.memoryOverhead=1228 + + + oozie.wf.workflow.notification.url + {serviceUrl}/v1/oozieNotification/jobUpdate?jobId=$jobId%26status=$status + + \ No newline at end of file diff --git a/dhp-workflows/dhp-stats-monitor-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats-monitor/oozie_app/copyDataToImpalaCluster.sh b/dhp-workflows/dhp-stats-monitor-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats-monitor/oozie_app/copyDataToImpalaCluster.sh new file mode 100644 index 000000000..1587f7152 --- /dev/null +++ b/dhp-workflows/dhp-stats-monitor-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats-monitor/oozie_app/copyDataToImpalaCluster.sh @@ -0,0 +1,75 @@ +export PYTHON_EGG_CACHE=/home/$(whoami)/.python-eggs +export link_folder=/tmp/impala-shell-python-egg-cache-$(whoami) +if ! [ -L $link_folder ] +then + rm -Rf "$link_folder" + ln -sfn ${PYTHON_EGG_CACHE}${link_folder} ${link_folder} +fi + +#export HADOOP_USER_NAME=$2 + +function copydb() { + + export HADOOP_USER="dimitris.pierrakos" + export HADOOP_USER_NAME='dimitris.pierrakos' + + db=$1 + FILE=("hive_wf_tmp_"$RANDOM) + hdfs dfs -mkdir hdfs://impala-cluster-mn1.openaire.eu:8020/tmp/$FILE/ + + # change ownership to impala +# hdfs dfs -conf /etc/impala_cluster/hdfs-site.xml -chmod -R 777 /tmp/$FILE/${db}.db + hdfs dfs -conf /etc/impala_cluster/hdfs-site.xml -chmod -R 777 /tmp/$FILE/ + + + # copy the databases from ocean to impala + echo "copying $db" + hadoop distcp -Dmapreduce.map.memory.mb=6144 -pb hdfs://nameservice1/user/hive/warehouse/${db}.db hdfs://impala-cluster-mn1.openaire.eu:8020/tmp/$FILE/ + + hdfs dfs -conf /etc/impala_cluster/hdfs-site.xml -chmod -R 777 /tmp/$FILE/${db}.db + + # drop tables from db + for i in `impala-shell -i impala-cluster-dn1.openaire.eu -d ${db} --delimited -q "show tables"`; + do + `impala-shell -i impala-cluster-dn1.openaire.eu -d ${db} -q "drop table $i;"`; + done + + # drop views from db + for i in `impala-shell -i impala-cluster-dn1.openaire.eu -d ${db} --delimited -q "show tables"`; + do + `impala-shell -i impala-cluster-dn1.openaire.eu -d ${db} -q "drop view $i;"`; + done + + # delete the database + impala-shell -i impala-cluster-dn1.openaire.eu -q "drop database if exists ${db} cascade"; + + # create the databases + impala-shell -i impala-cluster-dn1.openaire.eu -q "create database ${db}"; + + impala-shell -q "INVALIDATE METADATA" + echo "creating schema for ${db}" + for (( k = 0; k < 5; k ++ )); do + for i in `impala-shell -d ${db} --delimited -q "show tables"`; + do + impala-shell -d ${db} --delimited -q "show create table $i"; + done | sed 's/"$/;/' | sed 's/^"//' | sed 's/[[:space:]]\date[[:space:]]/`date`/g' | impala-shell --user $HADOOP_USER_NAME -i impala-cluster-dn1.openaire.eu -c -f - + done + + # load the data from /tmp in the respective tables + echo "copying data in tables and computing stats" + for i in `impala-shell -i impala-cluster-dn1.openaire.eu -d ${db} --delimited -q "show tables"`; + do + impala-shell -i impala-cluster-dn1.openaire.eu -d ${db} -q "load data inpath '/tmp/$FILE/${db}.db/$i' into table $i"; + impala-shell -i impala-cluster-dn1.openaire.eu -d ${db} -q "compute stats $i"; + done + + # deleting the remaining directory from hdfs +hdfs dfs -conf /etc/impala_cluster/hdfs-site.xml -rm -R /tmp/$FILE/${db}.db +} + +MONITOR_DB=$1 +#HADOOP_USER_NAME=$2 + +copydb $MONITOR_DB'_institutions' +copydb $MONITOR_DB + diff --git a/dhp-workflows/dhp-stats-monitor-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats-monitor/oozie_app/finalizeImpalaCluster.sh b/dhp-workflows/dhp-stats-monitor-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats-monitor/oozie_app/finalizeImpalaCluster.sh new file mode 100644 index 000000000..a7227e0c8 --- /dev/null +++ b/dhp-workflows/dhp-stats-monitor-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats-monitor/oozie_app/finalizeImpalaCluster.sh @@ -0,0 +1,29 @@ +export PYTHON_EGG_CACHE=/home/$(whoami)/.python-eggs +export link_folder=/tmp/impala-shell-python-egg-cache-$(whoami) +if ! [ -L $link_folder ] +then + rm -Rf "$link_folder" + ln -sfn ${PYTHON_EGG_CACHE}${link_folder} ${link_folder} +fi + +function createShadowDB() { + SOURCE=$1 + SHADOW=$2 + + # drop views from db + for i in `impala-shell -i impala-cluster-dn1.openaire.eu -d ${SHADOW} --delimited -q "show tables"`; + do + `impala-shell -i impala-cluster-dn1.openaire.eu -d ${SHADOW} -q "drop view $i;"`; + done + + impala-shell -i impala-cluster-dn1.openaire.eu -q "drop database ${SHADOW} CASCADE"; + impala-shell -i impala-cluster-dn1.openaire.eu -q "create database if not exists ${SHADOW}"; +# impala-shell -i impala-cluster-dn1.openaire.eu -d ${SHADOW} -q "show tables" | sed "s/^/drop view if exists ${SHADOW}./" | sed "s/$/;/" | impala-shell -i impala-cluster-dn1.openaire.eu -f - + impala-shell -i impala-cluster-dn1.openaire.eu -d ${SOURCE} -q "show tables" --delimited | sed "s/\(.*\)/create view ${SHADOW}.\1 as select * from ${SOURCE}.\1;/" | impala-shell -i impala-cluster-dn1.openaire.eu -f - +} + +MONITOR_DB=$1 +MONITOR_DB_SHADOW=$2 + +createShadowDB $MONITOR_DB'_institutions' $MONITOR_DB'_institutions_shadow' +createShadowDB $MONITOR_DB $MONITOR_DB'_shadow' diff --git a/dhp-workflows/dhp-stats-monitor-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats-monitor/oozie_app/monitor.sh b/dhp-workflows/dhp-stats-monitor-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats-monitor/oozie_app/monitor.sh new file mode 100644 index 000000000..4f1889c9e --- /dev/null +++ b/dhp-workflows/dhp-stats-monitor-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats-monitor/oozie_app/monitor.sh @@ -0,0 +1,54 @@ +export PYTHON_EGG_CACHE=/home/$(whoami)/.python-eggs +export link_folder=/tmp/impala-shell-python-egg-cache-$(whoami) +if ! [ -L $link_folder ] +then + rm -Rf "$link_folder" + ln -sfn ${PYTHON_EGG_CACHE}${link_folder} ${link_folder} +fi + +export SOURCE=$1 +export TARGET=$2 +export SHADOW=$3 +export SCRIPT_PATH=$4 +export SCRIPT_PATH2=$5 +export SCRIPT_PATH2=$6 + +export HIVE_OPTS="-hiveconf mapred.job.queue.name=analytics -hiveconf hive.spark.client.connect.timeout=120000ms -hiveconf hive.spark.client.server.connect.timeout=300000ms -hiveconf spark.executor.memory=19166291558 -hiveconf spark.yarn.executor.memoryOverhead=3225 -hiveconf spark.driver.memory=11596411699 -hiveconf spark.yarn.driver.memoryOverhead=1228" +export HADOOP_USER_NAME="oozie" + +echo "Getting file from " $4 +hdfs dfs -copyToLocal $4 + +echo "Getting file from " $5 +hdfs dfs -copyToLocal $5 + +echo "Getting file from " $6 +hdfs dfs -copyToLocal $6 + +#update Institutions DB +cat updateMonitorDB_institutions.sql | sed "s/SOURCE/$1/g" | sed "s/TARGET/$2_institutions/g1" > foo +hive $HIVE_OPTS -f foo +cat updateMonitorDB.sql | sed "s/SOURCE/$1/g" | sed "s/TARGET/$2_institutions/g1" > foo +hive $HIVE_OPTS -f foo + +echo "Hive shell finished" + +echo "Updating shadow monitor insitutions database" +hive -e "drop database if exists ${SHADOW}_institutions cascade" +hive -e "create database if not exists ${SHADOW}_institutions" +hive $HIVE_OPTS --database ${2}_institutions -e "show tables" | grep -v WARN | sed "s/\(.*\)/create view ${SHADOW}_institutions.\1 as select * from ${2}_institutions.\1;/" > foo +hive -f foo +echo "Shadow db monitor insitutions ready!" + +#update Monitor DB +cat updateMonitorDBAll.sql | sed "s/SOURCE/$1/g" | sed "s/TARGET/$2/g1" > foo +hive $HIVE_OPTS -f foo + +echo "Hive shell finished" + +echo "Updating shadow monitor database" +hive -e "drop database if exists ${SHADOW} cascade" +hive -e "create database if not exists ${SHADOW}" +hive $HIVE_OPTS --database ${2} -e "show tables" | grep -v WARN | sed "s/\(.*\)/create view ${SHADOW}.\1 as select * from ${2}.\1;/" > foo +hive -f foo +echo "Shadow db monitor insitutions ready!" diff --git a/dhp-workflows/dhp-stats-monitor-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats-monitor/oozie_app/scripts/updateMonitorDB.sql b/dhp-workflows/dhp-stats-monitor-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats-monitor/oozie_app/scripts/updateMonitorDB.sql new file mode 100644 index 000000000..248b7e564 --- /dev/null +++ b/dhp-workflows/dhp-stats-monitor-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats-monitor/oozie_app/scripts/updateMonitorDB.sql @@ -0,0 +1,138 @@ +INSERT INTO TARGET.result select * from TARGET.result_new; +ANALYZE TABLE TARGET.result COMPUTE STATISTICS; + +INSERT INTO TARGET.result_citations select * from SOURCE.result_citations orig where exists (select 1 from TARGET.result_new r where r.id=orig.id); +ANALYZE TABLE TARGET.result_citations COMPUTE STATISTICS; + +INSERT INTO TARGET.result_references_oc select * from SOURCE.result_references_oc orig where exists (select 1 from TARGET.result_new r where r.id=orig.id); +ANALYZE TABLE TARGET.result_references_oc COMPUTE STATISTICS; + +INSERT INTO TARGET.result_classifications select * from SOURCE.result_classifications orig where exists (select 1 from TARGET.result_new r where r.id=orig.id); +ANALYZE TABLE TARGET.result_classifications COMPUTE STATISTICS; + +INSERT INTO TARGET.result_apc select * from SOURCE.result_apc orig where exists (select 1 from TARGET.result_new r where r.id=orig.id); +ANALYZE TABLE TARGET.result_apc COMPUTE STATISTICS; + +INSERT INTO TARGET.result_concepts select * from SOURCE.result_concepts orig where exists (select 1 from TARGET.result_new r where r.id=orig.id); +ANALYZE TABLE TARGET.result_concepts COMPUTE STATISTICS; + +INSERT INTO TARGET.result_datasources select * from SOURCE.result_datasources orig where exists (select 1 from TARGET.result_new r where r.id=orig.id); +ANALYZE TABLE TARGET.result_datasources COMPUTE STATISTICS; + +INSERT INTO TARGET.result_fundercount select * from SOURCE.result_fundercount orig where exists (select 1 from TARGET.result_new r where r.id=orig.id); +ANALYZE TABLE TARGET.result_fundercount COMPUTE STATISTICS; + +INSERT INTO TARGET.result_gold select * from SOURCE.result_gold orig where exists (select 1 from TARGET.result_new r where r.id=orig.id); +ANALYZE TABLE TARGET.result_gold COMPUTE STATISTICS; + +INSERT INTO TARGET.result_greenoa select * from SOURCE.result_greenoa orig where exists (select 1 from TARGET.result_new r where r.id=orig.id); +ANALYZE TABLE TARGET.result_greenoa COMPUTE STATISTICS; + +INSERT INTO TARGET.result_languages select * from SOURCE.result_languages orig where exists (select 1 from TARGET.result_new r where r.id=orig.id); +ANALYZE TABLE TARGET.result_languages COMPUTE STATISTICS; + +INSERT INTO TARGET.result_licenses select * from SOURCE.result_licenses orig where exists (select 1 from TARGET.result_new r where r.id=orig.id); +ANALYZE TABLE TARGET.result_licenses COMPUTE STATISTICS; + +INSERT INTO TARGET.result_oids select * from SOURCE.result_oids orig where exists (select 1 from TARGET.result_new r where r.id=orig.id); +ANALYZE TABLE TARGET.result_oids COMPUTE STATISTICS; + +INSERT INTO TARGET.result_organization select * from SOURCE.result_organization orig where exists (select 1 from TARGET.result_new r where r.id=orig.id); +ANALYZE TABLE TARGET.result_organization COMPUTE STATISTICS; + +INSERT INTO TARGET.result_peerreviewed select * from SOURCE.result_peerreviewed orig where exists (select 1 from TARGET.result_new r where r.id=orig.id); +ANALYZE TABLE TARGET.result_peerreviewed COMPUTE STATISTICS; + +INSERT INTO TARGET.result_pids select * from SOURCE.result_pids orig where exists (select 1 from TARGET.result_new r where r.id=orig.id); +ANALYZE TABLE TARGET.result_pids COMPUTE STATISTICS; + +INSERT INTO TARGET.result_projectcount select * from SOURCE.result_projectcount orig where exists (select 1 from TARGET.result_new r where r.id=orig.id); +ANALYZE TABLE TARGET.result_projectcount COMPUTE STATISTICS; + +INSERT INTO TARGET.result_projects select * from SOURCE.result_projects orig where exists (select 1 from TARGET.result_new r where r.id=orig.id); +ANALYZE TABLE TARGET.result_projects COMPUTE STATISTICS; + +INSERT INTO TARGET.result_refereed select * from SOURCE.result_refereed orig where exists (select 1 from TARGET.result_new r where r.id=orig.id); +ANALYZE TABLE TARGET.result_refereed COMPUTE STATISTICS; + +INSERT INTO TARGET.result_sources select * from SOURCE.result_sources orig where exists (select 1 from TARGET.result_new r where r.id=orig.id); +ANALYZE TABLE TARGET.result_sources COMPUTE STATISTICS; + +INSERT INTO TARGET.result_topics select * from SOURCE.result_topics orig where exists (select 1 from TARGET.result_new r where r.id=orig.id); +ANALYZE TABLE TARGET.result_topics COMPUTE STATISTICS; + +INSERT INTO TARGET.result_fos select * from SOURCE.result_fos orig where exists (select 1 from TARGET.result_new r where r.id=orig.id); +ANALYZE TABLE TARGET.result_fos COMPUTE STATISTICS; + +INSERT INTO TARGET.result_accessroute select * from SOURCE.result_accessroute orig where exists (select 1 from TARGET.result_new r where r.id=orig.id); +ANALYZE TABLE TARGET.result_accessroute COMPUTE STATISTICS; + +create or replace view TARGET.foo1 as select * from SOURCE.result_result rr where rr.source in (select id from TARGET.result_new); +create or replace view TARGET.foo2 as select * from SOURCE.result_result rr where rr.target in (select id from TARGET.result_new); +insert into TARGET.result_result select distinct * from (select * from TARGET.foo1 union all select * from TARGET.foo2) foufou; +drop view TARGET.foo1; +drop view TARGET.foo2; +ANALYZE TABLE TARGET.result_result COMPUTE STATISTICS; + + +-- indicators +-- Sprint 1 ---- +INSERT INTO TARGET.indi_pub_green_oa select * from SOURCE.indi_pub_green_oa orig where exists (select 1 from TARGET.result_new r where r.id=orig.id); +ANALYZE TABLE TARGET.indi_pub_green_oa COMPUTE STATISTICS; +INSERT INTO TARGET.indi_pub_grey_lit select * from SOURCE.indi_pub_grey_lit orig where exists (select 1 from TARGET.result_new r where r.id=orig.id); +ANALYZE TABLE TARGET.indi_pub_grey_lit COMPUTE STATISTICS; +INSERT INTO TARGET.indi_pub_doi_from_crossref select * from SOURCE.indi_pub_doi_from_crossref orig where exists (select 1 from TARGET.result_new r where r.id=orig.id); +ANALYZE TABLE TARGET.indi_pub_doi_from_crossref COMPUTE STATISTICS; +-- Sprint 2 ---- +INSERT INTO TARGET.indi_result_has_cc_licence select * from SOURCE.indi_result_has_cc_licence orig where exists (select 1 from TARGET.result_new r where r.id=orig.id); +ANALYZE TABLE TARGET.indi_result_has_cc_licence COMPUTE STATISTICS; +INSERT INTO TARGET.indi_result_has_cc_licence_url select * from SOURCE.indi_result_has_cc_licence_url orig where exists (select 1 from TARGET.result_new r where r.id=orig.id); +ANALYZE TABLE TARGET.indi_result_has_cc_licence_url COMPUTE STATISTICS; +INSERT INTO TARGET.indi_pub_has_abstract select * from SOURCE.indi_pub_has_abstract orig where exists (select 1 from TARGET.result_new r where r.id=orig.id); +ANALYZE TABLE TARGET.indi_pub_has_abstract COMPUTE STATISTICS; +INSERT INTO TARGET.indi_result_with_orcid select * from SOURCE.indi_result_with_orcid orig where exists (select 1 from TARGET.result_new r where r.id=orig.id); +ANALYZE TABLE TARGET.indi_result_with_orcid COMPUTE STATISTICS; +---- Sprint 3 ---- +INSERT INTO TARGET.indi_funded_result_with_fundref select * from SOURCE.indi_funded_result_with_fundref orig where exists (select 1 from TARGET.result_new r where r.id=orig.id); +ANALYZE TABLE TARGET.indi_funded_result_with_fundref COMPUTE STATISTICS; + +---- Sprint 4 ---- +INSERT INTO TARGET.indi_pub_diamond select * from SOURCE.indi_pub_diamond orig where exists (select 1 from TARGET.result_new r where r.id=orig.id); +ANALYZE TABLE TARGET.indi_pub_diamond COMPUTE STATISTICS; +INSERT INTO TARGET.indi_pub_in_transformative select * from SOURCE.indi_pub_in_transformative orig where exists (select 1 from TARGET.result_new r where r.id=orig.id); +ANALYZE TABLE TARGET.indi_pub_in_transformative COMPUTE STATISTICS; +INSERT INTO TARGET.indi_pub_closed_other_open select * from SOURCE.indi_pub_closed_other_open orig where exists (select 1 from TARGET.result_new r where r.id=orig.id); +ANALYZE TABLE TARGET.indi_pub_closed_other_open COMPUTE STATISTICS; +---- Sprint 5 ---- +INSERT INTO TARGET.indi_result_no_of_copies select * from SOURCE.indi_result_no_of_copies orig where exists (select 1 from TARGET.result_new r where r.id=orig.id); +ANALYZE TABLE TARGET.indi_result_no_of_copies COMPUTE STATISTICS; +---- Sprint 6 ---- +INSERT INTO TARGET.indi_pub_hybrid_oa_with_cc select * from SOURCE.indi_pub_hybrid_oa_with_cc orig where exists (select 1 from TARGET.result_new r where r.id=orig.id); +ANALYZE TABLE TARGET.indi_pub_hybrid_oa_with_cc COMPUTE STATISTICS; +INSERT INTO TARGET.indi_pub_bronze_oa select * from SOURCE.indi_pub_bronze_oa orig where exists (select 1 from TARGET.result_new r where r.id=orig.id); +ANALYZE TABLE TARGET.indi_pub_bronze_oa COMPUTE STATISTICS; +INSERT INTO TARGET.indi_pub_downloads select * from SOURCE.indi_pub_downloads orig where exists (select 1 from TARGET.result_new r where r.id=orig.result_id); +ANALYZE TABLE TARGET.indi_pub_downloads COMPUTE STATISTICS; +INSERT INTO TARGET.indi_pub_downloads_datasource select * from SOURCE.indi_pub_downloads_datasource orig where exists (select 1 from TARGET.result_new r where r.id=orig.result_id); +ANALYZE TABLE TARGET.indi_pub_downloads_datasource COMPUTE STATISTICS; +INSERT INTO TARGET.indi_pub_downloads_year select * from SOURCE.indi_pub_downloads_year orig where exists (select 1 from TARGET.result_new r where r.id=orig.result_id); +ANALYZE TABLE TARGET.indi_pub_downloads_year COMPUTE STATISTICS; +INSERT INTO TARGET.indi_pub_downloads_datasource_year select * from SOURCE.indi_pub_downloads_datasource_year orig where exists (select 1 from TARGET.result_new r where r.id=orig.result_id); +ANALYZE TABLE TARGET.indi_pub_downloads_datasource_year COMPUTE STATISTICS; +---- Sprint 7 ---- +INSERT INTO TARGET.indi_pub_gold_oa select * from SOURCE.indi_pub_gold_oa orig where exists (select 1 from TARGET.result_new r where r.id=orig.id); +ANALYZE TABLE TARGET.indi_pub_gold_oa COMPUTE STATISTICS; +INSERT INTO TARGET.indi_pub_hybrid select * from SOURCE.indi_pub_hybrid orig where exists (select 1 from TARGET.result_new r where r.id=orig.id); +ANALYZE TABLE TARGET.indi_pub_hybrid COMPUTE STATISTICS; +INSERT INTO TARGET.indi_pub_has_preprint select * from SOURCE.indi_pub_has_preprint orig where exists (select 1 from TARGET.result_new r where r.id=orig.id); +ANALYZE TABLE TARGET.indi_pub_has_preprint COMPUTE STATISTICS; +INSERT INTO TARGET.indi_pub_in_subscribed select * from SOURCE.indi_pub_in_subscribed orig where exists (select 1 from TARGET.result_new r where r.id=orig.id); +ANALYZE TABLE TARGET.indi_pub_in_subscribed COMPUTE STATISTICS; +INSERT INTO TARGET.indi_result_with_pid select * from SOURCE.indi_result_with_pid orig where exists (select 1 from TARGET.result_new r where r.id=orig.id); +ANALYZE TABLE TARGET.indi_result_with_pid COMPUTE STATISTICS; +INSERT INTO TARGET.indi_impact_measures select * from SOURCE.indi_impact_measures orig where exists (select 1 from TARGET.result_new r where r.id=orig.id); +ANALYZE TABLE TARGET.indi_impact_measures COMPUTE STATISTICS; +INSERT INTO TARGET.indi_pub_interdisciplinarity select * from SOURCE.indi_pub_interdisciplinarity orig where exists (select 1 from TARGET.result_new r where r.id=orig.id); +ANALYZE TABLE TARGET.indi_pub_interdisciplinarity COMPUTE STATISTICS; + +DROP TABLE IF EXISTS TARGET.result_new; \ No newline at end of file diff --git a/dhp-workflows/dhp-stats-monitor-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats-monitor/oozie_app/scripts/updateMonitorDBAll.sql b/dhp-workflows/dhp-stats-monitor-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats-monitor/oozie_app/scripts/updateMonitorDBAll.sql new file mode 100644 index 000000000..478e3824e --- /dev/null +++ b/dhp-workflows/dhp-stats-monitor-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats-monitor/oozie_app/scripts/updateMonitorDBAll.sql @@ -0,0 +1,150 @@ +DROP TABLE IF EXISTS TARGET.result_new; + +create table TARGET.result_new as + select distinct * from ( + select * from SOURCE.result r where exists (select 1 from SOURCE.result_organization ro where ro.id=r.id and ro.organization in ( + 'openorgs____::4d4051b56708688235252f1d8fddb8c1', --Iscte - Instituto Universitário de Lisboa + 'openorgs____::ab4ac74c35fa5dada770cf08e5110fab' -- Universidade Católica Portuguesa + ) )) foo; + +INSERT INTO TARGET.result select * from TARGET.result_new; +ANALYZE TABLE TARGET.result_new COMPUTE STATISTICS; + +INSERT INTO TARGET.result select * from TARGET.result_new; +ANALYZE TABLE TARGET.result COMPUTE STATISTICS; + +INSERT INTO TARGET.result_citations select * from SOURCE.result_citations orig where exists (select 1 from TARGET.result_new r where r.id=orig.id); +ANALYZE TABLE TARGET.result_citations COMPUTE STATISTICS; + +INSERT INTO TARGET.result_references_oc select * from SOURCE.result_references_oc orig where exists (select 1 from TARGET.result_new r where r.id=orig.id); +ANALYZE TABLE TARGET.result_references_oc COMPUTE STATISTICS; + +INSERT INTO TARGET.result_classifications select * from SOURCE.result_classifications orig where exists (select 1 from TARGET.result_new r where r.id=orig.id); +ANALYZE TABLE TARGET.result_classifications COMPUTE STATISTICS; + +INSERT INTO TARGET.result_apc select * from SOURCE.result_apc orig where exists (select 1 from TARGET.result_new r where r.id=orig.id); +ANALYZE TABLE TARGET.result_apc COMPUTE STATISTICS; + +INSERT INTO TARGET.result_concepts select * from SOURCE.result_concepts orig where exists (select 1 from TARGET.result_new r where r.id=orig.id); +ANALYZE TABLE TARGET.result_concepts COMPUTE STATISTICS; + +INSERT INTO TARGET.result_datasources select * from SOURCE.result_datasources orig where exists (select 1 from TARGET.result_new r where r.id=orig.id); +ANALYZE TABLE TARGET.result_datasources COMPUTE STATISTICS; + +INSERT INTO TARGET.result_fundercount select * from SOURCE.result_fundercount orig where exists (select 1 from TARGET.result_new r where r.id=orig.id); +ANALYZE TABLE TARGET.result_fundercount COMPUTE STATISTICS; + +INSERT INTO TARGET.result_gold select * from SOURCE.result_gold orig where exists (select 1 from TARGET.result_new r where r.id=orig.id); +ANALYZE TABLE TARGET.result_gold COMPUTE STATISTICS; + +INSERT INTO TARGET.result_greenoa select * from SOURCE.result_greenoa orig where exists (select 1 from TARGET.result_new r where r.id=orig.id); +ANALYZE TABLE TARGET.result_greenoa COMPUTE STATISTICS; + +INSERT INTO TARGET.result_languages select * from SOURCE.result_languages orig where exists (select 1 from TARGET.result_new r where r.id=orig.id); +ANALYZE TABLE TARGET.result_languages COMPUTE STATISTICS; + +INSERT INTO TARGET.result_licenses select * from SOURCE.result_licenses orig where exists (select 1 from TARGET.result_new r where r.id=orig.id); +ANALYZE TABLE TARGET.result_licenses COMPUTE STATISTICS; + +INSERT INTO TARGET.result_oids select * from SOURCE.result_oids orig where exists (select 1 from TARGET.result_new r where r.id=orig.id); +ANALYZE TABLE TARGET.result_oids COMPUTE STATISTICS; + +INSERT INTO TARGET.result_organization select * from SOURCE.result_organization orig where exists (select 1 from TARGET.result_new r where r.id=orig.id); +ANALYZE TABLE TARGET.result_organization COMPUTE STATISTICS; + +INSERT INTO TARGET.result_peerreviewed select * from SOURCE.result_peerreviewed orig where exists (select 1 from TARGET.result_new r where r.id=orig.id); +ANALYZE TABLE TARGET.result_peerreviewed COMPUTE STATISTICS; + +INSERT INTO TARGET.result_pids select * from SOURCE.result_pids orig where exists (select 1 from TARGET.result_new r where r.id=orig.id); +ANALYZE TABLE TARGET.result_pids COMPUTE STATISTICS; + +INSERT INTO TARGET.result_projectcount select * from SOURCE.result_projectcount orig where exists (select 1 from TARGET.result_new r where r.id=orig.id); +ANALYZE TABLE TARGET.result_projectcount COMPUTE STATISTICS; + +INSERT INTO TARGET.result_projects select * from SOURCE.result_projects orig where exists (select 1 from TARGET.result_new r where r.id=orig.id); +ANALYZE TABLE TARGET.result_projects COMPUTE STATISTICS; + +INSERT INTO TARGET.result_refereed select * from SOURCE.result_refereed orig where exists (select 1 from TARGET.result_new r where r.id=orig.id); +ANALYZE TABLE TARGET.result_refereed COMPUTE STATISTICS; + +INSERT INTO TARGET.result_sources select * from SOURCE.result_sources orig where exists (select 1 from TARGET.result_new r where r.id=orig.id); +ANALYZE TABLE TARGET.result_sources COMPUTE STATISTICS; + +INSERT INTO TARGET.result_topics select * from SOURCE.result_topics orig where exists (select 1 from TARGET.result_new r where r.id=orig.id); +ANALYZE TABLE TARGET.result_topics COMPUTE STATISTICS; + +INSERT INTO TARGET.result_fos select * from SOURCE.result_fos orig where exists (select 1 from TARGET.result_new r where r.id=orig.id); +ANALYZE TABLE TARGET.result_fos COMPUTE STATISTICS; + +INSERT INTO TARGET.result_accessroute select * from SOURCE.result_accessroute orig where exists (select 1 from TARGET.result_new r where r.id=orig.id); +ANALYZE TABLE TARGET.result_accessroute COMPUTE STATISTICS; + +create or replace view TARGET.foo1 as select * from SOURCE.result_result rr where rr.source in (select id from TARGET.result_new); +create or replace view TARGET.foo2 as select * from SOURCE.result_result rr where rr.target in (select id from TARGET.result_new); +insert into TARGET.result_result select distinct * from (select * from TARGET.foo1 union all select * from TARGET.foo2) foufou; +drop view TARGET.foo1; +drop view TARGET.foo2; +ANALYZE TABLE TARGET.result_result COMPUTE STATISTICS; + + +-- indicators +-- Sprint 1 ---- +INSERT INTO TARGET.indi_pub_green_oa select * from SOURCE.indi_pub_green_oa orig where exists (select 1 from TARGET.result_new r where r.id=orig.id); +ANALYZE TABLE TARGET.indi_pub_green_oa COMPUTE STATISTICS; +INSERT INTO TARGET.indi_pub_grey_lit select * from SOURCE.indi_pub_grey_lit orig where exists (select 1 from TARGET.result_new r where r.id=orig.id); +ANALYZE TABLE TARGET.indi_pub_grey_lit COMPUTE STATISTICS; +INSERT INTO TARGET.indi_pub_doi_from_crossref select * from SOURCE.indi_pub_doi_from_crossref orig where exists (select 1 from TARGET.result_new r where r.id=orig.id); +ANALYZE TABLE TARGET.indi_pub_doi_from_crossref COMPUTE STATISTICS; +-- Sprint 2 ---- +INSERT INTO TARGET.indi_result_has_cc_licence select * from SOURCE.indi_result_has_cc_licence orig where exists (select 1 from TARGET.result_new r where r.id=orig.id); +ANALYZE TABLE TARGET.indi_result_has_cc_licence COMPUTE STATISTICS; +INSERT INTO TARGET.indi_result_has_cc_licence_url select * from SOURCE.indi_result_has_cc_licence_url orig where exists (select 1 from TARGET.result_new r where r.id=orig.id); +ANALYZE TABLE TARGET.indi_result_has_cc_licence_url COMPUTE STATISTICS; +INSERT INTO TARGET.indi_pub_has_abstract select * from SOURCE.indi_pub_has_abstract orig where exists (select 1 from TARGET.result_new r where r.id=orig.id); +ANALYZE TABLE TARGET.indi_pub_has_abstract COMPUTE STATISTICS; +INSERT INTO TARGET.indi_result_with_orcid select * from SOURCE.indi_result_with_orcid orig where exists (select 1 from TARGET.result_new r where r.id=orig.id); +ANALYZE TABLE TARGET.indi_result_with_orcid COMPUTE STATISTICS; +---- Sprint 3 ---- +INSERT INTO TARGET.indi_funded_result_with_fundref select * from SOURCE.indi_funded_result_with_fundref orig where exists (select 1 from TARGET.result_new r where r.id=orig.id); +ANALYZE TABLE TARGET.indi_funded_result_with_fundref COMPUTE STATISTICS; + +---- Sprint 4 ---- +INSERT INTO TARGET.indi_pub_diamond select * from SOURCE.indi_pub_diamond orig where exists (select 1 from TARGET.result_new r where r.id=orig.id); +ANALYZE TABLE TARGET.indi_pub_diamond COMPUTE STATISTICS; +INSERT INTO TARGET.indi_pub_in_transformative select * from SOURCE.indi_pub_in_transformative orig where exists (select 1 from TARGET.result_new r where r.id=orig.id); +ANALYZE TABLE TARGET.indi_pub_in_transformative COMPUTE STATISTICS; +INSERT INTO TARGET.indi_pub_closed_other_open select * from SOURCE.indi_pub_closed_other_open orig where exists (select 1 from TARGET.result_new r where r.id=orig.id); +ANALYZE TABLE TARGET.indi_pub_closed_other_open COMPUTE STATISTICS; +---- Sprint 5 ---- +INSERT INTO TARGET.indi_result_no_of_copies select * from SOURCE.indi_result_no_of_copies orig where exists (select 1 from TARGET.result_new r where r.id=orig.id); +ANALYZE TABLE TARGET.indi_result_no_of_copies COMPUTE STATISTICS; +---- Sprint 6 ---- +INSERT INTO TARGET.indi_pub_hybrid_oa_with_cc select * from SOURCE.indi_pub_hybrid_oa_with_cc orig where exists (select 1 from TARGET.result_new r where r.id=orig.id); +ANALYZE TABLE TARGET.indi_pub_hybrid_oa_with_cc COMPUTE STATISTICS; +INSERT INTO TARGET.indi_pub_bronze_oa select * from SOURCE.indi_pub_bronze_oa orig where exists (select 1 from TARGET.result_new r where r.id=orig.id); +ANALYZE TABLE TARGET.indi_pub_bronze_oa COMPUTE STATISTICS; +INSERT INTO TARGET.indi_pub_downloads select * from SOURCE.indi_pub_downloads orig where exists (select 1 from TARGET.result_new r where r.id=orig.result_id); +ANALYZE TABLE TARGET.indi_pub_downloads COMPUTE STATISTICS; +INSERT INTO TARGET.indi_pub_downloads_datasource select * from SOURCE.indi_pub_downloads_datasource orig where exists (select 1 from TARGET.result_new r where r.id=orig.result_id); +ANALYZE TABLE TARGET.indi_pub_downloads_datasource COMPUTE STATISTICS; +INSERT INTO TARGET.indi_pub_downloads_year select * from SOURCE.indi_pub_downloads_year orig where exists (select 1 from TARGET.result_new r where r.id=orig.result_id); +ANALYZE TABLE TARGET.indi_pub_downloads_year COMPUTE STATISTICS; +INSERT INTO TARGET.indi_pub_downloads_datasource_year select * from SOURCE.indi_pub_downloads_datasource_year orig where exists (select 1 from TARGET.result_new r where r.id=orig.result_id); +ANALYZE TABLE TARGET.indi_pub_downloads_datasource_year COMPUTE STATISTICS; +---- Sprint 7 ---- +INSERT INTO TARGET.indi_pub_gold_oa select * from SOURCE.indi_pub_gold_oa orig where exists (select 1 from TARGET.result_new r where r.id=orig.id); +ANALYZE TABLE TARGET.indi_pub_gold_oa COMPUTE STATISTICS; +INSERT INTO TARGET.indi_pub_hybrid select * from SOURCE.indi_pub_hybrid orig where exists (select 1 from TARGET.result_new r where r.id=orig.id); +ANALYZE TABLE TARGET.indi_pub_hybrid COMPUTE STATISTICS; +INSERT INTO TARGET.indi_pub_has_preprint select * from SOURCE.indi_pub_has_preprint orig where exists (select 1 from TARGET.result_new r where r.id=orig.id); +ANALYZE TABLE TARGET.indi_pub_has_preprint COMPUTE STATISTICS; +INSERT INTO TARGET.indi_pub_in_subscribed select * from SOURCE.indi_pub_in_subscribed orig where exists (select 1 from TARGET.result_new r where r.id=orig.id); +ANALYZE TABLE TARGET.indi_pub_in_subscribed COMPUTE STATISTICS; +INSERT INTO TARGET.indi_result_with_pid select * from SOURCE.indi_result_with_pid orig where exists (select 1 from TARGET.result_new r where r.id=orig.id); +ANALYZE TABLE TARGET.indi_result_with_pid COMPUTE STATISTICS; +INSERT INTO TARGET.indi_impact_measures select * from SOURCE.indi_impact_measures orig where exists (select 1 from TARGET.result_new r where r.id=orig.id); +ANALYZE TABLE TARGET.indi_impact_measures COMPUTE STATISTICS; +INSERT INTO TARGET.indi_pub_interdisciplinarity select * from SOURCE.indi_pub_interdisciplinarity orig where exists (select 1 from TARGET.result_new r where r.id=orig.id); +ANALYZE TABLE TARGET.indi_pub_interdisciplinarity COMPUTE STATISTICS; + +DROP TABLE IF EXISTS TARGET.result_new; diff --git a/dhp-workflows/dhp-stats-monitor-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats-monitor/oozie_app/scripts/updateMonitorDB_institutions.sql b/dhp-workflows/dhp-stats-monitor-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats-monitor/oozie_app/scripts/updateMonitorDB_institutions.sql new file mode 100644 index 000000000..236f3733f --- /dev/null +++ b/dhp-workflows/dhp-stats-monitor-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats-monitor/oozie_app/scripts/updateMonitorDB_institutions.sql @@ -0,0 +1,12 @@ +DROP TABLE IF EXISTS TARGET.result_new; + +create table TARGET.result_new as + select distinct * from ( + select * from SOURCE.result r where exists (select 1 from SOURCE.result_organization ro where ro.id=r.id and ro.organization in ( + 'openorgs____::4d4051b56708688235252f1d8fddb8c1', --Iscte - Instituto Universitário de Lisboa + 'openorgs____::ab4ac74c35fa5dada770cf08e5110fab' -- Universidade Católica Portuguesa + ) )) foo; + +INSERT INTO TARGET.result select * from TARGET.result_new; +ANALYZE TABLE TARGET.result_new COMPUTE STATISTICS; + diff --git a/dhp-workflows/dhp-stats-monitor-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats-monitor/oozie_app/workflow.xml b/dhp-workflows/dhp-stats-monitor-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats-monitor/oozie_app/workflow.xml new file mode 100644 index 000000000..7b999a843 --- /dev/null +++ b/dhp-workflows/dhp-stats-monitor-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats-monitor/oozie_app/workflow.xml @@ -0,0 +1,110 @@ + + + + stats_db_name + the target stats database name + + + monitor_db_name + the target monitor db name + + + monitor_db_shadow_name + the name of the shadow monitor db + + + hive_metastore_uris + hive server metastore URIs + + + hive_jdbc_url + hive server jdbc url + + + hive_timeout + the time period, in seconds, after which Hive fails a transaction if a Hive client has not sent a hearbeat. The default value is 300 seconds. + + + hadoop_user_name + user name of the wf owner + + + + + ${jobTracker} + ${nameNode} + + + hive.metastore.uris + ${hive_metastore_uris} + + + hive.txn.timeout + ${hive_timeout} + + + mapred.job.queue.name + analytics + + + + + + + + ${wf:conf('resumeFrom') eq 'Step1-updateMonitorDB'} + ${wf:conf('resumeFrom') eq 'Step2-copyDataToImpalaCluster'} + ${wf:conf('resumeFrom') eq 'Step3-finalizeImpalaCluster'} + + + + + + Action failed, error message[${wf:errorMessage(wf:lastErrorNode())}] + + + + + ${jobTracker} + ${nameNode} + monitor.sh + ${stats_db_name} + ${monitor_db_name} + ${monitor_db_shadow_name} + ${wf:appPath()}/scripts/updateMonitorDB_institutions.sql + ${wf:appPath()}/scripts/updateMonitorDB.sql + ${wf:appPath()}/scripts/updateMonitorDBAll.sql + monitor.sh + + + + + + + + ${jobTracker} + ${nameNode} + copyDataToImpalaCluster.sh + ${monitor_db_name} + ${hadoop_user_name} + copyDataToImpalaCluster.sh + + + + + + + + ${jobTracker} + ${nameNode} + finalizeImpalaCluster.sh + ${monitor_db_name} + ${monitor_db_shadow_name} + finalizeImpalaCluster.sh + + + + + + + diff --git a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/copyDataToImpalaCluster.sh b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/copyDataToImpalaCluster.sh index 87294f6e9..431978997 100644 --- a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/copyDataToImpalaCluster.sh +++ b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/copyDataToImpalaCluster.sh @@ -24,13 +24,13 @@ function copydb() { # drop tables from db for i in `impala-shell --user $HADOOP_USER_NAME -i impala-cluster-dn1.openaire.eu -d ${db} --delimited -q "show tables"`; do - `impala-shell -i impala-cluster-dn1.openaire.eu -d -d ${db} -q "drop table $i;"`; + `impala-shell -i impala-cluster-dn1.openaire.eu -d ${db} -q "drop table $i;"`; done # drop views from db for i in `impala-shell --user $HADOOP_USER_NAME -i impala-cluster-dn1.openaire.eu -d ${db} --delimited -q "show tables"`; do - `impala-shell -i impala-cluster-dn1.openaire.eu -d -d ${db} -q "drop view $i;"`; + `impala-shell -i impala-cluster-dn1.openaire.eu -d ${db} -q "drop view $i;"`; done # delete the database @@ -82,12 +82,12 @@ copydb $USAGE_STATS_DB copydb $PROD_USAGE_STATS_DB copydb $EXT_DB copydb $STATS_DB -#copydb $MONITOR_DB +copydb $MONITOR_DB copydb $OBSERVATORY_DB copydb $MONITOR_DB'_funded' copydb $MONITOR_DB'_institutions' -copydb $MONITOR_DB'_RIs_tail' +copydb $MONITOR_DB'_ris_tail' contexts="knowmad::other dh-ch::other enermaps::other gotriple::other neanias-atmospheric::other rural-digital-europe::other covid-19::other aurora::other neanias-space::other north-america-studies::other north-american-studies::other eutopia::other" for i in ${contexts} diff --git a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/finalizeImpalaCluster.sh b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/finalizeImpalaCluster.sh index 857635b6c..86a93216c 100644 --- a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/finalizeImpalaCluster.sh +++ b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/finalizeImpalaCluster.sh @@ -13,7 +13,7 @@ function createShadowDB() { # drop views from db for i in `impala-shell -i impala-cluster-dn1.openaire.eu -d ${SHADOW} --delimited -q "show tables"`; do - `impala-shell -i impala-cluster-dn1.openaire.eu -d -d ${SHADOW} -q "drop view $i;"`; + `impala-shell -i impala-cluster-dn1.openaire.eu -d ${SHADOW} -q "drop view $i;"`; done impala-shell -i impala-cluster-dn1.openaire.eu -q "drop database ${SHADOW} CASCADE"; @@ -36,13 +36,13 @@ createShadowDB $MONITOR_DB $MONITOR_DB_SHADOW createShadowDB $OBSERVATORY_DB $OBSERVATORY_DB_SHADOW createShadowDB USAGE_STATS_DB USAGE_STATS_DB_SHADOW -createShadowDB $MONITOR_DB'_funded' $MONITOR_DB'_funded_shadow' -createShadowDB $MONITOR_DB'_institutions' $MONITOR_DB'_institutions_shadow' -createShadowDB $MONITOR_DB'_RIs_tail' $MONITOR_DB'_RIs_tail_shadow' +createShadowDB $MONITOR_DB'_funded' $MONITOR_DB_SHADOW'_shadow_funded' +createShadowDB $MONITOR_DB'_institutions' $MONITOR_DB_SHADOW'_shadow_institutions' +createShadowDB $MONITOR_DB'_ris_tail' $MONITOR_DB_SHADOW'_shadow_ris_tail' contexts="knowmad::other dh-ch::other enermaps::other gotriple::other neanias-atmospheric::other rural-digital-europe::other covid-19::other aurora::other neanias-space::other north-america-studies::other north-american-studies::other eutopia::other" for i in ${contexts} do tmp=`echo "$i" | sed 's/'-'/'_'/g' | sed 's/'::'/'_'/g'` - createShadowDB ${MONITOR_DB}'_'${tmp} ${MONITOR_DB}'_'${tmp}'_shadow' + createShadowDB ${MONITOR_DB}'_'${tmp} ${MONITOR_DB_SHADOW}'_shadow_'${tmp} done \ No newline at end of file diff --git a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/monitor.sh b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/monitor.sh index 08f4c9232..014b19c6c 100644 --- a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/monitor.sh +++ b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/monitor.sh @@ -14,6 +14,7 @@ export SCRIPT_PATH2=$5 export SCRIPT_PATH3=$6 export SCRIPT_PATH4=$7 export SCRIPT_PATH5=$8 +export SCRIPT_PATH6=$9 export HIVE_OPTS="-hiveconf mapred.job.queue.name=analytics -hiveconf hive.spark.client.connect.timeout=120000ms -hiveconf hive.spark.client.server.connect.timeout=300000ms -hiveconf spark.executor.memory=19166291558 -hiveconf spark.yarn.executor.memoryOverhead=3225 -hiveconf spark.driver.memory=11596411699 -hiveconf spark.yarn.driver.memoryOverhead=1228" export HADOOP_USER_NAME="oozie" @@ -33,12 +34,19 @@ hdfs dfs -copyToLocal $7 echo "Getting file from " $8 hdfs dfs -copyToLocal $8 +echo "Getting file from " $9 +hdfs dfs -copyToLocal $9 + + echo "Creating monitor database" +cat step20-createMonitorDBAll.sql | sed "s/SOURCE/openaire_prod_stats_20230707/g" | sed "s/TARGET/openaire_prod_stats_monitor_20230707/g1" > foo +hive $HIVE_OPTS -f foo + cat step20-createMonitorDB_funded.sql | sed "s/SOURCE/$1/g" | sed "s/TARGET/$2_funded/g1" > foo hive $HIVE_OPTS -f foo cat step20-createMonitorDB.sql | sed "s/SOURCE/$1/g" | sed "s/TARGET/$2_funded/g1" > foo hive $HIVE_OPTS -f foo -# + cat step20-createMonitorDB_institutions.sql | sed "s/SOURCE/$1/g" | sed "s/TARGET/$2_institutions/g1" > foo hive $HIVE_OPTS -f foo cat step20-createMonitorDB.sql | sed "s/SOURCE/$1/g" | sed "s/TARGET/$2_institutions/g1" > foo @@ -56,14 +64,20 @@ do hive $HIVE_OPTS -f foo done - -cat step20-createMonitorDB_RIs_tail.sql | sed "s/SOURCE/$1/g" | sed "s/TARGET/$2_RIs_tail/g1" | sed "s/CONTEXTS/\"'knowmad::other','dh-ch::other', 'enermaps::other', 'gotriple::other', 'neanias-atmospheric::other', 'rural-digital-europe::other', 'covid-19::other', 'aurora::other', 'neanias-space::other', 'north-america-studies::other', 'north-american-studies::other', 'eutopia::other'\"/g" > foo +cat step20-createMonitorDB_RIs_tail.sql | sed "s/SOURCE/$1/g" | sed "s/TARGET/$2_ris_tail/g1" | sed "s/CONTEXTS/\"'knowmad::other','dh-ch::other', 'enermaps::other', 'gotriple::other', 'neanias-atmospheric::other', 'rural-digital-europe::other', 'covid-19::other', 'aurora::other', 'neanias-space::other', 'north-america-studies::other', 'north-american-studies::other', 'eutopia::other'\"/g" > foo hive $HIVE_OPTS -f foo -cat step20-createMonitorDB.sql | sed "s/SOURCE/$1/g" | sed "s/TARGET/$2_RIs_tail/g1" > foo +cat step20-createMonitorDB.sql | sed "s/SOURCE/$1/g" | sed "s/TARGET/$2_ris_tail/g1" > foo hive $HIVE_OPTS -f foo echo "Hive shell finished" +echo "Updating shadow monitor all database" +hive -e "drop database if exists ${SHADOW} cascade" +hive -e "create database if not exists ${SHADOW}" +hive $HIVE_OPTS --database ${2} -e "show tables" | grep -v WARN | sed "s/\(.*\)/create view ${SHADOW}.\1 as select * from ${2}.\1;/" > foo +hive -f foo +echo "Updated shadow monitor all database" + echo "Updating shadow monitor funded database" hive -e "drop database if exists ${SHADOW}_funded cascade" hive -e "create database if not exists ${SHADOW}_funded" diff --git a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step15.sql b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step15.sql index 132cb482e..75e8b001b 100644 --- a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step15.sql +++ b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step15.sql @@ -37,8 +37,17 @@ select * from ${stats_db_name}.otherresearchproduct_refereed; create table if not exists ${stats_db_name}.indi_impact_measures STORED AS PARQUET as select substr(id, 4) as id, measures_ids.id impactmetric, cast(measures_ids.unit.value[0] as double) score, -cast(measures_ids.unit.value[0] as decimal(6,3)) score_dec, measures_ids.unit.value[1] class +cast(measures_ids.unit.value[0] as decimal(6,3)) score_dec, measures_ids.unit.value[1] impact_class from ${openaire_db_name}.result lateral view explode(measures) measures as measures_ids where measures_ids.id!='views' and measures_ids.id!='downloads'; ANALYZE TABLE indi_impact_measures COMPUTE STATISTICS; + +create table if not exists ${stats_db_name}.result_apc_affiliations STORED AS PARQUET as +select distinct substr(rel.target,4) id, substr(rel.source,4) organization, o.legalname.value name, +cast(rel.properties[0].value as double) apc_amount, +rel.properties[1].value apc_currency +from ${openaire_db_name}.relation rel +join ${openaire_db_name}.organization o on o.id=rel.source +join ${openaire_db_name}.result r on r.id=rel.target +where rel.subreltype = 'affiliation' and rel.datainfo.deletedbyinference = false and size(rel.properties) > 0; diff --git a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step16-createIndicatorsTables.sql b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step16-createIndicatorsTables.sql index 36b34cc3c..57c381875 100755 --- a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step16-createIndicatorsTables.sql +++ b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step16-createIndicatorsTables.sql @@ -1,88 +1,88 @@ -- Sprint 1 ---- -create table if not exists indi_pub_green_oa stored as parquet as +create table if not exists ${stats_db_name}.indi_pub_green_oa stored as parquet as select distinct p.id, coalesce(green_oa, 0) as green_oa -from publication p +from ${stats_db_name}.publication p left outer join ( select p.id, 1 as green_oa - from publication p - join result_instance ri on ri.id = p.id - join datasource on datasource.id = ri.hostedby + from ${stats_db_name}.publication p + join ${stats_db_name}.result_instance ri on ri.id = p.id + join ${stats_db_name}.datasource on datasource.id = ri.hostedby where datasource.type like '%Repository%' and (ri.accessright = 'Open Access' or ri.accessright = 'Embargo' or ri.accessright = 'Open Source')) tmp on p.id= tmp.id; -ANALYZE TABLE indi_pub_green_oa COMPUTE STATISTICS; +--ANALYZE TABLE ${stats_db_name}.indi_pub_green_oa COMPUTE STATISTICS; -create table if not exists indi_pub_grey_lit stored as parquet as +create table if not exists ${stats_db_name}.indi_pub_grey_lit stored as parquet as select distinct p.id, coalesce(grey_lit, 0) as grey_lit -from publication p +from ${stats_db_name}.publication p left outer join ( select p.id, 1 as grey_lit - from publication p - join result_classifications rt on rt.id = p.id + from ${stats_db_name}.publication p + join ${stats_db_name}.result_classifications rt on rt.id = p.id where rt.type not in ('Article','Part of book or chapter of book','Book','Doctoral thesis','Master thesis','Data Paper', 'Thesis', 'Bachelor thesis', 'Conference object') and - not exists (select 1 from result_classifications rc where type ='Other literature type' + not exists (select 1 from ${stats_db_name}.result_classifications rc where type ='Other literature type' and rc.id=p.id)) tmp on p.id=tmp.id; -ANALYZE TABLE indi_pub_grey_lit COMPUTE STATISTICS; +--ANALYZE TABLE ${stats_db_name}.indi_pub_grey_lit COMPUTE STATISTICS; -create table if not exists indi_pub_doi_from_crossref stored as parquet as +create table if not exists ${stats_db_name}.indi_pub_doi_from_crossref stored as parquet as select distinct p.id, coalesce(doi_from_crossref, 0) as doi_from_crossref -from publication p +from ${stats_db_name}.publication p left outer join - (select ri.id, 1 as doi_from_crossref from result_instance ri - join datasource d on d.id = ri.collectedfrom + (select ri.id, 1 as doi_from_crossref from ${stats_db_name}.result_instance ri + join ${stats_db_name}.datasource d on d.id = ri.collectedfrom where pidtype='Digital Object Identifier' and d.name ='Crossref') tmp on tmp.id=p.id; -ANALYZE TABLE indi_pub_doi_from_crossref COMPUTE STATISTICS; +--ANALYZE TABLE ${stats_db_name}.indi_pub_doi_from_crossref COMPUTE STATISTICS; -- Sprint 2 ---- -create table if not exists indi_result_has_cc_licence stored as parquet as +create table if not exists ${stats_db_name}.indi_result_has_cc_licence stored as parquet as select distinct r.id, (case when lic='' or lic is null then 0 else 1 end) as has_cc_license -from result r - left outer join (select r.id, license.type as lic from result r - join result_licenses as license on license.id = r.id +from ${stats_db_name}.result r +left outer join (select r.id, license.type as lic from ${stats_db_name}.result r + join ${stats_db_name}.result_licenses as license on license.id = r.id where lower(license.type) LIKE '%creativecommons.org%' OR lower(license.type) LIKE '%cc-%') tmp on r.id= tmp.id; -ANALYZE TABLE indi_result_has_cc_licence COMPUTE STATISTICS; +--ANALYZE TABLE ${stats_db_name}.indi_result_has_cc_licence COMPUTE STATISTICS; -create table if not exists indi_result_has_cc_licence_url stored as parquet as +create table if not exists ${stats_db_name}.indi_result_has_cc_licence_url stored as parquet as select distinct r.id, case when lic_host='' or lic_host is null then 0 else 1 end as has_cc_license_url -from result r +from ${stats_db_name}.result r left outer join (select r.id, lower(parse_url(license.type, "HOST")) as lic_host - from result r - join result_licenses as license on license.id = r.id + from ${stats_db_name}.result r + join ${stats_db_name}.result_licenses as license on license.id = r.id WHERE lower(parse_url(license.type, "HOST")) = "creativecommons.org") tmp on r.id= tmp.id; -ANALYZE TABLE indi_result_has_cc_licence_url COMPUTE STATISTICS; +--ANALYZE TABLE ${stats_db_name}.indi_result_has_cc_licence_url COMPUTE STATISTICS; -create table if not exists indi_pub_has_abstract stored as parquet as +create table if not exists ${stats_db_name}.indi_pub_has_abstract stored as parquet as select distinct publication.id, cast(coalesce(abstract, true) as int) has_abstract -from publication; +from ${stats_db_name}.publication; -ANALYZE TABLE indi_pub_has_abstract COMPUTE STATISTICS; +--ANALYZE TABLE ${stats_db_name}.indi_pub_has_abstract COMPUTE STATISTICS; -create table if not exists indi_result_with_orcid stored as parquet as +create table if not exists ${stats_db_name}.indi_result_with_orcid stored as parquet as select distinct r.id, coalesce(has_orcid, 0) as has_orcid -from result r - left outer join (select id, 1 as has_orcid from result_orcid) tmp +from ${stats_db_name}.result r + left outer join (select id, 1 as has_orcid from ${stats_db_name}.result_orcid) tmp on r.id= tmp.id; -ANALYZE TABLE indi_result_with_orcid COMPUTE STATISTICS; +--ANALYZE TABLE ${stats_db_name}.indi_result_with_orcid COMPUTE STATISTICS; ---- Sprint 3 ---- -create table if not exists indi_funded_result_with_fundref stored as parquet as +create table if not exists ${stats_db_name}.indi_funded_result_with_fundref stored as parquet as select distinct r.result as id, coalesce(fundref, 0) as fundref -from project_results r - left outer join (select distinct result, 1 as fundref from project_results +from ${stats_db_name}.project_results r + left outer join (select distinct result, 1 as fundref from ${stats_db_name}.project_results where provenance='Harvested') tmp on r.result= tmp.result; -ANALYZE TABLE indi_funded_result_with_fundref COMPUTE STATISTICS; +--ANALYZE TABLE ${stats_db_name}.indi_funded_result_with_fundref COMPUTE STATISTICS; -- create table indi_result_org_collab stored as parquet as -- select o1.organization org1, o2.organization org2, count(distinct o1.id) as collaborations @@ -92,68 +92,68 @@ ANALYZE TABLE indi_funded_result_with_fundref COMPUTE STATISTICS; -- -- compute stats indi_result_org_collab; -- -create TEMPORARY TABLE tmp AS SELECT ro.organization organization, ro.id, o.name from result_organization ro -join organization o on o.id=ro.organization where o.name is not null; +create TEMPORARY TABLE ${stats_db_name}.tmp AS SELECT ro.organization organization, ro.id, o.name from ${stats_db_name}.result_organization ro +join ${stats_db_name}.organization o on o.id=ro.organization where o.name is not null; -create table if not exists indi_result_org_collab stored as parquet as +create table if not exists ${stats_db_name}.indi_result_org_collab stored as parquet as select o1.organization org1, o1.name org1name1, o2.organization org2, o2.name org2name2, count(o1.id) as collaborations -from tmp as o1 -join tmp as o2 where o1.id=o2.id and o1.organization!=o2.organization and o1.name!=o2.name +from ${stats_db_name}.tmp as o1 +join ${stats_db_name}.tmp as o2 where o1.id=o2.id and o1.organization!=o2.organization and o1.name!=o2.name group by o1.organization, o2.organization, o1.name, o2.name; -drop table tmp purge; +drop table ${stats_db_name}.tmp purge; -ANALYZE TABLE indi_result_org_collab COMPUTE STATISTICS; +--ANALYZE TABLE ${stats_db_name}.indi_result_org_collab COMPUTE STATISTICS; -create TEMPORARY TABLE tmp AS -select distinct ro.organization organization, ro.id, o.name, o.country from result_organization ro -join organization o on o.id=ro.organization where country <> 'UNKNOWN' and o.name is not null; +create TEMPORARY TABLE ${stats_db_name}.tmp AS +select distinct ro.organization organization, ro.id, o.name, o.country from ${stats_db_name}.result_organization ro +join ${stats_db_name}.organization o on o.id=ro.organization where country <> 'UNKNOWN' and o.name is not null; -create table if not exists indi_result_org_country_collab stored as parquet as +create table if not exists ${stats_db_name}.indi_result_org_country_collab stored as parquet as select o1.organization org1,o1.name org1name1, o2.country country2, count(o1.id) as collaborations -from tmp as o1 join tmp as o2 on o1.id=o2.id +from ${stats_db_name}.tmp as o1 join ${stats_db_name}.tmp as o2 on o1.id=o2.id where o1.id=o2.id and o1.country!=o2.country group by o1.organization, o1.id, o1.name, o2.country; -drop table tmp purge; +drop table ${stats_db_name}.tmp purge; -ANALYZE TABLE indi_result_org_country_collab COMPUTE STATISTICS; +--ANALYZE TABLE ${stats_db_name}.indi_result_org_country_collab COMPUTE STATISTICS; -create TEMPORARY TABLE tmp AS -select o.id organization, o.name, ro.project as project from organization o - join organization_projects ro on o.id=ro.id where o.name is not null; +create TEMPORARY TABLE ${stats_db_name}.tmp AS +select o.id organization, o.name, ro.project as project from ${stats_db_name}.organization o + join ${stats_db_name}.organization_projects ro on o.id=ro.id where o.name is not null; -create table if not exists indi_project_collab_org stored as parquet as +create table if not exists ${stats_db_name}.indi_project_collab_org stored as parquet as select o1.organization org1,o1.name orgname1, o2.organization org2, o2.name orgname2, count(distinct o1.project) as collaborations -from tmp as o1 - join tmp as o2 on o1.project=o2.project +from ${stats_db_name}.tmp as o1 + join ${stats_db_name}.tmp as o2 on o1.project=o2.project where o1.organization<>o2.organization and o1.name<>o2.name group by o1.name,o2.name, o1.organization, o2.organization; -drop table tmp purge; +drop table ${stats_db_name}.tmp purge; -ANALYZE TABLE indi_project_collab_org COMPUTE STATISTICS; +--ANALYZE TABLE ${stats_db_name}.indi_project_collab_org COMPUTE STATISTICS; -create TEMPORARY TABLE tmp AS -select o.id organization, o.name, o.country , ro.project as project from organization o - join organization_projects ro on o.id=ro.id +create TEMPORARY TABLE ${stats_db_name}.tmp AS +select o.id organization, o.name, o.country , ro.project as project from ${stats_db_name}.organization o + join ${stats_db_name}.organization_projects ro on o.id=ro.id and o.country <> 'UNKNOWN' and o.name is not null; -create table if not exists indi_project_collab_org_country stored as parquet as +create table if not exists ${stats_db_name}.indi_project_collab_org_country stored as parquet as select o1.organization org1,o1.name org1name, o2.country country2, count(distinct o1.project) as collaborations -from tmp as o1 - join tmp as o2 on o1.project=o2.project +from ${stats_db_name}.tmp as o1 + join ${stats_db_name}.tmp as o2 on o1.project=o2.project where o1.organization<>o2.organization and o1.country<>o2.country group by o1.organization, o2.country, o1.name; -drop table tmp purge; +drop table ${stats_db_name}.tmp purge; -ANALYZE TABLE indi_project_collab_org_country COMPUTE STATISTICS; +--ANALYZE TABLE ${stats_db_name}.indi_project_collab_org_country COMPUTE STATISTICS; -create table if not exists indi_funder_country_collab stored as parquet as - with tmp as (select funder, project, country from organization_projects op - join organization o on o.id=op.id - join project p on p.id=op.project +create table if not exists ${stats_db_name}.indi_funder_country_collab stored as parquet as + with tmp as (select funder, project, country from ${stats_db_name}.organization_projects op + join ${stats_db_name}.organization o on o.id=op.id + join ${stats_db_name}.project p on p.id=op.project where country <> 'UNKNOWN') select f1.funder, f1.country as country1, f2.country as country2, count(distinct f1.project) as collaborations from tmp as f1 @@ -161,104 +161,104 @@ from tmp as f1 where f1.country<>f2.country group by f1.funder, f2.country, f1.country; -ANALYZE TABLE indi_funder_country_collab COMPUTE STATISTICS; +--ANALYZE TABLE ${stats_db_name}.indi_funder_country_collab COMPUTE STATISTICS; -create TEMPORARY TABLE tmp AS -select distinct country, ro.id as result from organization o - join result_organization ro on o.id=ro.organization +create TEMPORARY TABLE ${stats_db_name}.tmp AS +select distinct country, ro.id as result from ${stats_db_name}.organization o + join ${stats_db_name}.result_organization ro on o.id=ro.organization where country <> 'UNKNOWN' and o.name is not null; -create table if not exists indi_result_country_collab stored as parquet as +create table if not exists ${stats_db_name}.indi_result_country_collab stored as parquet as select o1.country country1, o2.country country2, count(o1.result) as collaborations -from tmp as o1 - join tmp as o2 on o1.result=o2.result +from ${stats_db_name}.tmp as o1 + join ${stats_db_name}.tmp as o2 on o1.result=o2.result where o1.country<>o2.country group by o1.country, o2.country; -drop table tmp purge; +drop table ${stats_db_name}.tmp purge; -ANALYZE TABLE indi_result_country_collab COMPUTE STATISTICS; +--ANALYZE TABLE ${stats_db_name}.indi_result_country_collab COMPUTE STATISTICS; ---- Sprint 4 ---- -create table if not exists indi_pub_diamond stored as parquet as +create table if not exists ${stats_db_name}.indi_pub_diamond stored as parquet as select distinct pd.id, coalesce(in_diamond_journal, 0) as in_diamond_journal -from publication_datasources pd +from ${stats_db_name}.publication_datasources pd left outer join ( - select pd.id, 1 as in_diamond_journal from publication_datasources pd - join datasource d on d.id=pd.datasource + select pd.id, 1 as in_diamond_journal from ${stats_db_name}.publication_datasources pd + join ${stats_db_name}.datasource d on d.id=pd.datasource join STATS_EXT.plan_s_jn ps where (ps.issn_print=d.issn_printed and ps.issn_online=d.issn_online) and (ps.journal_is_in_doaj=true or ps.journal_is_oa=true) and ps.has_apc=false) tmp on pd.id=tmp.id; -ANALYZE TABLE indi_pub_diamond COMPUTE STATISTICS; +----ANALYZE TABLE ${stats_db_name}.indi_pub_diamond COMPUTE STATISTICS; -create table if not exists indi_pub_in_transformative stored as parquet as +create table if not exists ${stats_db_name}.indi_pub_in_transformative stored as parquet as select distinct pd.id, coalesce(is_transformative, 0) as is_transformative -from publication pd +from ${stats_db_name}.publication pd left outer join ( - select pd.id, 1 as is_transformative from publication_datasources pd - join datasource d on d.id=pd.datasource + select pd.id, 1 as is_transformative from ${stats_db_name}.publication_datasources pd + join ${stats_db_name}.datasource d on d.id=pd.datasource join STATS_EXT.plan_s_jn ps where (ps.issn_print=d.issn_printed and ps.issn_online=d.issn_online) and ps.is_transformative_journal=true) tmp on pd.id=tmp.id; -ANALYZE TABLE indi_pub_in_transformative COMPUTE STATISTICS; +--ANALYZE TABLE ${stats_db_name}.indi_pub_in_transformative COMPUTE STATISTICS; -create table if not exists indi_pub_closed_other_open stored as parquet as -select distinct ri.id, coalesce(pub_closed_other_open, 0) as pub_closed_other_open from result_instance ri +create table if not exists ${stats_db_name}.indi_pub_closed_other_open stored as parquet as +select distinct ri.id, coalesce(pub_closed_other_open, 0) as pub_closed_other_open from ${stats_db_name}.result_instance ri left outer join - (select ri.id, 1 as pub_closed_other_open from result_instance ri - join publication p on p.id=ri.id - join datasource d on ri.hostedby=d.id + (select ri.id, 1 as pub_closed_other_open from ${stats_db_name}.result_instance ri + join ${stats_db_name}.publication p on p.id=ri.id + join ${stats_db_name}.datasource d on ri.hostedby=d.id where d.type like '%Journal%' and ri.accessright='Closed Access' and (p.bestlicence='Open Access' or p.bestlicence='Open Source')) tmp on tmp.id=ri.id; -ANALYZE TABLE indi_pub_closed_other_open COMPUTE STATISTICS; +--ANALYZE TABLE ${stats_db_name}.indi_pub_closed_other_open COMPUTE STATISTICS; ---- Sprint 5 ---- -create table if not exists indi_result_no_of_copies stored as parquet as -select id, count(id) as number_of_copies from result_instance group by id; +create table if not exists ${stats_db_name}.indi_result_no_of_copies stored as parquet as +select id, count(id) as number_of_copies from ${stats_db_name}.result_instance group by id; -ANALYZE TABLE indi_result_no_of_copies COMPUTE STATISTICS; +--ANALYZE TABLE ${stats_db_name}.indi_result_no_of_copies COMPUTE STATISTICS; ---- Sprint 6 ---- -create table if not exists indi_pub_downloads stored as parquet as +create table if not exists ${stats_db_name}.indi_pub_downloads stored as parquet as SELECT result_id, sum(downloads) no_downloads from openaire_prod_usage_stats.usage_stats - join publication on result_id=id + join ${stats_db_name}.publication on result_id=id where downloads>0 GROUP BY result_id order by no_downloads desc; -ANALYZE TABLE indi_pub_downloads COMPUTE STATISTICS; +--ANALYZE TABLE ${stats_db_name}.indi_pub_downloads COMPUTE STATISTICS; -create table if not exists indi_pub_downloads_datasource stored as parquet as +create table if not exists ${stats_db_name}.indi_pub_downloads_datasource stored as parquet as SELECT result_id, repository_id, sum(downloads) no_downloads from openaire_prod_usage_stats.usage_stats - join publication on result_id=id + join ${stats_db_name}.publication on result_id=id where downloads>0 GROUP BY result_id, repository_id order by result_id; -ANALYZE TABLE indi_pub_downloads_datasource COMPUTE STATISTICS; +--ANALYZE TABLE ${stats_db_name}.indi_pub_downloads_datasource COMPUTE STATISTICS; -create table if not exists indi_pub_downloads_year stored as parquet as +create table if not exists ${stats_db_name}.indi_pub_downloads_year stored as parquet as SELECT result_id, substring(us.`date`, 1,4) as `year`, sum(downloads) no_downloads from openaire_prod_usage_stats.usage_stats us -join publication on result_id=id where downloads>0 +join ${stats_db_name}.publication on result_id=id where downloads>0 GROUP BY result_id, substring(us.`date`, 1,4); -ANALYZE TABLE indi_pub_downloads_year COMPUTE STATISTICS; +--ANALYZE TABLE ${stats_db_name}.indi_pub_downloads_year COMPUTE STATISTICS; -create table if not exists indi_pub_downloads_datasource_year stored as parquet as +create table if not exists ${stats_db_name}.indi_pub_downloads_datasource_year stored as parquet as SELECT result_id, substring(us.`date`, 1,4) as `year`, repository_id, sum(downloads) no_downloads from openaire_prod_usage_stats.usage_stats us -join publication on result_id=id +join ${stats_db_name}.publication on result_id=id where downloads>0 GROUP BY result_id, repository_id, substring(us.`date`, 1,4); -ANALYZE TABLE indi_pub_downloads_datasource_year COMPUTE STATISTICS; +--ANALYZE TABLE ${stats_db_name}.indi_pub_downloads_datasource_year COMPUTE STATISTICS; ---- Sprint 7 ---- -create table if not exists indi_pub_gold_oa stored as parquet as +create table if not exists ${stats_db_name}.indi_pub_gold_oa stored as parquet as WITH gold_oa AS ( SELECT issn_l, journal_is_in_doaj, @@ -284,7 +284,7 @@ create table if not exists indi_pub_gold_oa stored as parquet as id, issn_printed as issn FROM - datasource + ${stats_db_name}.datasource WHERE issn_printed IS NOT NULL UNION ALL @@ -292,7 +292,7 @@ create table if not exists indi_pub_gold_oa stored as parquet as id, issn_online as issn FROM - datasource + ${stats_db_name}.datasource WHERE issn_online IS NOT NULL or id like '%doajarticles%') as issn WHERE @@ -300,16 +300,16 @@ create table if not exists indi_pub_gold_oa stored as parquet as SELECT DISTINCT pd.id, coalesce(is_gold, 0) as is_gold FROM - publication_datasources pd + ${stats_db_name}.publication_datasources pd left outer join( - select pd.id, 1 as is_gold FROM publication_datasources pd + select pd.id, 1 as is_gold FROM ${stats_db_name}.publication_datasources pd JOIN issn on issn.id=pd.datasource JOIN gold_oa on issn.issn = gold_oa.issn) tmp on pd.id=tmp.id; -ANALYZE TABLE indi_pub_gold_oa COMPUTE STATISTICS; +--ANALYZE TABLE ${stats_db_name}.indi_pub_gold_oa COMPUTE STATISTICS; -create table if not exists indi_pub_hybrid_oa_with_cc stored as parquet as +create table if not exists ${stats_db_name}.indi_pub_hybrid_oa_with_cc stored as parquet as WITH hybrid_oa AS ( SELECT issn_l, journal_is_in_doaj, journal_is_oa, issn_print as issn FROM STATS_EXT.plan_s_jn @@ -322,27 +322,27 @@ create table if not exists indi_pub_hybrid_oa_with_cc stored as parquet as SELECT * FROM ( SELECT id, issn_printed as issn - FROM datasource + FROM ${stats_db_name}.datasource WHERE issn_printed IS NOT NULL UNION ALL SELECT id,issn_online as issn - FROM datasource + FROM ${stats_db_name}.datasource WHERE issn_online IS NOT NULL ) as issn WHERE LENGTH(issn) > 7) SELECT DISTINCT pd.id, coalesce(is_hybrid_oa, 0) as is_hybrid_oa -FROM publication_datasources pd +FROM ${stats_db_name}.publication_datasources pd LEFT OUTER JOIN ( - SELECT pd.id, 1 as is_hybrid_oa from publication_datasources pd - JOIN datasource d on d.id=pd.datasource + SELECT pd.id, 1 as is_hybrid_oa from ${stats_db_name}.publication_datasources pd + JOIN ${stats_db_name}.datasource d on d.id=pd.datasource JOIN issn on issn.id=pd.datasource JOIN hybrid_oa ON issn.issn = hybrid_oa.issn - JOIN indi_result_has_cc_licence cc on pd.id=cc.id - JOIN indi_pub_gold_oa ga on pd.id=ga.id + JOIN ${stats_db_name}.indi_result_has_cc_licence cc on pd.id=cc.id + JOIN ${stats_db_name}.indi_pub_gold_oa ga on pd.id=ga.id where cc.has_cc_license=1 and ga.is_gold=0) tmp on pd.id=tmp.id; -ANALYZE TABLE indi_pub_hybrid_oa_with_cc COMPUTE STATISTICS; +--ANALYZE TABLE ${stats_db_name}.indi_pub_hybrid_oa_with_cc COMPUTE STATISTICS; -create table if not exists indi_pub_hybrid stored as parquet as +create table if not exists ${stats_db_name}.indi_pub_hybrid stored as parquet as WITH gold_oa AS ( SELECT issn_l, journal_is_in_doaj, @@ -370,7 +370,7 @@ create table if not exists indi_pub_hybrid stored as parquet as id, issn_printed as issn FROM - datasource + ${stats_db_name}.datasource WHERE issn_printed IS NOT NULL UNION ALL @@ -378,424 +378,398 @@ create table if not exists indi_pub_hybrid stored as parquet as id, issn_online as issn FROM - datasource + ${stats_db_name}.datasource WHERE issn_online IS NOT NULL or id like '%doajarticles%') as issn WHERE LENGTH(issn) > 7) select distinct pd.id, coalesce(is_hybrid, 0) as is_hybrid -from publication_datasources pd +from ${stats_db_name}.publication_datasources pd left outer join ( - select pd.id, 1 as is_hybrid from publication_datasources pd - join datasource d on d.id=pd.datasource + select pd.id, 1 as is_hybrid from ${stats_db_name}.publication_datasources pd + join ${stats_db_name}.datasource d on d.id=pd.datasource join issn on issn.id=pd.datasource join gold_oa on issn.issn=gold_oa.issn where (gold_oa.journal_is_in_doaj=false or gold_oa.journal_is_oa=false))tmp on pd.id=tmp.id; -ANALYZE TABLE indi_pub_hybrid COMPUTE STATISTICS; +--ANALYZE TABLE ${stats_db_name}.indi_pub_hybrid COMPUTE STATISTICS; -create table if not exists indi_org_fairness stored as parquet as +create table if not exists ${stats_db_name}.indi_org_fairness stored as parquet as --return results with PIDs, and rich metadata group by organization with result_fair as - (select ro.organization organization, count(distinct ro.id) no_result_fair from result_organization ro - join result r on r.id=ro.id + (select ro.organization organization, count(distinct ro.id) no_result_fair from ${stats_db_name}.result_organization ro + join ${stats_db_name}.result r on r.id=ro.id --join result_pids rp on r.id=rp.id where (title is not null) and (publisher is not null) and (abstract=true) and (year is not null) and (authors>0) and cast(year as int)>2003 group by ro.organization), --return all results group by organization - allresults as (select organization, count(distinct ro.id) no_allresults from result_organization ro - join result r on r.id=ro.id + allresults as (select ro.organization, count(distinct ro.id) no_allresults from ${stats_db_name}.result_organization ro + join ${stats_db_name}.result r on r.id=ro.id where cast(year as int)>2003 - group by organization) + group by ro.organization) --return results_fair/all_results select allresults.organization, result_fair.no_result_fair/allresults.no_allresults org_fairness from allresults join result_fair on result_fair.organization=allresults.organization; -ANALYZE TABLE indi_org_fairness COMPUTE STATISTICS; +--ANALYZE TABLE ${stats_db_name}.indi_org_fairness COMPUTE STATISTICS; -create table if not exists indi_org_fairness_pub_pr stored as parquet as - with result_fair as - (select ro.organization organization, count(distinct ro.id) no_result_fair - from result_organization ro - join publication p on p.id=ro.id - join indi_pub_doi_from_crossref dc on dc.id=p.id - join indi_pub_grey_lit gl on gl.id=p.id +CREATE TEMPORARY table ${stats_db_name}.result_fair as +select ro.organization organization, count(distinct ro.id) no_result_fair + from ${stats_db_name}.result_organization ro + join ${stats_db_name}.publication p on p.id=ro.id + join ${stats_db_name}.indi_pub_doi_from_crossref dc on dc.id=p.id + join ${stats_db_name}.indi_pub_grey_lit gl on gl.id=p.id where (title is not null) and (publisher is not null) and (abstract=true) and (year is not null) and (authors>0) and cast(year as int)>2003 and dc.doi_from_crossref=1 and gl.grey_lit=0 - group by ro.organization), - allresults as (select organization, count(distinct ro.id) no_allresults from result_organization ro - join publication p on p.id=ro.id + group by ro.organization; + +CREATE TEMPORARY TABLE ${stats_db_name}.allresults as +select ro.organization, count(distinct ro.id) no_allresults from ${stats_db_name}.result_organization ro + join ${stats_db_name}.publication p on p.id=ro.id where cast(year as int)>2003 - group by organization) ---return results_fair/all_results -select allresults.organization, result_fair.no_result_fair/allresults.no_allresults org_fairness -from allresults - join result_fair on result_fair.organization=allresults.organization; + group by ro.organization; -ANALYZE TABLE indi_org_fairness_pub_pr COMPUTE STATISTICS; +create table if not exists ${stats_db_name}.indi_org_fairness_pub_pr stored as parquet as +select ar.organization, rf.no_result_fair/ar.no_allresults org_fairness +from ${stats_db_name}.allresults ar + join ${stats_db_name}.result_fair rf on rf.organization=ar.organization; -CREATE TEMPORARY table result_fair as - select year, ro.organization organization, count(distinct ro.id) no_result_fair from result_organization ro - join result p on p.id=ro.id +DROP table ${stats_db_name}.result_fair purge; +DROP table ${stats_db_name}.allresults purge; + +--ANALYZE TABLE ${stats_db_name}.indi_org_fairness_pub_pr COMPUTE STATISTICS; + +CREATE TEMPORARY table ${stats_db_name}.result_fair as + select year, ro.organization organization, count(distinct ro.id) no_result_fair from ${stats_db_name}.result_organization ro + join ${stats_db_name}.result p on p.id=ro.id where (title is not null) and (publisher is not null) and (abstract=true) and (year is not null) and (authors>0) and cast(year as int)>2003 group by ro.organization, year; -CREATE TEMPORARY TABLE allresults as select year, organization, count(distinct ro.id) no_allresults from result_organization ro - join result p on p.id=ro.id +CREATE TEMPORARY TABLE ${stats_db_name}.allresults as select year, ro.organization, count(distinct ro.id) no_allresults from ${stats_db_name}.result_organization ro + join ${stats_db_name}.result p on p.id=ro.id where cast(year as int)>2003 - group by organization, year; + group by ro.organization, year; -create table if not exists indi_org_fairness_pub_year stored as parquet as +create table if not exists ${stats_db_name}.indi_org_fairness_pub_year stored as parquet as select allresults.year, allresults.organization, result_fair.no_result_fair/allresults.no_allresults org_fairness -from allresults - join result_fair on result_fair.organization=allresults.organization and result_fair.year=allresults.year; +from ${stats_db_name}.allresults + join ${stats_db_name}.result_fair on result_fair.organization=allresults.organization and result_fair.year=allresults.year; -DROP table result_fair purge; -DROP table allresults purge; +DROP table ${stats_db_name}.result_fair purge; +DROP table ${stats_db_name}.allresults purge; -ANALYZE TABLE indi_org_fairness_pub_year COMPUTE STATISTICS; +--ANALYZE TABLE ${stats_db_name}.indi_org_fairness_pub_year COMPUTE STATISTICS; -CREATE TEMPORARY TABLE result_fair as +CREATE TEMPORARY TABLE ${stats_db_name}.result_fair as select ro.organization organization, count(distinct ro.id) no_result_fair - from result_organization ro - join result p on p.id=ro.id + from ${stats_db_name}.result_organization ro + join ${stats_db_name}.result p on p.id=ro.id where (title is not null) and (publisher is not null) and (abstract=true) and (year is not null) and (authors>0) and cast(year as int)>2003 group by ro.organization; -CREATE TEMPORARY TABLE allresults as - select organization, count(distinct ro.id) no_allresults from result_organization ro - join result p on p.id=ro.id +CREATE TEMPORARY TABLE ${stats_db_name}.allresults as + select ro.organization, count(distinct ro.id) no_allresults from ${stats_db_name}.result_organization ro + join ${stats_db_name}.result p on p.id=ro.id where cast(year as int)>2003 - group by organization; + group by ro.organization; -create table if not exists indi_org_fairness_pub as -select allresults.organization, result_fair.no_result_fair/allresults.no_allresults org_fairness -from allresults join result_fair on result_fair.organization=allresults.organization; +create table if not exists ${stats_db_name}.indi_org_fairness_pub as +select ar.organization, rf.no_result_fair/ar.no_allresults org_fairness +from ${stats_db_name}.allresults ar join ${stats_db_name}.result_fair rf +on rf.organization=ar.organization; -DROP table result_fair purge; -DROP table allresults purge; +DROP table ${stats_db_name}.result_fair purge; +DROP table ${stats_db_name}.allresults purge; -ANALYZE TABLE indi_org_fairness_pub COMPUTE STATISTICS; +--ANALYZE TABLE ${stats_db_name}.indi_org_fairness_pub COMPUTE STATISTICS; -CREATE TEMPORARY TABLE result_fair as - select year, ro.organization organization, count(distinct ro.id) no_result_fair from result_organization ro - join result r on r.id=ro.id - join result_pids rp on r.id=rp.id +CREATE TEMPORARY TABLE ${stats_db_name}.result_fair as + select year, ro.organization organization, count(distinct ro.id) no_result_fair from ${stats_db_name}.result_organization ro + join ${stats_db_name}.result r on r.id=ro.id + join ${stats_db_name}.result_pids rp on r.id=rp.id where (title is not null) and (publisher is not null) and (abstract=true) and (year is not null) and (authors>0) and cast(year as int)>2003 group by ro.organization, year; -CREATE TEMPORARY TABLE allresults as - select year, organization, count(distinct ro.id) no_allresults from result_organization ro - join result r on r.id=ro.id +CREATE TEMPORARY TABLE ${stats_db_name}.allresults as + select year, ro.organization, count(distinct ro.id) no_allresults from ${stats_db_name}.result_organization ro + join ${stats_db_name}.result r on r.id=ro.id where cast(year as int)>2003 - group by organization, year; + group by ro.organization, year; -create table if not exists indi_org_fairness_year stored as parquet as +create table if not exists ${stats_db_name}.indi_org_fairness_year stored as parquet as select allresults.year, allresults.organization, result_fair.no_result_fair/allresults.no_allresults org_fairness - from allresults - join result_fair on result_fair.organization=allresults.organization and result_fair.year=allresults.year; + from ${stats_db_name}.allresults + join ${stats_db_name}.result_fair on result_fair.organization=allresults.organization and result_fair.year=allresults.year; -DROP table result_fair purge; -DROP table allresults purge; +DROP table ${stats_db_name}.result_fair purge; +DROP table ${stats_db_name}.allresults purge; -ANALYZE TABLE indi_org_fairness_year COMPUTE STATISTICS; +--ANALYZE TABLE ${stats_db_name}.indi_org_fairness_year COMPUTE STATISTICS; -CREATE TEMPORARY TABLE result_with_pid as - select year, ro.organization organization, count(distinct rp.id) no_result_with_pid from result_organization ro - join result_pids rp on rp.id=ro.id - join result r on r.id=rp.id +CREATE TEMPORARY TABLE ${stats_db_name}.result_with_pid as + select year, ro.organization, count(distinct rp.id) no_result_with_pid from ${stats_db_name}.result_organization ro + join ${stats_db_name}.result_pids rp on rp.id=ro.id + join ${stats_db_name}.result r on r.id=rp.id where cast(year as int) >2003 group by ro.organization, year; -CREATE TEMPORARY TABLE allresults as - select year, organization, count(distinct ro.id) no_allresults from result_organization ro - join result r on r.id=ro.id +CREATE TEMPORARY TABLE ${stats_db_name}.allresults as + select year, ro.organization, count(distinct ro.id) no_allresults from ${stats_db_name}.result_organization ro + join ${stats_db_name}.result r on r.id=ro.id where cast(year as int) >2003 - group by organization, year; + group by ro.organization, year; -create table if not exists indi_org_findable_year stored as parquet as +create table if not exists ${stats_db_name}.indi_org_findable_year stored as parquet as select allresults.year, allresults.organization, result_with_pid.no_result_with_pid/allresults.no_allresults org_findable -from allresults - join result_with_pid on result_with_pid.organization=allresults.organization and result_with_pid.year=allresults.year; +from ${stats_db_name}.allresults + join ${stats_db_name}.result_with_pid on result_with_pid.organization=allresults.organization and result_with_pid.year=allresults.year; -DROP table result_with_pid purge; -DROP table allresults purge; +DROP table ${stats_db_name}.result_with_pid purge; +DROP table ${stats_db_name}.allresults purge; -ANALYZE TABLE indi_org_findable_year COMPUTE STATISTICS; +--ANALYZE TABLE ${stats_db_name}.indi_org_findable_year COMPUTE STATISTICS; -CREATE TEMPORARY TABLE result_with_pid as -select ro.organization organization, count(distinct rp.id) no_result_with_pid from result_organization ro - join result_pids rp on rp.id=ro.id - join result r on r.id=rp.id +CREATE TEMPORARY TABLE ${stats_db_name}.result_with_pid as +select ro.organization, count(distinct rp.id) no_result_with_pid from ${stats_db_name}.result_organization ro + join ${stats_db_name}.result_pids rp on rp.id=ro.id + join ${stats_db_name}.result r on r.id=rp.id where cast(year as int) >2003 group by ro.organization; -CREATE TEMPORARY TABLE allresults as -select organization, count(distinct ro.id) no_allresults from result_organization ro - join result r on r.id=ro.id +CREATE TEMPORARY TABLE ${stats_db_name}.allresults as +select ro.organization, count(distinct ro.id) no_allresults from ${stats_db_name}.result_organization ro + join ${stats_db_name}.result r on r.id=ro.id where cast(year as int) >2003 - group by organization; + group by ro.organization; -create table if not exists indi_org_findable stored as parquet as +create table if not exists ${stats_db_name}.indi_org_findable stored as parquet as select allresults.organization, result_with_pid.no_result_with_pid/allresults.no_allresults org_findable -from allresults - join result_with_pid on result_with_pid.organization=allresults.organization; +from ${stats_db_name}.allresults + join ${stats_db_name}.result_with_pid on result_with_pid.organization=allresults.organization; -DROP table result_with_pid purge; -DROP table allresults purge; +DROP table ${stats_db_name}.result_with_pid purge; +DROP table ${stats_db_name}.allresults purge; -ANALYZE TABLE indi_org_findable COMPUTE STATISTICS; +--ANALYZE TABLE ${stats_db_name}.indi_org_findable COMPUTE STATISTICS; -CREATE TEMPORARY TABLE pubs_oa as -SELECT ro.organization, count(distinct r.id) no_oapubs FROM publication r - join result_organization ro on ro.id=r.id - join result_instance ri on ri.id=r.id +CREATE TEMPORARY TABLE ${stats_db_name}.pubs_oa as +SELECT ro.organization, count(distinct r.id) no_oapubs FROM ${stats_db_name}.publication r + join ${stats_db_name}.result_organization ro on ro.id=r.id + join ${stats_db_name}.result_instance ri on ri.id=r.id where (ri.accessright = 'Open Access' or ri.accessright = 'Embargo' or ri.accessright = 'Open Source') and cast(r.year as int)>2003 group by ro.organization; -CREATE TEMPORARY TABLE datasets_oa as -SELECT ro.organization, count(distinct r.id) no_oadatasets FROM dataset r - join result_organization ro on ro.id=r.id - join result_instance ri on ri.id=r.id +CREATE TEMPORARY TABLE ${stats_db_name}.datasets_oa as +SELECT ro.organization, count(distinct r.id) no_oadatasets FROM ${stats_db_name}.dataset r + join ${stats_db_name}.result_organization ro on ro.id=r.id + join ${stats_db_name}.result_instance ri on ri.id=r.id where (ri.accessright = 'Open Access' or ri.accessright = 'Embargo' or ri.accessright = 'Open Source') and cast(r.year as int)>2003 group by ro.organization; -CREATE TEMPORARY TABLE software_oa as -SELECT ro.organization, count(distinct r.id) no_oasoftware FROM software r - join result_organization ro on ro.id=r.id - join result_instance ri on ri.id=r.id +CREATE TEMPORARY TABLE ${stats_db_name}.software_oa as +SELECT ro.organization, count(distinct r.id) no_oasoftware FROM ${stats_db_name}.software r + join ${stats_db_name}.result_organization ro on ro.id=r.id + join ${stats_db_name}.result_instance ri on ri.id=r.id where (ri.accessright = 'Open Access' or ri.accessright = 'Embargo' or ri.accessright = 'Open Source') and cast(r.year as int)>2003 group by ro.organization; -CREATE TEMPORARY TABLE allpubs as -SELECT ro.organization organization, count(ro.id) no_allpubs FROM result_organization ro - join publication ps on ps.id=ro.id +CREATE TEMPORARY TABLE ${stats_db_name}.allpubs as +SELECT ro.organization, count(ro.id) no_allpubs FROM ${stats_db_name}.result_organization ro + join ${stats_db_name}.publication ps on ps.id=ro.id where cast(ps.year as int)>2003 group by ro.organization; -CREATE TEMPORARY TABLE alldatasets as -SELECT ro.organization organization, count(ro.id) no_alldatasets FROM result_organization ro - join dataset ps on ps.id=ro.id +CREATE TEMPORARY TABLE ${stats_db_name}.alldatasets as +SELECT ro.organization, count(ro.id) no_alldatasets FROM ${stats_db_name}.result_organization ro + join ${stats_db_name}.dataset ps on ps.id=ro.id where cast(ps.year as int)>2003 group by ro.organization; -CREATE TEMPORARY TABLE allsoftware as -SELECT ro.organization organization, count(ro.id) no_allsoftware FROM result_organization ro - join software ps on ps.id=ro.id +CREATE TEMPORARY TABLE ${stats_db_name}.allsoftware as +SELECT ro.organization, count(ro.id) no_allsoftware FROM ${stats_db_name}.result_organization ro + join ${stats_db_name}.software ps on ps.id=ro.id where cast(ps.year as int)>2003 group by ro.organization; -CREATE TEMPORARY TABLE allpubsshare as -select pubs_oa.organization, pubs_oa.no_oapubs/allpubs.no_allpubs p from allpubs - join pubs_oa on allpubs.organization=pubs_oa.organization; +CREATE TEMPORARY TABLE ${stats_db_name}.allpubsshare as +select pubs_oa.organization, pubs_oa.no_oapubs/allpubs.no_allpubs p from ${stats_db_name}.allpubs + join ${stats_db_name}.pubs_oa on allpubs.organization=pubs_oa.organization; -CREATE TEMPORARY TABLE alldatasetssshare as +CREATE TEMPORARY TABLE ${stats_db_name}.alldatasetssshare as select datasets_oa.organization, datasets_oa.no_oadatasets/alldatasets.no_alldatasets d - from alldatasets - join datasets_oa on alldatasets.organization=datasets_oa.organization; + from ${stats_db_name}.alldatasets + join ${stats_db_name}.datasets_oa on alldatasets.organization=datasets_oa.organization; -CREATE TEMPORARY TABLE allsoftwaresshare as +CREATE TEMPORARY TABLE ${stats_db_name}.allsoftwaresshare as select software_oa.organization, software_oa.no_oasoftware/allsoftware.no_allsoftware s - from allsoftware - join software_oa on allsoftware.organization=software_oa.organization; + from ${stats_db_name}.allsoftware + join ${stats_db_name}.software_oa on allsoftware.organization=software_oa.organization; -create table if not exists indi_org_openess stored as parquet as +create table if not exists ${stats_db_name}.indi_org_openess stored as parquet as select allpubsshare.organization, (p+if(isnull(s),0,s)+if(isnull(d),0,d))/(1+(case when s is null then 0 else 1 end) +(case when d is null then 0 else 1 end)) - org_openess FROM allpubsshare + org_openess FROM ${stats_db_name}.allpubsshare left outer join (select organization,d from - alldatasetssshare) tmp1 + ${stats_db_name}.alldatasetssshare) tmp1 on tmp1.organization=allpubsshare.organization left outer join (select organization,s from - allsoftwaresshare) tmp2 + ${stats_db_name}.allsoftwaresshare) tmp2 on tmp2.organization=allpubsshare.organization; -DROP TABLE pubs_oa purge; -DROP TABLE datasets_oa purge; -DROP TABLE software_oa purge; -DROP TABLE allpubs purge; -DROP TABLE alldatasets purge; -DROP TABLE allsoftware purge; -DROP TABLE allpubsshare purge; -DROP TABLE alldatasetssshare purge; -DROP TABLE allsoftwaresshare purge; +DROP TABLE ${stats_db_name}.pubs_oa purge; +DROP TABLE ${stats_db_name}.datasets_oa purge; +DROP TABLE ${stats_db_name}.software_oa purge; +DROP TABLE ${stats_db_name}.allpubs purge; +DROP TABLE ${stats_db_name}.alldatasets purge; +DROP TABLE ${stats_db_name}.allsoftware purge; +DROP TABLE ${stats_db_name}.allpubsshare purge; +DROP TABLE ${stats_db_name}.alldatasetssshare purge; +DROP TABLE ${stats_db_name}.allsoftwaresshare purge; -ANALYZE TABLE indi_org_openess COMPUTE STATISTICS; +--ANALYZE TABLE ${stats_db_name}.indi_org_openess COMPUTE STATISTICS; -CREATE TEMPORARY TABLE pubs_oa AS -SELECT r.year, ro.organization, count(distinct r.id) no_oapubs FROM publication r - join result_organization ro on ro.id=r.id - join result_instance ri on ri.id=r.id +CREATE TEMPORARY TABLE ${stats_db_name}.pubs_oa AS +SELECT r.year, ro.organization, count(distinct r.id) no_oapubs FROM ${stats_db_name}.publication r + join ${stats_db_name}.result_organization ro on ro.id=r.id + join ${stats_db_name}.result_instance ri on ri.id=r.id where (ri.accessright = 'Open Access' or ri.accessright = 'Embargo' or ri.accessright = 'Open Source') and cast(r.year as int)>2003 group by ro.organization,r.year; -CREATE TEMPORARY TABLE datasets_oa AS -SELECT r.year,ro.organization, count(distinct r.id) no_oadatasets FROM dataset r - join result_organization ro on ro.id=r.id - join result_instance ri on ri.id=r.id +CREATE TEMPORARY TABLE ${stats_db_name}.datasets_oa AS +SELECT r.year,ro.organization, count(distinct r.id) no_oadatasets FROM ${stats_db_name}.dataset r + join ${stats_db_name}.result_organization ro on ro.id=r.id + join ${stats_db_name}.result_instance ri on ri.id=r.id where (ri.accessright = 'Open Access' or ri.accessright = 'Embargo' or ri.accessright = 'Open Source') and cast(r.year as int)>2003 group by ro.organization, r.year; -CREATE TEMPORARY TABLE software_oa AS -SELECT r.year,ro.organization, count(distinct r.id) no_oasoftware FROM software r - join result_organization ro on ro.id=r.id - join result_instance ri on ri.id=r.id +CREATE TEMPORARY TABLE ${stats_db_name}.software_oa AS +SELECT r.year,ro.organization, count(distinct r.id) no_oasoftware FROM ${stats_db_name}.software r + join ${stats_db_name}.result_organization ro on ro.id=r.id + join ${stats_db_name}.result_instance ri on ri.id=r.id where (ri.accessright = 'Open Access' or ri.accessright = 'Embargo' or ri.accessright = 'Open Source') and cast(r.year as int)>2003 group by ro.organization, r.year; -CREATE TEMPORARY TABLE allpubs as -SELECT p.year,ro.organization organization, count(ro.id) no_allpubs FROM result_organization ro - join publication p on p.id=ro.id where cast(p.year as int)>2003 +CREATE TEMPORARY TABLE ${stats_db_name}.allpubs as +SELECT p.year,ro.organization organization, count(ro.id) no_allpubs FROM ${stats_db_name}.result_organization ro + join ${stats_db_name}.publication p on p.id=ro.id where cast(p.year as int)>2003 group by ro.organization, p.year; -CREATE TEMPORARY TABLE alldatasets as -SELECT d.year, ro.organization organization, count(ro.id) no_alldatasets FROM result_organization ro - join dataset d on d.id=ro.id where cast(d.year as int)>2003 +CREATE TEMPORARY TABLE ${stats_db_name}.alldatasets as +SELECT d.year, ro.organization organization, count(ro.id) no_alldatasets FROM ${stats_db_name}.result_organization ro + join ${stats_db_name}.dataset d on d.id=ro.id where cast(d.year as int)>2003 group by ro.organization, d.year; -CREATE TEMPORARY TABLE allsoftware as -SELECT s.year,ro.organization organization, count(ro.id) no_allsoftware FROM result_organization ro - join software s on s.id=ro.id where cast(s.year as int)>2003 +CREATE TEMPORARY TABLE ${stats_db_name}.allsoftware as +SELECT s.year,ro.organization organization, count(ro.id) no_allsoftware FROM ${stats_db_name}.result_organization ro + join ${stats_db_name}.software s on s.id=ro.id where cast(s.year as int)>2003 group by ro.organization, s.year; -CREATE TEMPORARY TABLE allpubsshare as -select allpubs.year, pubs_oa.organization, pubs_oa.no_oapubs/allpubs.no_allpubs p from allpubs - join pubs_oa on allpubs.organization=pubs_oa.organization where cast(allpubs.year as INT)=cast(pubs_oa.year as int); +CREATE TEMPORARY TABLE ${stats_db_name}.allpubsshare as +select allpubs.year, pubs_oa.organization, pubs_oa.no_oapubs/allpubs.no_allpubs p from ${stats_db_name}.allpubs + join ${stats_db_name}.pubs_oa on allpubs.organization=pubs_oa.organization where cast(allpubs.year as INT)=cast(pubs_oa.year as int); -CREATE TEMPORARY TABLE alldatasetssshare as +CREATE TEMPORARY TABLE ${stats_db_name}.alldatasetssshare as select alldatasets.year, datasets_oa.organization, datasets_oa.no_oadatasets/alldatasets.no_alldatasets d - from alldatasets - join datasets_oa on alldatasets.organization=datasets_oa.organization where cast(alldatasets.year as INT)=cast(datasets_oa.year as int); + from ${stats_db_name}.alldatasets + join ${stats_db_name}.datasets_oa on alldatasets.organization=datasets_oa.organization where cast(alldatasets.year as INT)=cast(datasets_oa.year as int); -CREATE TEMPORARY TABLE allsoftwaresshare as +CREATE TEMPORARY TABLE ${stats_db_name}.allsoftwaresshare as select allsoftware.year, software_oa.organization, software_oa.no_oasoftware/allsoftware.no_allsoftware s - from allsoftware - join software_oa on allsoftware.organization=software_oa.organization where cast(allsoftware.year as INT)=cast(software_oa.year as int); + from ${stats_db_name}.allsoftware + join ${stats_db_name}.software_oa on allsoftware.organization=software_oa.organization where cast(allsoftware.year as INT)=cast(software_oa.year as int); -create table if not exists indi_org_openess_year stored as parquet as +create table if not exists ${stats_db_name}.indi_org_openess_year stored as parquet as select allpubsshare.year, allpubsshare.organization, (p+if(isnull(s),0,s)+if(isnull(d),0,d))/(1+(case when s is null then 0 else 1 end) +(case when d is null then 0 else 1 end)) - org_openess FROM allpubsshare + org_openess FROM ${stats_db_name}.allpubsshare left outer join (select year, organization,d from - alldatasetssshare) tmp1 + ${stats_db_name}.alldatasetssshare) tmp1 on tmp1.organization=allpubsshare.organization and tmp1.year=allpubsshare.year left outer join (select year, organization,s from - allsoftwaresshare) tmp2 + ${stats_db_name}.allsoftwaresshare) tmp2 on tmp2.organization=allpubsshare.organization and tmp2.year=allpubsshare.year; -DROP TABLE pubs_oa purge; -DROP TABLE datasets_oa purge; -DROP TABLE software_oa purge; -DROP TABLE allpubs purge; -DROP TABLE alldatasets purge; -DROP TABLE allsoftware purge; -DROP TABLE allpubsshare purge; -DROP TABLE alldatasetssshare purge; -DROP TABLE allsoftwaresshare purge; +DROP TABLE ${stats_db_name}.pubs_oa purge; +DROP TABLE ${stats_db_name}.datasets_oa purge; +DROP TABLE ${stats_db_name}.software_oa purge; +DROP TABLE ${stats_db_name}.allpubs purge; +DROP TABLE ${stats_db_name}.alldatasets purge; +DROP TABLE ${stats_db_name}.allsoftware purge; +DROP TABLE ${stats_db_name}.allpubsshare purge; +DROP TABLE ${stats_db_name}.alldatasetssshare purge; +DROP TABLE ${stats_db_name}.allsoftwaresshare purge; -ANALYZE TABLE indi_org_openess_year COMPUTE STATISTICS; +--ANALYZE TABLE ${stats_db_name}.indi_org_openess_year COMPUTE STATISTICS; -create table if not exists indi_pub_has_preprint stored as parquet as +create table if not exists ${stats_db_name}.indi_pub_has_preprint stored as parquet as select distinct p.id, coalesce(has_preprint, 0) as has_preprint -from publication_classifications p +from ${stats_db_name}.publication_classifications p left outer join ( select p.id, 1 as has_preprint - from publication_classifications p + from ${stats_db_name}.publication_classifications p where p.type='Preprint') tmp on p.id= tmp.id; -ANALYZE TABLE indi_pub_has_preprint COMPUTE STATISTICS; +--ANALYZE TABLE ${stats_db_name}.indi_pub_has_preprint COMPUTE STATISTICS; -create table if not exists indi_pub_in_subscribed stored as parquet as +create table if not exists ${stats_db_name}.indi_pub_in_subscribed stored as parquet as select distinct p.id, coalesce(is_subscription, 0) as is_subscription -from publication p +from ${stats_db_name}.publication p left outer join( - select p.id, 1 as is_subscription from publication p - join indi_pub_gold_oa g on p.id=g.id - join indi_pub_hybrid h on p.id=h.id - join indi_pub_in_transformative t on p.id=t.id + select p.id, 1 as is_subscription from ${stats_db_name}.publication p + join ${stats_db_name}.indi_pub_gold_oa g on p.id=g.id + join ${stats_db_name}.indi_pub_hybrid h on p.id=h.id + join ${stats_db_name}.indi_pub_in_transformative t on p.id=t.id where g.is_gold=0 and h.is_hybrid=0 and t.is_transformative=0) tmp on p.id=tmp.id; -ANALYZE TABLE indi_pub_in_subscribed COMPUTE STATISTICS; +--ANALYZE TABLE ${stats_db_name}.indi_pub_in_subscribed COMPUTE STATISTICS; -create table if not exists indi_result_with_pid as +create table if not exists ${stats_db_name}.indi_result_with_pid as select distinct p.id, coalesce(result_with_pid, 0) as result_with_pid -from result p +from ${stats_db_name}.result p left outer join ( select p.id, 1 as result_with_pid - from result_pids p) tmp + from ${stats_db_name}.result_pids p) tmp on p.id= tmp.id; -ANALYZE TABLE indi_result_with_pid COMPUTE STATISTICS; +--ANALYZE TABLE ${stats_db_name}.indi_result_with_pid COMPUTE STATISTICS; -CREATE TEMPORARY TABLE pub_fos_totals as -select rf.id, count(distinct lvl3) totals from result_fos rf +CREATE TEMPORARY TABLE ${stats_db_name}.pub_fos_totals as +select rf.id, count(distinct lvl3) totals from ${stats_db_name}.result_fos rf group by rf.id; -create table if not exists indi_pub_interdisciplinarity as +create table if not exists ${stats_db_name}.indi_pub_interdisciplinarity as select distinct p.id as id, coalesce(is_interdisciplinary, 0) as is_interdisciplinary -from pub_fos_totals p +from ${stats_db_name}.pub_fos_totals p left outer join ( -select pub_fos_totals.id, 1 as is_interdisciplinary from pub_fos_totals +select pub_fos_totals.id, 1 as is_interdisciplinary from ${stats_db_name}.pub_fos_totals where totals>1) tmp on p.id=tmp.id; -drop table pub_fos_totals purge; +drop table ${stats_db_name}.pub_fos_totals purge; -ANALYZE TABLE indi_pub_interdisciplinarity COMPUTE STATISTICS; +--ANALYZE TABLE ${stats_db_name}.indi_pub_interdisciplinarity COMPUTE STATISTICS; -create table if not exists indi_pub_bronze_oa stored as parquet as +create table if not exists ${stats_db_name}.indi_pub_bronze_oa stored as parquet as select distinct p.id, coalesce(is_bronze_oa,0) as is_bronze_oa -from publication p +from ${stats_db_name}.publication p left outer join -(select p.id, 1 as is_bronze_oa from publication p -join indi_result_has_cc_licence cc on cc.id=p.id -join indi_pub_gold_oa ga on ga.id=p.id +(select p.id, 1 as is_bronze_oa from ${stats_db_name}.publication p +join ${stats_db_name}.indi_result_has_cc_licence cc on cc.id=p.id +join ${stats_db_name}.indi_pub_gold_oa ga on ga.id=p.id where cc.has_cc_license=0 and ga.is_gold=0) tmp on tmp.id=p.id; --- create table if not exists indi_pub_bronze_oa stored as parquet as --- WITH hybrid_oa AS ( --- SELECT issn_l, journal_is_in_doaj, journal_is_oa, issn_print as issn --- FROM STATS_EXT.plan_s_jn --- WHERE issn_print != "" --- UNION ALL --- SELECT issn_l, journal_is_in_doaj, journal_is_oa, issn_online as issn --- FROM STATS_EXT.plan_s_jn --- WHERE issn_online != "" and (journal_is_in_doaj = FALSE OR journal_is_oa = FALSE)), --- issn AS ( --- SELECT * --- FROM ( --- SELECT id, issn_printed as issn --- FROM datasource --- WHERE issn_printed IS NOT NULL --- UNION ALL --- SELECT id,issn_online as issn --- FROM datasource --- WHERE issn_online IS NOT NULL ) as issn --- WHERE LENGTH(issn) > 7) ---SELECT DISTINCT pd.id, coalesce(is_bronze_oa, 0) as is_bronze_oa ---FROM publication_datasources pd --- LEFT OUTER JOIN ( --- SELECT pd.id, 1 as is_bronze_oa from publication_datasources pd --- JOIN datasource d on d.id=pd.datasource --- JOIN issn on issn.id=pd.datasource --- JOIN hybrid_oa ON issn.issn = hybrid_oa.issn --- JOIN indi_result_has_cc_licence cc on pd.id=cc.id --- JOIN indi_pub_gold_oa ga on pd.id=ga.id --- JOIN indi_pub_hybrid_oa_with_cc hy on hy.id=pd.id --- where cc.has_cc_license=0 and ga.is_gold=0 and hy.is_hybrid_oa=0) tmp on pd.id=tmp.id; - -ANALYZE TABLE indi_pub_bronze_oa COMPUTE STATISTICS; \ No newline at end of file +--ANALYZE TABLE ${stats_db_name}.indi_pub_bronze_oa COMPUTE STATISTICS; \ No newline at end of file diff --git a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step20-createMonitorDB.sql b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step20-createMonitorDB.sql index 9744d5aae..3eeb792c7 100644 --- a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step20-createMonitorDB.sql +++ b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step20-createMonitorDB.sql @@ -90,83 +90,83 @@ create view if not exists TARGET.totalresearchersft as select * from SOURCE.tota create view if not exists TARGET.hrrst as select * from SOURCE.hrrst; create table TARGET.result_citations stored as parquet as select * from SOURCE.result_citations orig where exists (select 1 from TARGET.result r where r.id=orig.id); -ANALYZE TABLE TARGET.result_citations COMPUTE STATISTICS; +--ANALYZE TABLE TARGET.result_citations COMPUTE STATISTICS; create table TARGET.result_references_oc stored as parquet as select * from SOURCE.result_references_oc orig where exists (select 1 from TARGET.result r where r.id=orig.id); -ANALYZE TABLE TARGET.result_references_oc COMPUTE STATISTICS; +--ANALYZE TABLE TARGET.result_references_oc COMPUTE STATISTICS; create table TARGET.result_citations_oc stored as parquet as select * from SOURCE.result_citations_oc orig where exists (select 1 from TARGET.result r where r.id=orig.id); -ANALYZE TABLE TARGET.result_citations_oc COMPUTE STATISTICS; +--ANALYZE TABLE TARGET.result_citations_oc COMPUTE STATISTICS; create table TARGET.result_classifications stored as parquet as select * from SOURCE.result_classifications orig where exists (select 1 from TARGET.result r where r.id=orig.id); -ANALYZE TABLE TARGET.result_classifications COMPUTE STATISTICS; +--ANALYZE TABLE TARGET.result_classifications COMPUTE STATISTICS; create table TARGET.result_apc stored as parquet as select * from SOURCE.result_apc orig where exists (select 1 from TARGET.result r where r.id=orig.id); -ANALYZE TABLE TARGET.result_apc COMPUTE STATISTICS; +--ANALYZE TABLE TARGET.result_apc COMPUTE STATISTICS; create table TARGET.result_concepts stored as parquet as select * from SOURCE.result_concepts orig where exists (select 1 from TARGET.result r where r.id=orig.id); -ANALYZE TABLE TARGET.result_concepts COMPUTE STATISTICS; +--ANALYZE TABLE TARGET.result_concepts COMPUTE STATISTICS; create table TARGET.result_datasources stored as parquet as select * from SOURCE.result_datasources orig where exists (select 1 from TARGET.result r where r.id=orig.id); -ANALYZE TABLE TARGET.result_datasources COMPUTE STATISTICS; +--ANALYZE TABLE TARGET.result_datasources COMPUTE STATISTICS; create table TARGET.result_fundercount stored as parquet as select * from SOURCE.result_fundercount orig where exists (select 1 from TARGET.result r where r.id=orig.id); -ANALYZE TABLE TARGET.result_fundercount COMPUTE STATISTICS; +--ANALYZE TABLE TARGET.result_fundercount COMPUTE STATISTICS; create table TARGET.result_gold stored as parquet as select * from SOURCE.result_gold orig where exists (select 1 from TARGET.result r where r.id=orig.id); -ANALYZE TABLE TARGET.result_gold COMPUTE STATISTICS; +--ANALYZE TABLE TARGET.result_gold COMPUTE STATISTICS; create table TARGET.result_greenoa stored as parquet as select * from SOURCE.result_greenoa orig where exists (select 1 from TARGET.result r where r.id=orig.id); -ANALYZE TABLE TARGET.result_greenoa COMPUTE STATISTICS; +--ANALYZE TABLE TARGET.result_greenoa COMPUTE STATISTICS; create table TARGET.result_languages stored as parquet as select * from SOURCE.result_languages orig where exists (select 1 from TARGET.result r where r.id=orig.id); -ANALYZE TABLE TARGET.result_languages COMPUTE STATISTICS; +--ANALYZE TABLE TARGET.result_languages COMPUTE STATISTICS; create table TARGET.result_licenses stored as parquet as select * from SOURCE.result_licenses orig where exists (select 1 from TARGET.result r where r.id=orig.id); -ANALYZE TABLE TARGET.result_licenses COMPUTE STATISTICS; +--ANALYZE TABLE TARGET.result_licenses COMPUTE STATISTICS; create table TARGET.licenses_normalized STORED AS PARQUET as select * from SOURCE.licenses_normalized; -ANALYZE TABLE TARGET.licenses_normalized COMPUTE STATISTICS; +--ANALYZE TABLE TARGET.licenses_normalized COMPUTE STATISTICS; create table TARGET.result_oids stored as parquet as select * from SOURCE.result_oids orig where exists (select 1 from TARGET.result r where r.id=orig.id); -ANALYZE TABLE TARGET.result_oids COMPUTE STATISTICS; +--ANALYZE TABLE TARGET.result_oids COMPUTE STATISTICS; create table TARGET.result_organization stored as parquet as select * from SOURCE.result_organization orig where exists (select 1 from TARGET.result r where r.id=orig.id); -ANALYZE TABLE TARGET.result_organization COMPUTE STATISTICS; +--ANALYZE TABLE TARGET.result_organization COMPUTE STATISTICS; create table TARGET.result_peerreviewed stored as parquet as select * from SOURCE.result_peerreviewed orig where exists (select 1 from TARGET.result r where r.id=orig.id); -ANALYZE TABLE TARGET.result_peerreviewed COMPUTE STATISTICS; +--ANALYZE TABLE TARGET.result_peerreviewed COMPUTE STATISTICS; create table TARGET.result_pids stored as parquet as select * from SOURCE.result_pids orig where exists (select 1 from TARGET.result r where r.id=orig.id); -ANALYZE TABLE TARGET.result_pids COMPUTE STATISTICS; +--ANALYZE TABLE TARGET.result_pids COMPUTE STATISTICS; create table TARGET.result_projectcount stored as parquet as select * from SOURCE.result_projectcount orig where exists (select 1 from TARGET.result r where r.id=orig.id); -ANALYZE TABLE TARGET.result_projectcount COMPUTE STATISTICS; +--ANALYZE TABLE TARGET.result_projectcount COMPUTE STATISTICS; create table TARGET.result_projects stored as parquet as select * from SOURCE.result_projects orig where exists (select 1 from TARGET.result r where r.id=orig.id); -ANALYZE TABLE TARGET.result_projects COMPUTE STATISTICS; +--ANALYZE TABLE TARGET.result_projects COMPUTE STATISTICS; create table TARGET.result_refereed stored as parquet as select * from SOURCE.result_refereed orig where exists (select 1 from TARGET.result r where r.id=orig.id); -ANALYZE TABLE TARGET.result_refereed COMPUTE STATISTICS; +--ANALYZE TABLE TARGET.result_refereed COMPUTE STATISTICS; create table TARGET.result_sources stored as parquet as select * from SOURCE.result_sources orig where exists (select 1 from TARGET.result r where r.id=orig.id); -ANALYZE TABLE TARGET.result_sources COMPUTE STATISTICS; +--ANALYZE TABLE TARGET.result_sources COMPUTE STATISTICS; create table TARGET.result_topics stored as parquet as select * from SOURCE.result_topics orig where exists (select 1 from TARGET.result r where r.id=orig.id); -ANALYZE TABLE TARGET.result_topics COMPUTE STATISTICS; +--ANALYZE TABLE TARGET.result_topics COMPUTE STATISTICS; create table TARGET.result_fos stored as parquet as select * from SOURCE.result_fos orig where exists (select 1 from TARGET.result r where r.id=orig.id); -ANALYZE TABLE TARGET.result_fos COMPUTE STATISTICS; +--ANALYZE TABLE TARGET.result_fos COMPUTE STATISTICS; create table TARGET.result_accessroute stored as parquet as select * from SOURCE.result_accessroute orig where exists (select 1 from TARGET.result r where r.id=orig.id); -ANALYZE TABLE TARGET.result_accessroute COMPUTE STATISTICS; +--ANALYZE TABLE TARGET.result_accessroute COMPUTE STATISTICS; create view TARGET.foo1 as select * from SOURCE.result_result rr where rr.source in (select id from TARGET.result); create view TARGET.foo2 as select * from SOURCE.result_result rr where rr.target in (select id from TARGET.result); create table TARGET.result_result STORED AS PARQUET as select distinct * from (select * from TARGET.foo1 union all select * from TARGET.foo2) foufou; drop view TARGET.foo1; drop view TARGET.foo2; -ANALYZE TABLE TARGET.result_result COMPUTE STATISTICS; +--ANALYZE TABLE TARGET.result_result COMPUTE STATISTICS; -- datasources create view if not exists TARGET.datasource as select * from SOURCE.datasource; @@ -175,7 +175,7 @@ create view if not exists TARGET.datasource_organizations as select * from SOURC create view if not exists TARGET.datasource_sources as select * from SOURCE.datasource_sources; create table TARGET.datasource_results stored as parquet as select id as result, datasource as id from TARGET.result_datasources; -ANALYZE TABLE TARGET.datasource_results COMPUTE STATISTICS; +--ANALYZE TABLE TARGET.datasource_results COMPUTE STATISTICS; -- organizations create view if not exists TARGET.organization as select * from SOURCE.organization; @@ -193,28 +193,28 @@ create view if not exists TARGET.project_classification as select * from SOURCE. create view if not exists TARGET.project_organization_contribution as select * from SOURCE.project_organization_contribution; create table TARGET.project_results stored as parquet as select id as result, project as id from TARGET.result_projects; -ANALYZE TABLE TARGET.project_results COMPUTE STATISTICS; +--ANALYZE TABLE TARGET.project_results COMPUTE STATISTICS; -- indicators -- Sprint 1 ---- create table TARGET.indi_pub_green_oa stored as parquet as select * from SOURCE.indi_pub_green_oa orig where exists (select 1 from TARGET.result r where r.id=orig.id); -ANALYZE TABLE TARGET.indi_pub_green_oa COMPUTE STATISTICS; +--ANALYZE TABLE TARGET.indi_pub_green_oa COMPUTE STATISTICS; create table TARGET.indi_pub_grey_lit stored as parquet as select * from SOURCE.indi_pub_grey_lit orig where exists (select 1 from TARGET.result r where r.id=orig.id); -ANALYZE TABLE TARGET.indi_pub_grey_lit COMPUTE STATISTICS; +--ANALYZE TABLE TARGET.indi_pub_grey_lit COMPUTE STATISTICS; create table TARGET.indi_pub_doi_from_crossref stored as parquet as select * from SOURCE.indi_pub_doi_from_crossref orig where exists (select 1 from TARGET.result r where r.id=orig.id); -ANALYZE TABLE TARGET.indi_pub_doi_from_crossref COMPUTE STATISTICS; +--ANALYZE TABLE TARGET.indi_pub_doi_from_crossref COMPUTE STATISTICS; -- Sprint 2 ---- create table TARGET.indi_result_has_cc_licence stored as parquet as select * from SOURCE.indi_result_has_cc_licence orig where exists (select 1 from TARGET.result r where r.id=orig.id); -ANALYZE TABLE TARGET.indi_result_has_cc_licence COMPUTE STATISTICS; +--ANALYZE TABLE TARGET.indi_result_has_cc_licence COMPUTE STATISTICS; create table TARGET.indi_result_has_cc_licence_url stored as parquet as select * from SOURCE.indi_result_has_cc_licence_url orig where exists (select 1 from TARGET.result r where r.id=orig.id); -ANALYZE TABLE TARGET.indi_result_has_cc_licence_url COMPUTE STATISTICS; +--ANALYZE TABLE TARGET.indi_result_has_cc_licence_url COMPUTE STATISTICS; create table TARGET.indi_pub_has_abstract stored as parquet as select * from SOURCE.indi_pub_has_abstract orig where exists (select 1 from TARGET.result r where r.id=orig.id); -ANALYZE TABLE TARGET.indi_pub_has_abstract COMPUTE STATISTICS; +--ANALYZE TABLE TARGET.indi_pub_has_abstract COMPUTE STATISTICS; create table TARGET.indi_result_with_orcid stored as parquet as select * from SOURCE.indi_result_with_orcid orig where exists (select 1 from TARGET.result r where r.id=orig.id); -ANALYZE TABLE TARGET.indi_result_with_orcid COMPUTE STATISTICS; +--ANALYZE TABLE TARGET.indi_result_with_orcid COMPUTE STATISTICS; ---- Sprint 3 ---- create table TARGET.indi_funded_result_with_fundref stored as parquet as select * from SOURCE.indi_funded_result_with_fundref orig where exists (select 1 from TARGET.result r where r.id=orig.id); -ANALYZE TABLE TARGET.indi_funded_result_with_fundref COMPUTE STATISTICS; +--ANALYZE TABLE TARGET.indi_funded_result_with_fundref COMPUTE STATISTICS; create view TARGET.indi_result_org_collab as select * from SOURCE.indi_result_org_collab; create view TARGET.indi_result_org_country_collab as select * from SOURCE.indi_result_org_country_collab; create view TARGET.indi_project_collab_org as select * from SOURCE.indi_project_collab_org; @@ -223,32 +223,32 @@ create view TARGET.indi_funder_country_collab as select * from SOURCE.indi_funde create view TARGET.indi_result_country_collab as select * from SOURCE.indi_result_country_collab; ---- Sprint 4 ---- create table TARGET.indi_pub_diamond stored as parquet as select * from SOURCE.indi_pub_diamond orig where exists (select 1 from TARGET.result r where r.id=orig.id); -ANALYZE TABLE TARGET.indi_pub_diamond COMPUTE STATISTICS; +--ANALYZE TABLE TARGET.indi_pub_diamond COMPUTE STATISTICS; create table TARGET.indi_pub_in_transformative stored as parquet as select * from SOURCE.indi_pub_in_transformative orig where exists (select 1 from TARGET.result r where r.id=orig.id); -ANALYZE TABLE TARGET.indi_pub_in_transformative COMPUTE STATISTICS; +--ANALYZE TABLE TARGET.indi_pub_in_transformative COMPUTE STATISTICS; create table TARGET.indi_pub_closed_other_open stored as parquet as select * from SOURCE.indi_pub_closed_other_open orig where exists (select 1 from TARGET.result r where r.id=orig.id); -ANALYZE TABLE TARGET.indi_pub_closed_other_open COMPUTE STATISTICS; +--ANALYZE TABLE TARGET.indi_pub_closed_other_open COMPUTE STATISTICS; ---- Sprint 5 ---- create table TARGET.indi_result_no_of_copies stored as parquet as select * from SOURCE.indi_result_no_of_copies orig where exists (select 1 from TARGET.result r where r.id=orig.id); -ANALYZE TABLE TARGET.indi_result_no_of_copies COMPUTE STATISTICS; +--ANALYZE TABLE TARGET.indi_result_no_of_copies COMPUTE STATISTICS; ---- Sprint 6 ---- create table TARGET.indi_pub_hybrid_oa_with_cc stored as parquet as select * from SOURCE.indi_pub_hybrid_oa_with_cc orig where exists (select 1 from TARGET.result r where r.id=orig.id); -ANALYZE TABLE TARGET.indi_pub_hybrid_oa_with_cc COMPUTE STATISTICS; +--ANALYZE TABLE TARGET.indi_pub_hybrid_oa_with_cc COMPUTE STATISTICS; create table TARGET.indi_pub_bronze_oa stored as parquet as select * from SOURCE.indi_pub_bronze_oa orig where exists (select 1 from TARGET.result r where r.id=orig.id); -ANALYZE TABLE TARGET.indi_pub_bronze_oa COMPUTE STATISTICS; +--ANALYZE TABLE TARGET.indi_pub_bronze_oa COMPUTE STATISTICS; create table TARGET.indi_pub_downloads stored as parquet as select * from SOURCE.indi_pub_downloads orig where exists (select 1 from TARGET.result r where r.id=orig.result_id); -ANALYZE TABLE TARGET.indi_pub_downloads COMPUTE STATISTICS; +--ANALYZE TABLE TARGET.indi_pub_downloads COMPUTE STATISTICS; create table TARGET.indi_pub_downloads_datasource stored as parquet as select * from SOURCE.indi_pub_downloads_datasource orig where exists (select 1 from TARGET.result r where r.id=orig.result_id); -ANALYZE TABLE TARGET.indi_pub_downloads_datasource COMPUTE STATISTICS; +--ANALYZE TABLE TARGET.indi_pub_downloads_datasource COMPUTE STATISTICS; create table TARGET.indi_pub_downloads_year stored as parquet as select * from SOURCE.indi_pub_downloads_year orig where exists (select 1 from TARGET.result r where r.id=orig.result_id); -ANALYZE TABLE TARGET.indi_pub_downloads_year COMPUTE STATISTICS; +--ANALYZE TABLE TARGET.indi_pub_downloads_year COMPUTE STATISTICS; create table TARGET.indi_pub_downloads_datasource_year stored as parquet as select * from SOURCE.indi_pub_downloads_datasource_year orig where exists (select 1 from TARGET.result r where r.id=orig.result_id); -ANALYZE TABLE TARGET.indi_pub_downloads_datasource_year COMPUTE STATISTICS; +--ANALYZE TABLE TARGET.indi_pub_downloads_datasource_year COMPUTE STATISTICS; ---- Sprint 7 ---- create table TARGET.indi_pub_gold_oa stored as parquet as select * from SOURCE.indi_pub_gold_oa orig where exists (select 1 from TARGET.result r where r.id=orig.id); -ANALYZE TABLE TARGET.indi_pub_gold_oa COMPUTE STATISTICS; +--ANALYZE TABLE TARGET.indi_pub_gold_oa COMPUTE STATISTICS; create table TARGET.indi_pub_hybrid stored as parquet as select * from SOURCE.indi_pub_hybrid orig where exists (select 1 from TARGET.result r where r.id=orig.id); -ANALYZE TABLE TARGET.indi_pub_hybrid COMPUTE STATISTICS; +--ANALYZE TABLE TARGET.indi_pub_hybrid COMPUTE STATISTICS; create view TARGET.indi_org_fairness as select * from SOURCE.indi_org_fairness; create view TARGET.indi_org_fairness_pub_pr as select * from SOURCE.indi_org_fairness_pub_pr; create view TARGET.indi_org_fairness_pub_year as select * from SOURCE.indi_org_fairness_pub_year; @@ -259,12 +259,14 @@ create view TARGET.indi_org_findable as select * from SOURCE.indi_org_findable; create view TARGET.indi_org_openess as select * from SOURCE.indi_org_openess; create view TARGET.indi_org_openess_year as select * from SOURCE.indi_org_openess_year; create table TARGET.indi_pub_has_preprint stored as parquet as select * from SOURCE.indi_pub_has_preprint orig where exists (select 1 from TARGET.result r where r.id=orig.id); -ANALYZE TABLE TARGET.indi_pub_has_preprint COMPUTE STATISTICS; +--ANALYZE TABLE TARGET.indi_pub_has_preprint COMPUTE STATISTICS; create table TARGET.indi_pub_in_subscribed stored as parquet as select * from SOURCE.indi_pub_in_subscribed orig where exists (select 1 from TARGET.result r where r.id=orig.id); -ANALYZE TABLE TARGET.indi_pub_in_subscribed COMPUTE STATISTICS; +--ANALYZE TABLE TARGET.indi_pub_in_subscribed COMPUTE STATISTICS; create table TARGET.indi_result_with_pid stored as parquet as select * from SOURCE.indi_result_with_pid orig where exists (select 1 from TARGET.result r where r.id=orig.id); -ANALYZE TABLE TARGET.indi_result_with_pid COMPUTE STATISTICS; +--ANALYZE TABLE TARGET.indi_result_with_pid COMPUTE STATISTICS; create table TARGET.indi_impact_measures stored as parquet as select * from SOURCE.indi_impact_measures orig where exists (select 1 from TARGET.result r where r.id=orig.id); -ANALYZE TABLE TARGET.indi_impact_measures COMPUTE STATISTICS; +--ANALYZE TABLE TARGET.indi_impact_measures COMPUTE STATISTICS; create table TARGET.indi_pub_interdisciplinarity stored as parquet as select * from SOURCE.indi_pub_interdisciplinarity orig where exists (select 1 from TARGET.result r where r.id=orig.id); -ANALYZE TABLE TARGET.indi_pub_interdisciplinarity COMPUTE STATISTICS; +--ANALYZE TABLE TARGET.indi_pub_interdisciplinarity COMPUTE STATISTICS; +create table TARGET.result_apc_affiliations stored as parquet as select * from SOURCE.result_apc_affiliations orig where exists (select 1 from TARGET.result r where r.id=orig.id); +--ANALYZE TABLE TARGET.result_apc_affiliations COMPUTE STATISTICS; diff --git a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step20-createMonitorDBAll.sql b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step20-createMonitorDBAll.sql new file mode 100644 index 000000000..a59791084 --- /dev/null +++ b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step20-createMonitorDBAll.sql @@ -0,0 +1,276 @@ +drop database if exists TARGET cascade; +create database if not exists TARGET; + +create view if not exists TARGET.category as select * from SOURCE.category; +create view if not exists TARGET.concept as select * from SOURCE.concept; +create view if not exists TARGET.context as select * from SOURCE.context; +create view if not exists TARGET.country as select * from SOURCE.country; +create view if not exists TARGET.countrygdp as select * from SOURCE.countrygdp; +create view if not exists TARGET.creation_date as select * from SOURCE.creation_date; +create view if not exists TARGET.funder as select * from SOURCE.funder; +create view if not exists TARGET.fundref as select * from SOURCE.fundref; +create view if not exists TARGET.rndexpenditure as select * from SOURCE.rndexpediture; +create view if not exists TARGET.rndgdpexpenditure as select * from SOURCE.rndgdpexpenditure; +create view if not exists TARGET.doctoratestudents as select * from SOURCE.doctoratestudents; +create view if not exists TARGET.totalresearchers as select * from SOURCE.totalresearchers; +create view if not exists TARGET.totalresearchersft as select * from SOURCE.totalresearchersft; +create view if not exists TARGET.hrrst as select * from SOURCE.hrrst; + +create table TARGET.result stored as parquet as + select distinct * from ( + select * from SOURCE.result r where exists (select 1 from SOURCE.result_projects rp join SOURCE.project p on rp.project=p.id where rp.id=r.id) + union all + select * from SOURCE.result r where exists (select 1 from SOURCE.result_concepts rc where rc.id=r.id) + union all + select * from SOURCE.result r where exists (select 1 from SOURCE.result_organization ro where ro.id=r.id and ro.organization in ( + 'openorgs____::b84450f9864182c67b8611b5593f4250', --"Athena Research and Innovation Center In Information Communication & Knowledge Technologies', --ARC" + 'openorgs____::d41cf6bd4ab1b1362a44397e0b95c975', --National Research Council + 'openorgs____::d2a09b9d5eabb10c95f9470e172d05d2', --??? Not exists ?? + 'openorgs____::d169c7407dd417152596908d48c11460', --Masaryk University + 'openorgs____::1ec924b1759bb16d0a02f2dad8689b21', --University of Belgrade + 'openorgs____::0ae431b820e4c33db8967fbb2b919150', --University of Helsinki + 'openorgs____::759d59f05d77188faee99b7493b46805', --University of Minho + 'openorgs____::cad284878801b9465fa51a95b1d779db', --Universidad Politécnica de Madrid + 'openorgs____::eadc8da90a546e98c03f896661a2e4d4', --University of Göttingen + 'openorgs____::c0286313e36479eff8676dba9b724b40', --National and Kapodistrian University of Athens + -- 'openorgs____::c80a8243a5e5c620d7931c88d93bf17a', --Université Paris Diderot + 'openorgs____::c08634f0a6b0081c3dc6e6c93a4314f3', --Bielefeld University + 'openorgs____::6fc85e4a8f7ecaf4b0c738d010e967ea', --University of Southern Denmark + 'openorgs____::3d6122f87f9a97a99d8f6e3d73313720', --Humboldt-Universität zu Berlin + 'openorgs____::16720ada63d0fa8ca41601feae7d1aa5', --TU Darmstadt + 'openorgs____::ccc0a066b56d2cfaf90c2ae369df16f5', --KU Leuven + 'openorgs____::4c6f119632adf789746f0a057ed73e90', --University of the Western Cape + 'openorgs____::ec3665affa01aeafa28b7852c4176dbd', --Rudjer Boskovic Institute + 'openorgs____::5f31346d444a7f06a28c880fb170b0f6', --Ghent University + 'openorgs____::2dbe47117fd5409f9c61620813456632', --University of Luxembourg + 'openorgs____::6445d7758d3a40c4d997953b6632a368', --National Institute of Informatics (NII) + 'openorgs____::b77c01aa15de3675da34277d48de2ec1', -- Valencia Catholic University Saint Vincent Martyr + 'openorgs____::7fe2f66cdc43983c6b24816bfe9cf6a0', -- Unviersity of Warsaw + 'openorgs____::15e7921fc50d9aa1229a82a84429419e', -- University Of Thessaly + 'openorgs____::11f7919dadc8f8a7251af54bba60c956', -- Technical University of Crete + 'openorgs____::84f0c5f5dbb6daf42748485924efde4b', -- University of Piraeus + 'openorgs____::4ac562f0376fce3539504567649cb373', -- University of Patras + 'openorgs____::3e8d1f8c3f6cd7f418b09f1f58b4873b', -- Aristotle University of Thessaloniki + 'openorgs____::3fcef6e1c469c10f2a84b281372c9814', -- World Bank + 'openorgs____::1698a2eb1885ef8adb5a4a969e745ad3', -- École des Ponts ParisTech + 'openorgs____::e15adb13c4dadd49de4d35c39b5da93a', -- Nanyang Technological University + 'openorgs____::4b34103bde246228fcd837f5f1bf4212', -- Autonomous University of Barcelona + 'openorgs____::72ec75fcfc4e0df1a76dc4c49007fceb', -- McMaster University + 'openorgs____::51c7fc556e46381734a25a6fbc3fd398', -- University of Modena and Reggio Emilia + 'openorgs____::235d7f9ad18ecd7e6dc62ea4990cb9db', -- Bilkent University + 'openorgs____::31f2fa9e05b49d4cf40a19c3fed8eb06', -- Saints Cyril and Methodius University of Skopje + 'openorgs____::db7686f30f22cbe73a4fde872ce812a6', -- University of Milan + 'openorgs____::b8b8ca674452579f3f593d9f5e557483', -- University College Cork + 'openorgs____::38d7097854736583dde879d12dacafca', -- Brown University + 'openorgs____::57784c9e047e826fefdb1ef816120d92', --Arts et Métiers ParisTech + 'openorgs____::2530baca8a15936ba2e3297f2bce2e7e', -- University of Cape Town + 'openorgs____::d11f981828c485cd23d93f7f24f24db1', -- Technological University Dublin + 'openorgs____::5e6bf8962665cdd040341171e5c631d8', -- Delft University of Technology + 'openorgs____::846cb428d3f52a445f7275561a7beb5d', -- University of Manitoba + 'openorgs____::eb391317ed0dc684aa81ac16265de041', -- Universitat Rovira i Virgili + 'openorgs____::66aa9fc2fceb271423dfabcc38752dc0', -- Lund University + 'openorgs____::3cff625a4370d51e08624cc586138b2f', -- IMT Atlantique + 'openorgs____::c0b262bd6eab819e4c994914f9c010e2', -- National Institute of Geophysics and Volcanology + 'openorgs____::1624ff7c01bb641b91f4518539a0c28a', -- Vrije Universiteit Amsterdam + 'openorgs____::4d4051b56708688235252f1d8fddb8c1', --Iscte - Instituto Universitário de Lisboa + 'openorgs____::ab4ac74c35fa5dada770cf08e5110fab' -- Universidade Católica Portuguesa + ) )) foo; + +--ANALYZE TABLE TARGET.result COMPUTE STATISTICS; + +create view if not exists TARGET.category as select * from SOURCE.category; +create view if not exists TARGET.concept as select * from SOURCE.concept; +create view if not exists TARGET.context as select * from SOURCE.context; +create view if not exists TARGET.country as select * from SOURCE.country; +create view if not exists TARGET.countrygdp as select * from SOURCE.countrygdp; +create view if not exists TARGET.creation_date as select * from SOURCE.creation_date; +create view if not exists TARGET.funder as select * from SOURCE.funder; +create view if not exists TARGET.fundref as select * from SOURCE.fundref; +create view if not exists TARGET.rndexpenditure as select * from SOURCE.rndexpediture; +create view if not exists TARGET.rndgdpexpenditure as select * from SOURCE.rndgdpexpenditure; +create view if not exists TARGET.doctoratestudents as select * from SOURCE.doctoratestudents; +create view if not exists TARGET.totalresearchers as select * from SOURCE.totalresearchers; +create view if not exists TARGET.totalresearchersft as select * from SOURCE.totalresearchersft; +create view if not exists TARGET.hrrst as select * from SOURCE.hrrst; + +create table TARGET.result_citations stored as parquet as select * from SOURCE.result_citations orig where exists (select 1 from TARGET.result r where r.id=orig.id); +--ANALYZE TABLE TARGET.result_citations COMPUTE STATISTICS; + +create table TARGET.result_references_oc stored as parquet as select * from SOURCE.result_references_oc orig where exists (select 1 from TARGET.result r where r.id=orig.id); +--ANALYZE TABLE TARGET.result_references_oc COMPUTE STATISTICS; + +create table TARGET.result_citations_oc stored as parquet as select * from SOURCE.result_citations_oc orig where exists (select 1 from TARGET.result r where r.id=orig.id); +--ANALYZE TABLE TARGET.result_citations_oc COMPUTE STATISTICS; + +create table TARGET.result_classifications stored as parquet as select * from SOURCE.result_classifications orig where exists (select 1 from TARGET.result r where r.id=orig.id); +--ANALYZE TABLE TARGET.result_classifications COMPUTE STATISTICS; + +create table TARGET.result_apc stored as parquet as select * from SOURCE.result_apc orig where exists (select 1 from TARGET.result r where r.id=orig.id); +--ANALYZE TABLE TARGET.result_apc COMPUTE STATISTICS; + +create table TARGET.result_concepts stored as parquet as select * from SOURCE.result_concepts orig where exists (select 1 from TARGET.result r where r.id=orig.id); +--ANALYZE TABLE TARGET.result_concepts COMPUTE STATISTICS; + +create table TARGET.result_datasources stored as parquet as select * from SOURCE.result_datasources orig where exists (select 1 from TARGET.result r where r.id=orig.id); +--ANALYZE TABLE TARGET.result_datasources COMPUTE STATISTICS; + +create table TARGET.result_fundercount stored as parquet as select * from SOURCE.result_fundercount orig where exists (select 1 from TARGET.result r where r.id=orig.id); +--ANALYZE TABLE TARGET.result_fundercount COMPUTE STATISTICS; + +create table TARGET.result_gold stored as parquet as select * from SOURCE.result_gold orig where exists (select 1 from TARGET.result r where r.id=orig.id); +--ANALYZE TABLE TARGET.result_gold COMPUTE STATISTICS; + +create table TARGET.result_greenoa stored as parquet as select * from SOURCE.result_greenoa orig where exists (select 1 from TARGET.result r where r.id=orig.id); +--ANALYZE TABLE TARGET.result_greenoa COMPUTE STATISTICS; + +create table TARGET.result_languages stored as parquet as select * from SOURCE.result_languages orig where exists (select 1 from TARGET.result r where r.id=orig.id); +--ANALYZE TABLE TARGET.result_languages COMPUTE STATISTICS; + +create table TARGET.result_licenses stored as parquet as select * from SOURCE.result_licenses orig where exists (select 1 from TARGET.result r where r.id=orig.id); +--ANALYZE TABLE TARGET.result_licenses COMPUTE STATISTICS; + +create table TARGET.licenses_normalized STORED AS PARQUET as select * from SOURCE.licenses_normalized; +--ANALYZE TABLE TARGET.licenses_normalized COMPUTE STATISTICS; + +create table TARGET.result_oids stored as parquet as select * from SOURCE.result_oids orig where exists (select 1 from TARGET.result r where r.id=orig.id); +--ANALYZE TABLE TARGET.result_oids COMPUTE STATISTICS; + +create table TARGET.result_organization stored as parquet as select * from SOURCE.result_organization orig where exists (select 1 from TARGET.result r where r.id=orig.id); +--ANALYZE TABLE TARGET.result_organization COMPUTE STATISTICS; + +create table TARGET.result_peerreviewed stored as parquet as select * from SOURCE.result_peerreviewed orig where exists (select 1 from TARGET.result r where r.id=orig.id); +--ANALYZE TABLE TARGET.result_peerreviewed COMPUTE STATISTICS; + +create table TARGET.result_pids stored as parquet as select * from SOURCE.result_pids orig where exists (select 1 from TARGET.result r where r.id=orig.id); +--ANALYZE TABLE TARGET.result_pids COMPUTE STATISTICS; + +create table TARGET.result_projectcount stored as parquet as select * from SOURCE.result_projectcount orig where exists (select 1 from TARGET.result r where r.id=orig.id); +--ANALYZE TABLE TARGET.result_projectcount COMPUTE STATISTICS; + +create table TARGET.result_projects stored as parquet as select * from SOURCE.result_projects orig where exists (select 1 from TARGET.result r where r.id=orig.id); +--ANALYZE TABLE TARGET.result_projects COMPUTE STATISTICS; + +create table TARGET.result_refereed stored as parquet as select * from SOURCE.result_refereed orig where exists (select 1 from TARGET.result r where r.id=orig.id); +--ANALYZE TABLE TARGET.result_refereed COMPUTE STATISTICS; + +create table TARGET.result_sources stored as parquet as select * from SOURCE.result_sources orig where exists (select 1 from TARGET.result r where r.id=orig.id); +--ANALYZE TABLE TARGET.result_sources COMPUTE STATISTICS; + +create table TARGET.result_topics stored as parquet as select * from SOURCE.result_topics orig where exists (select 1 from TARGET.result r where r.id=orig.id); +--ANALYZE TABLE TARGET.result_topics COMPUTE STATISTICS; + +create table TARGET.result_fos stored as parquet as select * from SOURCE.result_fos orig where exists (select 1 from TARGET.result r where r.id=orig.id); +--ANALYZE TABLE TARGET.result_fos COMPUTE STATISTICS; + +create table TARGET.result_accessroute stored as parquet as select * from SOURCE.result_accessroute orig where exists (select 1 from TARGET.result r where r.id=orig.id); +--ANALYZE TABLE TARGET.result_accessroute COMPUTE STATISTICS; + +create view TARGET.foo1 as select * from SOURCE.result_result rr where rr.source in (select id from TARGET.result); +create view TARGET.foo2 as select * from SOURCE.result_result rr where rr.target in (select id from TARGET.result); +create table TARGET.result_result STORED AS PARQUET as select distinct * from (select * from TARGET.foo1 union all select * from TARGET.foo2) foufou; +drop view TARGET.foo1; +drop view TARGET.foo2; +--ANALYZE TABLE TARGET.result_result COMPUTE STATISTICS; + +-- datasources +create view if not exists TARGET.datasource as select * from SOURCE.datasource; +create view if not exists TARGET.datasource_oids as select * from SOURCE.datasource_oids; +create view if not exists TARGET.datasource_organizations as select * from SOURCE.datasource_organizations; +create view if not exists TARGET.datasource_sources as select * from SOURCE.datasource_sources; + +create table TARGET.datasource_results stored as parquet as select id as result, datasource as id from TARGET.result_datasources; +--ANALYZE TABLE TARGET.datasource_results COMPUTE STATISTICS; + +-- organizations +create view if not exists TARGET.organization as select * from SOURCE.organization; +create view if not exists TARGET.organization_datasources as select * from SOURCE.organization_datasources; +create view if not exists TARGET.organization_pids as select * from SOURCE.organization_pids; +create view if not exists TARGET.organization_projects as select * from SOURCE.organization_projects; +create view if not exists TARGET.organization_sources as select * from SOURCE.organization_sources; + +-- projects +create view if not exists TARGET.project as select * from SOURCE.project; +create view if not exists TARGET.project_oids as select * from SOURCE.project_oids; +create view if not exists TARGET.project_organizations as select * from SOURCE.project_organizations; +create view if not exists TARGET.project_resultcount as select * from SOURCE.project_resultcount; +create view if not exists TARGET.project_classification as select * from SOURCE.project_classification; +create view if not exists TARGET.project_organization_contribution as select * from SOURCE.project_organization_contribution; + +create table TARGET.project_results stored as parquet as select id as result, project as id from TARGET.result_projects; +--ANALYZE TABLE TARGET.project_results COMPUTE STATISTICS; + +-- indicators +-- Sprint 1 ---- +create table TARGET.indi_pub_green_oa stored as parquet as select * from SOURCE.indi_pub_green_oa orig where exists (select 1 from TARGET.result r where r.id=orig.id); +--ANALYZE TABLE TARGET.indi_pub_green_oa COMPUTE STATISTICS; +create table TARGET.indi_pub_grey_lit stored as parquet as select * from SOURCE.indi_pub_grey_lit orig where exists (select 1 from TARGET.result r where r.id=orig.id); +--ANALYZE TABLE TARGET.indi_pub_grey_lit COMPUTE STATISTICS; +create table TARGET.indi_pub_doi_from_crossref stored as parquet as select * from SOURCE.indi_pub_doi_from_crossref orig where exists (select 1 from TARGET.result r where r.id=orig.id); +--ANALYZE TABLE TARGET.indi_pub_doi_from_crossref COMPUTE STATISTICS; +-- Sprint 2 ---- +create table TARGET.indi_result_has_cc_licence stored as parquet as select * from SOURCE.indi_result_has_cc_licence orig where exists (select 1 from TARGET.result r where r.id=orig.id); +--ANALYZE TABLE TARGET.indi_result_has_cc_licence COMPUTE STATISTICS; +create table TARGET.indi_result_has_cc_licence_url stored as parquet as select * from SOURCE.indi_result_has_cc_licence_url orig where exists (select 1 from TARGET.result r where r.id=orig.id); +--ANALYZE TABLE TARGET.indi_result_has_cc_licence_url COMPUTE STATISTICS; +create table TARGET.indi_pub_has_abstract stored as parquet as select * from SOURCE.indi_pub_has_abstract orig where exists (select 1 from TARGET.result r where r.id=orig.id); +--ANALYZE TABLE TARGET.indi_pub_has_abstract COMPUTE STATISTICS; +create table TARGET.indi_result_with_orcid stored as parquet as select * from SOURCE.indi_result_with_orcid orig where exists (select 1 from TARGET.result r where r.id=orig.id); +--ANALYZE TABLE TARGET.indi_result_with_orcid COMPUTE STATISTICS; +---- Sprint 3 ---- +create table TARGET.indi_funded_result_with_fundref stored as parquet as select * from SOURCE.indi_funded_result_with_fundref orig where exists (select 1 from TARGET.result r where r.id=orig.id); +--ANALYZE TABLE TARGET.indi_funded_result_with_fundref COMPUTE STATISTICS; +create view TARGET.indi_result_org_collab as select * from SOURCE.indi_result_org_collab; +create view TARGET.indi_result_org_country_collab as select * from SOURCE.indi_result_org_country_collab; +create view TARGET.indi_project_collab_org as select * from SOURCE.indi_project_collab_org; +create view TARGET.indi_project_collab_org_country as select * from SOURCE.indi_project_collab_org_country; +create view TARGET.indi_funder_country_collab as select * from SOURCE.indi_funder_country_collab; +create view TARGET.indi_result_country_collab as select * from SOURCE.indi_result_country_collab; +---- Sprint 4 ---- +create table TARGET.indi_pub_diamond stored as parquet as select * from SOURCE.indi_pub_diamond orig where exists (select 1 from TARGET.result r where r.id=orig.id); +--ANALYZE TABLE TARGET.indi_pub_diamond COMPUTE STATISTICS; +create table TARGET.indi_pub_in_transformative stored as parquet as select * from SOURCE.indi_pub_in_transformative orig where exists (select 1 from TARGET.result r where r.id=orig.id); +--ANALYZE TABLE TARGET.indi_pub_in_transformative COMPUTE STATISTICS; +create table TARGET.indi_pub_closed_other_open stored as parquet as select * from SOURCE.indi_pub_closed_other_open orig where exists (select 1 from TARGET.result r where r.id=orig.id); +--ANALYZE TABLE TARGET.indi_pub_closed_other_open COMPUTE STATISTICS; +---- Sprint 5 ---- +create table TARGET.indi_result_no_of_copies stored as parquet as select * from SOURCE.indi_result_no_of_copies orig where exists (select 1 from TARGET.result r where r.id=orig.id); +--ANALYZE TABLE TARGET.indi_result_no_of_copies COMPUTE STATISTICS; +---- Sprint 6 ---- +create table TARGET.indi_pub_hybrid_oa_with_cc stored as parquet as select * from SOURCE.indi_pub_hybrid_oa_with_cc orig where exists (select 1 from TARGET.result r where r.id=orig.id); +--ANALYZE TABLE TARGET.indi_pub_hybrid_oa_with_cc COMPUTE STATISTICS; +create table TARGET.indi_pub_bronze_oa stored as parquet as select * from SOURCE.indi_pub_bronze_oa orig where exists (select 1 from TARGET.result r where r.id=orig.id); +--ANALYZE TABLE TARGET.indi_pub_bronze_oa COMPUTE STATISTICS; +create table TARGET.indi_pub_downloads stored as parquet as select * from SOURCE.indi_pub_downloads orig where exists (select 1 from TARGET.result r where r.id=orig.result_id); +--ANALYZE TABLE TARGET.indi_pub_downloads COMPUTE STATISTICS; +create table TARGET.indi_pub_downloads_datasource stored as parquet as select * from SOURCE.indi_pub_downloads_datasource orig where exists (select 1 from TARGET.result r where r.id=orig.result_id); +--ANALYZE TABLE TARGET.indi_pub_downloads_datasource COMPUTE STATISTICS; +create table TARGET.indi_pub_downloads_year stored as parquet as select * from SOURCE.indi_pub_downloads_year orig where exists (select 1 from TARGET.result r where r.id=orig.result_id); +--ANALYZE TABLE TARGET.indi_pub_downloads_year COMPUTE STATISTICS; +create table TARGET.indi_pub_downloads_datasource_year stored as parquet as select * from SOURCE.indi_pub_downloads_datasource_year orig where exists (select 1 from TARGET.result r where r.id=orig.result_id); +--ANALYZE TABLE TARGET.indi_pub_downloads_datasource_year COMPUTE STATISTICS; +---- Sprint 7 ---- +create table TARGET.indi_pub_gold_oa stored as parquet as select * from SOURCE.indi_pub_gold_oa orig where exists (select 1 from TARGET.result r where r.id=orig.id); +--ANALYZE TABLE TARGET.indi_pub_gold_oa COMPUTE STATISTICS; +create table TARGET.indi_pub_hybrid stored as parquet as select * from SOURCE.indi_pub_hybrid orig where exists (select 1 from TARGET.result r where r.id=orig.id); +--ANALYZE TABLE TARGET.indi_pub_hybrid COMPUTE STATISTICS; +create view TARGET.indi_org_fairness as select * from SOURCE.indi_org_fairness; +create view TARGET.indi_org_fairness_pub_pr as select * from SOURCE.indi_org_fairness_pub_pr; +create view TARGET.indi_org_fairness_pub_year as select * from SOURCE.indi_org_fairness_pub_year; +create view TARGET.indi_org_fairness_pub as select * from SOURCE.indi_org_fairness_pub; +create view TARGET.indi_org_fairness_year as select * from SOURCE.indi_org_fairness_year; +create view TARGET.indi_org_findable_year as select * from SOURCE.indi_org_findable_year; +create view TARGET.indi_org_findable as select * from SOURCE.indi_org_findable; +create view TARGET.indi_org_openess as select * from SOURCE.indi_org_openess; +create view TARGET.indi_org_openess_year as select * from SOURCE.indi_org_openess_year; +create table TARGET.indi_pub_has_preprint stored as parquet as select * from SOURCE.indi_pub_has_preprint orig where exists (select 1 from TARGET.result r where r.id=orig.id); +--ANALYZE TABLE TARGET.indi_pub_has_preprint COMPUTE STATISTICS; +create table TARGET.indi_pub_in_subscribed stored as parquet as select * from SOURCE.indi_pub_in_subscribed orig where exists (select 1 from TARGET.result r where r.id=orig.id); +--ANALYZE TABLE TARGET.indi_pub_in_subscribed COMPUTE STATISTICS; +create table TARGET.indi_result_with_pid stored as parquet as select * from SOURCE.indi_result_with_pid orig where exists (select 1 from TARGET.result r where r.id=orig.id); +--ANALYZE TABLE TARGET.indi_result_with_pid COMPUTE STATISTICS; +create table TARGET.indi_impact_measures stored as parquet as select * from SOURCE.indi_impact_measures orig where exists (select 1 from TARGET.result r where r.id=orig.id); +--ANALYZE TABLE TARGET.indi_impact_measures COMPUTE STATISTICS; +create table TARGET.indi_pub_interdisciplinarity stored as parquet as select * from SOURCE.indi_pub_interdisciplinarity orig where exists (select 1 from TARGET.result r where r.id=orig.id); +--ANALYZE TABLE TARGET.indi_pub_interdisciplinarity COMPUTE STATISTICS; +create table TARGET.result_apc_affiliations stored as parquet as select * from SOURCE.result_apc_affiliations orig where exists (select 1 from TARGET.result r where r.id=orig.id); +--ANALYZE TABLE TARGET.result_apc_affiliations COMPUTE STATISTICS; diff --git a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step20-createMonitorDB_RIs.sql b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step20-createMonitorDB_RIs.sql index 92b40405d..9a9407c2d 100644 --- a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step20-createMonitorDB_RIs.sql +++ b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step20-createMonitorDB_RIs.sql @@ -12,4 +12,4 @@ create table TARGET.result stored as parquet as -- join SOURCE.result where rc.id=r.id and conc.category like CONTEXT) ) foo; -ANALYZE TABLE TARGET.result COMPUTE STATISTICS; \ No newline at end of file +--ANALYZE TABLE TARGET.result COMPUTE STATISTICS; \ No newline at end of file diff --git a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step20-createMonitorDB_RIs_tail.sql b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step20-createMonitorDB_RIs_tail.sql index ef6d08d79..bad18efde 100644 --- a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step20-createMonitorDB_RIs_tail.sql +++ b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step20-createMonitorDB_RIs_tail.sql @@ -12,4 +12,4 @@ create table TARGET.result stored as parquet as -- join SOURCE.result where rc.id=r.id and conc.category not in (CONTEXTS)) ) foo; -ANALYZE TABLE TARGET.result COMPUTE STATISTICS; \ No newline at end of file +--ANALYZE TABLE TARGET.result COMPUTE STATISTICS; \ No newline at end of file diff --git a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step20-createMonitorDB_funded.sql b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step20-createMonitorDB_funded.sql index 8d8739c74..b8d3c0242 100644 --- a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step20-createMonitorDB_funded.sql +++ b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step20-createMonitorDB_funded.sql @@ -6,4 +6,4 @@ create table TARGET.result stored as parquet as select * from SOURCE.result r where exists (select 1 from SOURCE.result_projects rp join SOURCE.project p on rp.project=p.id where rp.id=r.id) ) foo; -ANALYZE TABLE TARGET.result COMPUTE STATISTICS; \ No newline at end of file +--ANALYZE TABLE TARGET.result COMPUTE STATISTICS; \ No newline at end of file diff --git a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step20-createMonitorDB_institutions.sql b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step20-createMonitorDB_institutions.sql index 442e623cd..1f75c3cd1 100644 --- a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step20-createMonitorDB_institutions.sql +++ b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step20-createMonitorDB_institutions.sql @@ -42,7 +42,7 @@ create table TARGET.result stored as parquet as 'openorgs____::31f2fa9e05b49d4cf40a19c3fed8eb06', -- Saints Cyril and Methodius University of Skopje 'openorgs____::db7686f30f22cbe73a4fde872ce812a6', -- University of Milan 'openorgs____::b8b8ca674452579f3f593d9f5e557483', -- University College Cork - 'openorgs____::38d7097854736583dde879d12dacafca' -- Brown University + 'openorgs____::38d7097854736583dde879d12dacafca', -- Brown University 'openorgs____::57784c9e047e826fefdb1ef816120d92', --Arts et Métiers ParisTech 'openorgs____::2530baca8a15936ba2e3297f2bce2e7e', -- University of Cape Town 'openorgs____::d11f981828c485cd23d93f7f24f24db1', -- Technological University Dublin @@ -52,7 +52,10 @@ create table TARGET.result stored as parquet as 'openorgs____::66aa9fc2fceb271423dfabcc38752dc0', -- Lund University 'openorgs____::3cff625a4370d51e08624cc586138b2f', -- IMT Atlantique 'openorgs____::c0b262bd6eab819e4c994914f9c010e2', -- National Institute of Geophysics and Volcanology - 'openorgs____::1624ff7c01bb641b91f4518539a0c28a' -- Vrije Universiteit Amsterdam + 'openorgs____::1624ff7c01bb641b91f4518539a0c28a', -- Vrije Universiteit Amsterdam + 'openorgs____::4d4051b56708688235252f1d8fddb8c1', --Iscte - Instituto Universitário de Lisboa + 'openorgs____::ab4ac74c35fa5dada770cf08e5110fab' -- Universidade Católica Portuguesa + ))) foo; -ANALYZE TABLE TARGET.result COMPUTE STATISTICS; \ No newline at end of file +--ANALYZE TABLE TARGET.result COMPUTE STATISTICS; \ No newline at end of file diff --git a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step21-createObservatoryDB.sql b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step21-createObservatoryDB.sql index 2d7d572b3..b7e421813 100644 --- a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step21-createObservatoryDB.sql +++ b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step21-createObservatoryDB.sql @@ -8,7 +8,7 @@ from ${stats_db_name}.result r group by rl.id ) rln on rln.id=r.id; -ANALYZE TABLE ${observatory_db_name}.result_cc_licence COMPUTE STATISTICS; +--ANALYZE TABLE ${observatory_db_name}.result_cc_licence COMPUTE STATISTICS; create table ${observatory_db_name}.result_affiliated_country stored as parquet as select @@ -39,7 +39,7 @@ group by r.green, r.gold, case when rl.type is not null then true else false end case when r.access_mode in ('Open Access', 'Open Source') then true else false end, r.peer_reviewed, r.type, abstract, cc_licence, r.authors > 1, rpc.count > 1, rfc.count > 1, c.code, c.name; -ANALYZE TABLE ${observatory_db_name}.result_affiliated_country COMPUTE STATISTICS; +--ANALYZE TABLE ${observatory_db_name}.result_affiliated_country COMPUTE STATISTICS; create table ${observatory_db_name}.result_affiliated_year stored as parquet as select @@ -70,7 +70,7 @@ group by r.green, r.gold, case when rl.type is not null then true else false end case when r.access_mode in ('Open Access', 'Open Source') then true else false end, r.peer_reviewed, r.type, abstract, cc_licence, r.authors > 1, rpc.count > 1, rfc.count > 1, r.year; -ANALYZE TABLE ${observatory_db_name}.result_affiliated_year COMPUTE STATISTICS; +--ANALYZE TABLE ${observatory_db_name}.result_affiliated_year COMPUTE STATISTICS; create table ${observatory_db_name}.result_affiliated_year_country stored as parquet as select @@ -101,7 +101,7 @@ group by r.green, r.gold, case when rl.type is not null then true else false end case when r.access_mode in ('Open Access', 'Open Source') then true else false end, r.peer_reviewed, r.type, abstract, cc_licence, r.authors > 1, rpc.count > 1, rfc.count > 1, r.year, c.code, c.name; -ANALYZE TABLE ${observatory_db_name}.result_affiliated_year_country COMPUTE STATISTICS; +--ANALYZE TABLE ${observatory_db_name}.result_affiliated_year_country COMPUTE STATISTICS; create table ${observatory_db_name}.result_affiliated_datasource stored as parquet as select @@ -134,7 +134,7 @@ group by r.green, r.gold, case when rl.type is not null then true else false end case when r.access_mode in ('Open Access', 'Open Source') then true else false end, r.peer_reviewed, r.type, abstract, cc_licence, r.authors > 1, rpc.count > 1, rfc.count > 1, d.name; -ANALYZE TABLE ${observatory_db_name}.result_affiliated_datasource COMPUTE STATISTICS; +--ANALYZE TABLE ${observatory_db_name}.result_affiliated_datasource COMPUTE STATISTICS; create table ${observatory_db_name}.result_affiliated_datasource_country stored as parquet as select @@ -167,7 +167,7 @@ group by r.green, r.gold, case when rl.type is not null then true else false end case when r.access_mode in ('Open Access', 'Open Source') then true else false end, r.peer_reviewed, r.type, abstract, cc_licence, r.authors > 1, rpc.count > 1, rfc.count > 1, d.name, c.code, c.name; -ANALYZE TABLE ${observatory_db_name}.result_affiliated_datasource_country COMPUTE STATISTICS; +--ANALYZE TABLE ${observatory_db_name}.result_affiliated_datasource_country COMPUTE STATISTICS; create table ${observatory_db_name}.result_affiliated_organization stored as parquet as select @@ -198,7 +198,7 @@ group by r.green, r.gold, case when rl.type is not null then true else false end case when r.access_mode in ('Open Access', 'Open Source') then true else false end, r.peer_reviewed, r.type, abstract, cc_licence, r.authors > 1, rpc.count > 1, rfc.count > 1, o.name; -ANALYZE TABLE ${observatory_db_name}.result_affiliated_organization COMPUTE STATISTICS; +--ANALYZE TABLE ${observatory_db_name}.result_affiliated_organization COMPUTE STATISTICS; create table ${observatory_db_name}.result_affiliated_organization_country stored as parquet as select @@ -229,7 +229,7 @@ group by r.green, r.gold, case when rl.type is not null then true else false end case when r.access_mode in ('Open Access', 'Open Source') then true else false end, r.peer_reviewed, r.type, abstract, cc_licence, r.authors > 1, rpc.count > 1, rfc.count > 1, o.name, c.code, c.name; -ANALYZE TABLE ${observatory_db_name}.result_affiliated_organization_country COMPUTE STATISTICS; +--ANALYZE TABLE ${observatory_db_name}.result_affiliated_organization_country COMPUTE STATISTICS; create table ${observatory_db_name}.result_affiliated_funder stored as parquet as select @@ -262,7 +262,7 @@ group by r.green, r.gold, case when rl.type is not null then true else false end case when r.access_mode in ('Open Access', 'Open Source') then true else false end, r.peer_reviewed, r.type, abstract, cc_licence, r.authors > 1, rpc.count > 1, rfc.count > 1, p.funder; -ANALYZE TABLE ${observatory_db_name}.result_affiliated_funder COMPUTE STATISTICS; +--ANALYZE TABLE ${observatory_db_name}.result_affiliated_funder COMPUTE STATISTICS; create table ${observatory_db_name}.result_affiliated_funder_country stored as parquet as select @@ -295,7 +295,7 @@ group by r.green, r.gold, case when rl.type is not null then true else false end case when r.access_mode in ('Open Access', 'Open Source') then true else false end, r.peer_reviewed, r.type, abstract, cc_licence, r.authors > 1, rpc.count > 1, rfc.count > 1, p.funder, c.code, c.name; -ANALYZE TABLE ${observatory_db_name}.result_affiliated_funder_country COMPUTE STATISTICS; +--ANALYZE TABLE ${observatory_db_name}.result_affiliated_funder_country COMPUTE STATISTICS; create table ${observatory_db_name}.result_deposited_country stored as parquet as select @@ -328,7 +328,7 @@ group by r.green, r.gold, case when rl.type is not null then true else false end case when r.access_mode in ('Open Access', 'Open Source') then true else false end, r.peer_reviewed, r.type, abstract, cc_licence, r.authors > 1, rpc.count > 1, rfc.count > 1, c.code, c.name; -ANALYZE TABLE ${observatory_db_name}.result_deposited_country COMPUTE STATISTICS; +--ANALYZE TABLE ${observatory_db_name}.result_deposited_country COMPUTE STATISTICS; create table ${observatory_db_name}.result_deposited_year stored as parquet as select @@ -361,7 +361,7 @@ group by r.green, r.gold, case when rl.type is not null then true else false end case when r.access_mode in ('Open Access', 'Open Source') then true else false end, r.peer_reviewed, r.type, abstract, cc_licence, r.authors > 1, rpc.count > 1, rfc.count > 1, r.year; -ANALYZE TABLE ${observatory_db_name}.result_deposited_year COMPUTE STATISTICS; +--ANALYZE TABLE ${observatory_db_name}.result_deposited_year COMPUTE STATISTICS; create table ${observatory_db_name}.result_deposited_year_country stored as parquet as select @@ -394,7 +394,7 @@ group by r.green, r.gold, case when rl.type is not null then true else false end case when r.access_mode in ('Open Access', 'Open Source') then true else false end, r.peer_reviewed, r.type, abstract, cc_licence, r.authors > 1, rpc.count > 1, rfc.count > 1, r.year, c.code, c.name; -ANALYZE TABLE ${observatory_db_name}.result_deposited_year_country COMPUTE STATISTICS; +--ANALYZE TABLE ${observatory_db_name}.result_deposited_year_country COMPUTE STATISTICS; create table ${observatory_db_name}.result_deposited_datasource stored as parquet as select @@ -427,7 +427,7 @@ group by r.green, r.gold, case when rl.type is not null then true else false end case when r.access_mode in ('Open Access', 'Open Source') then true else false end, r.peer_reviewed, r.type, abstract, cc_licence, r.authors > 1, rpc.count > 1, rfc.count > 1, d.name; -ANALYZE TABLE ${observatory_db_name}.result_deposited_datasource COMPUTE STATISTICS; +--ANALYZE TABLE ${observatory_db_name}.result_deposited_datasource COMPUTE STATISTICS; create table ${observatory_db_name}.result_deposited_datasource_country stored as parquet as select @@ -460,7 +460,7 @@ group by r.green, r.gold, case when rl.type is not null then true else false end case when r.access_mode in ('Open Access', 'Open Source') then true else false end, r.peer_reviewed, r.type, abstract, cc_licence, r.authors > 1, rpc.count > 1, rfc.count > 1, d.name, c.code, c.name; -ANALYZE TABLE ${observatory_db_name}.result_deposited_datasource_country COMPUTE STATISTICS; +--ANALYZE TABLE ${observatory_db_name}.result_deposited_datasource_country COMPUTE STATISTICS; create table ${observatory_db_name}.result_deposited_organization stored as parquet as select @@ -493,7 +493,7 @@ group by r.green, r.gold, case when rl.type is not null then true else false end case when r.access_mode in ('Open Access', 'Open Source') then true else false end, r.peer_reviewed, r.type, abstract, cc_licence, r.authors > 1, rpc.count > 1, rfc.count > 1, o.name; -ANALYZE TABLE ${observatory_db_name}.result_deposited_organization COMPUTE STATISTICS; +--ANALYZE TABLE ${observatory_db_name}.result_deposited_organization COMPUTE STATISTICS; create table ${observatory_db_name}.result_deposited_organization_country stored as parquet as select @@ -526,7 +526,7 @@ group by r.green, r.gold, case when rl.type is not null then true else false end case when r.access_mode in ('Open Access', 'Open Source') then true else false end, r.peer_reviewed, r.type, abstract, cc_licence, r.authors > 1, rpc.count > 1, rfc.count > 1, o.name, c.code, c.name; -ANALYZE TABLE ${observatory_db_name}.result_deposited_organization_country COMPUTE STATISTICS; +--ANALYZE TABLE ${observatory_db_name}.result_deposited_organization_country COMPUTE STATISTICS; create table ${observatory_db_name}.result_deposited_funder stored as parquet as select @@ -561,7 +561,7 @@ group by r.green, r.gold, case when rl.type is not null then true else false end case when r.access_mode in ('Open Access', 'Open Source') then true else false end, r.peer_reviewed, r.type, abstract, cc_licence, r.authors > 1, rpc.count > 1, rfc.count > 1, p.funder; -ANALYZE TABLE ${observatory_db_name}.result_deposited_funder COMPUTE STATISTICS; +--ANALYZE TABLE ${observatory_db_name}.result_deposited_funder COMPUTE STATISTICS; create table ${observatory_db_name}.result_deposited_funder_country stored as parquet as select @@ -596,4 +596,4 @@ group by r.green, r.gold, case when rl.type is not null then true else false end case when r.access_mode in ('Open Access', 'Open Source') then true else false end, r.peer_reviewed, r.type, abstract, cc_licence, r.authors > 1, rpc.count > 1, rfc.count > 1, p.funder, c.code, c.name; -ANALYZE TABLE ${observatory_db_name}.result_deposited_funder_country COMPUTE STATISTICS; \ No newline at end of file +--ANALYZE TABLE ${observatory_db_name}.result_deposited_funder_country COMPUTE STATISTICS; \ No newline at end of file diff --git a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/workflow.xml b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/workflow.xml index 2ab50fb29..c03520e48 100644 --- a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/workflow.xml +++ b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/workflow.xml @@ -317,15 +317,12 @@ - - ${jobTracker} - ${nameNode} - indicators.sh - ${stats_db_name} - ${external_stats_db_name} - ${wf:appPath()}/scripts/step16-createIndicatorsTables.sql - indicators.sh - + + ${hive_jdbc_url} + + stats_db_name=${stats_db_name} + external_stats_db_name=${external_stats_db_name} + @@ -378,6 +375,7 @@ ${wf:appPath()}/scripts/step20-createMonitorDB_institutions.sql ${wf:appPath()}/scripts/step20-createMonitorDB_RIs.sql ${wf:appPath()}/scripts/step20-createMonitorDB_RIs_tail.sql + ${wf:appPath()}/scripts/step20-createMonitorDBAll.sql monitor.sh @@ -469,7 +467,7 @@ ${usage_stats_db_shadow_name} finalizeImpalaCluster.sh - + From be4856ef35401dc7a6e969763839254e645456fb Mon Sep 17 00:00:00 2001 From: dimitrispie Date: Mon, 17 Jul 2023 15:33:58 +0300 Subject: [PATCH 02/57] Update step15.sql --- .../eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step15.sql | 2 -- 1 file changed, 2 deletions(-) diff --git a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step15.sql b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step15.sql index 75e8b001b..d1cbde438 100644 --- a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step15.sql +++ b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step15.sql @@ -41,8 +41,6 @@ cast(measures_ids.unit.value[0] as decimal(6,3)) score_dec, measures_ids.unit.va from ${openaire_db_name}.result lateral view explode(measures) measures as measures_ids where measures_ids.id!='views' and measures_ids.id!='downloads'; -ANALYZE TABLE indi_impact_measures COMPUTE STATISTICS; - create table if not exists ${stats_db_name}.result_apc_affiliations STORED AS PARQUET as select distinct substr(rel.target,4) id, substr(rel.source,4) organization, o.legalname.value name, cast(rel.properties[0].value as double) apc_amount, From 6cc7d8ca7b04f8aff859fda0ff04294b830608b5 Mon Sep 17 00:00:00 2001 From: Giambattista Bloisi Date: Thu, 24 Aug 2023 21:48:07 +0200 Subject: [PATCH 03/57] GroupEntities and DispatchEntites are now merged in GroupEntitiesSparkJob --- .../oa/merge/DispatchEntitiesSparkJob.java | 98 -------- .../dhp/oa/merge/GroupEntitiesSparkJob.java | 221 ++++++++---------- .../merge/dispatch_entities_parameters.json | 26 --- .../group_graph_entities_parameters.json | 16 +- .../dedup/consistency/oozie_app/workflow.xml | 26 +-- .../dhp/oa/graph/group/oozie_app/workflow.xml | 25 +- .../group/GroupEntitiesSparkJobTest.java | 201 ++++++++-------- 7 files changed, 203 insertions(+), 410 deletions(-) delete mode 100644 dhp-common/src/main/java/eu/dnetlib/dhp/oa/merge/DispatchEntitiesSparkJob.java delete mode 100644 dhp-common/src/main/resources/eu/dnetlib/dhp/oa/merge/dispatch_entities_parameters.json diff --git a/dhp-common/src/main/java/eu/dnetlib/dhp/oa/merge/DispatchEntitiesSparkJob.java b/dhp-common/src/main/java/eu/dnetlib/dhp/oa/merge/DispatchEntitiesSparkJob.java deleted file mode 100644 index 4d2ccb178..000000000 --- a/dhp-common/src/main/java/eu/dnetlib/dhp/oa/merge/DispatchEntitiesSparkJob.java +++ /dev/null @@ -1,98 +0,0 @@ - -package eu.dnetlib.dhp.oa.merge; - -import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession; - -import java.util.Objects; -import java.util.Optional; - -import org.apache.commons.io.IOUtils; -import org.apache.commons.lang3.StringUtils; -import org.apache.spark.SparkConf; -import org.apache.spark.api.java.function.FilterFunction; -import org.apache.spark.api.java.function.MapFunction; -import org.apache.spark.sql.*; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -import eu.dnetlib.dhp.application.ArgumentApplicationParser; -import eu.dnetlib.dhp.common.HdfsSupport; -import eu.dnetlib.dhp.schema.common.ModelSupport; - -public class DispatchEntitiesSparkJob { - - private static final Logger log = LoggerFactory.getLogger(DispatchEntitiesSparkJob.class); - - public static void main(String[] args) throws Exception { - - String jsonConfiguration = IOUtils - .toString( - Objects - .requireNonNull( - DispatchEntitiesSparkJob.class - .getResourceAsStream( - "/eu/dnetlib/dhp/oa/merge/dispatch_entities_parameters.json"))); - final ArgumentApplicationParser parser = new ArgumentApplicationParser(jsonConfiguration); - parser.parseArgument(args); - - Boolean isSparkSessionManaged = Optional - .ofNullable(parser.get("isSparkSessionManaged")) - .map(Boolean::valueOf) - .orElse(Boolean.TRUE); - log.info("isSparkSessionManaged: {}", isSparkSessionManaged); - - String inputPath = parser.get("inputPath"); - log.info("inputPath: {}", inputPath); - - String outputPath = parser.get("outputPath"); - log.info("outputPath: {}", outputPath); - - boolean filterInvisible = Boolean.valueOf(parser.get("filterInvisible")); - log.info("filterInvisible: {}", filterInvisible); - - SparkConf conf = new SparkConf(); - runWithSparkSession( - conf, - isSparkSessionManaged, - spark -> { - HdfsSupport.remove(outputPath, spark.sparkContext().hadoopConfiguration()); - dispatchEntities(spark, inputPath, outputPath, filterInvisible); - }); - } - - private static void dispatchEntities( - SparkSession spark, - String inputPath, - String outputPath, - boolean filterInvisible) { - - Dataset df = spark.read().textFile(inputPath); - - ModelSupport.oafTypes.entrySet().parallelStream().forEach(entry -> { - String entityType = entry.getKey(); - Class clazz = entry.getValue(); - - if (!entityType.equalsIgnoreCase("relation")) { - Dataset entityDF = spark - .read() - .schema(Encoders.bean(clazz).schema()) - .json( - df - .filter((FilterFunction) s -> s.startsWith(clazz.getName())) - .map( - (MapFunction) s -> StringUtils.substringAfter(s, "|"), - Encoders.STRING())); - - if (filterInvisible) { - entityDF = entityDF.filter("dataInfo.invisible != true"); - } - - entityDF - .write() - .mode(SaveMode.Overwrite) - .option("compression", "gzip") - .json(outputPath + "/" + entityType); - } - }); - } -} diff --git a/dhp-common/src/main/java/eu/dnetlib/dhp/oa/merge/GroupEntitiesSparkJob.java b/dhp-common/src/main/java/eu/dnetlib/dhp/oa/merge/GroupEntitiesSparkJob.java index e652bd5b6..87510c108 100644 --- a/dhp-common/src/main/java/eu/dnetlib/dhp/oa/merge/GroupEntitiesSparkJob.java +++ b/dhp-common/src/main/java/eu/dnetlib/dhp/oa/merge/GroupEntitiesSparkJob.java @@ -2,36 +2,28 @@ package eu.dnetlib.dhp.oa.merge; import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession; -import static eu.dnetlib.dhp.utils.DHPUtils.toSeq; +import static org.apache.spark.sql.functions.col; +import static org.apache.spark.sql.functions.when; -import java.io.IOException; -import java.util.List; -import java.util.Objects; +import java.util.Map; import java.util.Optional; +import java.util.concurrent.ExecutionException; +import java.util.concurrent.ForkJoinPool; import java.util.stream.Collectors; import org.apache.commons.io.IOUtils; -import org.apache.commons.lang3.StringUtils; import org.apache.spark.SparkConf; -import org.apache.spark.api.java.JavaSparkContext; -import org.apache.spark.api.java.function.FilterFunction; import org.apache.spark.api.java.function.MapFunction; +import org.apache.spark.api.java.function.ReduceFunction; import org.apache.spark.sql.*; -import org.apache.spark.sql.expressions.Aggregator; import org.slf4j.Logger; import org.slf4j.LoggerFactory; -import com.fasterxml.jackson.databind.DeserializationFeature; -import com.fasterxml.jackson.databind.ObjectMapper; -import com.jayway.jsonpath.Configuration; -import com.jayway.jsonpath.DocumentContext; -import com.jayway.jsonpath.JsonPath; -import com.jayway.jsonpath.Option; - import eu.dnetlib.dhp.application.ArgumentApplicationParser; import eu.dnetlib.dhp.common.HdfsSupport; +import eu.dnetlib.dhp.schema.common.EntityType; import eu.dnetlib.dhp.schema.common.ModelSupport; -import eu.dnetlib.dhp.schema.oaf.*; +import eu.dnetlib.dhp.schema.oaf.OafEntity; import eu.dnetlib.dhp.schema.oaf.utils.OafMapperUtils; import scala.Tuple2; @@ -39,13 +31,9 @@ import scala.Tuple2; * Groups the graph content by entity identifier to ensure ID uniqueness */ public class GroupEntitiesSparkJob { - private static final Logger log = LoggerFactory.getLogger(GroupEntitiesSparkJob.class); - private static final String ID_JPATH = "$.id"; - - private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper() - .configure(DeserializationFeature.FAIL_ON_UNKNOWN_PROPERTIES, false); + private static final Encoder OAFENTITY_KRYO_ENC = Encoders.kryo(OafEntity.class); public static void main(String[] args) throws Exception { @@ -66,9 +54,15 @@ public class GroupEntitiesSparkJob { String graphInputPath = parser.get("graphInputPath"); log.info("graphInputPath: {}", graphInputPath); + String checkpointPath = parser.get("checkpointPath"); + log.info("checkpointPath: {}", checkpointPath); + String outputPath = parser.get("outputPath"); log.info("outputPath: {}", outputPath); + boolean filterInvisible = Boolean.valueOf(parser.get("filterInvisible")); + log.info("filterInvisible: {}", filterInvisible); + SparkConf conf = new SparkConf(); conf.set("spark.serializer", "org.apache.spark.serializer.KryoSerializer"); conf.registerKryoClasses(ModelSupport.getOafModelClasses()); @@ -78,126 +72,95 @@ public class GroupEntitiesSparkJob { isSparkSessionManaged, spark -> { HdfsSupport.remove(outputPath, spark.sparkContext().hadoopConfiguration()); - groupEntities(spark, graphInputPath, outputPath); + groupEntities(spark, graphInputPath, checkpointPath, outputPath, filterInvisible); }); } private static void groupEntities( SparkSession spark, String inputPath, - String outputPath) { + String checkpointPath, + String outputPath, + boolean filterInvisible) { - final TypedColumn aggregator = new GroupingAggregator().toColumn(); - final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext()); - spark - .read() - .textFile(toSeq(listEntityPaths(inputPath, sc))) - .map((MapFunction) GroupEntitiesSparkJob::parseOaf, Encoders.kryo(OafEntity.class)) - .filter((FilterFunction) e -> StringUtils.isNotBlank(ModelSupport.idFn().apply(e))) - .groupByKey((MapFunction) oaf -> ModelSupport.idFn().apply(oaf), Encoders.STRING()) - .agg(aggregator) + Dataset allEntities = spark.emptyDataset(OAFENTITY_KRYO_ENC); + + for (Map.Entry e : ModelSupport.entityTypes.entrySet()) { + String entity = e.getKey().name(); + Class entityClass = e.getValue(); + String entityInputPath = inputPath + "/" + entity; + + if (!HdfsSupport.exists(entityInputPath, spark.sparkContext().hadoopConfiguration())) { + continue; + } + + allEntities = allEntities + .union( + ((Dataset) spark + .read() + .schema(Encoders.bean(entityClass).schema()) + .json(entityInputPath) + .filter("length(id) > 0") + .as(Encoders.bean(entityClass))) + .map((MapFunction) r -> r, OAFENTITY_KRYO_ENC)); + } + + Dataset groupedEntities = allEntities + .groupByKey((MapFunction) OafEntity::getId, Encoders.STRING()) + .reduceGroups((ReduceFunction) (b, a) -> OafMapperUtils.mergeEntities(b, a)) .map( - (MapFunction, String>) t -> t._2().getClass().getName() + - "|" + OBJECT_MAPPER.writeValueAsString(t._2()), - Encoders.STRING()) + (MapFunction, Tuple2>) t -> new Tuple2( + t._2().getClass().getName(), t._2()), + Encoders.tuple(Encoders.STRING(), OAFENTITY_KRYO_ENC)); + + // pivot on "_1" (classname of the entity) + // created columns containing only entities of the same class + for (Map.Entry e : ModelSupport.entityTypes.entrySet()) { + String entity = e.getKey().name(); + Class entityClass = e.getValue(); + + groupedEntities = groupedEntities + .withColumn( + entity, + when(col("_1").equalTo(entityClass.getName()), col("_2"))); + } + + groupedEntities + .drop("_1", "_2") .write() - .option("compression", "gzip") .mode(SaveMode.Overwrite) - .text(outputPath); - } + .option("compression", "gzip") + .save(checkpointPath); - public static class GroupingAggregator extends Aggregator { + ForkJoinPool parPool = new ForkJoinPool(ModelSupport.entityTypes.size()); - @Override - public OafEntity zero() { - return null; - } - - @Override - public OafEntity reduce(OafEntity b, OafEntity a) { - return mergeAndGet(b, a); - } - - private OafEntity mergeAndGet(OafEntity b, OafEntity a) { - if (Objects.nonNull(a) && Objects.nonNull(b)) { - return OafMapperUtils.mergeEntities(b, a); - } - return Objects.isNull(a) ? b : a; - } - - @Override - public OafEntity merge(OafEntity b, OafEntity a) { - return mergeAndGet(b, a); - } - - @Override - public OafEntity finish(OafEntity j) { - return j; - } - - @Override - public Encoder bufferEncoder() { - return Encoders.kryo(OafEntity.class); - } - - @Override - public Encoder outputEncoder() { - return Encoders.kryo(OafEntity.class); - } - - } - - private static OafEntity parseOaf(String s) { - - DocumentContext dc = JsonPath - .parse(s, Configuration.defaultConfiguration().addOptions(Option.SUPPRESS_EXCEPTIONS)); - final String id = dc.read(ID_JPATH); - if (StringUtils.isNotBlank(id)) { - - String prefix = StringUtils.substringBefore(id, "|"); - switch (prefix) { - case "10": - return parse(s, Datasource.class); - case "20": - return parse(s, Organization.class); - case "40": - return parse(s, Project.class); - case "50": - String resultType = dc.read("$.resulttype.classid"); - switch (resultType) { - case "publication": - return parse(s, Publication.class); - case "dataset": - return parse(s, eu.dnetlib.dhp.schema.oaf.Dataset.class); - case "software": - return parse(s, Software.class); - case "other": - return parse(s, OtherResearchProduct.class); - default: - throw new IllegalArgumentException(String.format("invalid resultType: '%s'", resultType)); - } - default: - throw new IllegalArgumentException(String.format("invalid id prefix: '%s'", prefix)); - } - } else { - throw new IllegalArgumentException(String.format("invalid oaf: '%s'", s)); - } - } - - private static OafEntity parse(String s, Class clazz) { - try { - return OBJECT_MAPPER.readValue(s, clazz); - } catch (IOException e) { - throw new IllegalArgumentException(e); - } - } - - private static List listEntityPaths(String inputPath, JavaSparkContext sc) { - return HdfsSupport - .listFiles(inputPath, sc.hadoopConfiguration()) + ModelSupport.entityTypes + .entrySet() .stream() - .filter(f -> !f.toLowerCase().contains("relation")) - .collect(Collectors.toList()); - } + .map(e -> parPool.submit(() -> { + String entity = e.getKey().name(); + Class entityClass = e.getValue(); + spark + .read() + .load(checkpointPath) + .select(col(entity).as("value")) + .filter("value IS NOT NULL") + .as(OAFENTITY_KRYO_ENC) + .map((MapFunction) r -> r, (Encoder) Encoders.bean(entityClass)) + .filter(filterInvisible ? "dataInfo.invisible != TRUE" : "TRUE") + .write() + .mode(SaveMode.Overwrite) + .option("compression", "gzip") + .json(outputPath + "/" + entity); + })) + .collect(Collectors.toList()) + .forEach(t -> { + try { + t.get(); + } catch (InterruptedException | ExecutionException e) { + throw new RuntimeException(e); + } + }); + } } diff --git a/dhp-common/src/main/resources/eu/dnetlib/dhp/oa/merge/dispatch_entities_parameters.json b/dhp-common/src/main/resources/eu/dnetlib/dhp/oa/merge/dispatch_entities_parameters.json deleted file mode 100644 index 60f11ac84..000000000 --- a/dhp-common/src/main/resources/eu/dnetlib/dhp/oa/merge/dispatch_entities_parameters.json +++ /dev/null @@ -1,26 +0,0 @@ -[ - { - "paramName": "issm", - "paramLongName": "isSparkSessionManaged", - "paramDescription": "when true will stop SparkSession after job execution", - "paramRequired": false - }, - { - "paramName": "i", - "paramLongName": "inputPath", - "paramDescription": "the source path", - "paramRequired": true - }, - { - "paramName": "o", - "paramLongName": "outputPath", - "paramDescription": "path of the output graph", - "paramRequired": true - }, - { - "paramName": "fi", - "paramLongName": "filterInvisible", - "paramDescription": "if true filters out invisible entities", - "paramRequired": true - } -] \ No newline at end of file diff --git a/dhp-common/src/main/resources/eu/dnetlib/dhp/oa/merge/group_graph_entities_parameters.json b/dhp-common/src/main/resources/eu/dnetlib/dhp/oa/merge/group_graph_entities_parameters.json index e65acb3c4..58e3ca711 100644 --- a/dhp-common/src/main/resources/eu/dnetlib/dhp/oa/merge/group_graph_entities_parameters.json +++ b/dhp-common/src/main/resources/eu/dnetlib/dhp/oa/merge/group_graph_entities_parameters.json @@ -8,13 +8,25 @@ { "paramName": "gin", "paramLongName": "graphInputPath", - "paramDescription": "the graph root path", + "paramDescription": "the input graph root path", + "paramRequired": true + }, + { + "paramName": "cp", + "paramLongName": "checkpointPath", + "paramDescription": "checkpoint directory", "paramRequired": true }, { "paramName": "out", "paramLongName": "outputPath", - "paramDescription": "the output merged graph root path", + "paramDescription": "the output graph root path", + "paramRequired": true + }, + { + "paramName": "fi", + "paramLongName": "filterInvisible", + "paramDescription": "if true filters out invisible entities", "paramRequired": true } ] \ No newline at end of file diff --git a/dhp-workflows/dhp-dedup-openaire/src/main/resources/eu/dnetlib/dhp/oa/dedup/consistency/oozie_app/workflow.xml b/dhp-workflows/dhp-dedup-openaire/src/main/resources/eu/dnetlib/dhp/oa/dedup/consistency/oozie_app/workflow.xml index b724e5d0b..3640ee6ca 100644 --- a/dhp-workflows/dhp-dedup-openaire/src/main/resources/eu/dnetlib/dhp/oa/dedup/consistency/oozie_app/workflow.xml +++ b/dhp-workflows/dhp-dedup-openaire/src/main/resources/eu/dnetlib/dhp/oa/dedup/consistency/oozie_app/workflow.xml @@ -152,31 +152,7 @@ --conf spark.sql.shuffle.partitions=15000 --graphInputPath${graphBasePath} - --outputPath${workingPath}/grouped_entities - - - - - - - - yarn - cluster - Dispatch grouped entitities - eu.dnetlib.dhp.oa.merge.DispatchEntitiesSparkJob - dhp-dedup-openaire-${projectVersion}.jar - - --executor-memory=${sparkExecutorMemory} - --conf spark.executor.memoryOverhead=${sparkExecutorMemoryOverhead} - --executor-cores=${sparkExecutorCores} - --driver-memory=${sparkDriverMemory} - --conf spark.extraListeners=${spark2ExtraListeners} - --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} - --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} - --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} - --conf spark.sql.shuffle.partitions=7680 - - --inputPath${workingPath}/grouped_entities + --checkpointPath${workingPath}/grouped_entities --outputPath${graphOutputPath} --filterInvisible${filterInvisible} diff --git a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/group/oozie_app/workflow.xml b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/group/oozie_app/workflow.xml index 219dc7331..190788c9d 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/group/oozie_app/workflow.xml +++ b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/group/oozie_app/workflow.xml @@ -96,30 +96,7 @@ --conf spark.sql.shuffle.partitions=15000 --graphInputPath${graphBasePath} - --outputPath${workingPath}/grouped_entities - - - - - - - - yarn - cluster - Dispatch grouped entities - eu.dnetlib.dhp.oa.merge.DispatchEntitiesSparkJob - dhp-graph-mapper-${projectVersion}.jar - - --executor-cores=${sparkExecutorCores} - --executor-memory=${sparkExecutorMemory} - --driver-memory=${sparkDriverMemory} - --conf spark.extraListeners=${spark2ExtraListeners} - --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} - --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} - --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} - --conf spark.sql.shuffle.partitions=7680 - - --inputPath${workingPath}/grouped_entities + --checkpointPath${workingPath}/grouped_entities --outputPath${graphOutputPath} --filterInvisible${filterInvisible} diff --git a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/group/GroupEntitiesSparkJobTest.java b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/group/GroupEntitiesSparkJobTest.java index 61baf80dc..b878e778e 100644 --- a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/group/GroupEntitiesSparkJobTest.java +++ b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/group/GroupEntitiesSparkJobTest.java @@ -1,16 +1,15 @@ package eu.dnetlib.dhp.oa.graph.group; -import static org.junit.jupiter.api.Assertions.assertEquals; - -import java.io.IOException; -import java.net.URISyntaxException; -import java.nio.file.Files; -import java.nio.file.Path; -import java.nio.file.Paths; - +import com.fasterxml.jackson.databind.DeserializationFeature; +import com.fasterxml.jackson.databind.ObjectMapper; +import eu.dnetlib.dhp.common.HdfsSupport; +import eu.dnetlib.dhp.oa.merge.GroupEntitiesSparkJob; +import eu.dnetlib.dhp.schema.common.ModelSupport; +import eu.dnetlib.dhp.schema.oaf.OafEntity; +import eu.dnetlib.dhp.schema.oaf.Result; +import eu.dnetlib.dhp.utils.DHPUtils; import org.apache.commons.io.FileUtils; -import org.apache.commons.lang3.StringUtils; import org.apache.spark.SparkConf; import org.apache.spark.api.java.function.FilterFunction; import org.apache.spark.api.java.function.MapFunction; @@ -19,118 +18,108 @@ import org.apache.spark.sql.Encoders; import org.apache.spark.sql.SparkSession; import org.junit.jupiter.api.*; -import com.fasterxml.jackson.databind.DeserializationFeature; -import com.fasterxml.jackson.databind.ObjectMapper; +import java.io.IOException; +import java.net.URISyntaxException; +import java.nio.file.Files; +import java.nio.file.Path; +import java.nio.file.Paths; -import eu.dnetlib.dhp.common.HdfsSupport; -import eu.dnetlib.dhp.oa.merge.DispatchEntitiesSparkJob; -import eu.dnetlib.dhp.oa.merge.GroupEntitiesSparkJob; -import eu.dnetlib.dhp.schema.common.ModelSupport; -import eu.dnetlib.dhp.schema.oaf.Result; -import eu.dnetlib.dhp.utils.DHPUtils; +import static org.junit.jupiter.api.Assertions.assertEquals; @TestMethodOrder(MethodOrderer.OrderAnnotation.class) public class GroupEntitiesSparkJobTest { - private static SparkSession spark; + private static SparkSession spark; - private static ObjectMapper mapper = new ObjectMapper() - .configure(DeserializationFeature.FAIL_ON_UNKNOWN_PROPERTIES, false); + private static ObjectMapper mapper = new ObjectMapper() + .configure(DeserializationFeature.FAIL_ON_UNKNOWN_PROPERTIES, false); - private static Path workingDir; - private Path dataInputPath; + private static Path workingDir; + private Path dataInputPath; - private Path groupEntityPath; - private Path dispatchEntityPath; + private Path checkpointPath; - @BeforeAll - public static void beforeAll() throws IOException { - workingDir = Files.createTempDirectory(GroupEntitiesSparkJob.class.getSimpleName()); + private Path outputPath; - SparkConf conf = new SparkConf(); - conf.setAppName(GroupEntitiesSparkJob.class.getSimpleName()); - conf.setMaster("local"); - conf.set("spark.serializer", "org.apache.spark.serializer.KryoSerializer"); - conf.registerKryoClasses(ModelSupport.getOafModelClasses()); - spark = SparkSession.builder().config(conf).getOrCreate(); - } + @BeforeAll + public static void beforeAll() throws IOException { + workingDir = Files.createTempDirectory(GroupEntitiesSparkJob.class.getSimpleName()); - @BeforeEach - public void beforeEach() throws IOException, URISyntaxException { - dataInputPath = Paths.get(ClassLoader.getSystemResource("eu/dnetlib/dhp/oa/graph/group").toURI()); - groupEntityPath = workingDir.resolve("grouped_entity"); - dispatchEntityPath = workingDir.resolve("dispatched_entity"); - } + SparkConf conf = new SparkConf(); + conf.setAppName(GroupEntitiesSparkJob.class.getSimpleName()); + conf.setMaster("local"); + conf.set("spark.serializer", "org.apache.spark.serializer.KryoSerializer"); + conf.registerKryoClasses(ModelSupport.getOafModelClasses()); + spark = SparkSession.builder().config(conf).getOrCreate(); + } - @AfterAll - public static void afterAll() throws IOException { - spark.stop(); - FileUtils.deleteDirectory(workingDir.toFile()); - } + @BeforeEach + public void beforeEach() throws IOException, URISyntaxException { + dataInputPath = Paths.get(ClassLoader.getSystemResource("eu/dnetlib/dhp/oa/graph/group").toURI()); + checkpointPath = workingDir.resolve("grouped_entity"); + outputPath = workingDir.resolve("dispatched_entity"); + } - @Test - @Order(1) - void testGroupEntities() throws Exception { - GroupEntitiesSparkJob.main(new String[] { - "-isSparkSessionManaged", - Boolean.FALSE.toString(), - "-graphInputPath", - dataInputPath.toString(), - "-outputPath", - groupEntityPath.toString() - }); + @AfterAll + public static void afterAll() throws IOException { + spark.stop(); + FileUtils.deleteDirectory(workingDir.toFile()); + } - Dataset output = spark - .read() - .textFile(groupEntityPath.toString()) - .map((MapFunction) s -> StringUtils.substringAfter(s, "|"), Encoders.STRING()) - .map((MapFunction) s -> mapper.readValue(s, Result.class), Encoders.bean(Result.class)); + @Test + @Order(1) + void testGroupEntities() throws Exception { + GroupEntitiesSparkJob.main(new String[]{ + "-isSparkSessionManaged", + Boolean.FALSE.toString(), + "-graphInputPath", + dataInputPath.toString(), + "-checkpointPath", + checkpointPath.toString(), + "-outputPath", + outputPath.toString(), + "-filterInvisible", + Boolean.FALSE.toString() + }); - assertEquals( - 1, - output - .filter( - (FilterFunction) r -> "50|doi_________::09821844208a5cd6300b2bfb13bca1b9" - .equals(r.getId()) && - r.getCollectedfrom().stream().anyMatch(kv -> kv.getValue().equalsIgnoreCase("zenodo"))) - .count()); - } + Dataset checkpointTable = spark + .read() + .load(checkpointPath.toString()) + .selectExpr("COALESCE(*)") + .as(Encoders.kryo(OafEntity.class)); - @Test - @Order(2) - void testDispatchEntities() throws Exception { - DispatchEntitiesSparkJob.main(new String[] { - "-isSparkSessionManaged", - Boolean.FALSE.toString(), - "-inputPath", - groupEntityPath.toString(), - "-outputPath", - dispatchEntityPath.resolve(".").toString(), - "-filterInvisible", - Boolean.TRUE.toString() - }); - Dataset output = spark - .read() - .textFile( - DHPUtils - .toSeq( - HdfsSupport - .listFiles(dispatchEntityPath.toString(), spark.sparkContext().hadoopConfiguration()))) - .map((MapFunction) s -> mapper.readValue(s, Result.class), Encoders.bean(Result.class)); + assertEquals( + 1, + checkpointTable + .filter( + (FilterFunction) r -> "50|doi_________::09821844208a5cd6300b2bfb13bca1b9" + .equals(r.getId()) && + r.getCollectedfrom().stream().anyMatch(kv -> kv.getValue().equalsIgnoreCase("zenodo"))) + .count()); - assertEquals(3, output.count()); - assertEquals( - 2, - output - .map((MapFunction) r -> r.getResulttype().getClassid(), Encoders.STRING()) - .filter((FilterFunction) s -> s.equals("publication")) - .count()); - assertEquals( - 1, - output - .map((MapFunction) r -> r.getResulttype().getClassid(), Encoders.STRING()) - .filter((FilterFunction) s -> s.equals("dataset")) - .count()); - } -} + + Dataset output = spark + .read() + .textFile( + DHPUtils + .toSeq( + HdfsSupport + .listFiles(outputPath.toString(), spark.sparkContext().hadoopConfiguration()))) + .map((MapFunction) s -> mapper.readValue(s, Result.class), Encoders.bean(Result.class)); + + assertEquals(3, output.count()); + assertEquals( + 2, + output + .map((MapFunction) r -> r.getResulttype().getClassid(), Encoders.STRING()) + .filter((FilterFunction) s -> s.equals("publication")) + .count()); + assertEquals( + 1, + output + .map((MapFunction) r -> r.getResulttype().getClassid(), Encoders.STRING()) + .filter((FilterFunction) s -> s.equals("dataset")) + .count()); + } +} \ No newline at end of file From 964c2f553e43438cedbfe44d2b1ae5e4d4d3d4f6 Mon Sep 17 00:00:00 2001 From: dimitrispie Date: Fri, 1 Sep 2023 10:57:02 +0300 Subject: [PATCH 04/57] Changes in indicators step, monitor step MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - graduatedoctorates for observatory - result_apc_affiliations table - new indicators indi_is_funder_plan_s indi_funder_fairness indi_ris_fairness indi_funder_openess indi_ris_openess indi_funder_findable indi_ris_findable indi_is_project_result_after - cast year to int in composite indicators - new institutions -- Universidade Católica Portuguesa -- Iscte - Instituto Universitário de Lisboa -- Munster Technological University -- Cardiff University -- Leibniz Institute of Ecological Urban and Regional Development --- .../dhp/oa/graph/stats/oozie_app/monitor.sh | 2 +- .../graph/stats/oozie_app/scripts/step13.sql | 0 .../graph/stats/oozie_app/scripts/step15.sql | 2 +- .../stats/oozie_app/scripts/step15_5.sql | 1 + .../scripts/step16-createIndicatorsTables.sql | 358 ++++++++++++++---- .../scripts/step20-createMonitorDB.sql | 9 + .../scripts/step20-createMonitorDBAll.sql | 18 +- .../step20-createMonitorDB_institutions.sql | 25 +- .../graph/stats/oozie_app/scripts/step5.sql | 0 .../dhp/oa/graph/stats/oozie_app/workflow.xml | 2 +- 10 files changed, 324 insertions(+), 93 deletions(-) mode change 100644 => 100755 dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/monitor.sh mode change 100644 => 100755 dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step13.sql mode change 100644 => 100755 dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step5.sql diff --git a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/monitor.sh b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/monitor.sh old mode 100644 new mode 100755 index 014b19c6c..872456973 --- a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/monitor.sh +++ b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/monitor.sh @@ -39,7 +39,7 @@ hdfs dfs -copyToLocal $9 echo "Creating monitor database" -cat step20-createMonitorDBAll.sql | sed "s/SOURCE/openaire_prod_stats_20230707/g" | sed "s/TARGET/openaire_prod_stats_monitor_20230707/g1" > foo +cat step20-createMonitorDBAll.sql | sed "s/SOURCE/$1/g" | sed "s/TARGET/$2/g1" > foo hive $HIVE_OPTS -f foo cat step20-createMonitorDB_funded.sql | sed "s/SOURCE/$1/g" | sed "s/TARGET/$2_funded/g1" > foo diff --git a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step13.sql b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step13.sql old mode 100644 new mode 100755 diff --git a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step15.sql b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step15.sql index d1cbde438..4a8f81943 100644 --- a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step15.sql +++ b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step15.sql @@ -48,4 +48,4 @@ rel.properties[1].value apc_currency from ${openaire_db_name}.relation rel join ${openaire_db_name}.organization o on o.id=rel.source join ${openaire_db_name}.result r on r.id=rel.target -where rel.subreltype = 'affiliation' and rel.datainfo.deletedbyinference = false and size(rel.properties) > 0; +where rel.subreltype = 'affiliation' and rel.datainfo.deletedbyinference = false and size(rel.properties)>0; diff --git a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step15_5.sql b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step15_5.sql index f39ff2afd..615f523ce 100644 --- a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step15_5.sql +++ b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step15_5.sql @@ -35,6 +35,7 @@ create or replace view ${stats_db_name}.doctoratestudents as select * from stats create or replace view ${stats_db_name}.totalresearchers as select * from stats_ext.totalresearchers; create or replace view ${stats_db_name}.totalresearchersft as select * from stats_ext.totalresearchersft; create or replace view ${stats_db_name}.hrrst as select * from stats_ext.hrrst; +create or replace view ${stats_db_name}.graduatedoctorates as select * from stats_ext.graduatedoctorates; create table if not exists ${stats_db_name}.result_instance stored as parquet as select distinct r.* diff --git a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step16-createIndicatorsTables.sql b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step16-createIndicatorsTables.sql index 57c381875..1c80f6757 100755 --- a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step16-createIndicatorsTables.sql +++ b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step16-createIndicatorsTables.sql @@ -12,8 +12,6 @@ from ${stats_db_name}.publication p or ri.accessright = 'Embargo' or ri.accessright = 'Open Source')) tmp on p.id= tmp.id; ---ANALYZE TABLE ${stats_db_name}.indi_pub_green_oa COMPUTE STATISTICS; - create table if not exists ${stats_db_name}.indi_pub_grey_lit stored as parquet as select distinct p.id, coalesce(grey_lit, 0) as grey_lit from ${stats_db_name}.publication p @@ -25,8 +23,6 @@ from ${stats_db_name}.publication p not exists (select 1 from ${stats_db_name}.result_classifications rc where type ='Other literature type' and rc.id=p.id)) tmp on p.id=tmp.id; ---ANALYZE TABLE ${stats_db_name}.indi_pub_grey_lit COMPUTE STATISTICS; - create table if not exists ${stats_db_name}.indi_pub_doi_from_crossref stored as parquet as select distinct p.id, coalesce(doi_from_crossref, 0) as doi_from_crossref from ${stats_db_name}.publication p @@ -36,8 +32,6 @@ from ${stats_db_name}.publication p where pidtype='Digital Object Identifier' and d.name ='Crossref') tmp on tmp.id=p.id; ---ANALYZE TABLE ${stats_db_name}.indi_pub_doi_from_crossref COMPUTE STATISTICS; - -- Sprint 2 ---- create table if not exists ${stats_db_name}.indi_result_has_cc_licence stored as parquet as select distinct r.id, (case when lic='' or lic is null then 0 else 1 end) as has_cc_license @@ -47,8 +41,6 @@ left outer join (select r.id, license.type as lic from ${stats_db_name}.result r where lower(license.type) LIKE '%creativecommons.org%' OR lower(license.type) LIKE '%cc-%') tmp on r.id= tmp.id; ---ANALYZE TABLE ${stats_db_name}.indi_result_has_cc_licence COMPUTE STATISTICS; - create table if not exists ${stats_db_name}.indi_result_has_cc_licence_url stored as parquet as select distinct r.id, case when lic_host='' or lic_host is null then 0 else 1 end as has_cc_license_url from ${stats_db_name}.result r @@ -58,22 +50,16 @@ from ${stats_db_name}.result r WHERE lower(parse_url(license.type, "HOST")) = "creativecommons.org") tmp on r.id= tmp.id; ---ANALYZE TABLE ${stats_db_name}.indi_result_has_cc_licence_url COMPUTE STATISTICS; - create table if not exists ${stats_db_name}.indi_pub_has_abstract stored as parquet as select distinct publication.id, cast(coalesce(abstract, true) as int) has_abstract from ${stats_db_name}.publication; ---ANALYZE TABLE ${stats_db_name}.indi_pub_has_abstract COMPUTE STATISTICS; - create table if not exists ${stats_db_name}.indi_result_with_orcid stored as parquet as select distinct r.id, coalesce(has_orcid, 0) as has_orcid from ${stats_db_name}.result r left outer join (select id, 1 as has_orcid from ${stats_db_name}.result_orcid) tmp on r.id= tmp.id; ---ANALYZE TABLE ${stats_db_name}.indi_result_with_orcid COMPUTE STATISTICS; - ---- Sprint 3 ---- create table if not exists ${stats_db_name}.indi_funded_result_with_fundref stored as parquet as select distinct r.result as id, coalesce(fundref, 0) as fundref @@ -82,8 +68,6 @@ from ${stats_db_name}.project_results r where provenance='Harvested') tmp on r.result= tmp.result; ---ANALYZE TABLE ${stats_db_name}.indi_funded_result_with_fundref COMPUTE STATISTICS; - -- create table indi_result_org_collab stored as parquet as -- select o1.organization org1, o2.organization org2, count(distinct o1.id) as collaborations -- from result_organization as o1 @@ -103,8 +87,6 @@ group by o1.organization, o2.organization, o1.name, o2.name; drop table ${stats_db_name}.tmp purge; ---ANALYZE TABLE ${stats_db_name}.indi_result_org_collab COMPUTE STATISTICS; - create TEMPORARY TABLE ${stats_db_name}.tmp AS select distinct ro.organization organization, ro.id, o.name, o.country from ${stats_db_name}.result_organization ro join ${stats_db_name}.organization o on o.id=ro.organization where country <> 'UNKNOWN' and o.name is not null; @@ -117,8 +99,6 @@ group by o1.organization, o1.id, o1.name, o2.country; drop table ${stats_db_name}.tmp purge; ---ANALYZE TABLE ${stats_db_name}.indi_result_org_country_collab COMPUTE STATISTICS; - create TEMPORARY TABLE ${stats_db_name}.tmp AS select o.id organization, o.name, ro.project as project from ${stats_db_name}.organization o join ${stats_db_name}.organization_projects ro on o.id=ro.id where o.name is not null; @@ -132,8 +112,6 @@ group by o1.name,o2.name, o1.organization, o2.organization; drop table ${stats_db_name}.tmp purge; ---ANALYZE TABLE ${stats_db_name}.indi_project_collab_org COMPUTE STATISTICS; - create TEMPORARY TABLE ${stats_db_name}.tmp AS select o.id organization, o.name, o.country , ro.project as project from ${stats_db_name}.organization o join ${stats_db_name}.organization_projects ro on o.id=ro.id @@ -148,8 +126,6 @@ group by o1.organization, o2.country, o1.name; drop table ${stats_db_name}.tmp purge; ---ANALYZE TABLE ${stats_db_name}.indi_project_collab_org_country COMPUTE STATISTICS; - create table if not exists ${stats_db_name}.indi_funder_country_collab stored as parquet as with tmp as (select funder, project, country from ${stats_db_name}.organization_projects op join ${stats_db_name}.organization o on o.id=op.id @@ -161,8 +137,6 @@ from tmp as f1 where f1.country<>f2.country group by f1.funder, f2.country, f1.country; ---ANALYZE TABLE ${stats_db_name}.indi_funder_country_collab COMPUTE STATISTICS; - create TEMPORARY TABLE ${stats_db_name}.tmp AS select distinct country, ro.id as result from ${stats_db_name}.organization o join ${stats_db_name}.result_organization ro on o.id=ro.organization @@ -177,8 +151,6 @@ group by o1.country, o2.country; drop table ${stats_db_name}.tmp purge; ---ANALYZE TABLE ${stats_db_name}.indi_result_country_collab COMPUTE STATISTICS; - ---- Sprint 4 ---- create table if not exists ${stats_db_name}.indi_pub_diamond stored as parquet as select distinct pd.id, coalesce(in_diamond_journal, 0) as in_diamond_journal @@ -190,8 +162,6 @@ from ${stats_db_name}.publication_datasources pd and (ps.journal_is_in_doaj=true or ps.journal_is_oa=true) and ps.has_apc=false) tmp on pd.id=tmp.id; -----ANALYZE TABLE ${stats_db_name}.indi_pub_diamond COMPUTE STATISTICS; - create table if not exists ${stats_db_name}.indi_pub_in_transformative stored as parquet as select distinct pd.id, coalesce(is_transformative, 0) as is_transformative from ${stats_db_name}.publication pd @@ -202,8 +172,6 @@ from ${stats_db_name}.publication pd and ps.is_transformative_journal=true) tmp on pd.id=tmp.id; ---ANALYZE TABLE ${stats_db_name}.indi_pub_in_transformative COMPUTE STATISTICS; - create table if not exists ${stats_db_name}.indi_pub_closed_other_open stored as parquet as select distinct ri.id, coalesce(pub_closed_other_open, 0) as pub_closed_other_open from ${stats_db_name}.result_instance ri left outer join @@ -214,14 +182,10 @@ select distinct ri.id, coalesce(pub_closed_other_open, 0) as pub_closed_other_op (p.bestlicence='Open Access' or p.bestlicence='Open Source')) tmp on tmp.id=ri.id; ---ANALYZE TABLE ${stats_db_name}.indi_pub_closed_other_open COMPUTE STATISTICS; - ---- Sprint 5 ---- create table if not exists ${stats_db_name}.indi_result_no_of_copies stored as parquet as select id, count(id) as number_of_copies from ${stats_db_name}.result_instance group by id; ---ANALYZE TABLE ${stats_db_name}.indi_result_no_of_copies COMPUTE STATISTICS; - ---- Sprint 6 ---- create table if not exists ${stats_db_name}.indi_pub_downloads stored as parquet as SELECT result_id, sum(downloads) no_downloads from openaire_prod_usage_stats.usage_stats @@ -239,24 +203,18 @@ where downloads>0 GROUP BY result_id, repository_id order by result_id; ---ANALYZE TABLE ${stats_db_name}.indi_pub_downloads_datasource COMPUTE STATISTICS; - create table if not exists ${stats_db_name}.indi_pub_downloads_year stored as parquet as -SELECT result_id, substring(us.`date`, 1,4) as `year`, sum(downloads) no_downloads +SELECT result_id, cast(substring(us.`date`, 1,4) as int) as `year`, sum(downloads) no_downloads from openaire_prod_usage_stats.usage_stats us join ${stats_db_name}.publication on result_id=id where downloads>0 GROUP BY result_id, substring(us.`date`, 1,4); ---ANALYZE TABLE ${stats_db_name}.indi_pub_downloads_year COMPUTE STATISTICS; - create table if not exists ${stats_db_name}.indi_pub_downloads_datasource_year stored as parquet as -SELECT result_id, substring(us.`date`, 1,4) as `year`, repository_id, sum(downloads) no_downloads from openaire_prod_usage_stats.usage_stats us +SELECT result_id, cast(substring(us.`date`, 1,4) as int) as `year`, repository_id, sum(downloads) no_downloads from openaire_prod_usage_stats.usage_stats us join ${stats_db_name}.publication on result_id=id where downloads>0 GROUP BY result_id, repository_id, substring(us.`date`, 1,4); ---ANALYZE TABLE ${stats_db_name}.indi_pub_downloads_datasource_year COMPUTE STATISTICS; - ---- Sprint 7 ---- create table if not exists ${stats_db_name}.indi_pub_gold_oa stored as parquet as WITH gold_oa AS ( SELECT @@ -307,8 +265,6 @@ FROM JOIN gold_oa on issn.issn = gold_oa.issn) tmp on pd.id=tmp.id; ---ANALYZE TABLE ${stats_db_name}.indi_pub_gold_oa COMPUTE STATISTICS; - create table if not exists ${stats_db_name}.indi_pub_hybrid_oa_with_cc stored as parquet as WITH hybrid_oa AS ( SELECT issn_l, journal_is_in_doaj, journal_is_oa, issn_print as issn @@ -340,8 +296,6 @@ FROM ${stats_db_name}.publication_datasources pd JOIN ${stats_db_name}.indi_pub_gold_oa ga on pd.id=ga.id where cc.has_cc_license=1 and ga.is_gold=0) tmp on pd.id=tmp.id; ---ANALYZE TABLE ${stats_db_name}.indi_pub_hybrid_oa_with_cc COMPUTE STATISTICS; - create table if not exists ${stats_db_name}.indi_pub_hybrid stored as parquet as WITH gold_oa AS ( SELECT issn_l, @@ -393,8 +347,6 @@ from ${stats_db_name}.publication_datasources pd where (gold_oa.journal_is_in_doaj=false or gold_oa.journal_is_oa=false))tmp on pd.id=tmp.id; ---ANALYZE TABLE ${stats_db_name}.indi_pub_hybrid COMPUTE STATISTICS; - create table if not exists ${stats_db_name}.indi_org_fairness stored as parquet as --return results with PIDs, and rich metadata group by organization with result_fair as @@ -413,8 +365,6 @@ select allresults.organization, result_fair.no_result_fair/allresults.no_allresu from allresults join result_fair on result_fair.organization=allresults.organization; ---ANALYZE TABLE ${stats_db_name}.indi_org_fairness COMPUTE STATISTICS; - CREATE TEMPORARY table ${stats_db_name}.result_fair as select ro.organization organization, count(distinct ro.id) no_result_fair from ${stats_db_name}.result_organization ro @@ -439,8 +389,6 @@ from ${stats_db_name}.allresults ar DROP table ${stats_db_name}.result_fair purge; DROP table ${stats_db_name}.allresults purge; ---ANALYZE TABLE ${stats_db_name}.indi_org_fairness_pub_pr COMPUTE STATISTICS; - CREATE TEMPORARY table ${stats_db_name}.result_fair as select year, ro.organization organization, count(distinct ro.id) no_result_fair from ${stats_db_name}.result_organization ro join ${stats_db_name}.result p on p.id=ro.id @@ -460,8 +408,6 @@ from ${stats_db_name}.allresults DROP table ${stats_db_name}.result_fair purge; DROP table ${stats_db_name}.allresults purge; ---ANALYZE TABLE ${stats_db_name}.indi_org_fairness_pub_year COMPUTE STATISTICS; - CREATE TEMPORARY TABLE ${stats_db_name}.result_fair as select ro.organization organization, count(distinct ro.id) no_result_fair from ${stats_db_name}.result_organization ro @@ -484,8 +430,6 @@ on rf.organization=ar.organization; DROP table ${stats_db_name}.result_fair purge; DROP table ${stats_db_name}.allresults purge; ---ANALYZE TABLE ${stats_db_name}.indi_org_fairness_pub COMPUTE STATISTICS; - CREATE TEMPORARY TABLE ${stats_db_name}.result_fair as select year, ro.organization organization, count(distinct ro.id) no_result_fair from ${stats_db_name}.result_organization ro join ${stats_db_name}.result r on r.id=ro.id @@ -507,8 +451,6 @@ create table if not exists ${stats_db_name}.indi_org_fairness_year stored as par DROP table ${stats_db_name}.result_fair purge; DROP table ${stats_db_name}.allresults purge; ---ANALYZE TABLE ${stats_db_name}.indi_org_fairness_year COMPUTE STATISTICS; - CREATE TEMPORARY TABLE ${stats_db_name}.result_with_pid as select year, ro.organization, count(distinct rp.id) no_result_with_pid from ${stats_db_name}.result_organization ro join ${stats_db_name}.result_pids rp on rp.id=ro.id @@ -530,8 +472,6 @@ from ${stats_db_name}.allresults DROP table ${stats_db_name}.result_with_pid purge; DROP table ${stats_db_name}.allresults purge; ---ANALYZE TABLE ${stats_db_name}.indi_org_findable_year COMPUTE STATISTICS; - CREATE TEMPORARY TABLE ${stats_db_name}.result_with_pid as select ro.organization, count(distinct rp.id) no_result_with_pid from ${stats_db_name}.result_organization ro join ${stats_db_name}.result_pids rp on rp.id=ro.id @@ -553,8 +493,6 @@ from ${stats_db_name}.allresults DROP table ${stats_db_name}.result_with_pid purge; DROP table ${stats_db_name}.allresults purge; ---ANALYZE TABLE ${stats_db_name}.indi_org_findable COMPUTE STATISTICS; - CREATE TEMPORARY TABLE ${stats_db_name}.pubs_oa as SELECT ro.organization, count(distinct r.id) no_oapubs FROM ${stats_db_name}.publication r join ${stats_db_name}.result_organization ro on ro.id=r.id @@ -633,8 +571,6 @@ DROP TABLE ${stats_db_name}.allpubsshare purge; DROP TABLE ${stats_db_name}.alldatasetssshare purge; DROP TABLE ${stats_db_name}.allsoftwaresshare purge; ---ANALYZE TABLE ${stats_db_name}.indi_org_openess COMPUTE STATISTICS; - CREATE TEMPORARY TABLE ${stats_db_name}.pubs_oa AS SELECT r.year, ro.organization, count(distinct r.id) no_oapubs FROM ${stats_db_name}.publication r join ${stats_db_name}.result_organization ro on ro.id=r.id @@ -690,7 +626,7 @@ select allsoftware.year, software_oa.organization, software_oa.no_oasoftware/all create table if not exists ${stats_db_name}.indi_org_openess_year stored as parquet as -select allpubsshare.year, allpubsshare.organization, +select cast(allpubsshare.year as int), allpubsshare.organization, (p+if(isnull(s),0,s)+if(isnull(d),0,d))/(1+(case when s is null then 0 else 1 end) +(case when d is null then 0 else 1 end)) org_openess FROM ${stats_db_name}.allpubsshare @@ -711,8 +647,6 @@ DROP TABLE ${stats_db_name}.allpubsshare purge; DROP TABLE ${stats_db_name}.alldatasetssshare purge; DROP TABLE ${stats_db_name}.allsoftwaresshare purge; ---ANALYZE TABLE ${stats_db_name}.indi_org_openess_year COMPUTE STATISTICS; - create table if not exists ${stats_db_name}.indi_pub_has_preprint stored as parquet as select distinct p.id, coalesce(has_preprint, 0) as has_preprint from ${stats_db_name}.publication_classifications p @@ -722,8 +656,6 @@ from ${stats_db_name}.publication_classifications p where p.type='Preprint') tmp on p.id= tmp.id; ---ANALYZE TABLE ${stats_db_name}.indi_pub_has_preprint COMPUTE STATISTICS; - create table if not exists ${stats_db_name}.indi_pub_in_subscribed stored as parquet as select distinct p.id, coalesce(is_subscription, 0) as is_subscription from ${stats_db_name}.publication p @@ -735,8 +667,6 @@ from ${stats_db_name}.publication p where g.is_gold=0 and h.is_hybrid=0 and t.is_transformative=0) tmp on p.id=tmp.id; ---ANALYZE TABLE ${stats_db_name}.indi_pub_in_subscribed COMPUTE STATISTICS; - create table if not exists ${stats_db_name}.indi_result_with_pid as select distinct p.id, coalesce(result_with_pid, 0) as result_with_pid from ${stats_db_name}.result p @@ -745,8 +675,6 @@ from ${stats_db_name}.result p from ${stats_db_name}.result_pids p) tmp on p.id= tmp.id; ---ANALYZE TABLE ${stats_db_name}.indi_result_with_pid COMPUTE STATISTICS; - CREATE TEMPORARY TABLE ${stats_db_name}.pub_fos_totals as select rf.id, count(distinct lvl3) totals from ${stats_db_name}.result_fos rf group by rf.id; @@ -761,8 +689,6 @@ where totals>1) tmp on p.id=tmp.id; drop table ${stats_db_name}.pub_fos_totals purge; ---ANALYZE TABLE ${stats_db_name}.indi_pub_interdisciplinarity COMPUTE STATISTICS; - create table if not exists ${stats_db_name}.indi_pub_bronze_oa stored as parquet as select distinct p.id, coalesce(is_bronze_oa,0) as is_bronze_oa from ${stats_db_name}.publication p @@ -772,4 +698,280 @@ join ${stats_db_name}.indi_result_has_cc_licence cc on cc.id=p.id join ${stats_db_name}.indi_pub_gold_oa ga on ga.id=p.id where cc.has_cc_license=0 and ga.is_gold=0) tmp on tmp.id=p.id; ---ANALYZE TABLE ${stats_db_name}.indi_pub_bronze_oa COMPUTE STATISTICS; \ No newline at end of file +CREATE TEMPORARY TABLE ${stats_db_name}.project_year_result_year as +select p.id project_id, acronym, r.id result_id, r.year, p.end_year +from ${stats_db_name}.project p +join ${stats_db_name}.result_projects rp on p.id=rp.project +join ${stats_db_name}.result r on r.id=rp.id +where p.end_year is NOT NULL and r.year is not null; + +create table if not exists ${stats_db_name}.indi_is_project_result_after stored as parquet as +select pry.project_id, pry.acronym, pry.result_id, +coalesce(is_project_result_after, 0) as is_project_result_after +from ${stats_db_name}.project_year_result_year pry +left outer join (select pry.project_id, pry.acronym, pry.result_id, 1 as is_project_result_after +from ${stats_db_name}.project_year_result_year pry +where pry.year>pry.end_year) tmp on pry.result_id=tmp.result_id; + +drop table ${stats_db_name}.project_year_result_year purge; + +create table if not exists ${stats_db_name}.indi_is_funder_plan_s stored as parquet as +select distinct f.id, f.name, coalesce(is_funder_plan_s, 0) as is_funder_plan_s +from ${stats_db_name}.funder f + left outer join (select id, name, 1 as is_funder_plan_s from ${stats_db_name}.funder + join stats_ext.plan_s_short on c_o_alition_s_organisation_funder=name) tmp + on f.name= tmp.name; + +--Funder Fairness + +create table if not exists ${stats_db_name}.indi_funder_fairness stored as parquet as + with result_fair as + (select p.funder funder, count(distinct rp.id) no_result_fair from ${stats_db_name}.result_projects rp + join ${stats_db_name}.result r on r.id=rp.id + join ${stats_db_name}.project p on p.id=rp.project + where (r.title is not null) and (publisher is not null) and (abstract=true) and (year is not null) and (authors>0) and cast(year as int)>2003 + group by p.funder), + allresults as (select p.funder funder, count(distinct rp.id) no_allresults from ${stats_db_name}.result_projects rp + join ${stats_db_name}.result r on r.id=rp.id + join ${stats_db_name}.project p on p.id=rp.project + where cast(year as int)>2003 + group by p.funder) +select allresults.funder, result_fair.no_result_fair/allresults.no_allresults funder_fairness +from allresults + join result_fair on result_fair.funder=allresults.funder; + +--RIs Fairness +create table if not exists ${stats_db_name}.indi_ris_fairness stored as parquet as +with result_contexts as +(select distinct rc.id, context.name ri_initiative from ${stats_db_name}.result_concepts rc +join ${stats_db_name}.concept on concept.id=rc.concept +join ${stats_db_name}.category on category.id=concept.category +join ${stats_db_name}.context on context.id=category.context), +result_fair as + (select rc.ri_initiative ri_initiative, count(distinct rc.id) no_result_fair from result_contexts rc + join ${stats_db_name}.result r on r.id=rc.id + where (title is not null) and (publisher is not null) and (abstract=true) and (year is not null) and (authors>0) and cast(year as int)>2003 + group by rc.ri_initiative), +allresults as +(select rc.ri_initiative ri_initiative, count(distinct rc.id) no_allresults from result_contexts rc + join ${stats_db_name}.result r on r.id=rc.id + where cast(year as int)>2003 + group by rc.ri_initiative) +select allresults.ri_initiative, result_fair.no_result_fair/allresults.no_allresults ris_fairness +from allresults + join result_fair on result_fair.ri_initiative=allresults.ri_initiative; + +--Funder Openess + +CREATE TEMPORARY TABLE ${stats_db_name}.pubs_oa as +select p.funder funder, count(distinct rp.id) no_oapubs from ${stats_db_name}.result_projects rp +join ${stats_db_name}.project p on p.id=rp.project +join ${stats_db_name}.publication r on r.id=rp.id +join ${stats_db_name}.result_instance ri on ri.id=r.id +where (ri.accessright = 'Open Access' or ri.accessright = 'Embargo' or ri.accessright = 'Open Source') +and cast(r.year as int)>2003 +group by p.funder; + + +CREATE TEMPORARY TABLE ${stats_db_name}.datasets_oa as +select p.funder funder, count(distinct rp.id) no_oadatasets from ${stats_db_name}.result_projects rp +join ${stats_db_name}.project p on p.id=rp.project +join ${stats_db_name}.dataset r on r.id=rp.id +join ${stats_db_name}.result_instance ri on ri.id=r.id +where (ri.accessright = 'Open Access' or ri.accessright = 'Embargo' or ri.accessright = 'Open Source') +and cast(r.year as int)>2003 +group by p.funder; + +CREATE TEMPORARY TABLE ${stats_db_name}.software_oa as +select p.funder funder, count(distinct rp.id) no_oasoftware from ${stats_db_name}.result_projects rp +join ${stats_db_name}.project p on p.id=rp.project +join ${stats_db_name}.software r on r.id=rp.id +join ${stats_db_name}.result_instance ri on ri.id=r.id +where (ri.accessright = 'Open Access' or ri.accessright = 'Embargo' or ri.accessright = 'Open Source') +and cast(r.year as int)>2003 +group by p.funder; + +CREATE TEMPORARY TABLE ${stats_db_name}.allpubs as +select p.funder funder, count(distinct rp.id) no_allpubs from ${stats_db_name}.result_projects rp +join ${stats_db_name}.project p on p.id=rp.project +join ${stats_db_name}.publication r on r.id=rp.id +where cast(r.year as int)>2003 +group by p.funder; + +CREATE TEMPORARY TABLE ${stats_db_name}.alldatasets as +select p.funder funder, count(distinct rp.id) no_alldatasets from ${stats_db_name}.result_projects rp +join ${stats_db_name}.project p on p.id=rp.project +join ${stats_db_name}.dataset r on r.id=rp.id +where cast(r.year as int)>2003 +group by p.funder; + +CREATE TEMPORARY TABLE ${stats_db_name}.allsoftware as +select p.funder funder, count(distinct rp.id) no_allsoftware from ${stats_db_name}.result_projects rp +join ${stats_db_name}.project p on p.id=rp.project +join ${stats_db_name}.software r on r.id=rp.id +where cast(r.year as int)>2003 +group by p.funder; + +CREATE TEMPORARY TABLE ${stats_db_name}.allpubsshare as +select pubs_oa.funder, pubs_oa.no_oapubs/allpubs.no_allpubs p from ${stats_db_name}.allpubs + join ${stats_db_name}.pubs_oa on allpubs.funder=pubs_oa.funder; + +CREATE TEMPORARY TABLE ${stats_db_name}.alldatasetssshare as +select datasets_oa.funder, datasets_oa.no_oadatasets/alldatasets.no_alldatasets d + from ${stats_db_name}.alldatasets + join ${stats_db_name}.datasets_oa on alldatasets.funder=datasets_oa.funder; + +CREATE TEMPORARY TABLE ${stats_db_name}.allsoftwaresshare as +select software_oa.funder, software_oa.no_oasoftware/allsoftware.no_allsoftware s + from ${stats_db_name}.allsoftware + join ${stats_db_name}.software_oa on allsoftware.funder=software_oa.funder; + +create table if not exists ${stats_db_name}.indi_funder_openess stored as parquet as +select allpubsshare.funder, + (p+if(isnull(s),0,s)+if(isnull(d),0,d))/(1+(case when s is null then 0 else 1 end) + +(case when d is null then 0 else 1 end)) + funder_openess FROM ${stats_db_name}.allpubsshare + left outer join (select funder,d from + ${stats_db_name}.alldatasetssshare) tmp1 + on tmp1.funder=allpubsshare.funder + left outer join (select funder,s from + ${stats_db_name}.allsoftwaresshare) tmp2 + on tmp2.funder=allpubsshare.funder; + +DROP TABLE ${stats_db_name}.pubs_oa purge; +DROP TABLE ${stats_db_name}.datasets_oa purge; +DROP TABLE ${stats_db_name}.software_oa purge; +DROP TABLE ${stats_db_name}.allpubs purge; +DROP TABLE ${stats_db_name}.alldatasets purge; +DROP TABLE ${stats_db_name}.allsoftware purge; +DROP TABLE ${stats_db_name}.allpubsshare purge; +DROP TABLE ${stats_db_name}.alldatasetssshare purge; +DROP TABLE ${stats_db_name}.allsoftwaresshare purge; + +--RIs Openess + +CREATE TEMPORARY TABLE ${stats_db_name}.result_contexts as +select distinct rc.id, context.name ri_initiative from ${stats_db_name}.result_concepts rc +join ${stats_db_name}.concept on concept.id=rc.concept +join ${stats_db_name}.category on category.id=concept.category +join ${stats_db_name}.context on context.id=category.context; + +CREATE TEMPORARY TABLE ${stats_db_name}.pubs_oa as +select rp.ri_initiative ri_initiative, count(distinct rp.id) no_oapubs from ${stats_db_name}.result_contexts rp +join ${stats_db_name}.publication r on r.id=rp.id +join ${stats_db_name}.result_instance ri on ri.id=r.id +where (ri.accessright = 'Open Access' or ri.accessright = 'Embargo' or ri.accessright = 'Open Source') +and cast(r.year as int)>2003 +group by rp.ri_initiative; + +CREATE TEMPORARY TABLE ${stats_db_name}.datasets_oa as +select rp.ri_initiative ri_initiative, count(distinct rp.id) no_oadatasets from ${stats_db_name}.result_contexts rp +join ${stats_db_name}.dataset r on r.id=rp.id +join ${stats_db_name}.result_instance ri on ri.id=r.id +where (ri.accessright = 'Open Access' or ri.accessright = 'Embargo' or ri.accessright = 'Open Source') +and cast(r.year as int)>2003 +group by rp.ri_initiative; + +CREATE TEMPORARY TABLE ${stats_db_name}.software_oa as +select rp.ri_initiative ri_initiative, count(distinct rp.id) no_oasoftware from ${stats_db_name}.result_contexts rp +join ${stats_db_name}.software r on r.id=rp.id +join ${stats_db_name}.result_instance ri on ri.id=r.id +where (ri.accessright = 'Open Access' or ri.accessright = 'Embargo' or ri.accessright = 'Open Source') +and cast(r.year as int)>2003 +group by rp.ri_initiative; + +CREATE TEMPORARY TABLE ${stats_db_name}.allpubs as +select rp.ri_initiative ri_initiative, count(distinct rp.id) no_allpubs from ${stats_db_name}.result_contexts rp +join ${stats_db_name}.publication r on r.id=rp.id +where cast(r.year as int)>2003 +group by rp.ri_initiative; + +CREATE TEMPORARY TABLE ${stats_db_name}.alldatasets as +select rp.ri_initiative ri_initiative, count(distinct rp.id) no_alldatasets from ${stats_db_name}.result_contexts rp +join ${stats_db_name}.dataset r on r.id=rp.id +where cast(r.year as int)>2003 +group by rp.ri_initiative; + +CREATE TEMPORARY TABLE ${stats_db_name}.allsoftware as +select rp.ri_initiative ri_initiative, count(distinct rp.id) no_allsoftware from ${stats_db_name}.result_contexts rp +join ${stats_db_name}.software r on r.id=rp.id +where cast(r.year as int)>2003 +group by rp.ri_initiative; + +CREATE TEMPORARY TABLE ${stats_db_name}.allpubsshare as +select pubs_oa.ri_initiative, pubs_oa.no_oapubs/allpubs.no_allpubs p from ${stats_db_name}.allpubs + join ${stats_db_name}.pubs_oa on allpubs.ri_initiative=pubs_oa.ri_initiative; + +CREATE TEMPORARY TABLE ${stats_db_name}.alldatasetssshare as +select datasets_oa.ri_initiative, datasets_oa.no_oadatasets/alldatasets.no_alldatasets d + from ${stats_db_name}.alldatasets + join ${stats_db_name}.datasets_oa on alldatasets.ri_initiative=datasets_oa.ri_initiative; + +CREATE TEMPORARY TABLE ${stats_db_name}.allsoftwaresshare as +select software_oa.ri_initiative, software_oa.no_oasoftware/allsoftware.no_allsoftware s + from ${stats_db_name}.allsoftware + join ${stats_db_name}.software_oa on allsoftware.ri_initiative=software_oa.ri_initiative; + +create table if not exists ${stats_db_name}.indi_ris_openess stored as parquet as +select allpubsshare.ri_initiative, + (p+if(isnull(s),0,s)+if(isnull(d),0,d))/(1+(case when s is null then 0 else 1 end) + +(case when d is null then 0 else 1 end)) + ris_openess FROM ${stats_db_name}.allpubsshare + left outer join (select ri_initiative,d from + ${stats_db_name}.alldatasetssshare) tmp1 + on tmp1.ri_initiative=allpubsshare.ri_initiative + left outer join (select ri_initiative,s from + ${stats_db_name}.allsoftwaresshare) tmp2 + on tmp2.ri_initiative=allpubsshare.ri_initiative; + +DROP TABLE ${stats_db_name}.result_contexts purge; +DROP TABLE ${stats_db_name}.pubs_oa purge; +DROP TABLE ${stats_db_name}.datasets_oa purge; +DROP TABLE ${stats_db_name}.software_oa purge; +DROP TABLE ${stats_db_name}.allpubs purge; +DROP TABLE ${stats_db_name}.alldatasets purge; +DROP TABLE ${stats_db_name}.allsoftware purge; +DROP TABLE ${stats_db_name}.allpubsshare purge; +DROP TABLE ${stats_db_name}.alldatasetssshare purge; +DROP TABLE ${stats_db_name}.allsoftwaresshare purge; + +--Funder Findability +create table if not exists ${stats_db_name}.indi_funder_findable stored as parquet as +with result_findable as + (select p.funder funder, count(distinct rp.id) no_result_findable from ${stats_db_name}.result_projects rp + join ${stats_db_name}.publication r on r.id=rp.id + join ${stats_db_name}.project p on p.id=rp.project + join ${stats_db_name}.result_pids rpi on rpi.id=r.id + where cast(year as int)>2003 + group by p.funder), + allresults as (select p.funder funder, count(distinct rp.id) no_allresults from ${stats_db_name}.result_projects rp + join ${stats_db_name}.result r on r.id=rp.id + join ${stats_db_name}.project p on p.id=rp.project + where cast(year as int)>2003 + group by p.funder) +select allresults.funder, result_findable.no_result_findable/allresults.no_allresults funder_findable +from allresults + join result_findable on result_findable.funder=allresults.funder; + +--RIs Findability +create table if not exists ${stats_db_name}.indi_ris_findable stored as parquet as +with result_contexts as +(select distinct rc.id, context.name ri_initiative from ${stats_db_name}.result_concepts rc +join ${stats_db_name}.concept on concept.id=rc.concept +join ${stats_db_name}.category on category.id=concept.category +join ${stats_db_name}.context on context.id=category.context), +result_findable as + (select rc.ri_initiative ri_initiative, count(distinct rc.id) no_result_findable from result_contexts rc + join ${stats_db_name}.result r on r.id=rc.id + join ${stats_db_name}.result_pids rp on rp.id=r.id + where cast(r.year as int)>2003 + group by rc.ri_initiative), +allresults as +(select rc.ri_initiative ri_initiative, count(distinct rc.id) no_allresults from result_contexts rc + join ${stats_db_name}.result r on r.id=rc.id + where cast(r.year as int)>2003 + group by rc.ri_initiative) +select allresults.ri_initiative, result_findable.no_result_findable/allresults.no_allresults ris_findable +from allresults + join result_findable on result_findable.ri_initiative=allresults.ri_initiative; + diff --git a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step20-createMonitorDB.sql b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step20-createMonitorDB.sql index 3eeb792c7..586bee347 100644 --- a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step20-createMonitorDB.sql +++ b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step20-createMonitorDB.sql @@ -88,6 +88,7 @@ create view if not exists TARGET.doctoratestudents as select * from SOURCE.docto create view if not exists TARGET.totalresearchers as select * from SOURCE.totalresearchers; create view if not exists TARGET.totalresearchersft as select * from SOURCE.totalresearchersft; create view if not exists TARGET.hrrst as select * from SOURCE.hrrst; +create view if not exists TARGET.graduatedoctorates as select * from SOURCE.graduatedoctorates; create table TARGET.result_citations stored as parquet as select * from SOURCE.result_citations orig where exists (select 1 from TARGET.result r where r.id=orig.id); --ANALYZE TABLE TARGET.result_citations COMPUTE STATISTICS; @@ -270,3 +271,11 @@ create table TARGET.indi_pub_interdisciplinarity stored as parquet as select * f --ANALYZE TABLE TARGET.indi_pub_interdisciplinarity COMPUTE STATISTICS; create table TARGET.result_apc_affiliations stored as parquet as select * from SOURCE.result_apc_affiliations orig where exists (select 1 from TARGET.result r where r.id=orig.id); --ANALYZE TABLE TARGET.result_apc_affiliations COMPUTE STATISTICS; +create table TARGET.indi_is_project_result_after stored as parquet as select * from SOURCE.indi_is_project_result_after orig where exists (select 1 from TARGET.result r where r.id=orig.result_id); +create table TARGET.indi_is_funder_plan_s stored as parquet as select * from SOURCE.indi_is_funder_plan_s orig where exists (select 1 from TARGET.result r where r.id=orig.id); +create view TARGET.indi_funder_fairness as select * from SOURCE.indi_funder_fairness; +create view TARGET.indi_funder_openess as select * from SOURCE.indi_funder_openess; +create view TARGET.indi_funder_findable as select * from SOURCE.indi_funder_findable; +create view TARGET.indi_ris_fairness as select * from SOURCE.indi_ris_fairness; +create view TARGET.indi_ris_openess as select * from SOURCE.indi_ris_openess; +create view TARGET.indi_ris_findable as select * from SOURCE.indi_ris_findable; \ No newline at end of file diff --git a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step20-createMonitorDBAll.sql b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step20-createMonitorDBAll.sql index a59791084..df4795e3e 100644 --- a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step20-createMonitorDBAll.sql +++ b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step20-createMonitorDBAll.sql @@ -15,6 +15,7 @@ create view if not exists TARGET.doctoratestudents as select * from SOURCE.docto create view if not exists TARGET.totalresearchers as select * from SOURCE.totalresearchers; create view if not exists TARGET.totalresearchersft as select * from SOURCE.totalresearchersft; create view if not exists TARGET.hrrst as select * from SOURCE.hrrst; +create view if not exists TARGET.graduatedoctorates as select * from SOURCE.graduatedoctorates; create table TARGET.result stored as parquet as select distinct * from ( @@ -73,7 +74,11 @@ create table TARGET.result stored as parquet as 'openorgs____::c0b262bd6eab819e4c994914f9c010e2', -- National Institute of Geophysics and Volcanology 'openorgs____::1624ff7c01bb641b91f4518539a0c28a', -- Vrije Universiteit Amsterdam 'openorgs____::4d4051b56708688235252f1d8fddb8c1', --Iscte - Instituto Universitário de Lisboa - 'openorgs____::ab4ac74c35fa5dada770cf08e5110fab' -- Universidade Católica Portuguesa + 'openorgs____::ab4ac74c35fa5dada770cf08e5110fab', -- Universidade Católica Portuguesa + 'openorgs____::4d4051b56708688235252f1d8fddb8c1', -- Iscte - Instituto Universitário de Lisboa + 'openorgs____::5d55fb216b14691cf68218daf5d78cd9', -- Munster Technological University + 'openorgs____::0fccc7640f0cb44d5cd1b06b312a06b9', -- Cardiff University + 'openorgs____::8839b55dae0c84d56fd533f52d5d483a' -- Leibniz Institute of Ecological Urban and Regional Development ) )) foo; --ANALYZE TABLE TARGET.result COMPUTE STATISTICS; @@ -92,6 +97,7 @@ create view if not exists TARGET.doctoratestudents as select * from SOURCE.docto create view if not exists TARGET.totalresearchers as select * from SOURCE.totalresearchers; create view if not exists TARGET.totalresearchersft as select * from SOURCE.totalresearchersft; create view if not exists TARGET.hrrst as select * from SOURCE.hrrst; +--create view if not exists TARGET.graduatedoctorates as select * from SOURCE.graduatedoctorates; create table TARGET.result_citations stored as parquet as select * from SOURCE.result_citations orig where exists (select 1 from TARGET.result r where r.id=orig.id); --ANALYZE TABLE TARGET.result_citations COMPUTE STATISTICS; @@ -274,3 +280,13 @@ create table TARGET.indi_pub_interdisciplinarity stored as parquet as select * f --ANALYZE TABLE TARGET.indi_pub_interdisciplinarity COMPUTE STATISTICS; create table TARGET.result_apc_affiliations stored as parquet as select * from SOURCE.result_apc_affiliations orig where exists (select 1 from TARGET.result r where r.id=orig.id); --ANALYZE TABLE TARGET.result_apc_affiliations COMPUTE STATISTICS; +create table TARGET.indi_is_project_result_after stored as parquet as select * from SOURCE.indi_is_project_result_after orig where exists (select 1 from TARGET.result r where r.id=orig.id); +create table TARGET.indi_is_funder_plan_s stored as parquet as select * from SOURCE.indi_is_funder_plan_s orig where exists (select 1 from TARGET.result r where r.id=orig.id); +create view TARGET.indi_funder_fairness as select * from SOURCE.indi_funder_fairness; +create view TARGET.indi_funder_openess as select * from SOURCE.indi_funder_openess; +create view TARGET.indi_funder_findable as select * from SOURCE.indi_funder_findable; +create view TARGET.indi_ris_fairness as select * from SOURCE.indi_ris_fairness; +create view TARGET.indi_ris_openess as select * from SOURCE.indi_ris_openess; +create view TARGET.indi_ris_findable as select * from SOURCE.indi_ris_findable; + + diff --git a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step20-createMonitorDB_institutions.sql b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step20-createMonitorDB_institutions.sql index 1f75c3cd1..7bfba92a8 100644 --- a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step20-createMonitorDB_institutions.sql +++ b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step20-createMonitorDB_institutions.sql @@ -34,16 +34,16 @@ create table TARGET.result stored as parquet as 'openorgs____::3e8d1f8c3f6cd7f418b09f1f58b4873b', -- Aristotle University of Thessaloniki 'openorgs____::3fcef6e1c469c10f2a84b281372c9814', -- World Bank 'openorgs____::1698a2eb1885ef8adb5a4a969e745ad3', -- École des Ponts ParisTech - 'openorgs____::e15adb13c4dadd49de4d35c39b5da93a', -- Nanyang Technological University - 'openorgs____::4b34103bde246228fcd837f5f1bf4212', -- Autonomous University of Barcelona - 'openorgs____::72ec75fcfc4e0df1a76dc4c49007fceb', -- McMaster University - 'openorgs____::51c7fc556e46381734a25a6fbc3fd398', -- University of Modena and Reggio Emilia - 'openorgs____::235d7f9ad18ecd7e6dc62ea4990cb9db', -- Bilkent University - 'openorgs____::31f2fa9e05b49d4cf40a19c3fed8eb06', -- Saints Cyril and Methodius University of Skopje + 'openorgs____::e15adb13c4dadd49de4d35c39b5da93a', -- Nanyang Technological University + 'openorgs____::4b34103bde246228fcd837f5f1bf4212', -- Autonomous University of Barcelona + 'openorgs____::72ec75fcfc4e0df1a76dc4c49007fceb', -- McMaster University + 'openorgs____::51c7fc556e46381734a25a6fbc3fd398', -- University of Modena and Reggio Emilia + 'openorgs____::235d7f9ad18ecd7e6dc62ea4990cb9db', -- Bilkent University + 'openorgs____::31f2fa9e05b49d4cf40a19c3fed8eb06', -- Saints Cyril and Methodius University of Skopje 'openorgs____::db7686f30f22cbe73a4fde872ce812a6', -- University of Milan - 'openorgs____::b8b8ca674452579f3f593d9f5e557483', -- University College Cork + 'openorgs____::b8b8ca674452579f3f593d9f5e557483', -- University College Cork 'openorgs____::38d7097854736583dde879d12dacafca', -- Brown University - 'openorgs____::57784c9e047e826fefdb1ef816120d92', --Arts et Métiers ParisTech + 'openorgs____::57784c9e047e826fefdb1ef816120d92', --Arts et Métiers ParisTech 'openorgs____::2530baca8a15936ba2e3297f2bce2e7e', -- University of Cape Town 'openorgs____::d11f981828c485cd23d93f7f24f24db1', -- Technological University Dublin 'openorgs____::5e6bf8962665cdd040341171e5c631d8', -- Delft University of Technology @@ -52,10 +52,13 @@ create table TARGET.result stored as parquet as 'openorgs____::66aa9fc2fceb271423dfabcc38752dc0', -- Lund University 'openorgs____::3cff625a4370d51e08624cc586138b2f', -- IMT Atlantique 'openorgs____::c0b262bd6eab819e4c994914f9c010e2', -- National Institute of Geophysics and Volcanology - 'openorgs____::1624ff7c01bb641b91f4518539a0c28a', -- Vrije Universiteit Amsterdam + 'openorgs____::1624ff7c01bb641b91f4518539a0c28a', -- Vrije Universiteit Amsterdam 'openorgs____::4d4051b56708688235252f1d8fddb8c1', --Iscte - Instituto Universitário de Lisboa - 'openorgs____::ab4ac74c35fa5dada770cf08e5110fab' -- Universidade Católica Portuguesa - + 'openorgs____::ab4ac74c35fa5dada770cf08e5110fab', -- Universidade Católica Portuguesa + 'openorgs____::4d4051b56708688235252f1d8fddb8c1', -- Iscte - Instituto Universitário de Lisboa + 'openorgs____::5d55fb216b14691cf68218daf5d78cd9', -- Munster Technological University + 'openorgs____::0fccc7640f0cb44d5cd1b06b312a06b9', -- Cardiff University + 'openorgs____::8839b55dae0c84d56fd533f52d5d483a' -- Leibniz Institute of Ecological Urban and Regional Development ))) foo; --ANALYZE TABLE TARGET.result COMPUTE STATISTICS; \ No newline at end of file diff --git a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step5.sql b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step5.sql old mode 100644 new mode 100755 diff --git a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/workflow.xml b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/workflow.xml index c03520e48..aa991730b 100644 --- a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/workflow.xml +++ b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/workflow.xml @@ -467,7 +467,7 @@ ${usage_stats_db_shadow_name} finalizeImpalaCluster.sh - + From 2caaaec42d24023b7195f8922bafd43bc12494dd Mon Sep 17 00:00:00 2001 From: Giambattista Bloisi Date: Fri, 1 Sep 2023 09:32:57 +0200 Subject: [PATCH 05/57] Include SparkCleanRelation logic in SparkPropagateRelation SparkPropagateRelation includes merge relations Revised tests for SparkPropagateRelation --- .../dhp/oa/dedup/RelationAggregator.java | 57 ------- .../dhp/oa/dedup/SparkCleanRelation.scala | 78 --------- .../dhp/oa/dedup/SparkPropagateRelation.java | 138 ++++++++-------- .../oa/dedup/cleanRelation_parameters.json | 20 --- .../dedup/consistency/oozie_app/workflow.xml | 28 +--- .../dnetlib/dhp/oa/dedup/SparkDedupTest.java | 150 ++++++++---------- .../oa/dedup/SparkOpenorgsProvisionTest.java | 103 ++++++------ 7 files changed, 185 insertions(+), 389 deletions(-) delete mode 100644 dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/RelationAggregator.java delete mode 100644 dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/SparkCleanRelation.scala delete mode 100644 dhp-workflows/dhp-dedup-openaire/src/main/resources/eu/dnetlib/dhp/oa/dedup/cleanRelation_parameters.json diff --git a/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/RelationAggregator.java b/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/RelationAggregator.java deleted file mode 100644 index 96d783dbf..000000000 --- a/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/RelationAggregator.java +++ /dev/null @@ -1,57 +0,0 @@ - -package eu.dnetlib.dhp.oa.dedup; - -import java.util.Objects; - -import org.apache.spark.sql.Encoder; -import org.apache.spark.sql.Encoders; -import org.apache.spark.sql.expressions.Aggregator; - -import eu.dnetlib.dhp.schema.oaf.Relation; - -public class RelationAggregator extends Aggregator { - - private static final Relation ZERO = new Relation(); - - @Override - public Relation zero() { - return ZERO; - } - - @Override - public Relation reduce(Relation b, Relation a) { - return mergeRel(b, a); - } - - @Override - public Relation merge(Relation b, Relation a) { - return mergeRel(b, a); - } - - @Override - public Relation finish(Relation r) { - return r; - } - - private Relation mergeRel(Relation b, Relation a) { - if (Objects.equals(b, ZERO)) { - return a; - } - if (Objects.equals(a, ZERO)) { - return b; - } - - b.mergeFrom(a); - return b; - } - - @Override - public Encoder bufferEncoder() { - return Encoders.kryo(Relation.class); - } - - @Override - public Encoder outputEncoder() { - return Encoders.kryo(Relation.class); - } -} diff --git a/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/SparkCleanRelation.scala b/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/SparkCleanRelation.scala deleted file mode 100644 index 5d8da42c2..000000000 --- a/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/SparkCleanRelation.scala +++ /dev/null @@ -1,78 +0,0 @@ -package eu.dnetlib.dhp.oa.dedup - -import eu.dnetlib.dhp.application.ArgumentApplicationParser -import eu.dnetlib.dhp.common.HdfsSupport -import eu.dnetlib.dhp.schema.oaf.Relation -import eu.dnetlib.dhp.utils.ISLookupClientFactory -import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpService -import org.apache.commons.io.IOUtils -import org.apache.spark.SparkConf -import org.apache.spark.sql._ -import org.apache.spark.sql.functions.col -import org.apache.spark.sql.types.{DataTypes, StructField, StructType} -import org.slf4j.LoggerFactory - -object SparkCleanRelation { - private val log = LoggerFactory.getLogger(classOf[SparkCleanRelation]) - - @throws[Exception] - def main(args: Array[String]): Unit = { - val parser = new ArgumentApplicationParser( - IOUtils.toString( - classOf[SparkCleanRelation].getResourceAsStream("/eu/dnetlib/dhp/oa/dedup/cleanRelation_parameters.json") - ) - ) - parser.parseArgument(args) - val conf = new SparkConf - - new SparkCleanRelation(parser, AbstractSparkAction.getSparkSession(conf)) - .run(ISLookupClientFactory.getLookUpService(parser.get("isLookUpUrl"))) - } -} - -class SparkCleanRelation(parser: ArgumentApplicationParser, spark: SparkSession) - extends AbstractSparkAction(parser, spark) { - override def run(isLookUpService: ISLookUpService): Unit = { - val graphBasePath = parser.get("graphBasePath") - val inputPath = parser.get("inputPath") - val outputPath = parser.get("outputPath") - - SparkCleanRelation.log.info("graphBasePath: '{}'", graphBasePath) - SparkCleanRelation.log.info("inputPath: '{}'", inputPath) - SparkCleanRelation.log.info("outputPath: '{}'", outputPath) - - AbstractSparkAction.removeOutputDir(spark, outputPath) - - val entities = - Seq("datasource", "project", "organization", "publication", "dataset", "software", "otherresearchproduct") - - val idsSchema = StructType.fromDDL("`id` STRING, `dataInfo` STRUCT<`deletedbyinference`:BOOLEAN,`invisible`:BOOLEAN>") - - val emptyIds = spark.createDataFrame(spark.sparkContext.emptyRDD[Row].setName("empty"), - idsSchema) - - val ids = entities - .foldLeft(emptyIds)((ds, entity) => { - val entityPath = graphBasePath + '/' + entity - if (HdfsSupport.exists(entityPath, spark.sparkContext.hadoopConfiguration)) { - ds.union(spark.read.schema(idsSchema).json(entityPath)) - } else { - ds - } - }) - .filter("dataInfo.deletedbyinference != true AND dataInfo.invisible != true") - .select("id") - .distinct() - - val relations = spark.read.schema(Encoders.bean(classOf[Relation]).schema).json(inputPath) - .filter("dataInfo.deletedbyinference != true AND dataInfo.invisible != true") - - AbstractSparkAction.save( - relations - .join(ids, col("source") === ids("id"), "leftsemi") - .join(ids, col("target") === ids("id"), "leftsemi"), - outputPath, - SaveMode.Overwrite - ) - } -} diff --git a/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/SparkPropagateRelation.java b/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/SparkPropagateRelation.java index 175ebf8a6..739295c91 100644 --- a/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/SparkPropagateRelation.java +++ b/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/SparkPropagateRelation.java @@ -3,23 +3,19 @@ package eu.dnetlib.dhp.oa.dedup; import static org.apache.spark.sql.functions.col; -import java.util.Arrays; -import java.util.Collections; -import java.util.Iterator; -import java.util.Objects; - -import org.apache.commons.beanutils.BeanUtils; import org.apache.commons.io.IOUtils; -import org.apache.commons.lang3.StringUtils; import org.apache.spark.SparkConf; -import org.apache.spark.api.java.function.FilterFunction; import org.apache.spark.api.java.function.MapFunction; import org.apache.spark.api.java.function.ReduceFunction; import org.apache.spark.sql.*; +import org.apache.spark.sql.catalyst.encoders.RowEncoder; +import org.apache.spark.sql.types.StructType; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import eu.dnetlib.dhp.application.ArgumentApplicationParser; +import eu.dnetlib.dhp.common.HdfsSupport; +import eu.dnetlib.dhp.schema.common.EntityType; import eu.dnetlib.dhp.schema.common.ModelConstants; import eu.dnetlib.dhp.schema.common.ModelSupport; import eu.dnetlib.dhp.schema.oaf.DataInfo; @@ -70,73 +66,63 @@ public class SparkPropagateRelation extends AbstractSparkAction { log.info("workingPath: '{}'", workingPath); log.info("graphOutputPath: '{}'", graphOutputPath); - final String outputRelationPath = DedupUtility.createEntityPath(graphOutputPath, "relation"); - removeOutputDir(spark, outputRelationPath); - Dataset mergeRels = spark - .read() - .load(DedupUtility.createMergeRelPath(workingPath, "*", "*")) - .as(REL_BEAN_ENC); + .read() + .load(DedupUtility.createMergeRelPath(workingPath, "*", "*")) + .as(REL_BEAN_ENC); // - Dataset mergedIds = mergeRels - .where(col("relClass").equalTo(ModelConstants.MERGES)) - .select(col("source").as("dedupID"), col("target").as("mergedObjectID")) - .distinct() - .cache(); + Dataset idsToMerge = mergeRels + .where(col("relClass").equalTo(ModelConstants.MERGES)) + .select(col("source").as("dedupID"), col("target").as("mergedObjectID")) + .distinct(); Dataset allRels = spark - .read() - .schema(REL_BEAN_ENC.schema()) - .json(DedupUtility.createEntityPath(graphBasePath, "relation")); + .read() + .schema(REL_BEAN_ENC.schema()) + .json(graphBasePath + "/relation"); Dataset dedupedRels = allRels - .joinWith(mergedIds, allRels.col("source").equalTo(mergedIds.col("mergedObjectID")), "left_outer") - .joinWith(mergedIds, col("_1.target").equalTo(mergedIds.col("mergedObjectID")), "left_outer") - .select("_1._1", "_1._2.dedupID", "_2.dedupID") - .as(Encoders.tuple(REL_BEAN_ENC, Encoders.STRING(), Encoders.STRING())) - .flatMap(SparkPropagateRelation::addInferredRelations, REL_KRYO_ENC); + .joinWith(idsToMerge, allRels.col("source").equalTo(idsToMerge.col("mergedObjectID")), "left_outer") + .joinWith(idsToMerge, col("_1.target").equalTo(idsToMerge.col("mergedObjectID")), "left_outer") + .select("_1._1", "_1._2.dedupID", "_2.dedupID") + .as(Encoders.tuple(REL_BEAN_ENC, Encoders.STRING(), Encoders.STRING())) + .map((MapFunction, Relation>) t -> { + Relation rel = t._1(); + String newSource = t._2(); + String newTarget = t._3(); - Dataset processedRelations = distinctRelations( - dedupedRels.union(mergeRels.map((MapFunction) r -> r, REL_KRYO_ENC))) - .filter((FilterFunction) r -> !Objects.equals(r.getSource(), r.getTarget())); + if (rel.getDataInfo() == null) { + rel.setDataInfo(new DataInfo()); + } - save(processedRelations, outputRelationPath, SaveMode.Overwrite); - } + if (newSource != null || newTarget != null) { + rel.getDataInfo().setDeletedbyinference(false); - private static Iterator addInferredRelations(Tuple3 t) throws Exception { - Relation existingRel = t._1(); - String newSource = t._2(); - String newTarget = t._3(); + if (newSource != null) + rel.setSource(newSource); - if (newSource == null && newTarget == null) { - return Collections.singleton(t._1()).iterator(); - } + if (newTarget != null) + rel.setTarget(newTarget); + } - // update existing relation - if (existingRel.getDataInfo() == null) { - existingRel.setDataInfo(new DataInfo()); - } - existingRel.getDataInfo().setDeletedbyinference(true); + return rel; + }, REL_BEAN_ENC); - // Create new relation inferred by dedupIDs - Relation inferredRel = (Relation) BeanUtils.cloneBean(existingRel); + // ids of records that are both not deletedbyinference and not invisible + Dataset ids = validIds(spark, graphBasePath); - inferredRel.setDataInfo((DataInfo) BeanUtils.cloneBean(existingRel.getDataInfo())); - inferredRel.getDataInfo().setDeletedbyinference(false); + // filter relations that point to valid records, can force them to be visible + Dataset cleanedRels = dedupedRels + .join(ids, col("source").equalTo(ids.col("id")), "leftsemi") + .join(ids, col("target").equalTo(ids.col("id")), "leftsemi") + .as(REL_BEAN_ENC) + .map((MapFunction) r -> { + r.getDataInfo().setInvisible(false); + return r; + }, REL_KRYO_ENC); - if (newSource != null) - inferredRel.setSource(newSource); - - if (newTarget != null) - inferredRel.setTarget(newTarget); - - return Arrays.asList(existingRel, inferredRel).iterator(); - } - - private Dataset distinctRelations(Dataset rels) { - return rels - .filter(getRelationFilterFunction()) + Dataset distinctRels = cleanedRels .groupByKey( (MapFunction) r -> String .join(" ", r.getSource(), r.getTarget(), r.getRelType(), r.getSubRelType(), r.getRelClass()), @@ -146,13 +132,33 @@ public class SparkPropagateRelation extends AbstractSparkAction { return b; }) .map((MapFunction, Relation>) Tuple2::_2, REL_BEAN_ENC); + + final String outputRelationPath = graphOutputPath + "/relation"; + removeOutputDir(spark, outputRelationPath); + save( + distinctRels + .union(mergeRels) + .filter("source != target AND dataInfo.deletedbyinference != true AND dataInfo.invisible != true"), + outputRelationPath, + SaveMode.Overwrite); } - private FilterFunction getRelationFilterFunction() { - return r -> StringUtils.isNotBlank(r.getSource()) || - StringUtils.isNotBlank(r.getTarget()) || - StringUtils.isNotBlank(r.getRelType()) || - StringUtils.isNotBlank(r.getSubRelType()) || - StringUtils.isNotBlank(r.getRelClass()); + static Dataset validIds(SparkSession spark, String graphBasePath) { + StructType idsSchema = StructType + .fromDDL("`id` STRING, `dataInfo` STRUCT<`deletedbyinference`:BOOLEAN,`invisible`:BOOLEAN>"); + + Dataset allIds = spark.emptyDataset(RowEncoder.apply(idsSchema)); + + for (EntityType entityType : ModelSupport.entityTypes.keySet()) { + String entityPath = graphBasePath + '/' + entityType.name(); + if (HdfsSupport.exists(entityPath, spark.sparkContext().hadoopConfiguration())) { + allIds = allIds.union(spark.read().schema(idsSchema).json(entityPath)); + } + } + + return allIds + .filter("dataInfo.deletedbyinference != true AND dataInfo.invisible != true") + .select("id") + .distinct(); } } diff --git a/dhp-workflows/dhp-dedup-openaire/src/main/resources/eu/dnetlib/dhp/oa/dedup/cleanRelation_parameters.json b/dhp-workflows/dhp-dedup-openaire/src/main/resources/eu/dnetlib/dhp/oa/dedup/cleanRelation_parameters.json deleted file mode 100644 index 860539ad9..000000000 --- a/dhp-workflows/dhp-dedup-openaire/src/main/resources/eu/dnetlib/dhp/oa/dedup/cleanRelation_parameters.json +++ /dev/null @@ -1,20 +0,0 @@ -[ - { - "paramName": "i", - "paramLongName": "graphBasePath", - "paramDescription": "the base path of raw graph", - "paramRequired": true - }, - { - "paramName": "w", - "paramLongName": "inputPath", - "paramDescription": "the path to the input relation to cleanup", - "paramRequired": true - }, - { - "paramName": "o", - "paramLongName": "outputPath", - "paramDescription": "the path of the output relation cleaned", - "paramRequired": true - } -] \ No newline at end of file diff --git a/dhp-workflows/dhp-dedup-openaire/src/main/resources/eu/dnetlib/dhp/oa/dedup/consistency/oozie_app/workflow.xml b/dhp-workflows/dhp-dedup-openaire/src/main/resources/eu/dnetlib/dhp/oa/dedup/consistency/oozie_app/workflow.xml index b724e5d0b..0083339cf 100644 --- a/dhp-workflows/dhp-dedup-openaire/src/main/resources/eu/dnetlib/dhp/oa/dedup/consistency/oozie_app/workflow.xml +++ b/dhp-workflows/dhp-dedup-openaire/src/main/resources/eu/dnetlib/dhp/oa/dedup/consistency/oozie_app/workflow.xml @@ -100,35 +100,9 @@ --conf spark.sql.shuffle.partitions=15000 --graphBasePath${graphBasePath} - --graphOutputPath${workingPath}/propagaterelation/ + --graphOutputPath${graphOutputPath} --workingPath${workingPath} - - - - - - - yarn - cluster - Clean Relations - eu.dnetlib.dhp.oa.dedup.SparkCleanRelation - dhp-dedup-openaire-${projectVersion}.jar - - --executor-memory=${sparkExecutorMemory} - --conf spark.executor.memoryOverhead=${sparkExecutorMemoryOverhead} - --executor-cores=${sparkExecutorCores} - --driver-memory=${sparkDriverMemory} - --conf spark.extraListeners=${spark2ExtraListeners} - --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} - --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} - --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} - --conf spark.sql.shuffle.partitions=15000 - - --graphBasePath${graphBasePath} - --inputPath${workingPath}/propagaterelation/relation - --outputPath${graphOutputPath}/relation - diff --git a/dhp-workflows/dhp-dedup-openaire/src/test/java/eu/dnetlib/dhp/oa/dedup/SparkDedupTest.java b/dhp-workflows/dhp-dedup-openaire/src/test/java/eu/dnetlib/dhp/oa/dedup/SparkDedupTest.java index 38bd72a5e..6c4935637 100644 --- a/dhp-workflows/dhp-dedup-openaire/src/test/java/eu/dnetlib/dhp/oa/dedup/SparkDedupTest.java +++ b/dhp-workflows/dhp-dedup-openaire/src/test/java/eu/dnetlib/dhp/oa/dedup/SparkDedupTest.java @@ -3,7 +3,6 @@ package eu.dnetlib.dhp.oa.dedup; import static java.nio.file.Files.createTempDirectory; -import static org.apache.spark.sql.functions.col; import static org.apache.spark.sql.functions.count; import static org.junit.jupiter.api.Assertions.*; import static org.mockito.Mockito.lenient; @@ -23,14 +22,13 @@ import java.util.stream.Collectors; import org.apache.commons.io.FileUtils; import org.apache.commons.io.IOUtils; import org.apache.spark.SparkConf; -import org.apache.spark.api.java.JavaPairRDD; -import org.apache.spark.api.java.JavaRDD; import org.apache.spark.api.java.JavaSparkContext; import org.apache.spark.api.java.function.FilterFunction; import org.apache.spark.api.java.function.MapFunction; -import org.apache.spark.api.java.function.PairFunction; -import org.apache.spark.sql.*; import org.apache.spark.sql.Dataset; +import org.apache.spark.sql.Encoders; +import org.apache.spark.sql.Row; +import org.apache.spark.sql.SparkSession; import org.junit.jupiter.api.*; import org.junit.jupiter.api.extension.ExtendWith; import org.mockito.Mock; @@ -46,8 +44,6 @@ import eu.dnetlib.dhp.schema.common.ModelConstants; import eu.dnetlib.dhp.schema.oaf.*; import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpException; import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpService; -import eu.dnetlib.pace.util.MapDocumentUtil; -import scala.Tuple2; @ExtendWith(MockitoExtension.class) @TestMethodOrder(MethodOrderer.OrderAnnotation.class) @@ -62,6 +58,8 @@ public class SparkDedupTest implements Serializable { private static String testGraphBasePath; private static String testOutputBasePath; private static String testDedupGraphBasePath; + private static String testConsistencyGraphBasePath; + private static final String testActionSetId = "test-orchestrator"; private static String whitelistPath; private static List whiteList; @@ -75,6 +73,7 @@ public class SparkDedupTest implements Serializable { .get(SparkDedupTest.class.getResource("/eu/dnetlib/dhp/dedup/entities").toURI()) .toFile() .getAbsolutePath(); + testOutputBasePath = createTempDirectory(SparkDedupTest.class.getSimpleName() + "-") .toAbsolutePath() .toString(); @@ -83,6 +82,10 @@ public class SparkDedupTest implements Serializable { .toAbsolutePath() .toString(); + testConsistencyGraphBasePath = createTempDirectory(SparkDedupTest.class.getSimpleName() + "-") + .toAbsolutePath() + .toString(); + whitelistPath = Paths .get(SparkDedupTest.class.getResource("/eu/dnetlib/dhp/dedup/whitelist.simrels.txt").toURI()) .toFile() @@ -674,22 +677,45 @@ public class SparkDedupTest implements Serializable { assertEquals(mergedOrp, deletedOrp); } + @Test + @Order(6) + void copyRelationsNoOpenorgsTest() throws Exception { + + ArgumentApplicationParser parser = new ArgumentApplicationParser( + IOUtils + .toString( + SparkCopyRelationsNoOpenorgs.class + .getResourceAsStream( + "/eu/dnetlib/dhp/oa/dedup/updateEntity_parameters.json"))); + parser + .parseArgument( + new String[] { + "-i", testGraphBasePath, "-w", testOutputBasePath, "-o", testDedupGraphBasePath + }); + + new SparkCopyRelationsNoOpenorgs(parser, spark).run(isLookUpService); + + final Dataset outputRels = spark.read().text(testDedupGraphBasePath + "/relation"); + + System.out.println(outputRels.count()); + // assertEquals(2382, outputRels.count()); + } + @Test @Order(7) void propagateRelationTest() throws Exception { ArgumentApplicationParser parser = new ArgumentApplicationParser( classPathResourceAsString("/eu/dnetlib/dhp/oa/dedup/propagateRelation_parameters.json")); - String outputRelPath = testDedupGraphBasePath + "/propagaterelation"; parser .parseArgument( new String[] { - "-i", testGraphBasePath, "-w", testOutputBasePath, "-o", outputRelPath + "-i", testDedupGraphBasePath, "-w", testOutputBasePath, "-o", testConsistencyGraphBasePath }); new SparkPropagateRelation(parser, spark).run(isLookUpService); - long relations = jsc.textFile(outputRelPath + "/relation").count(); + long relations = jsc.textFile(testDedupGraphBasePath + "/relation").count(); // assertEquals(4860, relations); System.out.println("relations = " + relations); @@ -699,95 +725,52 @@ public class SparkDedupTest implements Serializable { .read() .load(DedupUtility.createMergeRelPath(testOutputBasePath, "*", "*")) .as(Encoders.bean(Relation.class)); - final JavaPairRDD mergedIds = mergeRels - .where("relClass == 'merges'") - .select(mergeRels.col("target")) - .distinct() - .toJavaRDD() - .mapToPair( - (PairFunction) r -> new Tuple2(r.getString(0), "d")); - JavaRDD toCheck = jsc - .textFile(outputRelPath + "/relation") - .mapToPair(json -> new Tuple2<>(MapDocumentUtil.getJPathString("$.source", json), json)) - .join(mergedIds) - .map(t -> t._2()._1()) - .mapToPair(json -> new Tuple2<>(MapDocumentUtil.getJPathString("$.target", json), json)) - .join(mergedIds) - .map(t -> t._2()._1()); + Dataset inputRels = spark + .read() + .json(testDedupGraphBasePath + "/relation"); - long deletedbyinference = toCheck.filter(this::isDeletedByInference).count(); - long updated = toCheck.count(); + Dataset outputRels = spark + .read() + .json(testConsistencyGraphBasePath + "/relation"); - assertEquals(updated, deletedbyinference); + assertEquals( + 0, outputRels + .filter("dataInfo.deletedbyinference == true OR dataInfo.invisible == true") + .count()); + + assertEquals( + 5, outputRels + .filter("relClass NOT IN ('merges', 'isMergedIn')") + .count()); + + assertEquals(5 + mergeRels.count(), outputRels.count()); } @Test @Order(8) - void testCleanBaseRelations() throws Exception { - ArgumentApplicationParser parser = new ArgumentApplicationParser( - classPathResourceAsString("/eu/dnetlib/dhp/oa/dedup/cleanRelation_parameters.json")); - - // append dangling relations to be cleaned up + void testCleanedPropagatedRelations() throws Exception { Dataset df_before = spark .read() .schema(Encoders.bean(Relation.class).schema()) - .json(testGraphBasePath + "/relation"); - Dataset df_input = df_before - .unionByName(df_before.drop("source").withColumn("source", functions.lit("n/a"))) - .unionByName(df_before.drop("target").withColumn("target", functions.lit("n/a"))); - df_input.write().mode(SaveMode.Overwrite).json(testOutputBasePath + "_tmp"); - - parser - .parseArgument( - new String[] { - "--graphBasePath", testGraphBasePath, - "--inputPath", testGraphBasePath + "/relation", - "--outputPath", testDedupGraphBasePath + "/relation" - }); - - new SparkCleanRelation(parser, spark).run(isLookUpService); + .json(testDedupGraphBasePath + "/relation"); Dataset df_after = spark .read() .schema(Encoders.bean(Relation.class).schema()) - .json(testDedupGraphBasePath + "/relation"); - - assertNotEquals(df_before.count(), df_input.count()); - assertNotEquals(df_input.count(), df_after.count()); - assertEquals(5, df_after.count()); - } - - @Test - @Order(9) - void testCleanDedupedRelations() throws Exception { - ArgumentApplicationParser parser = new ArgumentApplicationParser( - classPathResourceAsString("/eu/dnetlib/dhp/oa/dedup/cleanRelation_parameters.json")); - - String inputRelPath = testDedupGraphBasePath + "/propagaterelation/relation"; - - // append dangling relations to be cleaned up - Dataset df_before = spark.read().schema(Encoders.bean(Relation.class).schema()).json(inputRelPath); - - df_before.filter(col("dataInfo.deletedbyinference").notEqual(true)).show(50, false); - - parser - .parseArgument( - new String[] { - "--graphBasePath", testGraphBasePath, - "--inputPath", inputRelPath, - "--outputPath", testDedupGraphBasePath + "/relation" - }); - - new SparkCleanRelation(parser, spark).run(isLookUpService); - - Dataset df_after = spark - .read() - .schema(Encoders.bean(Relation.class).schema()) - .json(testDedupGraphBasePath + "/relation"); + .json(testConsistencyGraphBasePath + "/relation"); assertNotEquals(df_before.count(), df_after.count()); - assertEquals(0, df_after.count()); + + assertEquals( + 0, df_after + .filter("dataInfo.deletedbyinference == true OR dataInfo.invisible == true") + .count()); + + assertEquals( + 5, df_after + .filter("relClass NOT IN ('merges', 'isMergedIn')") + .count()); } @Test @@ -813,6 +796,7 @@ public class SparkDedupTest implements Serializable { public static void finalCleanUp() throws IOException { FileUtils.deleteDirectory(new File(testOutputBasePath)); FileUtils.deleteDirectory(new File(testDedupGraphBasePath)); + FileUtils.deleteDirectory(new File(testConsistencyGraphBasePath)); } public boolean isDeletedByInference(String s) { diff --git a/dhp-workflows/dhp-dedup-openaire/src/test/java/eu/dnetlib/dhp/oa/dedup/SparkOpenorgsProvisionTest.java b/dhp-workflows/dhp-dedup-openaire/src/test/java/eu/dnetlib/dhp/oa/dedup/SparkOpenorgsProvisionTest.java index a0bf6b37e..73e768cf1 100644 --- a/dhp-workflows/dhp-dedup-openaire/src/test/java/eu/dnetlib/dhp/oa/dedup/SparkOpenorgsProvisionTest.java +++ b/dhp-workflows/dhp-dedup-openaire/src/test/java/eu/dnetlib/dhp/oa/dedup/SparkOpenorgsProvisionTest.java @@ -3,6 +3,7 @@ package eu.dnetlib.dhp.oa.dedup; import static java.nio.file.Files.createTempDirectory; +import static org.apache.spark.sql.functions.col; import static org.junit.jupiter.api.Assertions.assertEquals; import static org.mockito.Mockito.lenient; @@ -15,10 +16,6 @@ import java.nio.file.Paths; import org.apache.commons.io.FileUtils; import org.apache.commons.io.IOUtils; import org.apache.spark.SparkConf; -import org.apache.spark.api.java.JavaPairRDD; -import org.apache.spark.api.java.JavaRDD; -import org.apache.spark.api.java.JavaSparkContext; -import org.apache.spark.api.java.function.PairFunction; import org.apache.spark.sql.Dataset; import org.apache.spark.sql.Encoders; import org.apache.spark.sql.Row; @@ -33,8 +30,6 @@ import eu.dnetlib.dhp.application.ArgumentApplicationParser; import eu.dnetlib.dhp.schema.oaf.Relation; import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpException; import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpService; -import eu.dnetlib.pace.util.MapDocumentUtil; -import scala.Tuple2; @ExtendWith(MockitoExtension.class) @TestMethodOrder(MethodOrderer.OrderAnnotation.class) @@ -44,11 +39,11 @@ public class SparkOpenorgsProvisionTest implements Serializable { ISLookUpService isLookUpService; private static SparkSession spark; - private static JavaSparkContext jsc; private static String testGraphBasePath; private static String testOutputBasePath; private static String testDedupGraphBasePath; + private static String testConsistencyGraphBasePath; private static final String testActionSetId = "test-orchestrator"; @BeforeAll @@ -64,6 +59,9 @@ public class SparkOpenorgsProvisionTest implements Serializable { testDedupGraphBasePath = createTempDirectory(SparkOpenorgsProvisionTest.class.getSimpleName() + "-") .toAbsolutePath() .toString(); + testConsistencyGraphBasePath = createTempDirectory(SparkOpenorgsProvisionTest.class.getSimpleName() + "-") + .toAbsolutePath() + .toString(); FileUtils.deleteDirectory(new File(testOutputBasePath)); FileUtils.deleteDirectory(new File(testDedupGraphBasePath)); @@ -76,8 +74,13 @@ public class SparkOpenorgsProvisionTest implements Serializable { .master("local[*]") .config(conf) .getOrCreate(); + } - jsc = JavaSparkContext.fromSparkContext(spark.sparkContext()); + @AfterAll + public static void finalCleanUp() throws IOException { + FileUtils.deleteDirectory(new File(testOutputBasePath)); + FileUtils.deleteDirectory(new File(testDedupGraphBasePath)); + FileUtils.deleteDirectory(new File(testConsistencyGraphBasePath)); } @BeforeEach @@ -186,26 +189,21 @@ public class SparkOpenorgsProvisionTest implements Serializable { new SparkUpdateEntity(parser, spark).run(isLookUpService); - long organizations = jsc.textFile(testDedupGraphBasePath + "/organization").count(); + Dataset organizations = spark.read().json(testDedupGraphBasePath + "/organization"); - long mergedOrgs = spark + Dataset mergedOrgs = spark .read() .load(testOutputBasePath + "/" + testActionSetId + "/organization_mergerel") - .as(Encoders.bean(Relation.class)) .where("relClass=='merges'") - .javaRDD() - .map(Relation::getTarget) - .distinct() - .count(); + .select("target") + .distinct(); - assertEquals(80, organizations); + assertEquals(80, organizations.count()); - long deletedOrgs = jsc - .textFile(testDedupGraphBasePath + "/organization") - .filter(this::isDeletedByInference) - .count(); + Dataset deletedOrgs = organizations + .filter("dataInfo.deletedbyinference = TRUE"); - assertEquals(mergedOrgs, deletedOrgs); + assertEquals(mergedOrgs.count(), deletedOrgs.count()); } @Test @@ -226,10 +224,9 @@ public class SparkOpenorgsProvisionTest implements Serializable { new SparkCopyRelationsNoOpenorgs(parser, spark).run(isLookUpService); - final JavaRDD rels = jsc.textFile(testDedupGraphBasePath + "/relation"); - - assertEquals(2382, rels.count()); + final Dataset outputRels = spark.read().text(testDedupGraphBasePath + "/relation"); + assertEquals(2382, outputRels.count()); } @Test @@ -244,51 +241,41 @@ public class SparkOpenorgsProvisionTest implements Serializable { parser .parseArgument( new String[] { - "-i", testGraphBasePath, "-w", testOutputBasePath, "-o", testDedupGraphBasePath + "-i", testDedupGraphBasePath, "-w", testOutputBasePath, "-o", testConsistencyGraphBasePath }); new SparkPropagateRelation(parser, spark).run(isLookUpService); - long relations = jsc.textFile(testDedupGraphBasePath + "/relation").count(); - - assertEquals(4896, relations); - - // check deletedbyinference final Dataset mergeRels = spark .read() .load(DedupUtility.createMergeRelPath(testOutputBasePath, "*", "*")) .as(Encoders.bean(Relation.class)); - final JavaPairRDD mergedIds = mergeRels + + Dataset inputRels = spark + .read() + .json(testDedupGraphBasePath + "/relation"); + + Dataset outputRels = spark + .read() + .json(testConsistencyGraphBasePath + "/relation"); + + final Dataset mergedIds = mergeRels .where("relClass == 'merges'") - .select(mergeRels.col("target")) - .distinct() - .toJavaRDD() - .mapToPair( - (PairFunction) r -> new Tuple2(r.getString(0), "d")); + .select(col("target").as("id")) + .distinct(); - JavaRDD toCheck = jsc - .textFile(testDedupGraphBasePath + "/relation") - .mapToPair(json -> new Tuple2<>(MapDocumentUtil.getJPathString("$.source", json), json)) - .join(mergedIds) - .map(t -> t._2()._1()) - .mapToPair(json -> new Tuple2<>(MapDocumentUtil.getJPathString("$.target", json), json)) - .join(mergedIds) - .map(t -> t._2()._1()); + Dataset toUpdateRels = inputRels + .as("rel") + .join(mergedIds.as("s"), col("rel.source").equalTo(col("s.id")), "left_outer") + .join(mergedIds.as("t"), col("rel.target").equalTo(col("t.id")), "left_outer") + .filter("s.id IS NOT NULL OR t.id IS NOT NULL") + .distinct(); - long deletedbyinference = toCheck.filter(this::isDeletedByInference).count(); - long updated = toCheck.count(); + Dataset updatedRels = inputRels + .select("source", "target", "relClass") + .except(outputRels.select("source", "target", "relClass")); - assertEquals(updated, deletedbyinference); + assertEquals(toUpdateRels.count(), updatedRels.count()); + assertEquals(140, outputRels.count()); } - - @AfterAll - public static void finalCleanUp() throws IOException { - FileUtils.deleteDirectory(new File(testOutputBasePath)); - FileUtils.deleteDirectory(new File(testDedupGraphBasePath)); - } - - public boolean isDeletedByInference(String s) { - return s.contains("\"deletedbyinference\":true"); - } - } From 5f90cc11e98d0addbfb22bf8ce0a83e87a269e00 Mon Sep 17 00:00:00 2001 From: dimitrispie Date: Wed, 6 Sep 2023 14:14:38 +0300 Subject: [PATCH 06/57] Update step16-createIndicatorsTables.sql Fix indi_pub_bronze_oa --- .../oozie_app/scripts/step16-createIndicatorsTables.sql | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step16-createIndicatorsTables.sql b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step16-createIndicatorsTables.sql index 1c80f6757..dd249d371 100755 --- a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step16-createIndicatorsTables.sql +++ b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step16-createIndicatorsTables.sql @@ -696,7 +696,11 @@ left outer join (select p.id, 1 as is_bronze_oa from ${stats_db_name}.publication p join ${stats_db_name}.indi_result_has_cc_licence cc on cc.id=p.id join ${stats_db_name}.indi_pub_gold_oa ga on ga.id=p.id -where cc.has_cc_license=0 and ga.is_gold=0) tmp on tmp.id=p.id; +join ${stats_db_name}.result_instance ri on ri.id=p.id +join ${stats_db_name}.datasource d on d.id=ri.hostedby +where cc.has_cc_license=0 and ga.is_gold=0 +and (d.type='Journal' or d.type='Journal Aggregator/Publisher') +and ri.accessright='Open Access') tmp on tmp.id=p.id; CREATE TEMPORARY TABLE ${stats_db_name}.project_year_result_year as select p.id project_id, acronym, r.id result_id, r.year, p.end_year From 4786aa0e094fe848a5e7024b68c0d4e7c80ec65f Mon Sep 17 00:00:00 2001 From: Claudio Atzori Date: Thu, 7 Sep 2023 11:20:35 +0200 Subject: [PATCH 07/57] added Archive ouverte UNIGE (ETHZ.UNIGENF, opendoar____::1400) to the Datacite hostedBy_map --- .../main/resources/eu/dnetlib/dhp/datacite/hostedBy_map.json | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/datacite/hostedBy_map.json b/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/datacite/hostedBy_map.json index 9088d2960..d07cc33cb 100644 --- a/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/datacite/hostedBy_map.json +++ b/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/datacite/hostedBy_map.json @@ -1,4 +1,9 @@ { + "ETHZ.UNIGENF": { + "openaire_id": "opendoar____::1400", + "datacite_name": "Uni Genf", + "official_name": "Archive ouverte UNIGE" + }, "GESIS.RKI": { "openaire_id": "re3data_____::r3d100010436", "datacite_name": "Forschungsdatenzentrum am Robert Koch Institut", From 8a6892cc638c84fb6b05db27fa9e2ed538899896 Mon Sep 17 00:00:00 2001 From: Claudio Atzori Date: Tue, 12 Sep 2023 14:34:28 +0200 Subject: [PATCH 08/57] [graph dedup] consistency wf should not remove the relations while dispatching the entities --- .../dhp/oa/merge/DispatchEntitiesSparkJob.java | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/dhp-common/src/main/java/eu/dnetlib/dhp/oa/merge/DispatchEntitiesSparkJob.java b/dhp-common/src/main/java/eu/dnetlib/dhp/oa/merge/DispatchEntitiesSparkJob.java index 4d2ccb178..cf0a183d7 100644 --- a/dhp-common/src/main/java/eu/dnetlib/dhp/oa/merge/DispatchEntitiesSparkJob.java +++ b/dhp-common/src/main/java/eu/dnetlib/dhp/oa/merge/DispatchEntitiesSparkJob.java @@ -47,17 +47,14 @@ public class DispatchEntitiesSparkJob { String outputPath = parser.get("outputPath"); log.info("outputPath: {}", outputPath); - boolean filterInvisible = Boolean.valueOf(parser.get("filterInvisible")); + boolean filterInvisible = Boolean.parseBoolean(parser.get("filterInvisible")); log.info("filterInvisible: {}", filterInvisible); SparkConf conf = new SparkConf(); runWithSparkSession( conf, isSparkSessionManaged, - spark -> { - HdfsSupport.remove(outputPath, spark.sparkContext().hadoopConfiguration()); - dispatchEntities(spark, inputPath, outputPath, filterInvisible); - }); + spark -> dispatchEntities(spark, inputPath, outputPath, filterInvisible)); } private static void dispatchEntities( @@ -72,7 +69,9 @@ public class DispatchEntitiesSparkJob { String entityType = entry.getKey(); Class clazz = entry.getValue(); + final String entityPath = outputPath + "/" + entityType; if (!entityType.equalsIgnoreCase("relation")) { + HdfsSupport.remove(entityPath, spark.sparkContext().hadoopConfiguration()); Dataset entityDF = spark .read() .schema(Encoders.bean(clazz).schema()) @@ -91,7 +90,7 @@ public class DispatchEntitiesSparkJob { .write() .mode(SaveMode.Overwrite) .option("compression", "gzip") - .json(outputPath + "/" + entityType); + .json(entityPath); } }); } From 395a4af020621633b9e33e05a6ee5ab5f089413b Mon Sep 17 00:00:00 2001 From: Serafeim Chatzopoulos Date: Tue, 12 Sep 2023 22:31:50 +0300 Subject: [PATCH 09/57] Run CC and RAM sequentieally in dhp-impact-indicators WF --- .../impact_indicators/oozie_app/workflow.xml | 19 +++++-------------- 1 file changed, 5 insertions(+), 14 deletions(-) diff --git a/dhp-workflows/dhp-impact-indicators/src/main/resources/eu/dnetlib/dhp/oa/graph/impact_indicators/oozie_app/workflow.xml b/dhp-workflows/dhp-impact-indicators/src/main/resources/eu/dnetlib/dhp/oa/graph/impact_indicators/oozie_app/workflow.xml index 0d7d29bfe..e43e7cf14 100644 --- a/dhp-workflows/dhp-impact-indicators/src/main/resources/eu/dnetlib/dhp/oa/graph/impact_indicators/oozie_app/workflow.xml +++ b/dhp-workflows/dhp-impact-indicators/src/main/resources/eu/dnetlib/dhp/oa/graph/impact_indicators/oozie_app/workflow.xml @@ -39,7 +39,8 @@ - ${wf:conf('resume') eq "rankings-start"} + ${wf:conf('resume') eq "cc"} + ${wf:conf('resume') eq "ram"} ${wf:conf('resume') eq "impulse"} ${wf:conf('resume') eq "pagerank"} ${wf:conf('resume') eq "attrank"} @@ -89,18 +90,11 @@ ${nameNode}${wfAppPath}/create_openaire_ranking_graph.py#create_openaire_ranking_graph.py - + - - - - - - - @@ -129,7 +123,7 @@ ${wfAppPath}/bip-ranker/CC.py#CC.py - + @@ -165,14 +159,11 @@ ${wfAppPath}/bip-ranker/TAR.py#TAR.py - + - - - From 9d44418d381b14ab7a6301f1979b2a3af5efbd2f Mon Sep 17 00:00:00 2001 From: Serafeim Chatzopoulos Date: Thu, 14 Sep 2023 18:43:25 +0300 Subject: [PATCH 10/57] Add collecting software code repository URLs --- dhp-workflows/dhp-swh/pom.xml | 104 +++++++++ .../swh/CollectSoftwareRepositoryURLs.java | 211 ++++++++++++++++++ .../dhp/swh/models/LastVisitResponse.java | 40 ++++ .../eu/dnetlib/dhp/swh/input_parameters.json | 26 +++ .../eu/dnetlib/dhp/swh/job.properties | 25 +++ .../eu/dnetlib/dhp/swh/oozie_app/workflow.xml | 101 +++++++++ dhp-workflows/pom.xml | 1 + 7 files changed, 508 insertions(+) create mode 100644 dhp-workflows/dhp-swh/pom.xml create mode 100644 dhp-workflows/dhp-swh/src/main/java/eu/dnetlib/dhp/swh/CollectSoftwareRepositoryURLs.java create mode 100644 dhp-workflows/dhp-swh/src/main/java/eu/dnetlib/dhp/swh/models/LastVisitResponse.java create mode 100644 dhp-workflows/dhp-swh/src/main/resources/eu/dnetlib/dhp/swh/input_parameters.json create mode 100644 dhp-workflows/dhp-swh/src/main/resources/eu/dnetlib/dhp/swh/job.properties create mode 100644 dhp-workflows/dhp-swh/src/main/resources/eu/dnetlib/dhp/swh/oozie_app/workflow.xml diff --git a/dhp-workflows/dhp-swh/pom.xml b/dhp-workflows/dhp-swh/pom.xml new file mode 100644 index 000000000..501b2aef8 --- /dev/null +++ b/dhp-workflows/dhp-swh/pom.xml @@ -0,0 +1,104 @@ + + + 4.0.0 + + eu.dnetlib.dhp + dhp-workflows + 1.2.5-SNAPSHOT + + dhp-swh + + + + org.apache.spark + spark-core_${scala.binary.version} + + + + org.apache.spark + spark-sql_${scala.binary.version} + + + + eu.dnetlib.dhp + dhp-common + ${project.version} + + + net.sf.saxon + Saxon-HE + + + + + + dom4j + dom4j + + + + xml-apis + xml-apis + + + + jaxen + jaxen + + + + org.apache.hadoop + hadoop-distcp + + + + eu.dnetlib + dnet-actionmanager-api + + + eu.dnetlib + dnet-actionmanager-common + + + eu.dnetlib + dnet-openaireplus-mapping-utils + + + saxonica + saxon + + + saxonica + saxon-dom + + + jgrapht + jgrapht + + + net.sf.ehcache + ehcache + + + org.springframework + spring-test + + + org.apache.* + * + + + apache + * + + + + + + org.apache.httpcomponents + httpclient + 4.5.13 + + + + diff --git a/dhp-workflows/dhp-swh/src/main/java/eu/dnetlib/dhp/swh/CollectSoftwareRepositoryURLs.java b/dhp-workflows/dhp-swh/src/main/java/eu/dnetlib/dhp/swh/CollectSoftwareRepositoryURLs.java new file mode 100644 index 000000000..c91f2bb8c --- /dev/null +++ b/dhp-workflows/dhp-swh/src/main/java/eu/dnetlib/dhp/swh/CollectSoftwareRepositoryURLs.java @@ -0,0 +1,211 @@ + +package eu.dnetlib.dhp.swh; + +import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkHiveSession; +import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession; + +import java.io.IOException; +import java.io.Serializable; +import java.util.ArrayList; +import java.util.Iterator; +import java.util.List; +import java.util.Optional; +import java.util.concurrent.TimeUnit; + +import org.apache.commons.io.IOUtils; +import org.apache.http.Header; +import org.apache.http.HttpEntity; +import org.apache.http.client.methods.CloseableHttpResponse; +import org.apache.http.client.methods.HttpGet; +import org.apache.http.impl.client.CloseableHttpClient; +import org.apache.http.impl.client.HttpClients; +import org.apache.http.util.EntityUtils; +import org.apache.spark.SparkConf; +import org.apache.spark.api.java.function.FlatMapFunction; +import org.apache.spark.sql.*; +import org.apache.spark.sql.Dataset; +import org.apache.spark.sql.types.DataTypes; +import org.apache.spark.sql.types.StructType; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import com.fasterxml.jackson.databind.ObjectMapper; + +import eu.dnetlib.dhp.application.ArgumentApplicationParser; +import eu.dnetlib.dhp.schema.common.ModelConstants; +import eu.dnetlib.dhp.schema.oaf.*; + +/** + * Creates action sets for Crossref affiliation relations inferred by BIP! + */ +public class CollectSoftwareRepositoryURLs implements Serializable { + + private static final Logger log = LoggerFactory.getLogger(CollectSoftwareRepositoryURLs.class); + // public static final String BIP_AFFILIATIONS_CLASSID = "result:organization:bipinference"; +// public static final String BIP_AFFILIATIONS_CLASSNAME = "Affiliation relation inferred by BIP!"; +// public static final String BIP_INFERENCE_PROVENANCE = "bip:affiliation:crossref"; + private static final String DEFAULT_VISIT_TYPE = "git"; + private static final int CONCURRENT_API_CALLS = 1; + + private static final String SWH_LATEST_VISIT_URL = "https://archive.softwareheritage.org/api/1/origin/%s/visit/latest/"; + + public static void main(String[] args) throws Exception { + + String jsonConfiguration = IOUtils + .toString( + CollectSoftwareRepositoryURLs.class + .getResourceAsStream("/eu/dnetlib/dhp/swh/input_parameters.json")); + + final ArgumentApplicationParser parser = new ArgumentApplicationParser(jsonConfiguration); + parser.parseArgument(args); + + final Boolean isSparkSessionManaged = Optional + .ofNullable(parser.get("isSparkSessionManaged")) + .map(Boolean::valueOf) + .orElse(Boolean.TRUE); + log.info("isSparkSessionManaged: {}", isSparkSessionManaged); + + final String hiveDbName = parser.get("hiveDbName"); + log.info("hiveDbName {}: ", hiveDbName); + + final String outputPath = parser.get("softwareCodeRepositoryURLs"); + log.info("softwareCodeRepositoryURLs {}: ", outputPath); + + final String hiveMetastoreUris = parser.get("hiveMetastoreUris"); + log.info("hiveMetastoreUris: {}", hiveMetastoreUris); + + SparkConf conf = new SparkConf(); + conf.set("hive.metastore.uris", hiveMetastoreUris); + + runWithSparkHiveSession( + conf, + isSparkSessionManaged, + spark -> { + doRun(spark, hiveDbName, outputPath); + }); + } + + private static void doRun(SparkSession spark, String hiveDbName, String outputPath) { + + String queryTemplate = "SELECT distinct coderepositoryurl.value " + + "FROM %s.software " + + "WHERE coderepositoryurl.value IS NOT NULL"; + String query = String.format(queryTemplate, hiveDbName); + + log.info("Hive query to fetch software code URLs: {}", query); + + Dataset df = spark.sql(query); + + // write distinct repository URLs + df + .write() + .mode(SaveMode.Overwrite) +// .option("compression", "gzip") + .csv(outputPath); + } + + private static Dataset readSoftware(SparkSession spark, String inputPath) { + return spark + .read() + .json(inputPath) + .select( + new Column("codeRepositoryUrl.value").as("codeRepositoryUrl"), + new Column("dataInfo.deletedbyinference"), + new Column("dataInfo.invisible")); + } + + private static Dataset filterSoftware(Dataset softwareDF, Integer limit) { + + Dataset df = softwareDF + .where(softwareDF.col("codeRepositoryUrl").isNotNull()) + .where("deletedbyinference = false") + .where("invisible = false") + .drop("deletedbyinference") + .drop("invisible"); + +// TODO remove when done + df = df.limit(limit); + + return df; + } + + public static Dataset makeParallelRequests(SparkSession spark, Dataset softwareDF) { + // TODO replace with coalesce ? + Dataset df = softwareDF.repartition(CONCURRENT_API_CALLS); + + log.info("Number of partitions: {}", df.rdd().getNumPartitions()); + + ObjectMapper objectMapper = new ObjectMapper(); + + List collectedRows = df + .javaRDD() + // max parallelism should be equal to the number of partitions here + .mapPartitions((FlatMapFunction, Row>) partition -> { + List resultRows = new ArrayList<>(); + while (partition.hasNext()) { + Row row = partition.next(); + String url = String.format(SWH_LATEST_VISIT_URL, row.getString(0)); + +// String snapshotId = null; +// String type = null; +// String date = null; + + String responseBody = makeAPICall(url); + TimeUnit.SECONDS.sleep(1); +// Thread.sleep(500); +// if (responseBody != null) { +// LastVisitResponse visitResponse = objectMapper.readValue(responseBody, LastVisitResponse.class); +// snapshotId = visitResponse.getSnapshot(); +// type = visitResponse.getType(); +// date = visitResponse.getDate(); +// } +// resultRows.add(RowFactory.create(url, snapshotId, type, date)); + + resultRows.add(RowFactory.create(url, responseBody)); + } + return resultRows.iterator(); + + }) + .collect(); + + StructType resultSchema = new StructType() + .add("codeRepositoryUrl", DataTypes.StringType) + .add("response", DataTypes.StringType); + +// .add("snapshotId", DataTypes.StringType) +// .add("type", DataTypes.StringType) +// .add("date", DataTypes.StringType); + + // create a DataFrame from the collected rows + return spark.createDataFrame(collectedRows, resultSchema); + } + + private static String makeAPICall(String url) throws IOException { + System.out.println(java.time.LocalDateTime.now()); + + try (CloseableHttpClient httpClient = HttpClients.createDefault()) { + HttpGet httpGet = new HttpGet(url); + httpGet + .setHeader( + "Authorization", + "Bearer eyJhbGciOiJIUzI1NiIsInR5cCIgOiAiSldUIiwia2lkIiA6ICJhMTMxYTQ1My1hM2IyLTQwMTUtODQ2Ny05MzAyZjk3MTFkOGEifQ.eyJpYXQiOjE2OTQ2MzYwMjAsImp0aSI6IjkwZjdkNTNjLTQ5YTktNGFiMy1hY2E0LTcwMTViMjEyZTNjNiIsImlzcyI6Imh0dHBzOi8vYXV0aC5zb2Z0d2FyZWhlcml0YWdlLm9yZy9hdXRoL3JlYWxtcy9Tb2Z0d2FyZUhlcml0YWdlIiwiYXVkIjoiaHR0cHM6Ly9hdXRoLnNvZnR3YXJlaGVyaXRhZ2Uub3JnL2F1dGgvcmVhbG1zL1NvZnR3YXJlSGVyaXRhZ2UiLCJzdWIiOiIzMTY5OWZkNC0xNmE0LTQxOWItYTdhMi00NjI5MDY4ZjI3OWEiLCJ0eXAiOiJPZmZsaW5lIiwiYXpwIjoic3doLXdlYiIsInNlc3Npb25fc3RhdGUiOiIzMjYzMzEwMS00ZDRkLTQwMjItODU2NC1iMzNlMTJiNTE3ZDkiLCJzY29wZSI6Im9wZW5pZCBvZmZsaW5lX2FjY2VzcyBwcm9maWxlIGVtYWlsIn0.XHj1VIZu1dZ4Ej32-oU84mFmaox9cLNjXosNxwZM0Xs"); + try (CloseableHttpResponse response = httpClient.execute(httpGet)) { + int statusCode = response.getStatusLine().getStatusCode(); +// if (statusCode != 200) +// return null; + Header[] headers = response.getHeaders("X-RateLimit-Remaining"); + for (Header header : headers) { + System.out + .println( + "Key : " + header.getName() + + " ,Value : " + header.getValue()); + } + HttpEntity entity = response.getEntity(); + if (entity != null) { + return EntityUtils.toString(entity); + } + } + } + return null; + } +} diff --git a/dhp-workflows/dhp-swh/src/main/java/eu/dnetlib/dhp/swh/models/LastVisitResponse.java b/dhp-workflows/dhp-swh/src/main/java/eu/dnetlib/dhp/swh/models/LastVisitResponse.java new file mode 100644 index 000000000..435397590 --- /dev/null +++ b/dhp-workflows/dhp-swh/src/main/java/eu/dnetlib/dhp/swh/models/LastVisitResponse.java @@ -0,0 +1,40 @@ + +package eu.dnetlib.dhp.swh.models; + +import com.cloudera.com.fasterxml.jackson.annotation.JsonProperty; +import com.fasterxml.jackson.annotation.JsonIgnoreProperties; + +@JsonIgnoreProperties(ignoreUnknown = true) +public class LastVisitResponse { + + private String type; + + private String date; + + @JsonProperty("snapshot") + private String snapshotId; + + public String getType() { + return type; + } + + public void setType(String type) { + this.type = type; + } + + public String getDate() { + return date; + } + + public void setDate(String date) { + this.date = date; + } + + public String getSnapshot() { + return snapshotId; + } + + public void setSnapshot(String snapshotId) { + this.snapshotId = snapshotId; + } +} diff --git a/dhp-workflows/dhp-swh/src/main/resources/eu/dnetlib/dhp/swh/input_parameters.json b/dhp-workflows/dhp-swh/src/main/resources/eu/dnetlib/dhp/swh/input_parameters.json new file mode 100644 index 000000000..dd5432b93 --- /dev/null +++ b/dhp-workflows/dhp-swh/src/main/resources/eu/dnetlib/dhp/swh/input_parameters.json @@ -0,0 +1,26 @@ +[ + { + "paramName": "issm", + "paramLongName": "isSparkSessionManaged", + "paramDescription": "when true will stop SparkSession after job execution", + "paramRequired": false + }, + { + "paramName": "ip", + "paramLongName": "softwareCodeRepositoryURLs", + "paramDescription": "the URL where to store software repository URLs", + "paramRequired": true + }, + { + "paramName": "db", + "paramLongName": "hiveDbName", + "paramDescription": "the target hive database name", + "paramRequired": true + }, + { + "paramName": "hmu", + "paramLongName": "hiveMetastoreUris", + "paramDescription": "the hive metastore uris", + "paramRequired": true + } +] \ No newline at end of file diff --git a/dhp-workflows/dhp-swh/src/main/resources/eu/dnetlib/dhp/swh/job.properties b/dhp-workflows/dhp-swh/src/main/resources/eu/dnetlib/dhp/swh/job.properties new file mode 100644 index 000000000..a63343aed --- /dev/null +++ b/dhp-workflows/dhp-swh/src/main/resources/eu/dnetlib/dhp/swh/job.properties @@ -0,0 +1,25 @@ +# hive +hiveDbName=openaire_prod_20230914 +hiveMetastoreUris=thrift://iis-cdh5-test-m3.ocean.icm.edu.pl:9083 + +# oozie +oozie.action.sharelib.for.spark=spark2 +oozie.use.system.libpath=true +oozie.wf.application.path=${oozieTopWfApplicationPath} +oozie.wf.application.path=${oozieTopWfApplicationPath} +oozieActionShareLibForSpark2=spark2 + +# spark +spark2EventLogDir=/user/spark/spark2ApplicationHistory +spark2ExtraListeners=com.cloudera.spark.lineage.NavigatorAppListener +spark2SqlQueryExecutionListeners=com.cloudera.spark.lineage.NavigatorQueryListener +spark2YarnHistoryServerAddress=http://iis-cdh5-test-gw.ocean.icm.edu.pl:18089 +sparkSqlWarehouseDir=/user/hive/warehouse + +# misc +wfAppPath=${oozieTopWfApplicationPath} +resourceManager=http://iis-cdh5-test-m2.ocean.icm.edu.pl:8088/cluster + +# custom params +softwareCodeRepositoryURLs=${workingDir}/code_repo_urls.csv +resume=collect-software-repository-urls diff --git a/dhp-workflows/dhp-swh/src/main/resources/eu/dnetlib/dhp/swh/oozie_app/workflow.xml b/dhp-workflows/dhp-swh/src/main/resources/eu/dnetlib/dhp/swh/oozie_app/workflow.xml new file mode 100644 index 000000000..9832e5f26 --- /dev/null +++ b/dhp-workflows/dhp-swh/src/main/resources/eu/dnetlib/dhp/swh/oozie_app/workflow.xml @@ -0,0 +1,101 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + ${jobTracker} + ${nameNode} + + + + + + Action failed, error message[${wf:errorMessage(wf:lastErrorNode())}] + + + + + ${wf:conf('startFrom') eq 'collect-software-repository-urls'} + + + + + + + yarn + cluster + Collect software repository URLs + eu.dnetlib.dhp.swh.CollectSoftwareRepositoryURLs + dhp-swh-${projectVersion}.jar + + --executor-memory=${sparkExecutorMemory} + --executor-cores=${sparkExecutorCores} + --driver-memory=${sparkDriverMemory} + --conf spark.extraListeners=${spark2ExtraListeners} + --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} + --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} + --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} + --conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir} + + + --softwareCodeRepositoryURLs${softwareCodeRepositoryURLs} + --hiveDbName${hiveDbName} + --hiveMetastoreUris${hiveMetastoreUris} + + + + + + + + + \ No newline at end of file diff --git a/dhp-workflows/pom.xml b/dhp-workflows/pom.xml index d054ba39b..64f5f2d26 100644 --- a/dhp-workflows/pom.xml +++ b/dhp-workflows/pom.xml @@ -39,6 +39,7 @@ dhp-broker-events dhp-doiboost dhp-impact-indicators + dhp-swh From 9ef971a1464e5d307c407316cda69eb97d6ecb9a Mon Sep 17 00:00:00 2001 From: dimitrispie Date: Tue, 19 Sep 2023 14:25:42 +0300 Subject: [PATCH 11/57] Update step16-createIndicatorsTables.sql Fix int year for: indi_org_openess_year indi_org_fairness_year indi_org_findable_year --- .../scripts/step16-createIndicatorsTables.sql | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step16-createIndicatorsTables.sql b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step16-createIndicatorsTables.sql index dd249d371..ae95727a6 100755 --- a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step16-createIndicatorsTables.sql +++ b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step16-createIndicatorsTables.sql @@ -444,9 +444,9 @@ CREATE TEMPORARY TABLE ${stats_db_name}.allresults as group by ro.organization, year; create table if not exists ${stats_db_name}.indi_org_fairness_year stored as parquet as - select allresults.year, allresults.organization, result_fair.no_result_fair/allresults.no_allresults org_fairness + select cast(allresults.year as int) year, allresults.organization, result_fair.no_result_fair/allresults.no_allresults org_fairness from ${stats_db_name}.allresults - join ${stats_db_name}.result_fair on result_fair.organization=allresults.organization and result_fair.year=allresults.year; + join ${stats_db_name}.result_fair on result_fair.organization=allresults.organization and cast(result_fair.year as int)=cast(allresults.year as int); DROP table ${stats_db_name}.result_fair purge; DROP table ${stats_db_name}.allresults purge; @@ -465,9 +465,9 @@ CREATE TEMPORARY TABLE ${stats_db_name}.allresults as group by ro.organization, year; create table if not exists ${stats_db_name}.indi_org_findable_year stored as parquet as -select allresults.year, allresults.organization, result_with_pid.no_result_with_pid/allresults.no_allresults org_findable +select cast(allresults.year as int) year, allresults.organization, result_with_pid.no_result_with_pid/allresults.no_allresults org_findable from ${stats_db_name}.allresults - join ${stats_db_name}.result_with_pid on result_with_pid.organization=allresults.organization and result_with_pid.year=allresults.year; + join ${stats_db_name}.result_with_pid on result_with_pid.organization=allresults.organization and cast(result_with_pid.year as int)=cast(allresults.year as int); DROP table ${stats_db_name}.result_with_pid purge; DROP table ${stats_db_name}.allresults purge; @@ -626,16 +626,16 @@ select allsoftware.year, software_oa.organization, software_oa.no_oasoftware/all create table if not exists ${stats_db_name}.indi_org_openess_year stored as parquet as -select cast(allpubsshare.year as int), allpubsshare.organization, +select cast(allpubsshare.year as int) year, allpubsshare.organization, (p+if(isnull(s),0,s)+if(isnull(d),0,d))/(1+(case when s is null then 0 else 1 end) +(case when d is null then 0 else 1 end)) org_openess FROM ${stats_db_name}.allpubsshare - left outer join (select year, organization,d from + left outer join (select cast(year as int), organization,d from ${stats_db_name}.alldatasetssshare) tmp1 on tmp1.organization=allpubsshare.organization and tmp1.year=allpubsshare.year - left outer join (select year, organization,s from + left outer join (select cast(year as int), organization,s from ${stats_db_name}.allsoftwaresshare) tmp2 - on tmp2.organization=allpubsshare.organization and tmp2.year=allpubsshare.year; + on tmp2.organization=allpubsshare.organization and cast(tmp2.year as int)=cast(allpubsshare.year as int); DROP TABLE ${stats_db_name}.pubs_oa purge; DROP TABLE ${stats_db_name}.datasets_oa purge; From 76476cdfb63c2c0570f0669e50970d43e9aecb16 Mon Sep 17 00:00:00 2001 From: Sandro La Bruzzo Date: Wed, 20 Sep 2023 10:33:14 +0200 Subject: [PATCH 12/57] Added maven repo for dependencies that are not in maven central --- .../eu/dnetlib/pace/util/DiffPatchMatch.java | 17 +++++++++++++++++ pom.xml | 10 ++++++++++ 2 files changed, 27 insertions(+) diff --git a/dhp-pace-core/src/main/java/eu/dnetlib/pace/util/DiffPatchMatch.java b/dhp-pace-core/src/main/java/eu/dnetlib/pace/util/DiffPatchMatch.java index 84d49bd5c..cfd9acd70 100644 --- a/dhp-pace-core/src/main/java/eu/dnetlib/pace/util/DiffPatchMatch.java +++ b/dhp-pace-core/src/main/java/eu/dnetlib/pace/util/DiffPatchMatch.java @@ -1,6 +1,23 @@ package eu.dnetlib.pace.util; +/* + * Diff Match and Patch + * Copyright 2018 The diff-match-patch Authors. + * https://github.com/google/diff-match-patch + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ /* * Diff Match and Patch * Copyright 2018 The diff-match-patch Authors. diff --git a/pom.xml b/pom.xml index c6b65e27a..9cd82a343 100644 --- a/pom.xml +++ b/pom.xml @@ -112,6 +112,16 @@ https://maven.d4science.org/nexus/content/repositories/dnet-deps default + + maven-restlet + Restlet repository + https://maven.restlet.talend.com + + + conjars + conjars + https://conjars.wensel.net/repo/ + From cc7204a08904a8b23ac8bd30be5f829ff93e7cc0 Mon Sep 17 00:00:00 2001 From: Alessia Bardi Date: Tue, 19 Sep 2023 13:38:25 +0200 Subject: [PATCH 13/57] tests for d4science catalog --- .../dnetlib/dhp/oa/graph/raw/MappersTest.java | 22 +++++ .../dhp/oa/graph/raw/d4science-1-training.xml | 93 +++++++++++++++++++ .../dhp/oa/graph/raw/d4science-2-dataset.xml | 72 ++++++++++++++ .../oa/provision/XmlRecordFactoryTest.java | 52 ++++++++++- .../oa/provision/d4science-1-training.json | 1 + .../dhp/oa/provision/d4science-2-dataset.json | 1 + 6 files changed, 237 insertions(+), 4 deletions(-) create mode 100644 dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/raw/d4science-1-training.xml create mode 100644 dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/raw/d4science-2-dataset.xml create mode 100644 dhp-workflows/dhp-graph-provision/src/test/resources/eu/dnetlib/dhp/oa/provision/d4science-1-training.json create mode 100644 dhp-workflows/dhp-graph-provision/src/test/resources/eu/dnetlib/dhp/oa/provision/d4science-2-dataset.json diff --git a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/raw/MappersTest.java b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/raw/MappersTest.java index 894ed33f7..b506d3a62 100644 --- a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/raw/MappersTest.java +++ b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/raw/MappersTest.java @@ -1067,6 +1067,28 @@ class MappersTest { System.out.println("***************"); } + @Test + public void testD4ScienceTraining() throws IOException { + final String xml = IOUtils + .toString(Objects.requireNonNull(getClass().getResourceAsStream("d4science-1-training.xml"))); + final List list = new OdfToOafMapper(vocs, false, true).processMdRecord(xml); + final OtherResearchProduct trainingMaterial = (OtherResearchProduct) list.get(0); + System.out.println("***************"); + System.out.println(new ObjectMapper().writeValueAsString(trainingMaterial)); + System.out.println("***************"); + } + + @Test + public void testD4ScienceDataset() throws IOException { + final String xml = IOUtils + .toString(Objects.requireNonNull(getClass().getResourceAsStream("d4science-2-dataset.xml"))); + final List list = new OdfToOafMapper(vocs, false, true).processMdRecord(xml); + final Dataset trainingMaterial = (Dataset) list.get(0); + System.out.println("***************"); + System.out.println(new ObjectMapper().writeValueAsString(trainingMaterial)); + System.out.println("***************"); + } + @Test void testNotWellFormed() throws IOException { final String xml = IOUtils diff --git a/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/raw/d4science-1-training.xml b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/raw/d4science-1-training.xml new file mode 100644 index 000000000..91f9f9118 --- /dev/null +++ b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/raw/d4science-1-training.xml @@ -0,0 +1,93 @@ + + + + alessia_____::104c2d4ba8878c16fa824dce5b1bea57 + 12d8f77e-d66f-46f5-8d88-af7db23bc4c9 + 2023-09-08T10:12:35.864+02:00 + alessia_____ + 2023-09-08T11:31:45.692+02:00 + + + + http://data.d4science.org/ctlg/ResourceCatalogue/visual_analytics_for_data_scientists + + + + BRAGHIERI MARCO + + + + Visual Analytics for Data Scientists + + SoBigData++ + + + + + TrainingMaterial + + Participants to this module shall + - Learn the principles and rules underlying the design of visual data + representations and human-computer interactions + - Understand, adapt and apply representative visual analytics methods and systems for diverse types + of data and problems + - Analyse and evaluate the structure and properties + of data to select or devise appropriate methods for data exploration + - Combine visualization, interactive techniques, and computational + processing to develop practical data analysis for problem solving + + (This teaching material on Visual Analytics for Data Scientists is part of a MSc module at City University London). + + The author did not intend to violate any copyright on figures or content. In case you are the legal owner of any copyrighted content, please contact info@sobigdata.eu and we will immediately remove it + + + Visual analytics + + + Slides + Other + PDF + PDF + PDF + PDF + PDF + PDF + PDF + PDF + PDF + PDF + ZIP + + + OPEN + 0010 + + + + other-open + corda__h2020::871042 + + + + + https%3A%2F%2Fapi.d4science.org%2Fcatalogue%2Fitems + + + + + + + false + false + 0.9 + + + + + \ No newline at end of file diff --git a/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/raw/d4science-2-dataset.xml b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/raw/d4science-2-dataset.xml new file mode 100644 index 000000000..48ceb6c13 --- /dev/null +++ b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/raw/d4science-2-dataset.xml @@ -0,0 +1,72 @@ + + + + alessia_____::028879484548f4e1c630e1c503e35231 + 4fed018e-c2ff-4afa-b7b5-1ca1beebf850 + 2023-09-08T12:14:27.615+02:00 + alessia_____ + 2023-09-08T12:14:51.7+02:00 + + + + http://data.d4science.org/ctlg/ResourceCatalogue/city-to-city_migration + + + + + + Pappalardo, Luca + + 0000-0002-1547-6007 + + + + City-to-city migration + + SoBigData++ + + + 2018-02-15 + + Dataset + + Census data recording the migration of people between metropolitan areas in + the US + + + Human Mobility data + + + + OPEN + 0021 + 2018-02-15 + + + AFL-3.0 + corda__h2020::871042 + + + + + https%3A%2F%2Fapi.d4science.org%2Fcatalogue%2Fitems + + + + + + + false + false + 0.9 + + + + + \ No newline at end of file diff --git a/dhp-workflows/dhp-graph-provision/src/test/java/eu/dnetlib/dhp/oa/provision/XmlRecordFactoryTest.java b/dhp-workflows/dhp-graph-provision/src/test/java/eu/dnetlib/dhp/oa/provision/XmlRecordFactoryTest.java index 761539780..88bffd0e7 100644 --- a/dhp-workflows/dhp-graph-provision/src/test/java/eu/dnetlib/dhp/oa/provision/XmlRecordFactoryTest.java +++ b/dhp-workflows/dhp-graph-provision/src/test/java/eu/dnetlib/dhp/oa/provision/XmlRecordFactoryTest.java @@ -24,10 +24,7 @@ import eu.dnetlib.dhp.oa.provision.model.RelatedEntity; import eu.dnetlib.dhp.oa.provision.model.RelatedEntityWrapper; import eu.dnetlib.dhp.oa.provision.utils.ContextMapper; import eu.dnetlib.dhp.oa.provision.utils.XmlRecordFactory; -import eu.dnetlib.dhp.schema.oaf.Datasource; -import eu.dnetlib.dhp.schema.oaf.Project; -import eu.dnetlib.dhp.schema.oaf.Publication; -import eu.dnetlib.dhp.schema.oaf.Relation; +import eu.dnetlib.dhp.schema.oaf.*; public class XmlRecordFactoryTest { @@ -196,4 +193,51 @@ public class XmlRecordFactoryTest { assertEquals("dnet:pid_types", ((Element) pids.get(0)).attribute("schemeid").getValue()); assertEquals("dnet:pid_types", ((Element) pids.get(0)).attribute("schemename").getValue()); } + + @Test + public void testD4ScienceTraining() throws DocumentException, IOException { + final ContextMapper contextMapper = new ContextMapper(); + + final XmlRecordFactory xmlRecordFactory = new XmlRecordFactory(contextMapper, false, + XmlConverterJob.schemaLocation); + + final OtherResearchProduct p = OBJECT_MAPPER + .readValue( + IOUtils.toString(getClass().getResourceAsStream("d4science-1-training.json")), + OtherResearchProduct.class); + + final String xml = xmlRecordFactory.build(new JoinedEntity<>(p)); + + assertNotNull(xml); + + final Document doc = new SAXReader().read(new StringReader(xml)); + + assertNotNull(doc); + System.out.println(doc.asXML()); + + } + + @Test + public void testD4ScienceDataset() throws DocumentException, IOException { + final ContextMapper contextMapper = new ContextMapper(); + + final XmlRecordFactory xmlRecordFactory = new XmlRecordFactory(contextMapper, false, + XmlConverterJob.schemaLocation); + + final OtherResearchProduct p = OBJECT_MAPPER + .readValue( + IOUtils.toString(getClass().getResourceAsStream("d4science-2-dataset.json")), + OtherResearchProduct.class); + + final String xml = xmlRecordFactory.build(new JoinedEntity<>(p)); + + assertNotNull(xml); + + final Document doc = new SAXReader().read(new StringReader(xml)); + + assertNotNull(doc); + System.out.println(doc.asXML()); + + } + } diff --git a/dhp-workflows/dhp-graph-provision/src/test/resources/eu/dnetlib/dhp/oa/provision/d4science-1-training.json b/dhp-workflows/dhp-graph-provision/src/test/resources/eu/dnetlib/dhp/oa/provision/d4science-1-training.json new file mode 100644 index 000000000..3ce397f10 --- /dev/null +++ b/dhp-workflows/dhp-graph-provision/src/test/resources/eu/dnetlib/dhp/oa/provision/d4science-1-training.json @@ -0,0 +1 @@ +{"collectedfrom":[{"key":"10|alessia_____::6332e88a4c7dba6f7743d3a7a0c6ea2c","value":"Alessia","dataInfo":null}],"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}},"lastupdatetimestamp":1694165542374,"id":"50|alessia_____::104c2d4ba8878c16fa824dce5b1bea57","originalId":["12d8f77e-d66f-46f5-8d88-af7db23bc4c9","50|alessia_____::104c2d4ba8878c16fa824dce5b1bea57"],"pid":[],"dateofcollection":"2023-09-08T10:12:35.864+02:00","dateoftransformation":"2023-09-08T11:31:45.692+02:00","extraInfo":[],"oaiprovenance":{"originDescription":{"harvestDate":"2023-09-08T10:12:35.864+02:00","altered":true,"baseURL":"https%3A%2F%2Fapi.d4science.org%2Fcatalogue%2Fitems","identifier":"","datestamp":"","metadataNamespace":""}},"measures":null,"processingchargeamount":null,"processingchargecurrency":null,"author":[{"fullname":"BRAGHIERI MARCO","name":"","surname":"","rank":1,"pid":[],"affiliation":[]}],"resulttype":{"classid":"other","classname":"other","schemeid":"dnet:result_typologies","schemename":"dnet:result_typologies"},"language":{"classid":"","classname":"","schemeid":"dnet:languages","schemename":"dnet:languages"},"country":[],"subject":[{"value":"Visual analytics","qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}}],"title":[{"value":"Visual Analytics for Data Scientists","qualifier":{"classid":"main title","classname":"main title","schemeid":"dnet:dataCite_title","schemename":"dnet:dataCite_title"},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}}],"relevantdate":[{"value":"","qualifier":{"classid":"Issued","classname":"Issued","schemeid":"dnet:dataCite_date","schemename":"dnet:dataCite_date"},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}}],"description":[{"value":"Participants to this module shall\n - Learn the principles and rules underlying the design of visual data\n representations and human-computer interactions\n - Understand, adapt and apply representative visual analytics methods and systems for diverse types\n of data and problems\n - Analyse and evaluate the structure and properties\n of data to select or devise appropriate methods for data exploration\n - Combine visualization, interactive techniques, and computational\n processing to develop practical data analysis for problem solving\n\n (This teaching material on Visual Analytics for Data Scientists is part of a MSc module at City University London).\n\n The author did not intend to violate any copyright on figures or content. In case you are the legal owner of any copyrighted content, please contact info@sobigdata.eu and we will immediately remove it","dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}}],"dateofacceptance":null,"publisher":{"value":"SoBigData++","dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},"embargoenddate":null,"source":[],"fulltext":[],"format":[{"value":"Slides","dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},{"value":"Other","dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},{"value":"PDF","dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},{"value":"ZIP","dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}}],"contributor":[],"resourcetype":{"classid":"TrainingMaterial","classname":"TrainingMaterial","schemeid":"dnet:dataCite_resource","schemename":"dnet:dataCite_resource"},"coverage":[],"bestaccessright":{"classid":"OPEN","classname":"Open Access","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"context":[],"externalReference":[],"instance":[{"license":{"value":"other-open","dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},"accessright":{"classid":"OPEN","classname":"Open Access","schemeid":"dnet:access_modes","schemename":"dnet:access_modes","openAccessRoute":null},"instancetype":{"classid":"0010","classname":"Lecture","schemeid":"dnet:publication_resource","schemename":"dnet:publication_resource"},"hostedby":{"key":"10|alessia_____::6332e88a4c7dba6f7743d3a7a0c6ea2c","value":"Alessia","dataInfo":null},"url":["http://data.d4science.org/ctlg/ResourceCatalogue/visual_analytics_for_data_scientists"],"distributionlocation":null,"collectedfrom":{"key":"10|alessia_____::6332e88a4c7dba6f7743d3a7a0c6ea2c","value":"Alessia","dataInfo":null},"pid":[],"alternateIdentifier":[],"dateofacceptance":null,"processingchargeamount":null,"processingchargecurrency":null,"refereed":{"classid":"","classname":"","schemeid":"dnet:review_levels","schemename":"dnet:review_levels"},"measures":null,"fulltext":null}],"eoscifguidelines":[],"contactperson":[],"contactgroup":[],"tool":[]} diff --git a/dhp-workflows/dhp-graph-provision/src/test/resources/eu/dnetlib/dhp/oa/provision/d4science-2-dataset.json b/dhp-workflows/dhp-graph-provision/src/test/resources/eu/dnetlib/dhp/oa/provision/d4science-2-dataset.json new file mode 100644 index 000000000..ea8465e36 --- /dev/null +++ b/dhp-workflows/dhp-graph-provision/src/test/resources/eu/dnetlib/dhp/oa/provision/d4science-2-dataset.json @@ -0,0 +1 @@ +{"collectedfrom":[{"key":"10|alessia_____::6332e88a4c7dba6f7743d3a7a0c6ea2c","value":"Alessia","dataInfo":null}],"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}},"lastupdatetimestamp":1694507584675,"id":"50|alessia_____::028879484548f4e1c630e1c503e35231","originalId":["4fed018e-c2ff-4afa-b7b5-1ca1beebf850","50|alessia_____::028879484548f4e1c630e1c503e35231"],"pid":[],"dateofcollection":"2023-09-08T12:14:27.615+02:00","dateoftransformation":"2023-09-08T12:14:51.7+02:00","extraInfo":[],"oaiprovenance":{"originDescription":{"harvestDate":"2023-09-08T12:14:27.615+02:00","altered":true,"baseURL":"https%3A%2F%2Fapi.d4science.org%2Fcatalogue%2Fitems","identifier":"","datestamp":"","metadataNamespace":""}},"measures":null,"processingchargeamount":null,"processingchargecurrency":null,"author":[{"fullname":"Pappalardo, Luca","name":"Luca","surname":"Pappalardo","rank":1,"pid":[{"value":"0000-0002-1547-6007","qualifier":{"classid":"orcid_pending","classname":"Open Researcher and Contributor ID","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}}],"affiliation":[]}],"resulttype":{"classid":"dataset","classname":"dataset","schemeid":"dnet:result_typologies","schemename":"dnet:result_typologies"},"language":{"classid":"","classname":"","schemeid":"dnet:languages","schemename":"dnet:languages"},"country":[],"subject":[{"value":"Human Mobility data","qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}}],"title":[{"value":"City-to-city migration","qualifier":{"classid":"main title","classname":"main title","schemeid":"dnet:dataCite_title","schemename":"dnet:dataCite_title"},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}}],"relevantdate":[{"value":"2018-02-15","qualifier":{"classid":"Issued","classname":"Issued","schemeid":"dnet:dataCite_date","schemename":"dnet:dataCite_date"},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}}],"description":[{"value":"Census data recording the migration of people between metropolitan areas in\n the US","dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}}],"dateofacceptance":{"value":"2018-02-15","dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},"publisher":{"value":"SoBigData++","dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},"embargoenddate":null,"source":[],"fulltext":[],"format":[],"contributor":[],"resourcetype":{"classid":"dataset","classname":"dataset","schemeid":"dnet:dataCite_resource","schemename":"dnet:dataCite_resource"},"coverage":[],"bestaccessright":{"classid":"OPEN","classname":"Open Access","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"context":[],"externalReference":[],"instance":[{"license":{"value":"AFL-3.0","dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},"accessright":{"classid":"OPEN","classname":"Open Access","schemeid":"dnet:access_modes","schemename":"dnet:access_modes","openAccessRoute":null},"instancetype":{"classid":"0021","classname":"Dataset","schemeid":"dnet:publication_resource","schemename":"dnet:publication_resource"},"hostedby":{"key":"10|alessia_____::6332e88a4c7dba6f7743d3a7a0c6ea2c","value":"Alessia","dataInfo":null},"url":["http://data.d4science.org/ctlg/ResourceCatalogue/city-to-city_migration"],"distributionlocation":null,"collectedfrom":{"key":"10|alessia_____::6332e88a4c7dba6f7743d3a7a0c6ea2c","value":"Alessia","dataInfo":null},"pid":[],"alternateIdentifier":[],"dateofacceptance":{"value":"2018-02-15","dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},"processingchargeamount":null,"processingchargecurrency":null,"refereed":{"classid":"","classname":"","schemeid":"dnet:review_levels","schemename":"dnet:review_levels"},"measures":null,"fulltext":null}],"eoscifguidelines":[],"storagedate":{"value":"2018-02-15","dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},"device":null,"size":null,"version":null,"lastmetadataupdate":null,"metadataversionnumber":null,"geolocation":[]} From 0935d7757cfc9c4efce6500e3e8f02792d56c2ad Mon Sep 17 00:00:00 2001 From: Alessia Bardi Date: Tue, 19 Sep 2023 14:47:01 +0200 Subject: [PATCH 14/57] Use v5 of the UNIBI Gold ISSN list in test --- .../eu/dnetlib/dhp/oa/graph/hostedbymap/DownloadCsvTest.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/hostedbymap/DownloadCsvTest.java b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/hostedbymap/DownloadCsvTest.java index 48f1e0c06..9bd32968a 100644 --- a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/hostedbymap/DownloadCsvTest.java +++ b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/hostedbymap/DownloadCsvTest.java @@ -49,7 +49,7 @@ public class DownloadCsvTest { @Test void getUnibiFileTest() throws CollectorException, IOException, ClassNotFoundException { - String fileURL = "https://pub.uni-bielefeld.de/download/2944717/2944718/issn_gold_oa_version_4.csv"; + String fileURL = "https://pub.uni-bielefeld.de/download/2944717/2944718/issn_gold_oa_version_5.csv"; final String outputFile = workingDir + "/unibi_gold.json"; new DownloadCSV() From ed9c81a0b7a12ac2b337843ef42f63ca62b1f063 Mon Sep 17 00:00:00 2001 From: Serafeim Chatzopoulos Date: Wed, 27 Sep 2023 19:00:54 +0300 Subject: [PATCH 15/57] Add steps to collect last visit data && archive not found repository URLs --- .../java/eu/dnetlib/dhp/common/Constants.java | 1 + .../common/collection/HttpClientParams.java | 37 +++- .../dhp/common/collection/HttpConnector2.java | 7 + dhp-workflows/dhp-swh/pom.xml | 6 + .../dhp/swh/ArchiveRepositoryURLs.java | 137 +++++++++++++++ .../swh/CollectLastVisitRepositoryData.java | 120 +++++++++++++ .../swh/CollectSoftwareRepositoryURLs.java | 158 ++---------------- ...tVisitResponse.java => LastVisitData.java} | 10 +- .../dnetlib/dhp/swh/utils/SWHConnection.java | 138 +++++++++++++++ .../dnetlib/dhp/swh/utils/SWHConstants.java | 13 ++ .../eu/dnetlib/dhp/swh/utils/SWHUtils.java | 94 +++++++++++ .../swh/input_archive_repository_urls.json | 26 +++ ...ut_collect_last_visit_repository_data.json | 38 +++++ ...put_collect_software_repository_urls.json} | 2 +- .../eu/dnetlib/dhp/swh/job.properties | 22 +-- .../dhp/swh/oozie_app/config-default.xml | 50 ++++++ .../eu/dnetlib/dhp/swh/oozie_app/workflow.xml | 109 ++++++------ .../dhp/swh/ArchiveRepositoryURLsTest.java | 35 ++++ .../eu/dnetlib/dhp/swh/SWHConnectionTest.java | 57 +++++++ .../dhp/swh/lastVisitDataToArchive.csv | 6 + 20 files changed, 848 insertions(+), 218 deletions(-) create mode 100644 dhp-workflows/dhp-swh/src/main/java/eu/dnetlib/dhp/swh/ArchiveRepositoryURLs.java create mode 100644 dhp-workflows/dhp-swh/src/main/java/eu/dnetlib/dhp/swh/CollectLastVisitRepositoryData.java rename dhp-workflows/dhp-swh/src/main/java/eu/dnetlib/dhp/swh/models/{LastVisitResponse.java => LastVisitData.java} (81%) create mode 100644 dhp-workflows/dhp-swh/src/main/java/eu/dnetlib/dhp/swh/utils/SWHConnection.java create mode 100644 dhp-workflows/dhp-swh/src/main/java/eu/dnetlib/dhp/swh/utils/SWHConstants.java create mode 100644 dhp-workflows/dhp-swh/src/main/java/eu/dnetlib/dhp/swh/utils/SWHUtils.java create mode 100644 dhp-workflows/dhp-swh/src/main/resources/eu/dnetlib/dhp/swh/input_archive_repository_urls.json create mode 100644 dhp-workflows/dhp-swh/src/main/resources/eu/dnetlib/dhp/swh/input_collect_last_visit_repository_data.json rename dhp-workflows/dhp-swh/src/main/resources/eu/dnetlib/dhp/swh/{input_parameters.json => input_collect_software_repository_urls.json} (96%) create mode 100644 dhp-workflows/dhp-swh/src/main/resources/eu/dnetlib/dhp/swh/oozie_app/config-default.xml create mode 100644 dhp-workflows/dhp-swh/src/test/java/eu/dnetlib/dhp/swh/ArchiveRepositoryURLsTest.java create mode 100644 dhp-workflows/dhp-swh/src/test/java/eu/dnetlib/dhp/swh/SWHConnectionTest.java create mode 100644 dhp-workflows/dhp-swh/src/test/resources/eu/dnetlib/dhp/swh/lastVisitDataToArchive.csv diff --git a/dhp-common/src/main/java/eu/dnetlib/dhp/common/Constants.java b/dhp-common/src/main/java/eu/dnetlib/dhp/common/Constants.java index 4f2c6341e..0477d6399 100644 --- a/dhp-common/src/main/java/eu/dnetlib/dhp/common/Constants.java +++ b/dhp-common/src/main/java/eu/dnetlib/dhp/common/Constants.java @@ -51,6 +51,7 @@ public class Constants { public static final String RETRY_DELAY = "retryDelay"; public static final String CONNECT_TIMEOUT = "connectTimeOut"; public static final String READ_TIMEOUT = "readTimeOut"; + public static final String REQUEST_METHOD = "requestMethod"; public static final String FROM_DATE_OVERRIDE = "fromDateOverride"; public static final String UNTIL_DATE_OVERRIDE = "untilDateOverride"; diff --git a/dhp-common/src/main/java/eu/dnetlib/dhp/common/collection/HttpClientParams.java b/dhp-common/src/main/java/eu/dnetlib/dhp/common/collection/HttpClientParams.java index 6fcec00dd..55f9ceb8b 100644 --- a/dhp-common/src/main/java/eu/dnetlib/dhp/common/collection/HttpClientParams.java +++ b/dhp-common/src/main/java/eu/dnetlib/dhp/common/collection/HttpClientParams.java @@ -1,6 +1,9 @@ package eu.dnetlib.dhp.common.collection; +import java.util.HashMap; +import java.util.Map; + /** * Bundles the http connection parameters driving the client behaviour. */ @@ -13,6 +16,8 @@ public class HttpClientParams { public static int _connectTimeOut = 10; // seconds public static int _readTimeOut = 30; // seconds + public static String _requestMethod = "GET"; + /** * Maximum number of allowed retires before failing */ @@ -38,17 +43,30 @@ public class HttpClientParams { */ private int readTimeOut; + /** + * Custom http headers + */ + private Map headers; + + /** + * Request method (i.e., GET, POST etc) + */ + private String requestMethod; + + public HttpClientParams() { - this(_maxNumberOfRetry, _requestDelay, _retryDelay, _connectTimeOut, _readTimeOut); + this(_maxNumberOfRetry, _requestDelay, _retryDelay, _connectTimeOut, _readTimeOut, new HashMap<>(), _requestMethod); } public HttpClientParams(int maxNumberOfRetry, int requestDelay, int retryDelay, int connectTimeOut, - int readTimeOut) { + int readTimeOut, Map headers, String requestMethod) { this.maxNumberOfRetry = maxNumberOfRetry; this.requestDelay = requestDelay; this.retryDelay = retryDelay; this.connectTimeOut = connectTimeOut; this.readTimeOut = readTimeOut; + this.headers = headers; + this.requestMethod = requestMethod; } public int getMaxNumberOfRetry() { @@ -91,4 +109,19 @@ public class HttpClientParams { this.readTimeOut = readTimeOut; } + public Map getHeaders() { + return headers; + } + + public void setHeaders(Map headers) { + this.headers = headers; + } + + public String getRequestMethod() { + return requestMethod; + } + + public void setRequestMethod(String requestMethod) { + this.requestMethod = requestMethod; + } } diff --git a/dhp-common/src/main/java/eu/dnetlib/dhp/common/collection/HttpConnector2.java b/dhp-common/src/main/java/eu/dnetlib/dhp/common/collection/HttpConnector2.java index dd46ab1f4..905457bcd 100644 --- a/dhp-common/src/main/java/eu/dnetlib/dhp/common/collection/HttpConnector2.java +++ b/dhp-common/src/main/java/eu/dnetlib/dhp/common/collection/HttpConnector2.java @@ -107,7 +107,14 @@ public class HttpConnector2 { urlConn.setReadTimeout(getClientParams().getReadTimeOut() * 1000); urlConn.setConnectTimeout(getClientParams().getConnectTimeOut() * 1000); urlConn.addRequestProperty(HttpHeaders.USER_AGENT, userAgent); + urlConn.setRequestMethod(getClientParams().getRequestMethod()); + // if provided, add custom headers + if (!getClientParams().getHeaders().isEmpty()) { + for (Map.Entry headerEntry : getClientParams().getHeaders().entrySet()) { + urlConn.addRequestProperty(headerEntry.getKey(), headerEntry.getValue()); + } + } if (log.isDebugEnabled()) { logHeaderFields(urlConn); } diff --git a/dhp-workflows/dhp-swh/pom.xml b/dhp-workflows/dhp-swh/pom.xml index 501b2aef8..80fff4587 100644 --- a/dhp-workflows/dhp-swh/pom.xml +++ b/dhp-workflows/dhp-swh/pom.xml @@ -99,6 +99,12 @@ httpclient 4.5.13 + + org.datanucleus + datanucleus-core + 3.2.10 + compile + diff --git a/dhp-workflows/dhp-swh/src/main/java/eu/dnetlib/dhp/swh/ArchiveRepositoryURLs.java b/dhp-workflows/dhp-swh/src/main/java/eu/dnetlib/dhp/swh/ArchiveRepositoryURLs.java new file mode 100644 index 000000000..7b3b74d9e --- /dev/null +++ b/dhp-workflows/dhp-swh/src/main/java/eu/dnetlib/dhp/swh/ArchiveRepositoryURLs.java @@ -0,0 +1,137 @@ + +package eu.dnetlib.dhp.swh; + +import com.fasterxml.jackson.databind.ObjectMapper; +import eu.dnetlib.dhp.application.ArgumentApplicationParser; +import eu.dnetlib.dhp.common.collection.CollectorException; +import eu.dnetlib.dhp.common.collection.HttpClientParams; +import eu.dnetlib.dhp.swh.models.LastVisitData; +import eu.dnetlib.dhp.swh.utils.SWHConnection; +import eu.dnetlib.dhp.swh.utils.SWHConstants; +import eu.dnetlib.dhp.swh.utils.SWHUtils; +import org.apache.commons.cli.ParseException; +import org.apache.commons.io.IOUtils; +import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.io.SequenceFile; +import org.apache.hadoop.io.Text; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.io.IOException; +import java.net.URL; +import java.util.Date; +import java.util.Optional; +import java.util.concurrent.TimeUnit; + +import static eu.dnetlib.dhp.common.Constants.REQUEST_METHOD; +import static eu.dnetlib.dhp.utils.DHPUtils.getHadoopConfiguration; + +/** + * Sends archive requests to the SWH API for those software repository URLs that are missing from them + * + * @author Serafeim Chatzopoulos + */ +public class ArchiveRepositoryURLs { + + private static final Logger log = LoggerFactory.getLogger(ArchiveRepositoryURLs.class); + private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper(); + + private static SWHConnection swhConnection = null; + + public static void main(final String[] args) throws IOException, ParseException { + final ArgumentApplicationParser argumentParser = new ArgumentApplicationParser( + IOUtils + .toString( + CollectLastVisitRepositoryData.class + .getResourceAsStream( + "/eu/dnetlib/dhp/swh/input_archive_repository_urls.json"))); + argumentParser.parseArgument(args); + + final String hdfsuri = argumentParser.get("namenode"); + log.info("hdfsURI: {}", hdfsuri); + + final String inputPath = argumentParser.get("lastVisitsPath"); + log.info("inputPath: {}", inputPath); + + final String outputPath = argumentParser.get("archiveRequestsPath"); + log.info("outputPath: {}", outputPath); + + final Integer archiveThresholdInDays = Integer.parseInt(argumentParser.get("archiveThresholdInDays")); + log.info("archiveThresholdInDays: {}", archiveThresholdInDays); + + final HttpClientParams clientParams = SWHUtils.getClientParams(argumentParser); + + swhConnection = new SWHConnection(clientParams); + + final FileSystem fs = FileSystem.get(getHadoopConfiguration(hdfsuri)); + + archive(fs, inputPath, outputPath, archiveThresholdInDays); + + } + + private static void archive(FileSystem fs, String inputPath, String outputPath, Integer archiveThresholdInDays) throws IOException { + + SequenceFile.Reader fr = SWHUtils.getSequenceFileReader(fs, inputPath); + SequenceFile.Writer fw = SWHUtils.getSequenceFileWriter(fs, outputPath); + + // Create key and value objects to hold data + Text repoUrl = new Text(); + Text lastVisitData = new Text(); + + // Read key-value pairs from the SequenceFile and handle appropriately + while (fr.next(repoUrl, lastVisitData)) { + + String response = handleRecord(repoUrl.toString(), lastVisitData.toString(), archiveThresholdInDays); + + // response is equal to null when no need for request + if (response != null) { + SWHUtils.appendToSequenceFile(fw, repoUrl.toString(), response); + } + + } + + // Close readers + fw.close(); + fr.close(); + } + + public static String handleRecord(String repoUrl, String lastVisitData, Integer archiveThresholdInDays) throws IOException { + System.out.println("Key: " + repoUrl + ", Value: " + lastVisitData); + + LastVisitData lastVisit = OBJECT_MAPPER.readValue(lastVisitData, LastVisitData.class); + + // perform an archive request when no repoUrl was not found in previous step + if (lastVisit.getSnapshot() != null) { + + // OR last visit was before (now() - archiveThresholdInDays) + long diffInMillies = Math.abs((new Date()).getTime() - lastVisit.getDate().getTime()); + long diffInDays = TimeUnit.DAYS.convert(diffInMillies, TimeUnit.MILLISECONDS); + + if (archiveThresholdInDays >= diffInDays) { + return null; + } + } + + // if last visit data are available, re-use version control type, else use the default one (i.e., git) + String visitType = Optional + .ofNullable(lastVisit.getType()) + .orElse(SWHConstants.DEFAULT_VISIT_TYPE); + + URL url = new URL(String.format(SWHConstants.SWH_ARCHIVE_URL, visitType, repoUrl.trim())); + System.out.println(url.toString()); + + String response; + try { + response = swhConnection.call(url.toString()); + } catch (CollectorException e) { + log.info("Error in request: {}", url); + response = "{}"; + } + + return response; + + } + + + +} diff --git a/dhp-workflows/dhp-swh/src/main/java/eu/dnetlib/dhp/swh/CollectLastVisitRepositoryData.java b/dhp-workflows/dhp-swh/src/main/java/eu/dnetlib/dhp/swh/CollectLastVisitRepositoryData.java new file mode 100644 index 000000000..c4b6412b5 --- /dev/null +++ b/dhp-workflows/dhp-swh/src/main/java/eu/dnetlib/dhp/swh/CollectLastVisitRepositoryData.java @@ -0,0 +1,120 @@ + +package eu.dnetlib.dhp.swh; + +import eu.dnetlib.dhp.application.ArgumentApplicationParser; +import eu.dnetlib.dhp.common.collection.CollectorException; +import eu.dnetlib.dhp.common.collection.HttpClientParams; +import eu.dnetlib.dhp.swh.utils.SWHConnection; +import eu.dnetlib.dhp.swh.utils.SWHConstants; +import eu.dnetlib.dhp.swh.utils.SWHUtils; +import org.apache.commons.cli.ParseException; +import org.apache.commons.io.IOUtils; +import org.apache.hadoop.fs.FSDataInputStream; +import org.apache.hadoop.fs.FileStatus; +import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.io.SequenceFile; +import org.apache.hadoop.io.Text; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.io.BufferedReader; +import java.io.IOException; +import java.io.InputStreamReader; +import java.net.URISyntaxException; +import java.net.URL; +import java.nio.charset.StandardCharsets; + +import static eu.dnetlib.dhp.utils.DHPUtils.getHadoopConfiguration; + +/** + * Given a file with software repository URLs, this class + * collects last visit data from the Software Heritage API. + * + * @author Serafeim Chatzopoulos + */ +public class CollectLastVisitRepositoryData { + + private static final Logger log = LoggerFactory.getLogger(CollectLastVisitRepositoryData.class); + private static SWHConnection swhConnection = null; + + public static void main(final String[] args) + throws IOException, ParseException, InterruptedException, URISyntaxException, CollectorException { + final ArgumentApplicationParser argumentParser = new ArgumentApplicationParser( + IOUtils + .toString( + CollectLastVisitRepositoryData.class + .getResourceAsStream( + "/eu/dnetlib/dhp/swh/input_collect_last_visit_repository_data.json"))); + argumentParser.parseArgument(args); + + log.info("Java Xmx: {}m", Runtime.getRuntime().maxMemory() / (1024 * 1024)); + + final String hdfsuri = argumentParser.get("namenode"); + log.info("hdfsURI: {}", hdfsuri); + + final String inputPath = argumentParser.get("softwareCodeRepositoryURLs"); + log.info("inputPath: {}", inputPath); + + final String outputPath = argumentParser.get("lastVisitsPath"); + log.info("outputPath: {}", outputPath); + + final HttpClientParams clientParams = SWHUtils.getClientParams(argumentParser); + + swhConnection = new SWHConnection(clientParams); + + final FileSystem fs = FileSystem.get(getHadoopConfiguration(hdfsuri)); + + collect(fs, inputPath, outputPath); + + fs.close(); + } + + private static void collect(FileSystem fs, String inputPath, String outputPath) + throws IOException { + + SequenceFile.Writer fw = SWHUtils.getSequenceFileWriter(fs, outputPath); + + // Specify the HDFS directory path you want to read + Path directoryPath = new Path(inputPath); + + // List all files in the directory + FileStatus[] partStatuses = fs.listStatus(directoryPath); + + for (FileStatus partStatus : partStatuses) { + + // Check if it's a file (not a directory) + if (partStatus.isFile()) { + handleFile(fs, partStatus.getPath(), fw); + } + + } + + fw.close(); + } + + private static void handleFile(FileSystem fs, Path partInputPath, SequenceFile.Writer fw) + throws IOException { + + BufferedReader br = SWHUtils.getFileReader(fs, partInputPath); + + String repoUrl; + while ((repoUrl = br.readLine()) != null) { + + URL url = new URL(String.format(SWHConstants.SWH_LATEST_VISIT_URL, repoUrl.trim())); + + String response; + try { + response = swhConnection.call(url.toString()); + } catch (CollectorException e) { + log.info("Error in request: {}", url); + response = "{}"; + } + + SWHUtils.appendToSequenceFile(fw, repoUrl, response); + } + + br.close(); + } + +} diff --git a/dhp-workflows/dhp-swh/src/main/java/eu/dnetlib/dhp/swh/CollectSoftwareRepositoryURLs.java b/dhp-workflows/dhp-swh/src/main/java/eu/dnetlib/dhp/swh/CollectSoftwareRepositoryURLs.java index c91f2bb8c..f93280b5e 100644 --- a/dhp-workflows/dhp-swh/src/main/java/eu/dnetlib/dhp/swh/CollectSoftwareRepositoryURLs.java +++ b/dhp-workflows/dhp-swh/src/main/java/eu/dnetlib/dhp/swh/CollectSoftwareRepositoryURLs.java @@ -1,60 +1,37 @@ package eu.dnetlib.dhp.swh; -import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkHiveSession; -import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession; - -import java.io.IOException; -import java.io.Serializable; -import java.util.ArrayList; -import java.util.Iterator; -import java.util.List; -import java.util.Optional; -import java.util.concurrent.TimeUnit; - +import eu.dnetlib.dhp.application.ArgumentApplicationParser; +import eu.dnetlib.dhp.schema.oaf.Result; import org.apache.commons.io.IOUtils; -import org.apache.http.Header; -import org.apache.http.HttpEntity; -import org.apache.http.client.methods.CloseableHttpResponse; -import org.apache.http.client.methods.HttpGet; -import org.apache.http.impl.client.CloseableHttpClient; -import org.apache.http.impl.client.HttpClients; -import org.apache.http.util.EntityUtils; import org.apache.spark.SparkConf; -import org.apache.spark.api.java.function.FlatMapFunction; -import org.apache.spark.sql.*; import org.apache.spark.sql.Dataset; -import org.apache.spark.sql.types.DataTypes; -import org.apache.spark.sql.types.StructType; +import org.apache.spark.sql.Row; +import org.apache.spark.sql.SaveMode; +import org.apache.spark.sql.SparkSession; import org.slf4j.Logger; import org.slf4j.LoggerFactory; -import com.fasterxml.jackson.databind.ObjectMapper; +import java.io.Serializable; +import java.util.Optional; -import eu.dnetlib.dhp.application.ArgumentApplicationParser; -import eu.dnetlib.dhp.schema.common.ModelConstants; -import eu.dnetlib.dhp.schema.oaf.*; +import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkHiveSession; /** - * Creates action sets for Crossref affiliation relations inferred by BIP! + * Collects unique software repository URLs in the Graph using Hive + * + * @author Serafeim Chatzopoulos */ public class CollectSoftwareRepositoryURLs implements Serializable { private static final Logger log = LoggerFactory.getLogger(CollectSoftwareRepositoryURLs.class); - // public static final String BIP_AFFILIATIONS_CLASSID = "result:organization:bipinference"; -// public static final String BIP_AFFILIATIONS_CLASSNAME = "Affiliation relation inferred by BIP!"; -// public static final String BIP_INFERENCE_PROVENANCE = "bip:affiliation:crossref"; - private static final String DEFAULT_VISIT_TYPE = "git"; - private static final int CONCURRENT_API_CALLS = 1; - - private static final String SWH_LATEST_VISIT_URL = "https://archive.softwareheritage.org/api/1/origin/%s/visit/latest/"; public static void main(String[] args) throws Exception { String jsonConfiguration = IOUtils .toString( CollectSoftwareRepositoryURLs.class - .getResourceAsStream("/eu/dnetlib/dhp/swh/input_parameters.json")); + .getResourceAsStream("/eu/dnetlib/dhp/swh/input_collect_software_repository_urls.json")); final ArgumentApplicationParser parser = new ArgumentApplicationParser(jsonConfiguration); parser.parseArgument(args); @@ -89,7 +66,10 @@ public class CollectSoftwareRepositoryURLs implements Serializable { String queryTemplate = "SELECT distinct coderepositoryurl.value " + "FROM %s.software " + - "WHERE coderepositoryurl.value IS NOT NULL"; + "WHERE coderepositoryurl.value IS NOT NULL " + + "AND datainfo.deletedbyinference = FALSE " + + "AND datainfo.invisible = FALSE " + + "LIMIT 1000"; // TODO remove String query = String.format(queryTemplate, hiveDbName); log.info("Hive query to fetch software code URLs: {}", query); @@ -100,112 +80,6 @@ public class CollectSoftwareRepositoryURLs implements Serializable { df .write() .mode(SaveMode.Overwrite) -// .option("compression", "gzip") .csv(outputPath); } - - private static Dataset readSoftware(SparkSession spark, String inputPath) { - return spark - .read() - .json(inputPath) - .select( - new Column("codeRepositoryUrl.value").as("codeRepositoryUrl"), - new Column("dataInfo.deletedbyinference"), - new Column("dataInfo.invisible")); - } - - private static Dataset filterSoftware(Dataset softwareDF, Integer limit) { - - Dataset df = softwareDF - .where(softwareDF.col("codeRepositoryUrl").isNotNull()) - .where("deletedbyinference = false") - .where("invisible = false") - .drop("deletedbyinference") - .drop("invisible"); - -// TODO remove when done - df = df.limit(limit); - - return df; - } - - public static Dataset makeParallelRequests(SparkSession spark, Dataset softwareDF) { - // TODO replace with coalesce ? - Dataset df = softwareDF.repartition(CONCURRENT_API_CALLS); - - log.info("Number of partitions: {}", df.rdd().getNumPartitions()); - - ObjectMapper objectMapper = new ObjectMapper(); - - List collectedRows = df - .javaRDD() - // max parallelism should be equal to the number of partitions here - .mapPartitions((FlatMapFunction, Row>) partition -> { - List resultRows = new ArrayList<>(); - while (partition.hasNext()) { - Row row = partition.next(); - String url = String.format(SWH_LATEST_VISIT_URL, row.getString(0)); - -// String snapshotId = null; -// String type = null; -// String date = null; - - String responseBody = makeAPICall(url); - TimeUnit.SECONDS.sleep(1); -// Thread.sleep(500); -// if (responseBody != null) { -// LastVisitResponse visitResponse = objectMapper.readValue(responseBody, LastVisitResponse.class); -// snapshotId = visitResponse.getSnapshot(); -// type = visitResponse.getType(); -// date = visitResponse.getDate(); -// } -// resultRows.add(RowFactory.create(url, snapshotId, type, date)); - - resultRows.add(RowFactory.create(url, responseBody)); - } - return resultRows.iterator(); - - }) - .collect(); - - StructType resultSchema = new StructType() - .add("codeRepositoryUrl", DataTypes.StringType) - .add("response", DataTypes.StringType); - -// .add("snapshotId", DataTypes.StringType) -// .add("type", DataTypes.StringType) -// .add("date", DataTypes.StringType); - - // create a DataFrame from the collected rows - return spark.createDataFrame(collectedRows, resultSchema); - } - - private static String makeAPICall(String url) throws IOException { - System.out.println(java.time.LocalDateTime.now()); - - try (CloseableHttpClient httpClient = HttpClients.createDefault()) { - HttpGet httpGet = new HttpGet(url); - httpGet - .setHeader( - "Authorization", - "Bearer eyJhbGciOiJIUzI1NiIsInR5cCIgOiAiSldUIiwia2lkIiA6ICJhMTMxYTQ1My1hM2IyLTQwMTUtODQ2Ny05MzAyZjk3MTFkOGEifQ.eyJpYXQiOjE2OTQ2MzYwMjAsImp0aSI6IjkwZjdkNTNjLTQ5YTktNGFiMy1hY2E0LTcwMTViMjEyZTNjNiIsImlzcyI6Imh0dHBzOi8vYXV0aC5zb2Z0d2FyZWhlcml0YWdlLm9yZy9hdXRoL3JlYWxtcy9Tb2Z0d2FyZUhlcml0YWdlIiwiYXVkIjoiaHR0cHM6Ly9hdXRoLnNvZnR3YXJlaGVyaXRhZ2Uub3JnL2F1dGgvcmVhbG1zL1NvZnR3YXJlSGVyaXRhZ2UiLCJzdWIiOiIzMTY5OWZkNC0xNmE0LTQxOWItYTdhMi00NjI5MDY4ZjI3OWEiLCJ0eXAiOiJPZmZsaW5lIiwiYXpwIjoic3doLXdlYiIsInNlc3Npb25fc3RhdGUiOiIzMjYzMzEwMS00ZDRkLTQwMjItODU2NC1iMzNlMTJiNTE3ZDkiLCJzY29wZSI6Im9wZW5pZCBvZmZsaW5lX2FjY2VzcyBwcm9maWxlIGVtYWlsIn0.XHj1VIZu1dZ4Ej32-oU84mFmaox9cLNjXosNxwZM0Xs"); - try (CloseableHttpResponse response = httpClient.execute(httpGet)) { - int statusCode = response.getStatusLine().getStatusCode(); -// if (statusCode != 200) -// return null; - Header[] headers = response.getHeaders("X-RateLimit-Remaining"); - for (Header header : headers) { - System.out - .println( - "Key : " + header.getName() - + " ,Value : " + header.getValue()); - } - HttpEntity entity = response.getEntity(); - if (entity != null) { - return EntityUtils.toString(entity); - } - } - } - return null; - } } diff --git a/dhp-workflows/dhp-swh/src/main/java/eu/dnetlib/dhp/swh/models/LastVisitResponse.java b/dhp-workflows/dhp-swh/src/main/java/eu/dnetlib/dhp/swh/models/LastVisitData.java similarity index 81% rename from dhp-workflows/dhp-swh/src/main/java/eu/dnetlib/dhp/swh/models/LastVisitResponse.java rename to dhp-workflows/dhp-swh/src/main/java/eu/dnetlib/dhp/swh/models/LastVisitData.java index 435397590..b8cd6de6e 100644 --- a/dhp-workflows/dhp-swh/src/main/java/eu/dnetlib/dhp/swh/models/LastVisitResponse.java +++ b/dhp-workflows/dhp-swh/src/main/java/eu/dnetlib/dhp/swh/models/LastVisitData.java @@ -4,12 +4,14 @@ package eu.dnetlib.dhp.swh.models; import com.cloudera.com.fasterxml.jackson.annotation.JsonProperty; import com.fasterxml.jackson.annotation.JsonIgnoreProperties; +import java.util.Date; + @JsonIgnoreProperties(ignoreUnknown = true) -public class LastVisitResponse { +public class LastVisitData { private String type; - private String date; + private Date date; @JsonProperty("snapshot") private String snapshotId; @@ -22,11 +24,11 @@ public class LastVisitResponse { this.type = type; } - public String getDate() { + public Date getDate() { return date; } - public void setDate(String date) { + public void setDate(Date date) { this.date = date; } diff --git a/dhp-workflows/dhp-swh/src/main/java/eu/dnetlib/dhp/swh/utils/SWHConnection.java b/dhp-workflows/dhp-swh/src/main/java/eu/dnetlib/dhp/swh/utils/SWHConnection.java new file mode 100644 index 000000000..46d512dcb --- /dev/null +++ b/dhp-workflows/dhp-swh/src/main/java/eu/dnetlib/dhp/swh/utils/SWHConnection.java @@ -0,0 +1,138 @@ + +package eu.dnetlib.dhp.swh.utils; + +import java.io.IOException; +import java.util.Arrays; +import java.util.HashMap; +import java.util.List; +import java.util.Map; + +import org.apache.commons.lang3.math.NumberUtils; +import org.apache.http.Header; +import org.apache.http.HttpHeaders; +import org.apache.http.HttpStatus; +import org.apache.http.client.methods.CloseableHttpResponse; +import org.apache.http.client.methods.HttpGet; +import org.apache.http.impl.client.CloseableHttpClient; +import org.apache.http.util.EntityUtils; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import eu.dnetlib.dhp.common.Constants; +import eu.dnetlib.dhp.common.collection.CollectorException; +import eu.dnetlib.dhp.common.collection.HttpClientParams; +import eu.dnetlib.dhp.common.collection.HttpConnector2; + +public class SWHConnection { + + private static final Logger log = LoggerFactory.getLogger(SWHConnection.class); + + CloseableHttpClient httpClient; + + HttpClientParams clientParams; + + HttpConnector2 conn; + + public SWHConnection(HttpClientParams clientParams) { + +// // force http client to NOT transform double quotes (//) to single quote (/) +// RequestConfig requestConfig = RequestConfig.custom().setNormalizeUri(false).build(); +// +// // Create an HttpClient instance +// httpClient = HttpClientBuilder +// .create() +// .setDefaultRequestConfig(requestConfig) +// .build(); +// +// this.clientParams = clientParams; + // set custom headers + Map headers = new HashMap() { + { + put(HttpHeaders.ACCEPT, "application/json"); + put(HttpHeaders.AUTHORIZATION, String.format("Bearer %s", SWHConstants.ACCESS_TOKEN)); + } + }; + + clientParams.setHeaders(headers); + + // create http connector + conn = new HttpConnector2(clientParams); + + } + + public String call(String url) throws CollectorException { + return conn.getInputSource(url); + } + + public String getLib(String url) throws IOException, CollectorException { + + // delay between requests + if (this.clientParams.getRequestDelay() > 0) { + log.info("Request delay: {}", this.clientParams.getRequestDelay()); + this.backOff(this.clientParams.getRequestDelay()); + } + + // Create an HttpGet request with the URL + HttpGet httpGet = new HttpGet(url); + httpGet.setHeader("Accept", "application/json"); + httpGet.setHeader("Authorization", String.format("Bearer %s", SWHConstants.ACCESS_TOKEN)); + + // Execute the request and get the response + try (CloseableHttpResponse response = httpClient.execute(httpGet)) { + + System.out.println(url); + + int responseCode = response.getStatusLine().getStatusCode(); + if (responseCode != HttpStatus.SC_OK) { + + } + + System.out.println(responseCode); + + List
httpHeaders = Arrays.asList(response.getAllHeaders()); + for (Header header : httpHeaders) { + System.out.println(header.getName() + ":\t" + header.getValue()); + } + + String rateRemaining = this.getRateRemaining(response); + + // back off when rate remaining limit is approaching + if (rateRemaining != null && (Integer.parseInt(rateRemaining) < 2)) { + int retryAfter = this.getRetryAfter(response); + + log.info("Rate Limit: {} - Backing off: {}", rateRemaining, retryAfter); + this.backOff(retryAfter); + } + + return EntityUtils.toString(response.getEntity()); + } + } + + private String getRateRemaining(CloseableHttpResponse response) { + Header header = response.getFirstHeader(Constants.HTTPHEADER_IETF_DRAFT_RATELIMIT_REMAINING); + if (header != null) { + return header.getValue(); + } + return null; + } + + private int getRetryAfter(CloseableHttpResponse response) { + Header header = response.getFirstHeader(HttpHeaders.RETRY_AFTER); + if (header != null) { + String retryAfter = header.getValue(); + if (NumberUtils.isCreatable(retryAfter)) { + return Integer.parseInt(retryAfter) + 10; + } + } + return 1000; + } + + private void backOff(int sleepTimeMs) throws CollectorException { + try { + Thread.sleep(sleepTimeMs); + } catch (InterruptedException e) { + throw new CollectorException(e); + } + } + +} diff --git a/dhp-workflows/dhp-swh/src/main/java/eu/dnetlib/dhp/swh/utils/SWHConstants.java b/dhp-workflows/dhp-swh/src/main/java/eu/dnetlib/dhp/swh/utils/SWHConstants.java new file mode 100644 index 000000000..1299bc805 --- /dev/null +++ b/dhp-workflows/dhp-swh/src/main/java/eu/dnetlib/dhp/swh/utils/SWHConstants.java @@ -0,0 +1,13 @@ + +package eu.dnetlib.dhp.swh.utils; + +public class SWHConstants { + public static final String SWH_LATEST_VISIT_URL = "https://archive.softwareheritage.org/api/1/origin/%s/visit/latest/"; + + public static final String SWH_ARCHIVE_URL = "https://archive.softwareheritage.org/api/1/origin/save/%s/url/%s/"; + + public static final String ACCESS_TOKEN = ""; + + public static final String DEFAULT_VISIT_TYPE = "git"; + +} diff --git a/dhp-workflows/dhp-swh/src/main/java/eu/dnetlib/dhp/swh/utils/SWHUtils.java b/dhp-workflows/dhp-swh/src/main/java/eu/dnetlib/dhp/swh/utils/SWHUtils.java new file mode 100644 index 000000000..8200e7b34 --- /dev/null +++ b/dhp-workflows/dhp-swh/src/main/java/eu/dnetlib/dhp/swh/utils/SWHUtils.java @@ -0,0 +1,94 @@ + +package eu.dnetlib.dhp.swh.utils; + +import eu.dnetlib.dhp.application.ArgumentApplicationParser; +import eu.dnetlib.dhp.common.collection.HttpClientParams; +import org.apache.hadoop.fs.FSDataInputStream; +import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.io.SequenceFile; +import org.apache.hadoop.io.Text; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.io.BufferedReader; +import java.io.IOException; +import java.io.InputStreamReader; +import java.nio.charset.StandardCharsets; +import java.util.Optional; + +import static eu.dnetlib.dhp.common.Constants.*; + +public class SWHUtils { + + private static final Logger log = LoggerFactory.getLogger(SWHUtils.class); + + public static HttpClientParams getClientParams(ArgumentApplicationParser argumentParser) { + + final HttpClientParams clientParams = new HttpClientParams(); + clientParams + .setMaxNumberOfRetry( + Optional + .ofNullable(argumentParser.get(MAX_NUMBER_OF_RETRY)) + .map(Integer::parseInt) + .orElse(HttpClientParams._maxNumberOfRetry)); + log.info("maxNumberOfRetry is {}", clientParams.getMaxNumberOfRetry()); + + clientParams + .setRequestDelay( + Optional + .ofNullable(argumentParser.get(REQUEST_DELAY)) + .map(Integer::parseInt) + .orElse(HttpClientParams._requestDelay)); + log.info("requestDelay is {}", clientParams.getRequestDelay()); + + clientParams + .setRetryDelay( + Optional + .ofNullable(argumentParser.get(RETRY_DELAY)) + .map(Integer::parseInt) + .orElse(HttpClientParams._retryDelay)); + log.info("retryDelay is {}", clientParams.getRetryDelay()); + + clientParams + .setRequestMethod( + Optional + .ofNullable(argumentParser.get(REQUEST_METHOD)) + .orElse(HttpClientParams._requestMethod)); + log.info("requestMethod is {}", clientParams.getRequestMethod()); + + return clientParams; + } + + public static BufferedReader getFileReader(FileSystem fs, Path inputPath) throws IOException { + FSDataInputStream inputStream = fs.open(inputPath); + return new BufferedReader( + new InputStreamReader(inputStream, StandardCharsets.UTF_8)); + } + + public static SequenceFile.Writer getSequenceFileWriter(FileSystem fs, String outputPath) throws IOException { + return SequenceFile + .createWriter( + fs.getConf(), + SequenceFile.Writer.file(new Path(outputPath)), + SequenceFile.Writer.keyClass(Text.class), + SequenceFile.Writer.valueClass(Text.class)); + } + + public static SequenceFile.Reader getSequenceFileReader(FileSystem fs, String inputPath) throws IOException { + Path filePath = new Path(inputPath); + SequenceFile.Reader.Option fileOption = SequenceFile.Reader.file(filePath); + + return new SequenceFile.Reader(fs.getConf(), fileOption); + } + + public static void appendToSequenceFile(SequenceFile.Writer fw, String keyStr, String valueStr) throws IOException { + Text key = new Text(); + key.set(keyStr); + + Text value = new Text(); + value.set(valueStr); + + fw.append(key, value); + } +} diff --git a/dhp-workflows/dhp-swh/src/main/resources/eu/dnetlib/dhp/swh/input_archive_repository_urls.json b/dhp-workflows/dhp-swh/src/main/resources/eu/dnetlib/dhp/swh/input_archive_repository_urls.json new file mode 100644 index 000000000..5ec481305 --- /dev/null +++ b/dhp-workflows/dhp-swh/src/main/resources/eu/dnetlib/dhp/swh/input_archive_repository_urls.json @@ -0,0 +1,26 @@ +[ + { + "paramName": "n", + "paramLongName": "namenode", + "paramDescription": "the Name Node URI", + "paramRequired": true + }, + { + "paramName": "lv", + "paramLongName": "lastVisitsPath", + "paramDescription": "the URL where to store last visits data", + "paramRequired": true + }, + { + "paramName": "rqd", + "paramLongName": "requestDelay", + "paramDescription": "the delay (ms) between requests", + "paramRequired": false + }, + { + "paramName": "atid", + "paramLongName": "archiveThresholdInDays", + "paramDescription": "the thershold (in days) required to issue an archive request", + "paramRequired": false + } +] \ No newline at end of file diff --git a/dhp-workflows/dhp-swh/src/main/resources/eu/dnetlib/dhp/swh/input_collect_last_visit_repository_data.json b/dhp-workflows/dhp-swh/src/main/resources/eu/dnetlib/dhp/swh/input_collect_last_visit_repository_data.json new file mode 100644 index 000000000..6c59123be --- /dev/null +++ b/dhp-workflows/dhp-swh/src/main/resources/eu/dnetlib/dhp/swh/input_collect_last_visit_repository_data.json @@ -0,0 +1,38 @@ +[ + { + "paramName": "n", + "paramLongName": "namenode", + "paramDescription": "the Name Node URI", + "paramRequired": true + }, + { + "paramName": "scr", + "paramLongName": "softwareCodeRepositoryURLs", + "paramDescription": "the URL from where to read software repository URLs", + "paramRequired": true + }, + { + "paramName": "lv", + "paramLongName": "lastVisitsPath", + "paramDescription": "the URL where to store last visits data", + "paramRequired": true + }, + { + "paramName": "mnr", + "paramLongName": "maxNumberOfRetry", + "paramDescription": "the maximum number of admitted connection retries", + "paramRequired": false + }, + { + "paramName": "rqd", + "paramLongName": "requestDelay", + "paramDescription": "the delay (ms) between requests", + "paramRequired": false + }, + { + "paramName": "rtd", + "paramLongName": "retryDelay", + "paramDescription": "the delay (ms) between retries", + "paramRequired": false + } +] \ No newline at end of file diff --git a/dhp-workflows/dhp-swh/src/main/resources/eu/dnetlib/dhp/swh/input_parameters.json b/dhp-workflows/dhp-swh/src/main/resources/eu/dnetlib/dhp/swh/input_collect_software_repository_urls.json similarity index 96% rename from dhp-workflows/dhp-swh/src/main/resources/eu/dnetlib/dhp/swh/input_parameters.json rename to dhp-workflows/dhp-swh/src/main/resources/eu/dnetlib/dhp/swh/input_collect_software_repository_urls.json index dd5432b93..6e98c7673 100644 --- a/dhp-workflows/dhp-swh/src/main/resources/eu/dnetlib/dhp/swh/input_parameters.json +++ b/dhp-workflows/dhp-swh/src/main/resources/eu/dnetlib/dhp/swh/input_collect_software_repository_urls.json @@ -6,7 +6,7 @@ "paramRequired": false }, { - "paramName": "ip", + "paramName": "scr", "paramLongName": "softwareCodeRepositoryURLs", "paramDescription": "the URL where to store software repository URLs", "paramRequired": true diff --git a/dhp-workflows/dhp-swh/src/main/resources/eu/dnetlib/dhp/swh/job.properties b/dhp-workflows/dhp-swh/src/main/resources/eu/dnetlib/dhp/swh/job.properties index a63343aed..e2c2af852 100644 --- a/dhp-workflows/dhp-swh/src/main/resources/eu/dnetlib/dhp/swh/job.properties +++ b/dhp-workflows/dhp-swh/src/main/resources/eu/dnetlib/dhp/swh/job.properties @@ -1,25 +1,11 @@ # hive hiveDbName=openaire_prod_20230914 -hiveMetastoreUris=thrift://iis-cdh5-test-m3.ocean.icm.edu.pl:9083 -# oozie -oozie.action.sharelib.for.spark=spark2 -oozie.use.system.libpath=true -oozie.wf.application.path=${oozieTopWfApplicationPath} -oozie.wf.application.path=${oozieTopWfApplicationPath} -oozieActionShareLibForSpark2=spark2 - -# spark -spark2EventLogDir=/user/spark/spark2ApplicationHistory -spark2ExtraListeners=com.cloudera.spark.lineage.NavigatorAppListener -spark2SqlQueryExecutionListeners=com.cloudera.spark.lineage.NavigatorQueryListener -spark2YarnHistoryServerAddress=http://iis-cdh5-test-gw.ocean.icm.edu.pl:18089 sparkSqlWarehouseDir=/user/hive/warehouse -# misc -wfAppPath=${oozieTopWfApplicationPath} -resourceManager=http://iis-cdh5-test-m2.ocean.icm.edu.pl:8088/cluster +# input/output files +softwareCodeRepositoryURLs=${workingDir}/1_code_repo_urls.csv +lastVisitsPath=${workingDir}/2_last_visits.seq +archiveRequestsPath=${workingDir}/3_archive_requests.seq -# custom params -softwareCodeRepositoryURLs=${workingDir}/code_repo_urls.csv resume=collect-software-repository-urls diff --git a/dhp-workflows/dhp-swh/src/main/resources/eu/dnetlib/dhp/swh/oozie_app/config-default.xml b/dhp-workflows/dhp-swh/src/main/resources/eu/dnetlib/dhp/swh/oozie_app/config-default.xml new file mode 100644 index 000000000..7873d595e --- /dev/null +++ b/dhp-workflows/dhp-swh/src/main/resources/eu/dnetlib/dhp/swh/oozie_app/config-default.xml @@ -0,0 +1,50 @@ + + + jobTracker + yarnRM + + + nameNode + hdfs://nameservice1 + + + oozie.use.system.libpath + true + + + oozie.action.sharelib.for.spark + spark2 + + + hiveMetastoreUris + thrift://iis-cdh5-test-m3.ocean.icm.edu.pl:9083 + + + spark2YarnHistoryServerAddress + http://iis-cdh5-test-gw.ocean.icm.edu.pl:18089 + + + spark2EventLogDir + /user/spark/spark2ApplicationHistory + + + spark2ExtraListeners + "com.cloudera.spark.lineage.NavigatorAppListener" + + + spark2SqlQueryExecutionListeners + "com.cloudera.spark.lineage.NavigatorQueryListener" + + + oozieActionShareLibForSpark2 + spark2 + + + resourceManager + http://iis-cdh5-test-m2.ocean.icm.edu.pl:8088/cluster + + + oozie.launcher.mapreduce.user.classpath.first + true + + \ No newline at end of file diff --git a/dhp-workflows/dhp-swh/src/main/resources/eu/dnetlib/dhp/swh/oozie_app/workflow.xml b/dhp-workflows/dhp-swh/src/main/resources/eu/dnetlib/dhp/swh/oozie_app/workflow.xml index 9832e5f26..5062d562b 100644 --- a/dhp-workflows/dhp-swh/src/main/resources/eu/dnetlib/dhp/swh/oozie_app/workflow.xml +++ b/dhp-workflows/dhp-swh/src/main/resources/eu/dnetlib/dhp/swh/oozie_app/workflow.xml @@ -1,59 +1,31 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + + + + hiveDbName + The name of the Hive DB to be used + + + softwareCodeRepositoryURLs + The path in the HDSF to save the software repository URLs + + + resume + Variable that indicates the step to start from + + + ${jobTracker} ${nameNode} + + + oozie.action.sharelib.for.spark + ${oozieActionShareLibForSpark2} + + @@ -90,8 +62,43 @@ --softwareCodeRepositoryURLs${softwareCodeRepositoryURLs} --hiveDbName${hiveDbName} --hiveMetastoreUris${hiveMetastoreUris} - + + + + + + + eu.dnetlib.dhp.swh.CollectLastVisitRepositoryData + + --namenode${nameNode} + --softwareCodeRepositoryURLs${softwareCodeRepositoryURLs} + --lastVisitsPath${lastVisitsPath} + + --maxNumberOfRetry2 + --requestDelay0 + --retryDelay1 + --requestMethodGET + + + + + + + + + eu.dnetlib.dhp.swh.ArchiveRepositoryURLs + + --namenode${nameNode} + --lastVisitsPath${lastVisitsPath} + --archiveThresholdInDays365 + + --maxNumberOfRetry2 + --requestDelay0 + --retryDelay1 + --requestMethodPOST + + diff --git a/dhp-workflows/dhp-swh/src/test/java/eu/dnetlib/dhp/swh/ArchiveRepositoryURLsTest.java b/dhp-workflows/dhp-swh/src/test/java/eu/dnetlib/dhp/swh/ArchiveRepositoryURLsTest.java new file mode 100644 index 000000000..06e40ae14 --- /dev/null +++ b/dhp-workflows/dhp-swh/src/test/java/eu/dnetlib/dhp/swh/ArchiveRepositoryURLsTest.java @@ -0,0 +1,35 @@ +package eu.dnetlib.dhp.swh; + +import eu.dnetlib.dhp.swh.utils.SWHUtils; +import org.apache.hadoop.fs.FileSystem; +import org.junit.jupiter.api.Test; + +import java.io.BufferedReader; +import java.io.File; +import java.io.FileReader; +import java.io.IOException; +import java.util.Arrays; + +public class ArchiveRepositoryURLsTest { + + @Test + void testArchive() throws IOException { + String inputPath = getClass() + .getResource("/eu/dnetlib/dhp/swh/lastVisitDataToArchive.csv") + .getPath(); + + File file = new File(inputPath); + FileReader fr = new FileReader(file); + BufferedReader br = new BufferedReader(fr); //creates a buffering character input stream + + String line; + while((line = br.readLine()) != null) { + String[] tokens = line.split("\t"); + + String response = ArchiveRepositoryURLs.handleRecord(tokens[0], tokens[1], 365); + System.out.println(tokens[0] + "\t" + response); + System.out.println(); + } + fr.close(); + } +} diff --git a/dhp-workflows/dhp-swh/src/test/java/eu/dnetlib/dhp/swh/SWHConnectionTest.java b/dhp-workflows/dhp-swh/src/test/java/eu/dnetlib/dhp/swh/SWHConnectionTest.java new file mode 100644 index 000000000..d69f6ff1b --- /dev/null +++ b/dhp-workflows/dhp-swh/src/test/java/eu/dnetlib/dhp/swh/SWHConnectionTest.java @@ -0,0 +1,57 @@ + +package eu.dnetlib.dhp.swh; + +import eu.dnetlib.dhp.common.collection.CollectorException; +import eu.dnetlib.dhp.common.collection.HttpClientParams; +import eu.dnetlib.dhp.swh.utils.SWHConnection; +import eu.dnetlib.dhp.swh.utils.SWHConstants; +import org.junit.jupiter.api.Test; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.io.IOException; +import java.net.MalformedURLException; +import java.net.URL; + +//import org.apache.hadoop.hdfs.MiniDFSCluster; + +public class SWHConnectionTest { + private static final Logger log = LoggerFactory.getLogger(SWHConnectionTest.class); + + @Test + void testGetCall() throws IOException { + + HttpClientParams clientParams = new HttpClientParams(); + clientParams.setRequestMethod("GET"); + + SWHConnection swhConnection = new SWHConnection(clientParams); + + String repoUrl = "https://github.com/stanford-futuredata/FAST"; + URL url = new URL(String.format(SWHConstants.SWH_LATEST_VISIT_URL, repoUrl)); + String response = null; + try { + response = swhConnection.call(url.toString()); + } catch (CollectorException e) { + System.out.println("Error in request: " + url); + } + System.out.println(response); + } + + @Test + void testPostCall() throws MalformedURLException { + HttpClientParams clientParams = new HttpClientParams(); + clientParams.setRequestMethod("POST"); + + SWHConnection swhConnection = new SWHConnection(clientParams); + + String repoUrl = "https://github.com/stanford-futuredata/FAST"; + URL url = new URL(String.format(SWHConstants.SWH_ARCHIVE_URL, SWHConstants.DEFAULT_VISIT_TYPE, repoUrl)); + String response = null; + try { + response = swhConnection.call(url.toString()); + } catch (CollectorException e) { + System.out.println("Error in request: " + url); + } + System.out.println(response); + } +} diff --git a/dhp-workflows/dhp-swh/src/test/resources/eu/dnetlib/dhp/swh/lastVisitDataToArchive.csv b/dhp-workflows/dhp-swh/src/test/resources/eu/dnetlib/dhp/swh/lastVisitDataToArchive.csv new file mode 100644 index 000000000..6477dd62a --- /dev/null +++ b/dhp-workflows/dhp-swh/src/test/resources/eu/dnetlib/dhp/swh/lastVisitDataToArchive.csv @@ -0,0 +1,6 @@ +https://github.com/bioinsilico/BIPSPI {"origin":"https://github.com/bioinsilico/BIPSPI","visit":1,"date":"2020-03-18T14:50:21.541822+00:00","status":"full","snapshot":"c6c69d2cd73ce89811448da5f031611df6f63bdb","type":"git","metadata":{},"origin_url":"https://archive.softwareheritage.org/api/1/origin/https://github.com/bioinsilico/BIPSPI/get/","snapshot_url":"https://archive.softwareheritage.org/api/1/snapshot/c6c69d2cd73ce89811448da5f031611df6f63bdb/"} +https://github.com/mloop/kdiff-type1-error-rate/blob/master/analysis/simulation.R {} +https://github.com/schwanbeck/YSMR {"origin":"https://github.com/schwanbeck/YSMR","visit":6,"date":"2023-08-02T15:25:02.650676+00:00","status":"full","snapshot":"a9d1c5f0bca2def198b89f65bc9f7da3be8439ed","type":"git","metadata":{},"origin_url":"https://archive.softwareheritage.org/api/1/origin/https://github.com/schwanbeck/YSMR/get/","snapshot_url":"https://archive.softwareheritage.org/api/1/snapshot/a9d1c5f0bca2def198b89f65bc9f7da3be8439ed/"} +https://github.com/lvclark/TASSELGBS_combine {"origin":"https://github.com/lvclark/TASSELGBS_combine","visit":1,"date":"2020-04-12T20:44:09.405589+00:00","status":"full","snapshot":"ffa6fefd3f5becefbea9fe0e6d5d93859c95c071","type":"git","metadata":{},"origin_url":"https://archive.softwareheritage.org/api/1/origin/https://github.com/lvclark/TASSELGBS_combine/get/","snapshot_url":"https://archive.softwareheritage.org/api/1/snapshot/ffa6fefd3f5becefbea9fe0e6d5d93859c95c071/"} +https://github.com/PRIDE-Toolsuite/inspector-example-files {"origin":"https://github.com/PRIDE-Toolsuite/inspector-example-files","visit":12,"date":"2021-01-25T08:54:13.394674+00:00","status":"full","snapshot":"0b56eb0ad07cf778df6dabefc4b73636e0ae8b37","type":"git","metadata":{},"origin_url":"https://archive.softwareheritage.org/api/1/origin/https://github.com/PRIDE-Toolsuite/inspector-example-files/get/","snapshot_url":"https://archive.softwareheritage.org/api/1/snapshot/0b56eb0ad07cf778df6dabefc4b73636e0ae8b37/"} +https://bitbucket.org/matwey/chelyabinsk {"origin":"https://bitbucket.org/matwey/chelyabinsk","visit":6,"date":"2021-09-24T19:32:43.322909+00:00","status":"full","snapshot":"215913858c3ee0e61e1aaea18241c5ee006da1b0","type":"hg","metadata":{},"origin_url":"https://archive.softwareheritage.org/api/1/origin/https://bitbucket.org/matwey/chelyabinsk/get/","snapshot_url":"https://archive.softwareheritage.org/api/1/snapshot/215913858c3ee0e61e1aaea18241c5ee006da1b0/"} \ No newline at end of file From ab0d70691cf4c21b886142b9a5d2b7327d6445d5 Mon Sep 17 00:00:00 2001 From: Serafeim Chatzopoulos Date: Thu, 28 Sep 2023 20:56:18 +0300 Subject: [PATCH 16/57] Add step for archiving repoUrls to SWH --- .../common/collection/HttpClientParams.java | 4 +- .../dhp/swh/ArchiveRepositoryURLs.java | 99 +++++++++++++------ .../swh/CollectLastVisitRepositoryData.java | 31 +++--- .../swh/CollectSoftwareRepositoryURLs.java | 15 +-- .../dnetlib/dhp/swh/models/LastVisitData.java | 22 +++-- .../dnetlib/dhp/swh/utils/SWHConstants.java | 4 +- .../eu/dnetlib/dhp/swh/utils/SWHUtils.java | 39 ++++---- .../swh/input_archive_repository_urls.json | 24 +++++ ...ut_collect_last_visit_repository_data.json | 6 ++ .../eu/dnetlib/dhp/swh/job.properties | 4 + .../eu/dnetlib/dhp/swh/oozie_app/workflow.xml | 35 +++++-- .../dhp/swh/ArchiveRepositoryURLsTest.java | 45 +++++---- .../eu/dnetlib/dhp/swh/SWHConnectionTest.java | 19 ++-- .../dhp/swh/lastVisitDataToArchive.csv | 1 + 14 files changed, 230 insertions(+), 118 deletions(-) diff --git a/dhp-common/src/main/java/eu/dnetlib/dhp/common/collection/HttpClientParams.java b/dhp-common/src/main/java/eu/dnetlib/dhp/common/collection/HttpClientParams.java index 55f9ceb8b..d26d9c0e9 100644 --- a/dhp-common/src/main/java/eu/dnetlib/dhp/common/collection/HttpClientParams.java +++ b/dhp-common/src/main/java/eu/dnetlib/dhp/common/collection/HttpClientParams.java @@ -53,9 +53,9 @@ public class HttpClientParams { */ private String requestMethod; - public HttpClientParams() { - this(_maxNumberOfRetry, _requestDelay, _retryDelay, _connectTimeOut, _readTimeOut, new HashMap<>(), _requestMethod); + this(_maxNumberOfRetry, _requestDelay, _retryDelay, _connectTimeOut, _readTimeOut, new HashMap<>(), + _requestMethod); } public HttpClientParams(int maxNumberOfRetry, int requestDelay, int retryDelay, int connectTimeOut, diff --git a/dhp-workflows/dhp-swh/src/main/java/eu/dnetlib/dhp/swh/ArchiveRepositoryURLs.java b/dhp-workflows/dhp-swh/src/main/java/eu/dnetlib/dhp/swh/ArchiveRepositoryURLs.java index 7b3b74d9e..38db27baf 100644 --- a/dhp-workflows/dhp-swh/src/main/java/eu/dnetlib/dhp/swh/ArchiveRepositoryURLs.java +++ b/dhp-workflows/dhp-swh/src/main/java/eu/dnetlib/dhp/swh/ArchiveRepositoryURLs.java @@ -1,14 +1,16 @@ package eu.dnetlib.dhp.swh; -import com.fasterxml.jackson.databind.ObjectMapper; -import eu.dnetlib.dhp.application.ArgumentApplicationParser; -import eu.dnetlib.dhp.common.collection.CollectorException; -import eu.dnetlib.dhp.common.collection.HttpClientParams; -import eu.dnetlib.dhp.swh.models.LastVisitData; -import eu.dnetlib.dhp.swh.utils.SWHConnection; -import eu.dnetlib.dhp.swh.utils.SWHConstants; -import eu.dnetlib.dhp.swh.utils.SWHUtils; +import static eu.dnetlib.dhp.common.Constants.REQUEST_METHOD; +import static eu.dnetlib.dhp.utils.DHPUtils.getHadoopConfiguration; + +import java.io.IOException; +import java.net.URL; +import java.text.SimpleDateFormat; +import java.util.Date; +import java.util.Optional; +import java.util.concurrent.TimeUnit; + import org.apache.commons.cli.ParseException; import org.apache.commons.io.IOUtils; import org.apache.hadoop.fs.FileSystem; @@ -17,14 +19,17 @@ import org.apache.hadoop.io.Text; import org.slf4j.Logger; import org.slf4j.LoggerFactory; -import java.io.IOException; -import java.net.URL; -import java.util.Date; -import java.util.Optional; -import java.util.concurrent.TimeUnit; +import com.fasterxml.jackson.databind.ObjectMapper; -import static eu.dnetlib.dhp.common.Constants.REQUEST_METHOD; -import static eu.dnetlib.dhp.utils.DHPUtils.getHadoopConfiguration; +import eu.dnetlib.dhp.application.ArgumentApplicationParser; +import eu.dnetlib.dhp.common.collection.CollectorException; +import eu.dnetlib.dhp.common.collection.HttpClientParams; +import eu.dnetlib.dhp.schema.common.ModelSupport; +import eu.dnetlib.dhp.schema.oaf.utils.GraphCleaningFunctions; +import eu.dnetlib.dhp.swh.models.LastVisitData; +import eu.dnetlib.dhp.swh.utils.SWHConnection; +import eu.dnetlib.dhp.swh.utils.SWHConstants; +import eu.dnetlib.dhp.swh.utils.SWHUtils; /** * Sends archive requests to the SWH API for those software repository URLs that are missing from them @@ -69,7 +74,8 @@ public class ArchiveRepositoryURLs { } - private static void archive(FileSystem fs, String inputPath, String outputPath, Integer archiveThresholdInDays) throws IOException { + private static void archive(FileSystem fs, String inputPath, String outputPath, Integer archiveThresholdInDays) + throws IOException { SequenceFile.Reader fr = SWHUtils.getSequenceFileReader(fs, inputPath); SequenceFile.Writer fw = SWHUtils.getSequenceFileWriter(fs, outputPath); @@ -81,7 +87,13 @@ public class ArchiveRepositoryURLs { // Read key-value pairs from the SequenceFile and handle appropriately while (fr.next(repoUrl, lastVisitData)) { - String response = handleRecord(repoUrl.toString(), lastVisitData.toString(), archiveThresholdInDays); + String response = null; + try { + response = handleRecord(repoUrl.toString(), lastVisitData.toString(), archiveThresholdInDays); + } catch (java.text.ParseException e) { + log.error("Could not handle record with repo Url: {}", repoUrl.toString()); + throw new RuntimeException(e); + } // response is equal to null when no need for request if (response != null) { @@ -95,43 +107,68 @@ public class ArchiveRepositoryURLs { fr.close(); } - public static String handleRecord(String repoUrl, String lastVisitData, Integer archiveThresholdInDays) throws IOException { - System.out.println("Key: " + repoUrl + ", Value: " + lastVisitData); + public static String handleRecord(String repoUrl, String lastVisitData, Integer archiveThresholdInDays) + throws IOException, java.text.ParseException { + + log.info("{ Key: {}, Value: {} }", repoUrl, lastVisitData); LastVisitData lastVisit = OBJECT_MAPPER.readValue(lastVisitData, LastVisitData.class); - // perform an archive request when no repoUrl was not found in previous step + // a previous attempt for archival has been made, and repository URL was not found + // avoid performing the same archive request again + if (lastVisit.getType() != null && + lastVisit.getType().equals(SWHConstants.VISIT_STATUS_NOT_FOUND)) { + + log.info("Avoid request -- previous archive request returned NOT_FOUND"); + return null; + } + + // if we have last visit data if (lastVisit.getSnapshot() != null) { - // OR last visit was before (now() - archiveThresholdInDays) - long diffInMillies = Math.abs((new Date()).getTime() - lastVisit.getDate().getTime()); - long diffInDays = TimeUnit.DAYS.convert(diffInMillies, TimeUnit.MILLISECONDS); + String cleanDate = GraphCleaningFunctions.cleanDate(lastVisit.getDate()); - if (archiveThresholdInDays >= diffInDays) { - return null; + // and the last visit date can be parsed + if (cleanDate != null) { + + SimpleDateFormat formatter = new SimpleDateFormat(ModelSupport.DATE_FORMAT); + Date lastVisitDate = formatter.parse(cleanDate); + + // OR last visit time < (now() - archiveThresholdInDays) + long diffInMillies = Math.abs((new Date()).getTime() - lastVisitDate.getTime()); + long diffInDays = TimeUnit.DAYS.convert(diffInMillies, TimeUnit.MILLISECONDS); + log.info("Date diff from now (in days): {}", diffInDays); + + // do not perform a request, if the last visit date is no older than $archiveThresholdInDays + if (archiveThresholdInDays >= diffInDays) { + log.info("Avoid request -- no older than {} days", archiveThresholdInDays); + return null; + } } } - // if last visit data are available, re-use version control type, else use the default one (i.e., git) + // ELSE perform an archive request + log.info("Perform archive request for: {}", repoUrl); + + // if last visit data are available, re-use version control type, + // else use the default one (i.e., git) String visitType = Optional .ofNullable(lastVisit.getType()) .orElse(SWHConstants.DEFAULT_VISIT_TYPE); URL url = new URL(String.format(SWHConstants.SWH_ARCHIVE_URL, visitType, repoUrl.trim())); - System.out.println(url.toString()); + + log.info("Sending archive request: {}", url); String response; try { response = swhConnection.call(url.toString()); } catch (CollectorException e) { - log.info("Error in request: {}", url); + log.error("Error in request: {}", url); response = "{}"; } return response; - } - - } diff --git a/dhp-workflows/dhp-swh/src/main/java/eu/dnetlib/dhp/swh/CollectLastVisitRepositoryData.java b/dhp-workflows/dhp-swh/src/main/java/eu/dnetlib/dhp/swh/CollectLastVisitRepositoryData.java index c4b6412b5..9386b6876 100644 --- a/dhp-workflows/dhp-swh/src/main/java/eu/dnetlib/dhp/swh/CollectLastVisitRepositoryData.java +++ b/dhp-workflows/dhp-swh/src/main/java/eu/dnetlib/dhp/swh/CollectLastVisitRepositoryData.java @@ -1,12 +1,15 @@ package eu.dnetlib.dhp.swh; -import eu.dnetlib.dhp.application.ArgumentApplicationParser; -import eu.dnetlib.dhp.common.collection.CollectorException; -import eu.dnetlib.dhp.common.collection.HttpClientParams; -import eu.dnetlib.dhp.swh.utils.SWHConnection; -import eu.dnetlib.dhp.swh.utils.SWHConstants; -import eu.dnetlib.dhp.swh.utils.SWHUtils; +import static eu.dnetlib.dhp.utils.DHPUtils.getHadoopConfiguration; + +import java.io.BufferedReader; +import java.io.IOException; +import java.io.InputStreamReader; +import java.net.URISyntaxException; +import java.net.URL; +import java.nio.charset.StandardCharsets; + import org.apache.commons.cli.ParseException; import org.apache.commons.io.IOUtils; import org.apache.hadoop.fs.FSDataInputStream; @@ -18,14 +21,12 @@ import org.apache.hadoop.io.Text; import org.slf4j.Logger; import org.slf4j.LoggerFactory; -import java.io.BufferedReader; -import java.io.IOException; -import java.io.InputStreamReader; -import java.net.URISyntaxException; -import java.net.URL; -import java.nio.charset.StandardCharsets; - -import static eu.dnetlib.dhp.utils.DHPUtils.getHadoopConfiguration; +import eu.dnetlib.dhp.application.ArgumentApplicationParser; +import eu.dnetlib.dhp.common.collection.CollectorException; +import eu.dnetlib.dhp.common.collection.HttpClientParams; +import eu.dnetlib.dhp.swh.utils.SWHConnection; +import eu.dnetlib.dhp.swh.utils.SWHConstants; +import eu.dnetlib.dhp.swh.utils.SWHUtils; /** * Given a file with software repository URLs, this class @@ -107,7 +108,7 @@ public class CollectLastVisitRepositoryData { try { response = swhConnection.call(url.toString()); } catch (CollectorException e) { - log.info("Error in request: {}", url); + log.error("Error in request: {}", url); response = "{}"; } diff --git a/dhp-workflows/dhp-swh/src/main/java/eu/dnetlib/dhp/swh/CollectSoftwareRepositoryURLs.java b/dhp-workflows/dhp-swh/src/main/java/eu/dnetlib/dhp/swh/CollectSoftwareRepositoryURLs.java index f93280b5e..c1a0fafa5 100644 --- a/dhp-workflows/dhp-swh/src/main/java/eu/dnetlib/dhp/swh/CollectSoftwareRepositoryURLs.java +++ b/dhp-workflows/dhp-swh/src/main/java/eu/dnetlib/dhp/swh/CollectSoftwareRepositoryURLs.java @@ -1,8 +1,11 @@ package eu.dnetlib.dhp.swh; -import eu.dnetlib.dhp.application.ArgumentApplicationParser; -import eu.dnetlib.dhp.schema.oaf.Result; +import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkHiveSession; + +import java.io.Serializable; +import java.util.Optional; + import org.apache.commons.io.IOUtils; import org.apache.spark.SparkConf; import org.apache.spark.sql.Dataset; @@ -12,10 +15,8 @@ import org.apache.spark.sql.SparkSession; import org.slf4j.Logger; import org.slf4j.LoggerFactory; -import java.io.Serializable; -import java.util.Optional; - -import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkHiveSession; +import eu.dnetlib.dhp.application.ArgumentApplicationParser; +import eu.dnetlib.dhp.schema.oaf.Result; /** * Collects unique software repository URLs in the Graph using Hive @@ -69,7 +70,7 @@ public class CollectSoftwareRepositoryURLs implements Serializable { "WHERE coderepositoryurl.value IS NOT NULL " + "AND datainfo.deletedbyinference = FALSE " + "AND datainfo.invisible = FALSE " + - "LIMIT 1000"; // TODO remove + "LIMIT 1000"; String query = String.format(queryTemplate, hiveDbName); log.info("Hive query to fetch software code URLs: {}", query); diff --git a/dhp-workflows/dhp-swh/src/main/java/eu/dnetlib/dhp/swh/models/LastVisitData.java b/dhp-workflows/dhp-swh/src/main/java/eu/dnetlib/dhp/swh/models/LastVisitData.java index b8cd6de6e..eaff5ce02 100644 --- a/dhp-workflows/dhp-swh/src/main/java/eu/dnetlib/dhp/swh/models/LastVisitData.java +++ b/dhp-workflows/dhp-swh/src/main/java/eu/dnetlib/dhp/swh/models/LastVisitData.java @@ -1,21 +1,23 @@ package eu.dnetlib.dhp.swh.models; +import java.util.Date; + +import com.cloudera.com.fasterxml.jackson.annotation.JsonFormat; import com.cloudera.com.fasterxml.jackson.annotation.JsonProperty; import com.fasterxml.jackson.annotation.JsonIgnoreProperties; -import java.util.Date; - @JsonIgnoreProperties(ignoreUnknown = true) public class LastVisitData { private String type; - - private Date date; + private String date; @JsonProperty("snapshot") private String snapshotId; + private String status; + public String getType() { return type; } @@ -24,11 +26,11 @@ public class LastVisitData { this.type = type; } - public Date getDate() { + public String getDate() { return date; } - public void setDate(Date date) { + public void setDate(String date) { this.date = date; } @@ -39,4 +41,12 @@ public class LastVisitData { public void setSnapshot(String snapshotId) { this.snapshotId = snapshotId; } + + public String getStatus() { + return status; + } + + public void setStatus(String status) { + this.status = status; + } } diff --git a/dhp-workflows/dhp-swh/src/main/java/eu/dnetlib/dhp/swh/utils/SWHConstants.java b/dhp-workflows/dhp-swh/src/main/java/eu/dnetlib/dhp/swh/utils/SWHConstants.java index 1299bc805..f58705188 100644 --- a/dhp-workflows/dhp-swh/src/main/java/eu/dnetlib/dhp/swh/utils/SWHConstants.java +++ b/dhp-workflows/dhp-swh/src/main/java/eu/dnetlib/dhp/swh/utils/SWHConstants.java @@ -6,8 +6,10 @@ public class SWHConstants { public static final String SWH_ARCHIVE_URL = "https://archive.softwareheritage.org/api/1/origin/save/%s/url/%s/"; - public static final String ACCESS_TOKEN = ""; + public static final String ACCESS_TOKEN = "eyJhbGciOiJIUzI1NiIsInR5cCIgOiAiSldUIiwia2lkIiA6ICJhMTMxYTQ1My1hM2IyLTQwMTUtODQ2Ny05MzAyZjk3MTFkOGEifQ.eyJpYXQiOjE2OTQ2MzYwMjAsImp0aSI6IjkwZjdkNTNjLTQ5YTktNGFiMy1hY2E0LTcwMTViMjEyZTNjNiIsImlzcyI6Imh0dHBzOi8vYXV0aC5zb2Z0d2FyZWhlcml0YWdlLm9yZy9hdXRoL3JlYWxtcy9Tb2Z0d2FyZUhlcml0YWdlIiwiYXVkIjoiaHR0cHM6Ly9hdXRoLnNvZnR3YXJlaGVyaXRhZ2Uub3JnL2F1dGgvcmVhbG1zL1NvZnR3YXJlSGVyaXRhZ2UiLCJzdWIiOiIzMTY5OWZkNC0xNmE0LTQxOWItYTdhMi00NjI5MDY4ZjI3OWEiLCJ0eXAiOiJPZmZsaW5lIiwiYXpwIjoic3doLXdlYiIsInNlc3Npb25fc3RhdGUiOiIzMjYzMzEwMS00ZDRkLTQwMjItODU2NC1iMzNlMTJiNTE3ZDkiLCJzY29wZSI6Im9wZW5pZCBvZmZsaW5lX2FjY2VzcyBwcm9maWxlIGVtYWlsIn0.XHj1VIZu1dZ4Ej32-oU84mFmaox9cLNjXosNxwZM0Xs"; public static final String DEFAULT_VISIT_TYPE = "git"; + public static final String VISIT_STATUS_NOT_FOUND = "not_found"; + } diff --git a/dhp-workflows/dhp-swh/src/main/java/eu/dnetlib/dhp/swh/utils/SWHUtils.java b/dhp-workflows/dhp-swh/src/main/java/eu/dnetlib/dhp/swh/utils/SWHUtils.java index 8200e7b34..405ce51e4 100644 --- a/dhp-workflows/dhp-swh/src/main/java/eu/dnetlib/dhp/swh/utils/SWHUtils.java +++ b/dhp-workflows/dhp-swh/src/main/java/eu/dnetlib/dhp/swh/utils/SWHUtils.java @@ -1,8 +1,14 @@ package eu.dnetlib.dhp.swh.utils; -import eu.dnetlib.dhp.application.ArgumentApplicationParser; -import eu.dnetlib.dhp.common.collection.HttpClientParams; +import static eu.dnetlib.dhp.common.Constants.*; + +import java.io.BufferedReader; +import java.io.IOException; +import java.io.InputStreamReader; +import java.nio.charset.StandardCharsets; +import java.util.Optional; + import org.apache.hadoop.fs.FSDataInputStream; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; @@ -11,13 +17,8 @@ import org.apache.hadoop.io.Text; import org.slf4j.Logger; import org.slf4j.LoggerFactory; -import java.io.BufferedReader; -import java.io.IOException; -import java.io.InputStreamReader; -import java.nio.charset.StandardCharsets; -import java.util.Optional; - -import static eu.dnetlib.dhp.common.Constants.*; +import eu.dnetlib.dhp.application.ArgumentApplicationParser; +import eu.dnetlib.dhp.common.collection.HttpClientParams; public class SWHUtils { @@ -51,10 +52,10 @@ public class SWHUtils { log.info("retryDelay is {}", clientParams.getRetryDelay()); clientParams - .setRequestMethod( - Optional - .ofNullable(argumentParser.get(REQUEST_METHOD)) - .orElse(HttpClientParams._requestMethod)); + .setRequestMethod( + Optional + .ofNullable(argumentParser.get(REQUEST_METHOD)) + .orElse(HttpClientParams._requestMethod)); log.info("requestMethod is {}", clientParams.getRequestMethod()); return clientParams; @@ -63,16 +64,16 @@ public class SWHUtils { public static BufferedReader getFileReader(FileSystem fs, Path inputPath) throws IOException { FSDataInputStream inputStream = fs.open(inputPath); return new BufferedReader( - new InputStreamReader(inputStream, StandardCharsets.UTF_8)); + new InputStreamReader(inputStream, StandardCharsets.UTF_8)); } public static SequenceFile.Writer getSequenceFileWriter(FileSystem fs, String outputPath) throws IOException { return SequenceFile - .createWriter( - fs.getConf(), - SequenceFile.Writer.file(new Path(outputPath)), - SequenceFile.Writer.keyClass(Text.class), - SequenceFile.Writer.valueClass(Text.class)); + .createWriter( + fs.getConf(), + SequenceFile.Writer.file(new Path(outputPath)), + SequenceFile.Writer.keyClass(Text.class), + SequenceFile.Writer.valueClass(Text.class)); } public static SequenceFile.Reader getSequenceFileReader(FileSystem fs, String inputPath) throws IOException { diff --git a/dhp-workflows/dhp-swh/src/main/resources/eu/dnetlib/dhp/swh/input_archive_repository_urls.json b/dhp-workflows/dhp-swh/src/main/resources/eu/dnetlib/dhp/swh/input_archive_repository_urls.json index 5ec481305..ce80d6f4a 100644 --- a/dhp-workflows/dhp-swh/src/main/resources/eu/dnetlib/dhp/swh/input_archive_repository_urls.json +++ b/dhp-workflows/dhp-swh/src/main/resources/eu/dnetlib/dhp/swh/input_archive_repository_urls.json @@ -11,12 +11,36 @@ "paramDescription": "the URL where to store last visits data", "paramRequired": true }, + { + "paramName": "arp", + "paramLongName": "archiveRequestsPath", + "paramDescription": "the URL where to store the responses of the archive requests", + "paramRequired": true + }, + { + "paramName": "mnr", + "paramLongName": "maxNumberOfRetry", + "paramDescription": "the maximum number of admitted connection retries", + "paramRequired": false + }, { "paramName": "rqd", "paramLongName": "requestDelay", "paramDescription": "the delay (ms) between requests", "paramRequired": false }, + { + "paramName": "rtd", + "paramLongName": "retryDelay", + "paramDescription": "the delay (ms) between retries", + "paramRequired": false + }, + { + "paramName": "rm", + "paramLongName": "requestMethod", + "paramDescription": "the method of the requests to perform", + "paramRequired": false + }, { "paramName": "atid", "paramLongName": "archiveThresholdInDays", diff --git a/dhp-workflows/dhp-swh/src/main/resources/eu/dnetlib/dhp/swh/input_collect_last_visit_repository_data.json b/dhp-workflows/dhp-swh/src/main/resources/eu/dnetlib/dhp/swh/input_collect_last_visit_repository_data.json index 6c59123be..8bf41f0ae 100644 --- a/dhp-workflows/dhp-swh/src/main/resources/eu/dnetlib/dhp/swh/input_collect_last_visit_repository_data.json +++ b/dhp-workflows/dhp-swh/src/main/resources/eu/dnetlib/dhp/swh/input_collect_last_visit_repository_data.json @@ -34,5 +34,11 @@ "paramLongName": "retryDelay", "paramDescription": "the delay (ms) between retries", "paramRequired": false + }, + { + "paramName": "rm", + "paramLongName": "requestMethod", + "paramDescription": "the method of the requests to perform", + "paramRequired": false } ] \ No newline at end of file diff --git a/dhp-workflows/dhp-swh/src/main/resources/eu/dnetlib/dhp/swh/job.properties b/dhp-workflows/dhp-swh/src/main/resources/eu/dnetlib/dhp/swh/job.properties index e2c2af852..4cc1c1e25 100644 --- a/dhp-workflows/dhp-swh/src/main/resources/eu/dnetlib/dhp/swh/job.properties +++ b/dhp-workflows/dhp-swh/src/main/resources/eu/dnetlib/dhp/swh/job.properties @@ -8,4 +8,8 @@ softwareCodeRepositoryURLs=${workingDir}/1_code_repo_urls.csv lastVisitsPath=${workingDir}/2_last_visits.seq archiveRequestsPath=${workingDir}/3_archive_requests.seq +maxNumberOfRetry=2 +retryDelay=1 +requestDelay=100 + resume=collect-software-repository-urls diff --git a/dhp-workflows/dhp-swh/src/main/resources/eu/dnetlib/dhp/swh/oozie_app/workflow.xml b/dhp-workflows/dhp-swh/src/main/resources/eu/dnetlib/dhp/swh/oozie_app/workflow.xml index 5062d562b..b89165fa2 100644 --- a/dhp-workflows/dhp-swh/src/main/resources/eu/dnetlib/dhp/swh/oozie_app/workflow.xml +++ b/dhp-workflows/dhp-swh/src/main/resources/eu/dnetlib/dhp/swh/oozie_app/workflow.xml @@ -8,7 +8,27 @@ softwareCodeRepositoryURLs - The path in the HDSF to save the software repository URLs + The path in the HDFS to save the software repository URLs + + + lastVisitsPath + The path in the HDFS to save the responses of the last visit requests + + + archiveRequestsPath + The path in the HDFS to save the responses of the archive requests + + + maxNumberOfRetry + Max number of retries for failed API calls + + + retryDelay + Retry delay for failed requests (in sec) + + + requestDelay + Delay between API requests (in ms) resume @@ -75,9 +95,9 @@ --softwareCodeRepositoryURLs${softwareCodeRepositoryURLs} --lastVisitsPath${lastVisitsPath} - --maxNumberOfRetry2 - --requestDelay0 - --retryDelay1 + --maxNumberOfRetry${maxNumberOfRetry} + --requestDelay${requestDelay} + --retryDelay${retryDelay} --requestMethodGET @@ -91,11 +111,12 @@ --namenode${nameNode} --lastVisitsPath${lastVisitsPath} + --archiveRequestsPath${archiveRequestsPath} --archiveThresholdInDays365 - --maxNumberOfRetry2 - --requestDelay0 - --retryDelay1 + --maxNumberOfRetry${maxNumberOfRetry} + --requestDelay${requestDelay} + --retryDelay${retryDelay} --requestMethodPOST diff --git a/dhp-workflows/dhp-swh/src/test/java/eu/dnetlib/dhp/swh/ArchiveRepositoryURLsTest.java b/dhp-workflows/dhp-swh/src/test/java/eu/dnetlib/dhp/swh/ArchiveRepositoryURLsTest.java index 06e40ae14..e069e9655 100644 --- a/dhp-workflows/dhp-swh/src/test/java/eu/dnetlib/dhp/swh/ArchiveRepositoryURLsTest.java +++ b/dhp-workflows/dhp-swh/src/test/java/eu/dnetlib/dhp/swh/ArchiveRepositoryURLsTest.java @@ -1,35 +1,38 @@ -package eu.dnetlib.dhp.swh; -import eu.dnetlib.dhp.swh.utils.SWHUtils; -import org.apache.hadoop.fs.FileSystem; -import org.junit.jupiter.api.Test; +package eu.dnetlib.dhp.swh; import java.io.BufferedReader; import java.io.File; import java.io.FileReader; import java.io.IOException; +import java.text.ParseException; import java.util.Arrays; +import org.apache.hadoop.fs.FileSystem; +import org.junit.jupiter.api.Test; + +import eu.dnetlib.dhp.swh.utils.SWHUtils; + public class ArchiveRepositoryURLsTest { - @Test - void testArchive() throws IOException { - String inputPath = getClass() - .getResource("/eu/dnetlib/dhp/swh/lastVisitDataToArchive.csv") - .getPath(); + @Test + void testArchive() throws IOException, ParseException { + String inputPath = getClass() + .getResource("/eu/dnetlib/dhp/swh/lastVisitDataToArchive.csv") + .getPath(); - File file = new File(inputPath); - FileReader fr = new FileReader(file); - BufferedReader br = new BufferedReader(fr); //creates a buffering character input stream + File file = new File(inputPath); + FileReader fr = new FileReader(file); + BufferedReader br = new BufferedReader(fr); // creates a buffering character input stream - String line; - while((line = br.readLine()) != null) { - String[] tokens = line.split("\t"); + String line; + while ((line = br.readLine()) != null) { + String[] tokens = line.split("\t"); - String response = ArchiveRepositoryURLs.handleRecord(tokens[0], tokens[1], 365); - System.out.println(tokens[0] + "\t" + response); - System.out.println(); - } - fr.close(); - } + String response = ArchiveRepositoryURLs.handleRecord(tokens[0], tokens[1], 365); + System.out.println(tokens[0] + "\t" + response); + System.out.println(); + } + fr.close(); + } } diff --git a/dhp-workflows/dhp-swh/src/test/java/eu/dnetlib/dhp/swh/SWHConnectionTest.java b/dhp-workflows/dhp-swh/src/test/java/eu/dnetlib/dhp/swh/SWHConnectionTest.java index d69f6ff1b..28210f1b3 100644 --- a/dhp-workflows/dhp-swh/src/test/java/eu/dnetlib/dhp/swh/SWHConnectionTest.java +++ b/dhp-workflows/dhp-swh/src/test/java/eu/dnetlib/dhp/swh/SWHConnectionTest.java @@ -1,17 +1,18 @@ package eu.dnetlib.dhp.swh; +import java.io.IOException; +import java.net.MalformedURLException; +import java.net.URL; + +import org.junit.jupiter.api.Test; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + import eu.dnetlib.dhp.common.collection.CollectorException; import eu.dnetlib.dhp.common.collection.HttpClientParams; import eu.dnetlib.dhp.swh.utils.SWHConnection; import eu.dnetlib.dhp.swh.utils.SWHConstants; -import org.junit.jupiter.api.Test; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -import java.io.IOException; -import java.net.MalformedURLException; -import java.net.URL; //import org.apache.hadoop.hdfs.MiniDFSCluster; @@ -24,7 +25,7 @@ public class SWHConnectionTest { HttpClientParams clientParams = new HttpClientParams(); clientParams.setRequestMethod("GET"); - SWHConnection swhConnection = new SWHConnection(clientParams); + SWHConnection swhConnection = new SWHConnection(clientParams); String repoUrl = "https://github.com/stanford-futuredata/FAST"; URL url = new URL(String.format(SWHConstants.SWH_LATEST_VISIT_URL, repoUrl)); @@ -42,7 +43,7 @@ public class SWHConnectionTest { HttpClientParams clientParams = new HttpClientParams(); clientParams.setRequestMethod("POST"); - SWHConnection swhConnection = new SWHConnection(clientParams); + SWHConnection swhConnection = new SWHConnection(clientParams); String repoUrl = "https://github.com/stanford-futuredata/FAST"; URL url = new URL(String.format(SWHConstants.SWH_ARCHIVE_URL, SWHConstants.DEFAULT_VISIT_TYPE, repoUrl)); diff --git a/dhp-workflows/dhp-swh/src/test/resources/eu/dnetlib/dhp/swh/lastVisitDataToArchive.csv b/dhp-workflows/dhp-swh/src/test/resources/eu/dnetlib/dhp/swh/lastVisitDataToArchive.csv index 6477dd62a..568ccf482 100644 --- a/dhp-workflows/dhp-swh/src/test/resources/eu/dnetlib/dhp/swh/lastVisitDataToArchive.csv +++ b/dhp-workflows/dhp-swh/src/test/resources/eu/dnetlib/dhp/swh/lastVisitDataToArchive.csv @@ -1,3 +1,4 @@ +https://bitbucket.org/samskillman/yt-stokes {"origin":"https://bitbucket.org/samskillman/yt-stokes","visit":43,"date":"2021-09-13T21:59:27.125171+00:00","status":"failed","snapshot":null,"type":"hg","metadata":{},"origin_url":"https://archive.softwareheritage.org/api/1/origin/https://bitbucket.org/samskillman/yt-stokes/get/","snapshot_url":null} https://github.com/bioinsilico/BIPSPI {"origin":"https://github.com/bioinsilico/BIPSPI","visit":1,"date":"2020-03-18T14:50:21.541822+00:00","status":"full","snapshot":"c6c69d2cd73ce89811448da5f031611df6f63bdb","type":"git","metadata":{},"origin_url":"https://archive.softwareheritage.org/api/1/origin/https://github.com/bioinsilico/BIPSPI/get/","snapshot_url":"https://archive.softwareheritage.org/api/1/snapshot/c6c69d2cd73ce89811448da5f031611df6f63bdb/"} https://github.com/mloop/kdiff-type1-error-rate/blob/master/analysis/simulation.R {} https://github.com/schwanbeck/YSMR {"origin":"https://github.com/schwanbeck/YSMR","visit":6,"date":"2023-08-02T15:25:02.650676+00:00","status":"full","snapshot":"a9d1c5f0bca2def198b89f65bc9f7da3be8439ed","type":"git","metadata":{},"origin_url":"https://archive.softwareheritage.org/api/1/origin/https://github.com/schwanbeck/YSMR/get/","snapshot_url":"https://archive.softwareheritage.org/api/1/snapshot/a9d1c5f0bca2def198b89f65bc9f7da3be8439ed/"} From e84f5b5e6457d8e47fea99a860cd27f41c67882e Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Mon, 2 Oct 2023 09:25:16 +0200 Subject: [PATCH 17/57] extended existing codo to accomodate import of POCI from open citation --- .../CreateActionSetSparkJob.java | 29 +++++++++++++------ .../opencitations/GetOpenCitationsRefs.java | 16 ++++++---- .../actionmanager/opencitations/ReadCOCI.java | 18 +++++++++--- .../opencitations/as_parameters.json | 5 ++++ .../opencitations/input_parameters.json | 6 ++++ .../input_readcoci_parameters.json | 7 ++++- .../opencitations/oozie_app/workflow.xml | 9 ++++-- 7 files changed, 67 insertions(+), 23 deletions(-) diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/opencitations/CreateActionSetSparkJob.java b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/opencitations/CreateActionSetSparkJob.java index 4c658e52f..dafd82120 100644 --- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/opencitations/CreateActionSetSparkJob.java +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/opencitations/CreateActionSetSparkJob.java @@ -26,7 +26,6 @@ import eu.dnetlib.dhp.actionmanager.opencitations.model.COCI; import eu.dnetlib.dhp.application.ArgumentApplicationParser; import eu.dnetlib.dhp.schema.action.AtomicAction; import eu.dnetlib.dhp.schema.common.ModelConstants; -import eu.dnetlib.dhp.schema.common.ModelSupport; import eu.dnetlib.dhp.schema.oaf.*; import eu.dnetlib.dhp.schema.oaf.utils.CleaningFunctions; import eu.dnetlib.dhp.schema.oaf.utils.IdentifierFactory; @@ -35,7 +34,9 @@ import scala.Tuple2; public class CreateActionSetSparkJob implements Serializable { public static final String OPENCITATIONS_CLASSID = "sysimport:crosswalk:opencitations"; public static final String OPENCITATIONS_CLASSNAME = "Imported from OpenCitations"; - private static final String ID_PREFIX = "50|doi_________::"; + private static final String DOI_PREFIX = "50|doi_________::"; + + private static final String PMID_PREFIX = "50|pmid________::"; private static final String TRUST = "0.91"; private static final Logger log = LoggerFactory.getLogger(CreateActionSetSparkJob.class); @@ -67,6 +68,9 @@ public class CreateActionSetSparkJob implements Serializable { final String outputPath = parser.get("outputPath"); log.info("outputPath {}", outputPath); + final String prefix = parser.get("prefix"); + log.info("prefix {}", prefix); + final boolean shouldDuplicateRels = Optional .ofNullable(parser.get("shouldDuplicateRels")) .map(Boolean::valueOf) @@ -77,13 +81,13 @@ public class CreateActionSetSparkJob implements Serializable { conf, isSparkSessionManaged, spark -> { - extractContent(spark, inputPath, outputPath, shouldDuplicateRels); + extractContent(spark, inputPath, outputPath, shouldDuplicateRels, prefix); }); } private static void extractContent(SparkSession spark, String inputPath, String outputPath, - boolean shouldDuplicateRels) { + boolean shouldDuplicateRels, String prefix) { spark .read() .textFile(inputPath + "/*") @@ -91,7 +95,8 @@ public class CreateActionSetSparkJob implements Serializable { (MapFunction) value -> OBJECT_MAPPER.readValue(value, COCI.class), Encoders.bean(COCI.class)) .flatMap( - (FlatMapFunction) value -> createRelation(value, shouldDuplicateRels).iterator(), + (FlatMapFunction) value -> createRelation(value, shouldDuplicateRels, prefix) + .iterator(), Encoders.bean(Relation.class)) .filter((FilterFunction) value -> value != null) .toJavaRDD() @@ -103,13 +108,19 @@ public class CreateActionSetSparkJob implements Serializable { } - private static List createRelation(COCI value, boolean duplicate) { + private static List createRelation(COCI value, boolean duplicate, String p) { List relationList = new ArrayList<>(); + String prefix; + if (p.equals("COCI")) { + prefix = DOI_PREFIX; + } else { + prefix = PMID_PREFIX; + } - String citing = ID_PREFIX + String citing = prefix + IdentifierFactory.md5(CleaningFunctions.normalizePidValue("doi", value.getCiting())); - final String cited = ID_PREFIX + final String cited = prefix + IdentifierFactory.md5(CleaningFunctions.normalizePidValue("doi", value.getCited())); if (!citing.equals(cited)) { @@ -120,7 +131,7 @@ public class CreateActionSetSparkJob implements Serializable { cited, ModelConstants.CITES)); if (duplicate && value.getCiting().endsWith(".refs")) { - citing = ID_PREFIX + IdentifierFactory + citing = prefix + IdentifierFactory .md5( CleaningFunctions .normalizePidValue( diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/opencitations/GetOpenCitationsRefs.java b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/opencitations/GetOpenCitationsRefs.java index 3530c9980..60dc998ef 100644 --- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/opencitations/GetOpenCitationsRefs.java +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/opencitations/GetOpenCitationsRefs.java @@ -45,6 +45,9 @@ public class GetOpenCitationsRefs implements Serializable { final String hdfsNameNode = parser.get("hdfsNameNode"); log.info("hdfsNameNode {}", hdfsNameNode); + final String prefix = parser.get("prefix"); + log.info("prefix {}", prefix); + Configuration conf = new Configuration(); conf.set("fs.defaultFS", hdfsNameNode); @@ -53,30 +56,31 @@ public class GetOpenCitationsRefs implements Serializable { GetOpenCitationsRefs ocr = new GetOpenCitationsRefs(); for (String file : inputFile) { - ocr.doExtract(workingPath + "/Original/" + file, workingPath, fileSystem); + ocr.doExtract(workingPath + "/Original/" + file, workingPath, fileSystem, prefix); } } - private void doExtract(String inputFile, String workingPath, FileSystem fileSystem) + private void doExtract(String inputFile, String workingPath, FileSystem fileSystem, String prefix) throws IOException { final Path path = new Path(inputFile); FSDataInputStream oc_zip = fileSystem.open(path); - int count = 1; + // int count = 1; try (ZipInputStream zis = new ZipInputStream(oc_zip)) { ZipEntry entry = null; while ((entry = zis.getNextEntry()) != null) { if (!entry.isDirectory()) { String fileName = entry.getName(); - fileName = fileName.substring(0, fileName.indexOf("T")) + "_" + count; - count++; + // fileName = fileName.substring(0, fileName.indexOf("T")) + "_" + count; + fileName = fileName.substring(0, fileName.lastIndexOf(".")); + // count++; try ( FSDataOutputStream out = fileSystem - .create(new Path(workingPath + "/COCI/" + fileName + ".gz")); + .create(new Path(workingPath + "/" + prefix + "/" + fileName + ".gz")); GZIPOutputStream gzipOs = new GZIPOutputStream(new BufferedOutputStream(out))) { IOUtils.copy(zis, gzipOs); diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/opencitations/ReadCOCI.java b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/opencitations/ReadCOCI.java index 4293ca187..3d384de9d 100644 --- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/opencitations/ReadCOCI.java +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/opencitations/ReadCOCI.java @@ -49,6 +49,9 @@ public class ReadCOCI implements Serializable { final String workingPath = parser.get("workingPath"); log.info("workingPath {}", workingPath); + final String format = parser.get("format"); + log.info("format {}", format); + SparkConf sconf = new SparkConf(); final String delimiter = Optional @@ -64,13 +67,14 @@ public class ReadCOCI implements Serializable { workingPath, inputFile, outputPath, - delimiter); + delimiter, + format); }); } private static void doRead(SparkSession spark, String workingPath, String[] inputFiles, String outputPath, - String delimiter) throws IOException { + String delimiter, String format) throws IOException { for (String inputFile : inputFiles) { String p_string = workingPath + "/" + inputFile + ".gz"; @@ -87,9 +91,15 @@ public class ReadCOCI implements Serializable { cociData.map((MapFunction) row -> { COCI coci = new COCI(); + if (format.equals("COCI")) { + coci.setCiting(row.getString(1)); + coci.setCited(row.getString(2)); + } else { + coci.setCiting(String.valueOf(row.getInt(1))); + coci.setCited(String.valueOf(row.getInt(2))); + } coci.setOci(row.getString(0)); - coci.setCiting(row.getString(1)); - coci.setCited(row.getString(2)); + return coci; }, Encoders.bean(COCI.class)) .write() diff --git a/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/actionmanager/opencitations/as_parameters.json b/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/actionmanager/opencitations/as_parameters.json index 308e02026..e25d1f4b8 100644 --- a/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/actionmanager/opencitations/as_parameters.json +++ b/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/actionmanager/opencitations/as_parameters.json @@ -21,5 +21,10 @@ "paramLongName": "shouldDuplicateRels", "paramDescription": "the hdfs name node", "paramRequired": false +},{ + "paramName": "p", + "paramLongName": "prefix", + "paramDescription": "the hdfs name node", + "paramRequired": true } ] diff --git a/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/actionmanager/opencitations/input_parameters.json b/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/actionmanager/opencitations/input_parameters.json index 4910ad11d..96db7eeb7 100644 --- a/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/actionmanager/opencitations/input_parameters.json +++ b/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/actionmanager/opencitations/input_parameters.json @@ -16,5 +16,11 @@ "paramLongName": "hdfsNameNode", "paramDescription": "the hdfs name node", "paramRequired": true + }, + { + "paramName": "p", + "paramLongName": "prefix", + "paramDescription": "COCI or POCI", + "paramRequired": true } ] diff --git a/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/actionmanager/opencitations/input_readcoci_parameters.json b/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/actionmanager/opencitations/input_readcoci_parameters.json index b57cb5d9a..fa840089d 100644 --- a/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/actionmanager/opencitations/input_readcoci_parameters.json +++ b/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/actionmanager/opencitations/input_readcoci_parameters.json @@ -30,7 +30,12 @@ "paramLongName": "inputFile", "paramDescription": "the hdfs name node", "paramRequired": true - } + }, { + "paramName": "f", + "paramLongName": "format", + "paramDescription": "the hdfs name node", + "paramRequired": true +} ] diff --git a/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/actionmanager/opencitations/oozie_app/workflow.xml b/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/actionmanager/opencitations/oozie_app/workflow.xml index 0f01039f7..4807da903 100644 --- a/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/actionmanager/opencitations/oozie_app/workflow.xml +++ b/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/actionmanager/opencitations/oozie_app/workflow.xml @@ -60,6 +60,7 @@ --hdfsNameNode${nameNode} --inputFile${inputFile} --workingPath${workingPath} + --prefix${prefix} @@ -82,10 +83,11 @@ --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} --conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir} - --workingPath${workingPath}/COCI - --outputPath${workingPath}/COCI_JSON/ + --workingPath${workingPath}/${prefix} + --outputPath${workingPath}/${prefix}_JSON/ --delimiter${delimiter} --inputFile${inputFileCoci} + --format${prefix} @@ -108,8 +110,9 @@ --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} --conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir} - --inputPath${workingPath}/COCI_JSON + --inputPath${workingPath}/${prefix}_JSON --outputPath${outputPath} + --prefix${prefix} From e239b81740bccfb90211464b5d422113eac2b783 Mon Sep 17 00:00:00 2001 From: Giambattista Bloisi Date: Wed, 20 Sep 2023 15:42:00 +0200 Subject: [PATCH 18/57] Fix defect #8997: GenerateEventsJob is generating huge amounts of logs because broker entity similarity calculation consistently failed --- .../eu/dnetlib/pace/model/SparkModel.scala | 4 ++-- .../dhp/broker/oa/util/TrustUtils.java | 22 +++++++++---------- 2 files changed, 12 insertions(+), 14 deletions(-) diff --git a/dhp-pace-core/src/main/java/eu/dnetlib/pace/model/SparkModel.scala b/dhp-pace-core/src/main/java/eu/dnetlib/pace/model/SparkModel.scala index 95325ace0..3ba36aa22 100644 --- a/dhp-pace-core/src/main/java/eu/dnetlib/pace/model/SparkModel.scala +++ b/dhp-pace-core/src/main/java/eu/dnetlib/pace/model/SparkModel.scala @@ -78,10 +78,10 @@ case class SparkModel(conf: DedupConfig) { uv case Type.List | Type.JSON => - MapDocumentUtil.truncateList( + Seq(MapDocumentUtil.truncateList( MapDocumentUtil.getJPathList(fdef.getPath, documentContext, fdef.getType), fdef.getSize - ).toArray + )) case Type.StringConcat => val jpaths = CONCAT_REGEX.split(fdef.getPath) diff --git a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/util/TrustUtils.java b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/util/TrustUtils.java index a6fa2b1a1..6f197a8ce 100644 --- a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/util/TrustUtils.java +++ b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/util/TrustUtils.java @@ -1,18 +1,18 @@ package eu.dnetlib.dhp.broker.oa.util; -import java.io.IOException; - -import org.apache.spark.sql.Row; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - import com.fasterxml.jackson.databind.ObjectMapper; - import eu.dnetlib.broker.objects.OaBrokerMainEntity; import eu.dnetlib.pace.config.DedupConfig; import eu.dnetlib.pace.model.SparkDeduper; import eu.dnetlib.pace.tree.support.TreeProcessor; +import org.apache.commons.io.IOUtils; +import org.apache.spark.sql.Row; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.io.IOException; +import java.nio.charset.StandardCharsets; public class TrustUtils { @@ -27,10 +27,8 @@ public class TrustUtils { static { mapper = new ObjectMapper(); try { - dedupConfig = mapper - .readValue( - DedupConfig.class.getResourceAsStream("/eu/dnetlib/dhp/broker/oa/dedupConfig/dedupConfig.json"), - DedupConfig.class); + dedupConfig = DedupConfig.load(IOUtils.toString(DedupConfig.class.getResourceAsStream("/eu/dnetlib/dhp/broker/oa/dedupConfig/dedupConfig.json"), StandardCharsets.UTF_8)); + deduper = new SparkDeduper(dedupConfig); } catch (final IOException e) { log.error("Error loading dedupConfig, e"); @@ -57,7 +55,7 @@ public class TrustUtils { return TrustUtils.rescale(score, threshold); } catch (final Exception e) { log.error("Error computing score between results", e); - return BrokerConstants.MIN_TRUST; + throw new RuntimeException(e); } } From 7f244d9a7ad7e723d9c80503476e590835031c4d Mon Sep 17 00:00:00 2001 From: Claudio Atzori Date: Wed, 20 Sep 2023 15:53:21 +0200 Subject: [PATCH 19/57] code formatting --- .../dhp/broker/oa/util/TrustUtils.java | 24 ++++++++++++------- 1 file changed, 16 insertions(+), 8 deletions(-) diff --git a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/util/TrustUtils.java b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/util/TrustUtils.java index 6f197a8ce..67468c6f9 100644 --- a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/util/TrustUtils.java +++ b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/util/TrustUtils.java @@ -1,18 +1,20 @@ package eu.dnetlib.dhp.broker.oa.util; -import com.fasterxml.jackson.databind.ObjectMapper; -import eu.dnetlib.broker.objects.OaBrokerMainEntity; -import eu.dnetlib.pace.config.DedupConfig; -import eu.dnetlib.pace.model.SparkDeduper; -import eu.dnetlib.pace.tree.support.TreeProcessor; +import java.io.IOException; +import java.nio.charset.StandardCharsets; + import org.apache.commons.io.IOUtils; import org.apache.spark.sql.Row; import org.slf4j.Logger; import org.slf4j.LoggerFactory; -import java.io.IOException; -import java.nio.charset.StandardCharsets; +import com.fasterxml.jackson.databind.ObjectMapper; + +import eu.dnetlib.broker.objects.OaBrokerMainEntity; +import eu.dnetlib.pace.config.DedupConfig; +import eu.dnetlib.pace.model.SparkDeduper; +import eu.dnetlib.pace.tree.support.TreeProcessor; public class TrustUtils { @@ -27,7 +29,13 @@ public class TrustUtils { static { mapper = new ObjectMapper(); try { - dedupConfig = DedupConfig.load(IOUtils.toString(DedupConfig.class.getResourceAsStream("/eu/dnetlib/dhp/broker/oa/dedupConfig/dedupConfig.json"), StandardCharsets.UTF_8)); + dedupConfig = DedupConfig + .load( + IOUtils + .toString( + DedupConfig.class + .getResourceAsStream("/eu/dnetlib/dhp/broker/oa/dedupConfig/dedupConfig.json"), + StandardCharsets.UTF_8)); deduper = new SparkDeduper(dedupConfig); } catch (final IOException e) { From 3c47920c7839b2004a2e8f9fbb4dbff8873d1e6d Mon Sep 17 00:00:00 2001 From: Giambattista Bloisi Date: Wed, 20 Sep 2023 16:14:01 +0200 Subject: [PATCH 20/57] Use asScala to convert java List to Scala Sequence --- .../src/main/java/eu/dnetlib/pace/model/SparkModel.scala | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/dhp-pace-core/src/main/java/eu/dnetlib/pace/model/SparkModel.scala b/dhp-pace-core/src/main/java/eu/dnetlib/pace/model/SparkModel.scala index 3ba36aa22..aa997c6e9 100644 --- a/dhp-pace-core/src/main/java/eu/dnetlib/pace/model/SparkModel.scala +++ b/dhp-pace-core/src/main/java/eu/dnetlib/pace/model/SparkModel.scala @@ -78,10 +78,10 @@ case class SparkModel(conf: DedupConfig) { uv case Type.List | Type.JSON => - Seq(MapDocumentUtil.truncateList( + MapDocumentUtil.truncateList( MapDocumentUtil.getJPathList(fdef.getPath, documentContext, fdef.getType), fdef.getSize - )) + ).asScala case Type.StringConcat => val jpaths = CONCAT_REGEX.split(fdef.getPath) From c412dc162bc7028c1e94eb3205960e24e748347e Mon Sep 17 00:00:00 2001 From: Giambattista Bloisi Date: Wed, 27 Sep 2023 22:30:47 +0200 Subject: [PATCH 21/57] Fix bug in conversion from dedup json model to Spark Dataset of Rows: list of strings contained the json escaped representation of the value instead of the plain value, this caused instanceTypeMatch failures because of the leading and trailing double quotes --- .../src/main/java/eu/dnetlib/pace/util/MapDocumentUtil.java | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/dhp-pace-core/src/main/java/eu/dnetlib/pace/util/MapDocumentUtil.java b/dhp-pace-core/src/main/java/eu/dnetlib/pace/util/MapDocumentUtil.java index a59b6248b..28244cb3b 100644 --- a/dhp-pace-core/src/main/java/eu/dnetlib/pace/util/MapDocumentUtil.java +++ b/dhp-pace-core/src/main/java/eu/dnetlib/pace/util/MapDocumentUtil.java @@ -117,6 +117,11 @@ public class MapDocumentUtil { return result; } + if (type == Type.List && jresult instanceof List) { + ((List) jresult).forEach(x -> result.add(x.toString())); + return result; + } + if (jresult instanceof JSONArray) { ((JSONArray) jresult).forEach(it -> { try { From 9898470b0e2c5b05ae73c5b2ae404f6f52cba2f4 Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Mon, 2 Oct 2023 12:54:16 +0200 Subject: [PATCH 22/57] Addressing comments in https://code-repo.d4science.org/D-Net/dnet-hadoop/pulls/340\#issuecomment-10592 --- .../CreateActionSetSparkJob.java | 119 ++++++++---------- .../opencitations/oozie_app/workflow.xml | 2 +- 2 files changed, 54 insertions(+), 67 deletions(-) diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/opencitations/CreateActionSetSparkJob.java b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/opencitations/CreateActionSetSparkJob.java index dafd82120..e3a9833b3 100644 --- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/opencitations/CreateActionSetSparkJob.java +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/opencitations/CreateActionSetSparkJob.java @@ -12,6 +12,7 @@ import org.apache.commons.io.IOUtils; import org.apache.hadoop.io.Text; import org.apache.hadoop.mapred.SequenceFileOutputFormat; import org.apache.spark.SparkConf; +import org.apache.spark.api.java.JavaPairRDD; import org.apache.spark.api.java.function.FilterFunction; import org.apache.spark.api.java.function.FlatMapFunction; import org.apache.spark.api.java.function.MapFunction; @@ -26,9 +27,12 @@ import eu.dnetlib.dhp.actionmanager.opencitations.model.COCI; import eu.dnetlib.dhp.application.ArgumentApplicationParser; import eu.dnetlib.dhp.schema.action.AtomicAction; import eu.dnetlib.dhp.schema.common.ModelConstants; +import eu.dnetlib.dhp.schema.common.ModelSupport; import eu.dnetlib.dhp.schema.oaf.*; import eu.dnetlib.dhp.schema.oaf.utils.CleaningFunctions; import eu.dnetlib.dhp.schema.oaf.utils.IdentifierFactory; +import eu.dnetlib.dhp.schema.oaf.utils.OafMapperUtils; +import eu.dnetlib.dhp.utils.DHPUtils; import scala.Tuple2; public class CreateActionSetSparkJob implements Serializable { @@ -68,9 +72,6 @@ public class CreateActionSetSparkJob implements Serializable { final String outputPath = parser.get("outputPath"); log.info("outputPath {}", outputPath); - final String prefix = parser.get("prefix"); - log.info("prefix {}", prefix); - final boolean shouldDuplicateRels = Optional .ofNullable(parser.get("shouldDuplicateRels")) .map(Boolean::valueOf) @@ -81,47 +82,62 @@ public class CreateActionSetSparkJob implements Serializable { conf, isSparkSessionManaged, spark -> { - extractContent(spark, inputPath, outputPath, shouldDuplicateRels, prefix); + extractContent(spark, inputPath, outputPath, shouldDuplicateRels); }); } private static void extractContent(SparkSession spark, String inputPath, String outputPath, + boolean shouldDuplicateRels) { + + getTextTextJavaPairRDD(spark, inputPath, shouldDuplicateRels, "COCI") + .union(getTextTextJavaPairRDD(spark, inputPath, shouldDuplicateRels, "POCI")) + .saveAsHadoopFile(outputPath, Text.class, Text.class, SequenceFileOutputFormat.class); + + } + + private static JavaPairRDD getTextTextJavaPairRDD(SparkSession spark, String inputPath, boolean shouldDuplicateRels, String prefix) { - spark + return spark .read() - .textFile(inputPath + "/*") + .textFile(inputPath + "/" + prefix + "/" + prefix + "_JSON/*") .map( (MapFunction) value -> OBJECT_MAPPER.readValue(value, COCI.class), Encoders.bean(COCI.class)) .flatMap( - (FlatMapFunction) value -> createRelation(value, shouldDuplicateRels, prefix) - .iterator(), + (FlatMapFunction) value -> createRelation( + value, shouldDuplicateRels, prefix) + .iterator(), Encoders.bean(Relation.class)) .filter((FilterFunction) value -> value != null) .toJavaRDD() .map(p -> new AtomicAction(p.getClass(), p)) .mapToPair( aa -> new Tuple2<>(new Text(aa.getClazz().getCanonicalName()), - new Text(OBJECT_MAPPER.writeValueAsString(aa)))) - .saveAsHadoopFile(outputPath, Text.class, Text.class, SequenceFileOutputFormat.class); - + new Text(OBJECT_MAPPER.writeValueAsString(aa)))); } private static List createRelation(COCI value, boolean duplicate, String p) { List relationList = new ArrayList<>(); String prefix; + String citing; + String cited; if (p.equals("COCI")) { prefix = DOI_PREFIX; + citing = prefix + + IdentifierFactory.md5(CleaningFunctions.normalizePidValue("doi", value.getCiting())); + cited = prefix + + IdentifierFactory.md5(CleaningFunctions.normalizePidValue("doi", value.getCited())); + } else { prefix = PMID_PREFIX; - } + citing = prefix + + IdentifierFactory.md5(CleaningFunctions.normalizePidValue("pmid", value.getCiting())); + cited = prefix + + IdentifierFactory.md5(CleaningFunctions.normalizePidValue("pmid", value.getCited())); - String citing = prefix - + IdentifierFactory.md5(CleaningFunctions.normalizePidValue("doi", value.getCiting())); - final String cited = prefix - + IdentifierFactory.md5(CleaningFunctions.normalizePidValue("doi", value.getCited())); + } if (!citing.equals(cited)) { relationList @@ -143,59 +159,30 @@ public class CreateActionSetSparkJob implements Serializable { return relationList; } - private static Collection getRelations(String citing, String cited) { - - return Arrays - .asList( - getRelation(citing, cited, ModelConstants.CITES), - getRelation(cited, citing, ModelConstants.IS_CITED_BY)); - } - public static Relation getRelation( String source, String target, String relclass) { - Relation r = new Relation(); - r.setCollectedfrom(getCollectedFrom()); - r.setSource(source); - r.setTarget(target); - r.setRelClass(relclass); - r.setRelType(ModelConstants.RESULT_RESULT); - r.setSubRelType(ModelConstants.CITATION); - r - .setDataInfo( - getDataInfo()); - return r; + + return OafMapperUtils + .getRelation( + source, + target, + ModelConstants.RESULT_RESULT, + ModelConstants.CITATION, + relclass, + Arrays + .asList( + OafMapperUtils.keyValue(ModelConstants.OPENOCITATIONS_ID, ModelConstants.OPENOCITATIONS_NAME)), + OafMapperUtils + .dataInfo( + false, null, false, false, + OafMapperUtils + .qualifier( + OPENCITATIONS_CLASSID, OPENCITATIONS_CLASSNAME, + ModelConstants.DNET_PROVENANCE_ACTIONS, ModelConstants.DNET_PROVENANCE_ACTIONS), + TRUST), + null); + } - - public static List getCollectedFrom() { - KeyValue kv = new KeyValue(); - kv.setKey(ModelConstants.OPENOCITATIONS_ID); - kv.setValue(ModelConstants.OPENOCITATIONS_NAME); - - return Arrays.asList(kv); - } - - public static DataInfo getDataInfo() { - DataInfo di = new DataInfo(); - di.setInferred(false); - di.setDeletedbyinference(false); - di.setTrust(TRUST); - - di - .setProvenanceaction( - getQualifier(OPENCITATIONS_CLASSID, OPENCITATIONS_CLASSNAME, ModelConstants.DNET_PROVENANCE_ACTIONS)); - return di; - } - - public static Qualifier getQualifier(String class_id, String class_name, - String qualifierSchema) { - Qualifier pa = new Qualifier(); - pa.setClassid(class_id); - pa.setClassname(class_name); - pa.setSchemeid(qualifierSchema); - pa.setSchemename(qualifierSchema); - return pa; - } - } diff --git a/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/actionmanager/opencitations/oozie_app/workflow.xml b/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/actionmanager/opencitations/oozie_app/workflow.xml index 4807da903..bd1932dd5 100644 --- a/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/actionmanager/opencitations/oozie_app/workflow.xml +++ b/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/actionmanager/opencitations/oozie_app/workflow.xml @@ -110,7 +110,7 @@ --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} --conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir} - --inputPath${workingPath}/${prefix}_JSON + --inputPath${workingPath} --outputPath${outputPath} --prefix${prefix} From d7fccdc64b53a80c5f93be8cf4826c0d01f1e3ff Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Mon, 2 Oct 2023 14:10:57 +0200 Subject: [PATCH 23/57] fixed paths in wf to match the req of the pathname --- .../actionmanager/opencitations/oozie_app/workflow.xml | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/actionmanager/opencitations/oozie_app/workflow.xml b/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/actionmanager/opencitations/oozie_app/workflow.xml index bd1932dd5..d87dfa2ba 100644 --- a/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/actionmanager/opencitations/oozie_app/workflow.xml +++ b/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/actionmanager/opencitations/oozie_app/workflow.xml @@ -46,7 +46,7 @@ download.sh ${filelist} - ${workingPath}/Original + ${workingPath}/${prefix}/Original HADOOP_USER_NAME=${wf:user()} download.sh @@ -59,7 +59,7 @@ eu.dnetlib.dhp.actionmanager.opencitations.GetOpenCitationsRefs --hdfsNameNode${nameNode} --inputFile${inputFile} - --workingPath${workingPath} + --workingPath${workingPath}/${prefix} --prefix${prefix} @@ -83,8 +83,8 @@ --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} --conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir} - --workingPath${workingPath}/${prefix} - --outputPath${workingPath}/${prefix}_JSON/ + --workingPath${workingPath}/${prefix}/${prefix} + --outputPath${workingPath}/${prefix}/${prefix}_JSON/ --delimiter${delimiter} --inputFile${inputFileCoci} --format${prefix} From c9a5ad6a0266de84a0be5efc493b686586705218 Mon Sep 17 00:00:00 2001 From: Claudio Atzori Date: Mon, 2 Oct 2023 16:28:42 +0200 Subject: [PATCH 24/57] extending the coverage of the peer non-unknown refereed instances --- .../oaf/utils/GraphCleaningFunctions.java | 35 +++++++++++++++++++ 1 file changed, 35 insertions(+) diff --git a/dhp-common/src/main/java/eu/dnetlib/dhp/schema/oaf/utils/GraphCleaningFunctions.java b/dhp-common/src/main/java/eu/dnetlib/dhp/schema/oaf/utils/GraphCleaningFunctions.java index 8afa41f95..3c3e8052e 100644 --- a/dhp-common/src/main/java/eu/dnetlib/dhp/schema/oaf/utils/GraphCleaningFunctions.java +++ b/dhp-common/src/main/java/eu/dnetlib/dhp/schema/oaf/utils/GraphCleaningFunctions.java @@ -36,6 +36,19 @@ public class GraphCleaningFunctions extends CleaningFunctions { public static final int TITLE_FILTER_RESIDUAL_LENGTH = 5; private static final String NAME_CLEANING_REGEX = "[\\r\\n\\t\\s]+"; + private static final HashSet PEER_REVIEWED_TYPES = new HashSet<>(); + + static { + PEER_REVIEWED_TYPES.add("Article"); + PEER_REVIEWED_TYPES.add("Part of book or chapter of book"); + PEER_REVIEWED_TYPES.add("Book"); + PEER_REVIEWED_TYPES.add("Doctoral thesis"); + PEER_REVIEWED_TYPES.add("Master thesis"); + PEER_REVIEWED_TYPES.add("Data Paper"); + PEER_REVIEWED_TYPES.add("Thesis"); + PEER_REVIEWED_TYPES.add("Bachelor thesis"); + PEER_REVIEWED_TYPES.add("Conference object"); + } public static T cleanContext(T value, String contextId, String verifyParam) { if (ModelSupport.isSubClass(value, Result.class)) { @@ -493,6 +506,28 @@ public class GraphCleaningFunctions extends CleaningFunctions { if (Objects.isNull(i.getRefereed()) || StringUtils.isBlank(i.getRefereed().getClassid())) { i.setRefereed(qualifier("0000", "Unknown", ModelConstants.DNET_REVIEW_LEVELS)); } + + // from the script from Dimitris + if ("0000".equals(i.getRefereed().getClassid())) { + final boolean isFromCrossref = ModelConstants.CROSSREF_ID + .equals(i.getCollectedfrom().getKey()); + final boolean hasDoi = i + .getPid() + .stream() + .anyMatch(pid -> PidType.doi.toString().equals(pid.getQualifier().getClassid())); + final boolean isPeerReviewedType = PEER_REVIEWED_TYPES + .contains(i.getInstancetype().getClassname()); + final boolean noOtherLitType = r + .getInstance() + .stream() + .noneMatch(ii -> "Other literature type".equals(ii.getInstancetype().getClassname())); + if (isFromCrossref && hasDoi && isPeerReviewedType && noOtherLitType) { + i.setRefereed(qualifier("0001", "peerReviewed", ModelConstants.DNET_REVIEW_LEVELS)); + } else { + i.setRefereed(qualifier("0002", "nonPeerReviewed", ModelConstants.DNET_REVIEW_LEVELS)); + } + } + if (Objects.nonNull(i.getDateofacceptance())) { Optional date = cleanDateField(i.getDateofacceptance()); if (date.isPresent()) { From 839a8524e70790440bb570c5093f0ab43e0df323 Mon Sep 17 00:00:00 2001 From: Serafeim Chatzopoulos Date: Mon, 2 Oct 2023 23:50:38 +0300 Subject: [PATCH 25/57] Add action for creating actionsets --- .../dhp/swh/ArchiveRepositoryURLs.java | 5 +- .../swh/CollectLastVisitRepositoryData.java | 2 +- .../swh/CollectSoftwareRepositoryURLs.java | 9 +- .../dnetlib/dhp/swh/PrepareSWHActionsets.java | 177 ++++++++++++++++++ .../dnetlib/dhp/swh/models/LastVisitData.java | 25 ++- .../dnetlib/dhp/swh/utils/SWHConnection.java | 100 ---------- .../dnetlib/dhp/swh/utils/SWHConstants.java | 4 + .../dhp/swh/input_prepare_swh_actionsets.json | 26 +++ .../eu/dnetlib/dhp/swh/job.properties | 2 + .../eu/dnetlib/dhp/swh/oozie_app/workflow.xml | 27 +++ .../dhp/swh/PrepareSWHActionsetsTest.java | 97 ++++++++++ .../eu/dnetlib/dhp/swh/last_visits_data.seq | Bin 0 -> 253188 bytes .../eu/dnetlib/dhp/swh/software.json.gz | Bin 0 -> 16127 bytes 13 files changed, 362 insertions(+), 112 deletions(-) create mode 100644 dhp-workflows/dhp-swh/src/main/java/eu/dnetlib/dhp/swh/PrepareSWHActionsets.java create mode 100644 dhp-workflows/dhp-swh/src/main/resources/eu/dnetlib/dhp/swh/input_prepare_swh_actionsets.json create mode 100644 dhp-workflows/dhp-swh/src/test/java/eu/dnetlib/dhp/swh/PrepareSWHActionsetsTest.java create mode 100644 dhp-workflows/dhp-swh/src/test/resources/eu/dnetlib/dhp/swh/last_visits_data.seq create mode 100644 dhp-workflows/dhp-swh/src/test/resources/eu/dnetlib/dhp/swh/software.json.gz diff --git a/dhp-workflows/dhp-swh/src/main/java/eu/dnetlib/dhp/swh/ArchiveRepositoryURLs.java b/dhp-workflows/dhp-swh/src/main/java/eu/dnetlib/dhp/swh/ArchiveRepositoryURLs.java index 38db27baf..f02861953 100644 --- a/dhp-workflows/dhp-swh/src/main/java/eu/dnetlib/dhp/swh/ArchiveRepositoryURLs.java +++ b/dhp-workflows/dhp-swh/src/main/java/eu/dnetlib/dhp/swh/ArchiveRepositoryURLs.java @@ -1,7 +1,6 @@ package eu.dnetlib.dhp.swh; -import static eu.dnetlib.dhp.common.Constants.REQUEST_METHOD; import static eu.dnetlib.dhp.utils.DHPUtils.getHadoopConfiguration; import java.io.IOException; @@ -116,8 +115,8 @@ public class ArchiveRepositoryURLs { // a previous attempt for archival has been made, and repository URL was not found // avoid performing the same archive request again - if (lastVisit.getType() != null && - lastVisit.getType().equals(SWHConstants.VISIT_STATUS_NOT_FOUND)) { + if (lastVisit.getStatus() != null && + lastVisit.getStatus().equals(SWHConstants.VISIT_STATUS_NOT_FOUND)) { log.info("Avoid request -- previous archive request returned NOT_FOUND"); return null; diff --git a/dhp-workflows/dhp-swh/src/main/java/eu/dnetlib/dhp/swh/CollectLastVisitRepositoryData.java b/dhp-workflows/dhp-swh/src/main/java/eu/dnetlib/dhp/swh/CollectLastVisitRepositoryData.java index 9386b6876..296a4cce1 100644 --- a/dhp-workflows/dhp-swh/src/main/java/eu/dnetlib/dhp/swh/CollectLastVisitRepositoryData.java +++ b/dhp-workflows/dhp-swh/src/main/java/eu/dnetlib/dhp/swh/CollectLastVisitRepositoryData.java @@ -40,7 +40,7 @@ public class CollectLastVisitRepositoryData { private static SWHConnection swhConnection = null; public static void main(final String[] args) - throws IOException, ParseException, InterruptedException, URISyntaxException, CollectorException { + throws IOException, ParseException { final ArgumentApplicationParser argumentParser = new ArgumentApplicationParser( IOUtils .toString( diff --git a/dhp-workflows/dhp-swh/src/main/java/eu/dnetlib/dhp/swh/CollectSoftwareRepositoryURLs.java b/dhp-workflows/dhp-swh/src/main/java/eu/dnetlib/dhp/swh/CollectSoftwareRepositoryURLs.java index c1a0fafa5..6232fa322 100644 --- a/dhp-workflows/dhp-swh/src/main/java/eu/dnetlib/dhp/swh/CollectSoftwareRepositoryURLs.java +++ b/dhp-workflows/dhp-swh/src/main/java/eu/dnetlib/dhp/swh/CollectSoftwareRepositoryURLs.java @@ -3,7 +3,6 @@ package eu.dnetlib.dhp.swh; import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkHiveSession; -import java.io.Serializable; import java.util.Optional; import org.apache.commons.io.IOUtils; @@ -23,7 +22,7 @@ import eu.dnetlib.dhp.schema.oaf.Result; * * @author Serafeim Chatzopoulos */ -public class CollectSoftwareRepositoryURLs implements Serializable { +public class CollectSoftwareRepositoryURLs { private static final Logger log = LoggerFactory.getLogger(CollectSoftwareRepositoryURLs.class); @@ -44,10 +43,10 @@ public class CollectSoftwareRepositoryURLs implements Serializable { log.info("isSparkSessionManaged: {}", isSparkSessionManaged); final String hiveDbName = parser.get("hiveDbName"); - log.info("hiveDbName {}: ", hiveDbName); + log.info("hiveDbName: {}", hiveDbName); final String outputPath = parser.get("softwareCodeRepositoryURLs"); - log.info("softwareCodeRepositoryURLs {}: ", outputPath); + log.info("softwareCodeRepositoryURLs: {}", outputPath); final String hiveMetastoreUris = parser.get("hiveMetastoreUris"); log.info("hiveMetastoreUris: {}", hiveMetastoreUris); @@ -70,7 +69,7 @@ public class CollectSoftwareRepositoryURLs implements Serializable { "WHERE coderepositoryurl.value IS NOT NULL " + "AND datainfo.deletedbyinference = FALSE " + "AND datainfo.invisible = FALSE " + - "LIMIT 1000"; + "LIMIT 5000"; String query = String.format(queryTemplate, hiveDbName); log.info("Hive query to fetch software code URLs: {}", query); diff --git a/dhp-workflows/dhp-swh/src/main/java/eu/dnetlib/dhp/swh/PrepareSWHActionsets.java b/dhp-workflows/dhp-swh/src/main/java/eu/dnetlib/dhp/swh/PrepareSWHActionsets.java new file mode 100644 index 000000000..c0ab11bc4 --- /dev/null +++ b/dhp-workflows/dhp-swh/src/main/java/eu/dnetlib/dhp/swh/PrepareSWHActionsets.java @@ -0,0 +1,177 @@ + +package eu.dnetlib.dhp.swh; + +import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession; +import static org.apache.spark.sql.functions.col; + +import java.io.Serializable; +import java.util.Arrays; +import java.util.List; +import java.util.Optional; + +import org.apache.commons.io.IOUtils; +import org.apache.hadoop.io.Text; +import org.apache.hadoop.mapred.SequenceFileOutputFormat; +import org.apache.spark.SparkConf; +import org.apache.spark.api.java.JavaPairRDD; +import org.apache.spark.api.java.JavaRDD; +import org.apache.spark.api.java.JavaSparkContext; +import org.apache.spark.api.java.function.MapFunction; +import org.apache.spark.sql.*; +import org.apache.spark.sql.Dataset; +import org.apache.spark.sql.types.DataTypes; +import org.apache.spark.sql.types.StructField; +import org.apache.spark.sql.types.StructType; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import com.fasterxml.jackson.databind.ObjectMapper; + +import eu.dnetlib.dhp.application.ArgumentApplicationParser; +import eu.dnetlib.dhp.schema.action.AtomicAction; +import eu.dnetlib.dhp.schema.common.ModelConstants; +import eu.dnetlib.dhp.schema.oaf.*; +import eu.dnetlib.dhp.schema.oaf.utils.OafMapperUtils; +import eu.dnetlib.dhp.swh.models.LastVisitData; +import eu.dnetlib.dhp.swh.utils.SWHConstants; +import scala.Tuple2; + +/** + * Creates action sets for Software Heritage data + * + * @author Serafeim Chatzopoulos + */ +public class PrepareSWHActionsets { + + private static final Logger log = LoggerFactory.getLogger(PrepareSWHActionsets.class); + private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper(); + + public static void main(String[] args) throws Exception { + + String jsonConfiguration = IOUtils + .toString( + PrepareSWHActionsets.class + .getResourceAsStream( + "/eu/dnetlib/dhp/swh/input_prepare_swh_actionsets.json")); + + final ArgumentApplicationParser parser = new ArgumentApplicationParser(jsonConfiguration); + parser.parseArgument(args); + + final Boolean isSparkSessionManaged = Optional + .ofNullable(parser.get("isSparkSessionManaged")) + .map(Boolean::valueOf) + .orElse(Boolean.TRUE); + log.info("isSparkSessionManaged: {}", isSparkSessionManaged); + + final String inputPath = parser.get("lastVisitsPath"); + log.info("inputPath: {}", inputPath); + + final String softwareInputPath = parser.get("softwareInputPath"); + log.info("softwareInputPath: {}", softwareInputPath); + + final String outputPath = parser.get("actionsetsPath"); + log.info("outputPath: {}", outputPath); + + SparkConf conf = new SparkConf(); + + runWithSparkSession( + conf, + isSparkSessionManaged, + spark -> { + JavaPairRDD softwareRDD = prepareActionsets(spark, inputPath, softwareInputPath); + softwareRDD + .saveAsHadoopFile( + outputPath, Text.class, Text.class, SequenceFileOutputFormat.class); +// , GzipCodec.class); + }); + } + + private static Dataset loadSWHData(SparkSession spark, String inputPath) { + + JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext()); + + // read from file and transform to tuples + // Note: snapshot id is the SWH id for us + JavaRDD swhRDD = sc + .sequenceFile(inputPath, Text.class, Text.class) + .map(t -> t._2().toString()) + .map(t -> OBJECT_MAPPER.readValue(t, LastVisitData.class)) + .filter(t -> t.getOrigin() != null && t.getSnapshot() != null) // response from SWH API is empty if repo URL + // was not found + .map(item -> RowFactory.create(item.getOrigin(), item.getSnapshot())); + + // convert RDD to 2-column DF + List fields = Arrays + .asList( + DataTypes.createStructField("repoUrl", DataTypes.StringType, true), + DataTypes.createStructField("swhId", DataTypes.StringType, true)); + StructType schema = DataTypes.createStructType(fields); + + return spark.createDataFrame(swhRDD, schema); + } + + private static Dataset loadGraphSoftwareData(SparkSession spark, String softwareInputPath) { + return spark + .read() + .textFile(softwareInputPath) + .map( + (MapFunction) t -> OBJECT_MAPPER.readValue(t, Software.class), + Encoders.bean(Software.class)) + .filter(t -> t.getCodeRepositoryUrl() != null) + .select(col("id"), col("codeRepositoryUrl.value").as("repoUrl")); + } + + private static JavaPairRDD prepareActionsets(SparkSession spark, String inputPath, + String softwareInputPath) { + + Dataset swhDF = loadSWHData(spark, inputPath); +// swhDF.show(false); + + Dataset graphSoftwareDF = loadGraphSoftwareData(spark, softwareInputPath); +// graphSoftwareDF.show(5); + + Dataset joinedDF = graphSoftwareDF.join(swhDF, "repoUrl").select("id", "swhid"); +// joinedDF.show(false); + + return joinedDF.map((MapFunction) row -> { + + Software s = new Software(); + + // set openaire id + s.setId(row.getString(row.fieldIndex("id"))); + + // set swh id + Qualifier qualifier = OafMapperUtils + .qualifier( + SWHConstants.SWHID, + SWHConstants.SWHID_CLASSNAME, + ModelConstants.DNET_PID_TYPES, + ModelConstants.DNET_PID_TYPES); + + DataInfo dataInfo = OafMapperUtils + .dataInfo( + false, + null, + false, + false, + ModelConstants.PROVENANCE_ACTION_SET_QUALIFIER, + ""); + + s + .setPid( + Arrays + .asList( + OafMapperUtils + .structuredProperty( + row.getString(row.fieldIndex("swhid")), + qualifier, + dataInfo))); + return s; + }, Encoders.bean(Software.class)) + .toJavaRDD() + .map(p -> new AtomicAction(Software.class, p)) + .mapToPair( + aa -> new Tuple2<>(new Text(aa.getClazz().getCanonicalName()), + new Text(OBJECT_MAPPER.writeValueAsString(aa)))); + } +} diff --git a/dhp-workflows/dhp-swh/src/main/java/eu/dnetlib/dhp/swh/models/LastVisitData.java b/dhp-workflows/dhp-swh/src/main/java/eu/dnetlib/dhp/swh/models/LastVisitData.java index eaff5ce02..5e705716c 100644 --- a/dhp-workflows/dhp-swh/src/main/java/eu/dnetlib/dhp/swh/models/LastVisitData.java +++ b/dhp-workflows/dhp-swh/src/main/java/eu/dnetlib/dhp/swh/models/LastVisitData.java @@ -1,15 +1,15 @@ package eu.dnetlib.dhp.swh.models; -import java.util.Date; +import java.io.Serializable; -import com.cloudera.com.fasterxml.jackson.annotation.JsonFormat; import com.cloudera.com.fasterxml.jackson.annotation.JsonProperty; import com.fasterxml.jackson.annotation.JsonIgnoreProperties; @JsonIgnoreProperties(ignoreUnknown = true) -public class LastVisitData { +public class LastVisitData implements Serializable { + private String origin; private String type; private String date; @@ -49,4 +49,23 @@ public class LastVisitData { public void setStatus(String status) { this.status = status; } + + public String getOrigin() { + return origin; + } + + public void setOrigin(String origin) { + this.origin = origin; + } + + @Override + public String toString() { + return "LastVisitData{" + + "origin='" + origin + '\'' + + ", type='" + type + '\'' + + ", date='" + date + '\'' + + ", snapshotId='" + snapshotId + '\'' + + ", status='" + status + '\'' + + '}'; + } } diff --git a/dhp-workflows/dhp-swh/src/main/java/eu/dnetlib/dhp/swh/utils/SWHConnection.java b/dhp-workflows/dhp-swh/src/main/java/eu/dnetlib/dhp/swh/utils/SWHConnection.java index 46d512dcb..9c145fc19 100644 --- a/dhp-workflows/dhp-swh/src/main/java/eu/dnetlib/dhp/swh/utils/SWHConnection.java +++ b/dhp-workflows/dhp-swh/src/main/java/eu/dnetlib/dhp/swh/utils/SWHConnection.java @@ -1,50 +1,21 @@ package eu.dnetlib.dhp.swh.utils; -import java.io.IOException; -import java.util.Arrays; import java.util.HashMap; -import java.util.List; import java.util.Map; -import org.apache.commons.lang3.math.NumberUtils; -import org.apache.http.Header; import org.apache.http.HttpHeaders; -import org.apache.http.HttpStatus; -import org.apache.http.client.methods.CloseableHttpResponse; -import org.apache.http.client.methods.HttpGet; -import org.apache.http.impl.client.CloseableHttpClient; -import org.apache.http.util.EntityUtils; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; -import eu.dnetlib.dhp.common.Constants; import eu.dnetlib.dhp.common.collection.CollectorException; import eu.dnetlib.dhp.common.collection.HttpClientParams; import eu.dnetlib.dhp.common.collection.HttpConnector2; public class SWHConnection { - private static final Logger log = LoggerFactory.getLogger(SWHConnection.class); - - CloseableHttpClient httpClient; - - HttpClientParams clientParams; - HttpConnector2 conn; public SWHConnection(HttpClientParams clientParams) { -// // force http client to NOT transform double quotes (//) to single quote (/) -// RequestConfig requestConfig = RequestConfig.custom().setNormalizeUri(false).build(); -// -// // Create an HttpClient instance -// httpClient = HttpClientBuilder -// .create() -// .setDefaultRequestConfig(requestConfig) -// .build(); -// -// this.clientParams = clientParams; // set custom headers Map headers = new HashMap() { { @@ -64,75 +35,4 @@ public class SWHConnection { return conn.getInputSource(url); } - public String getLib(String url) throws IOException, CollectorException { - - // delay between requests - if (this.clientParams.getRequestDelay() > 0) { - log.info("Request delay: {}", this.clientParams.getRequestDelay()); - this.backOff(this.clientParams.getRequestDelay()); - } - - // Create an HttpGet request with the URL - HttpGet httpGet = new HttpGet(url); - httpGet.setHeader("Accept", "application/json"); - httpGet.setHeader("Authorization", String.format("Bearer %s", SWHConstants.ACCESS_TOKEN)); - - // Execute the request and get the response - try (CloseableHttpResponse response = httpClient.execute(httpGet)) { - - System.out.println(url); - - int responseCode = response.getStatusLine().getStatusCode(); - if (responseCode != HttpStatus.SC_OK) { - - } - - System.out.println(responseCode); - - List
httpHeaders = Arrays.asList(response.getAllHeaders()); - for (Header header : httpHeaders) { - System.out.println(header.getName() + ":\t" + header.getValue()); - } - - String rateRemaining = this.getRateRemaining(response); - - // back off when rate remaining limit is approaching - if (rateRemaining != null && (Integer.parseInt(rateRemaining) < 2)) { - int retryAfter = this.getRetryAfter(response); - - log.info("Rate Limit: {} - Backing off: {}", rateRemaining, retryAfter); - this.backOff(retryAfter); - } - - return EntityUtils.toString(response.getEntity()); - } - } - - private String getRateRemaining(CloseableHttpResponse response) { - Header header = response.getFirstHeader(Constants.HTTPHEADER_IETF_DRAFT_RATELIMIT_REMAINING); - if (header != null) { - return header.getValue(); - } - return null; - } - - private int getRetryAfter(CloseableHttpResponse response) { - Header header = response.getFirstHeader(HttpHeaders.RETRY_AFTER); - if (header != null) { - String retryAfter = header.getValue(); - if (NumberUtils.isCreatable(retryAfter)) { - return Integer.parseInt(retryAfter) + 10; - } - } - return 1000; - } - - private void backOff(int sleepTimeMs) throws CollectorException { - try { - Thread.sleep(sleepTimeMs); - } catch (InterruptedException e) { - throw new CollectorException(e); - } - } - } diff --git a/dhp-workflows/dhp-swh/src/main/java/eu/dnetlib/dhp/swh/utils/SWHConstants.java b/dhp-workflows/dhp-swh/src/main/java/eu/dnetlib/dhp/swh/utils/SWHConstants.java index f58705188..08400f28b 100644 --- a/dhp-workflows/dhp-swh/src/main/java/eu/dnetlib/dhp/swh/utils/SWHConstants.java +++ b/dhp-workflows/dhp-swh/src/main/java/eu/dnetlib/dhp/swh/utils/SWHConstants.java @@ -12,4 +12,8 @@ public class SWHConstants { public static final String VISIT_STATUS_NOT_FOUND = "not_found"; + public static final String SWHID = "swhid"; + + public static final String SWHID_CLASSNAME = "Software Heritage Identifier"; + } diff --git a/dhp-workflows/dhp-swh/src/main/resources/eu/dnetlib/dhp/swh/input_prepare_swh_actionsets.json b/dhp-workflows/dhp-swh/src/main/resources/eu/dnetlib/dhp/swh/input_prepare_swh_actionsets.json new file mode 100644 index 000000000..07ab0b1f4 --- /dev/null +++ b/dhp-workflows/dhp-swh/src/main/resources/eu/dnetlib/dhp/swh/input_prepare_swh_actionsets.json @@ -0,0 +1,26 @@ +[ + { + "paramName": "issm", + "paramLongName": "isSparkSessionManaged", + "paramDescription": "when true will stop SparkSession after job execution", + "paramRequired": false + }, + { + "paramName": "lv", + "paramLongName": "lastVisitsPath", + "paramDescription": "the URL where to store last visits data", + "paramRequired": true + }, + { + "paramName": "ap", + "paramLongName": "actionsetsPath", + "paramDescription": "the URL path where to store actionsets", + "paramRequired": true + }, + { + "paramName": "sip", + "paramLongName": "softwareInputPath", + "paramDescription": "the URL path of the software in the graph", + "paramRequired": true + } +] \ No newline at end of file diff --git a/dhp-workflows/dhp-swh/src/main/resources/eu/dnetlib/dhp/swh/job.properties b/dhp-workflows/dhp-swh/src/main/resources/eu/dnetlib/dhp/swh/job.properties index 4cc1c1e25..651bae337 100644 --- a/dhp-workflows/dhp-swh/src/main/resources/eu/dnetlib/dhp/swh/job.properties +++ b/dhp-workflows/dhp-swh/src/main/resources/eu/dnetlib/dhp/swh/job.properties @@ -7,6 +7,8 @@ sparkSqlWarehouseDir=/user/hive/warehouse softwareCodeRepositoryURLs=${workingDir}/1_code_repo_urls.csv lastVisitsPath=${workingDir}/2_last_visits.seq archiveRequestsPath=${workingDir}/3_archive_requests.seq +actionsetsPath=${workingDir}/4_actionsets +graphPath=/tmp/prod_provision/graph/18_graph_blacklisted maxNumberOfRetry=2 retryDelay=1 diff --git a/dhp-workflows/dhp-swh/src/main/resources/eu/dnetlib/dhp/swh/oozie_app/workflow.xml b/dhp-workflows/dhp-swh/src/main/resources/eu/dnetlib/dhp/swh/oozie_app/workflow.xml index b89165fa2..7aa667a4a 100644 --- a/dhp-workflows/dhp-swh/src/main/resources/eu/dnetlib/dhp/swh/oozie_app/workflow.xml +++ b/dhp-workflows/dhp-swh/src/main/resources/eu/dnetlib/dhp/swh/oozie_app/workflow.xml @@ -57,6 +57,7 @@ ${wf:conf('startFrom') eq 'collect-software-repository-urls'} + ${wf:conf('startFrom') eq 'create-swh-actionsets'} @@ -120,6 +121,32 @@ --requestMethodPOST + + + + + + + yarn + cluster + Create actionsets for SWH data + eu.dnetlib.dhp.swh.PrepareSWHActionsets + dhp-swh-${projectVersion}.jar + + --executor-memory=${sparkExecutorMemory} + --executor-cores=${sparkExecutorCores} + --driver-memory=${sparkDriverMemory} + --conf spark.extraListeners=${spark2ExtraListeners} + --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} + --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} + --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} + --conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir} + + + --lastVisitsPath${lastVisitsPath} + --actionsetsPath${actionsetsPath} + --softwareInputPath${graphPath}/software + diff --git a/dhp-workflows/dhp-swh/src/test/java/eu/dnetlib/dhp/swh/PrepareSWHActionsetsTest.java b/dhp-workflows/dhp-swh/src/test/java/eu/dnetlib/dhp/swh/PrepareSWHActionsetsTest.java new file mode 100644 index 000000000..ffcb7aaee --- /dev/null +++ b/dhp-workflows/dhp-swh/src/test/java/eu/dnetlib/dhp/swh/PrepareSWHActionsetsTest.java @@ -0,0 +1,97 @@ + +package eu.dnetlib.dhp.swh; + +import static org.junit.jupiter.api.Assertions.assertEquals; + +import java.io.IOException; +import java.nio.file.Files; +import java.nio.file.Path; + +import org.apache.commons.io.FileUtils; +import org.apache.hadoop.io.Text; +import org.apache.spark.SparkConf; +import org.apache.spark.api.java.JavaRDD; +import org.apache.spark.api.java.JavaSparkContext; +import org.apache.spark.sql.Dataset; +import org.apache.spark.sql.Encoders; +import org.apache.spark.sql.Row; +import org.apache.spark.sql.SparkSession; +import org.junit.jupiter.api.AfterAll; +import org.junit.jupiter.api.Assertions; +import org.junit.jupiter.api.BeforeAll; +import org.junit.jupiter.api.Test; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import com.fasterxml.jackson.databind.ObjectMapper; + +import eu.dnetlib.dhp.schema.action.AtomicAction; +import eu.dnetlib.dhp.schema.common.ModelConstants; +import eu.dnetlib.dhp.schema.oaf.Relation; +import eu.dnetlib.dhp.schema.oaf.utils.CleaningFunctions; +import eu.dnetlib.dhp.schema.oaf.utils.IdentifierFactory; + +public class PrepareSWHActionsetsTest { + + private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper(); + + private static SparkSession spark; + + private static Path workingDir; + + private static final Logger log = LoggerFactory + .getLogger(PrepareSWHActionsetsTest.class); + + @BeforeAll + public static void beforeAll() throws IOException { + workingDir = Files.createTempDirectory(PrepareSWHActionsetsTest.class.getSimpleName()); + + log.info("Using work dir {}", workingDir); + + SparkConf conf = new SparkConf(); + conf.setAppName(PrepareSWHActionsetsTest.class.getSimpleName()); + + conf.setMaster("local[*]"); + conf.set("spark.driver.host", "localhost"); + conf.set("hive.metastore.local", "true"); + conf.set("spark.ui.enabled", "false"); + conf.set("spark.sql.warehouse.dir", workingDir.toString()); + conf.set("hive.metastore.warehouse.dir", workingDir.resolve("warehouse").toString()); + + spark = SparkSession + .builder() + .appName(PrepareSWHActionsetsTest.class.getSimpleName()) + .config(conf) + .getOrCreate(); + } + + @AfterAll + public static void afterAll() throws IOException { + FileUtils.deleteDirectory(workingDir.toFile()); + spark.stop(); + } + + @Test + void testRun() throws Exception { + + String lastVisitsPath = getClass() + .getResource("/eu/dnetlib/dhp/swh/last_visits_data.seq") + .getPath(); + + String outputPath = workingDir.toString() + "/actionSet"; + + String softwareInputPath = getClass() + .getResource("/eu/dnetlib/dhp/swh/software.json.gz") + .getPath(); + + PrepareSWHActionsets + .main( + new String[] { + "-isSparkSessionManaged", Boolean.FALSE.toString(), + "-lastVisitsPath", lastVisitsPath, + "-softwareInputPath", softwareInputPath, + "-actionsetsPath", outputPath + }); + + } +} diff --git a/dhp-workflows/dhp-swh/src/test/resources/eu/dnetlib/dhp/swh/last_visits_data.seq b/dhp-workflows/dhp-swh/src/test/resources/eu/dnetlib/dhp/swh/last_visits_data.seq new file mode 100644 index 0000000000000000000000000000000000000000..683fc0e693c3174f4d863d95a31361c6651565ca GIT binary patch literal 253188 zcma&O2RPOL|2}RMAtNI)lO*Xl_DIOimYEgDad0?;bB;|aNw#EmciL-=zuS)=+~DJC2n!*AmW zWllMPko;@AY`1K$89oZr=MH(qB8b1Zdp_YFPS+d1`2HfYs!%`O#6sb8k2j3bqC00z z&)b}?eq6qXl-`067_G16cISrV`m?IjbtzXM)x(A^-p(~yCP6KgKfjy#v6hfcw4N0m zwfx5QS+;#a_*q2j8q^?L8EoRLL>=8_I@7DEOm|cbT{wb7VABk8zysSdbL=}T+Zg2 z-{a2Vu%^Y2?^cc_TAZD-Ww1H<*1xZ(sJcv0{s&5qeE( z|9<_+fpc8}%+KI?Ki%z*I*WQm%wDszaX!6^{p1kt%LlGsQm=03e<}WT=pJzXstfJn zFWStd&=3H~4e(M|I|3AC1PrZf2DJm9pq@w_j@EdUF zRuKlZPbOsrAJ0^Wa=EZxl)oluZ~o@%*lC$ZLJ>wEQdyhkBC{>xt*CJPwlBdtTg)9d5K?NAWx=TDdN=6S z^II(6?>R|MNG__UIB&T=6|=KTkM0(o^4O9Q-n(wZ>6d%MNv9{pQ2eoAHCo5gqYM*P zQ=n@!{5I>lb=~Ez_Oi810VA1W1D`vw@;0S5k4I(0&7_!j0_EJe{RK9N$lv$r^)F5D zxch*I4zm;dI1&d(OTqp+jR2hC zJTJ=@HgZzG$hAC}*d6|+x7>y%HwycWwx7AbGwl?!zKxtdI8?A{DD0$^t}!u}3_%T$ zh3jS2_jN$8RWVXF%7k7M?#HRd<%EUl#TJOWtA9{sM|Ufws65wyb8~7uy{k4${M%O( zmppg7^k-|LQ*`@RXSPBuQ~i8iIFGzDG{RwDej*^EYsKL2a%{_PTj^ zSfj}Jq@2QipQ0xw>?kLMGHIQe}ke&JTb&N)}xd92I)_? z-dF)#h$~Btxv1z%G-g8m-5sf@tfai?yBO}Vs*Ga zRr^9F=os??=i(+LM>)-JGaVHXv~peM1-pB_QhoDc=EP)H`DDfWl?pXK^|FU&w3JJ; z{3eTr2wfT$gKV2T!|N-ZwhKN}PK#+T3OBs_KD)ZUXKS@jW{+@7irK=gb}``-%`eiQ zML4!dCQD}&3M*jDGLCz>pB2Zl zdFyN8r^&1}c7BBA=*?VfBdp-I0@fc1zta0ag!Wn%bac&Z9zpd>oubvvHBWHv-4F+b zc;f7d0>La>Qb$T~f2z!Q@_DPq4-Z=u89#rc@c2nVK^l7h5d1lUF#y0wxVer%%*n+Q z2_<+UF)n{#YCTJ=FU6qvh6l3r4)_O&QZTpQ6p3U}t5%k(cMX})GB;V@)ZSWb3<%h( zDowmcBW~2q1TiPAx^{crg-L%l%93QF#m}BaE|~M7pztLrr_vy&;tz83wGM|53`6vu z=ksqCB5$2yvAZjVw7^$$jUs*K&7tG%o58T6c2n28c4b+s5z*@naS(0`&YBOlKJc}( z{ij2PyIvF}#<`Yegxzsi@yToWHZv+AuiM}8-L=9!k`H~w9BXuT9mVySy#Cnim)Ffj zFPhEo34A)NJc#}>TGE7hD$O4_1uzf;um81wJ0!u*9qxi42m?w6HTXk~bdeY}C;^>70v_qhBw+sZDD*`5#=tY+pn%gmn%8cZKnHZ_!72~xxt%J%7;Q?cx<#H1`) zDkMtk@;1W8CQL6bq0p4`g^QJ`Yp9SNrX-XrJ%H4Hw%{>;w6S z%>Wh&@cQ3%#ADrYaD)Ru6d{a35TJiCPSD5T_C$>S3F`&o_lqBKFkKKB0v?G6lo;G& zxRWCe>+VYY6%i0vU93II&?_~~K$}?USj@i0OTd?SW=N@cciy_ai5!-QT+=)SDt}?y zFkrCOWL?t9yZ(*)RU$m8?lg~4=PX_vD~`-Uzb#j6KE)a;OcDO_-V@Z9N$RL(rXQ4w zz518oj;vgwf-B(>p(3A{>t-o$aEAUcFEf;g39HLh9WLKZ)$U~yj7f)N*}Ur6iexQ+ z(;>iRI*cyrDpZAE8LC{5rt2|~$bAR5;h3Tx3Mm~4QNyl&PT0KQs%$8D$8HnX)1Y0C zf;9+Mn7KN3zmVR}CDUW$6b!4TKmBgyS`#b%FVW*S&sXJadT#JsIPf5|9o3(EA9ocL zK30I%k$g(Y9)rLjWW}L|2nQqvp@gvq#~498ht1#|o_sK#_q|3U=5Z$4Z=U54eyzlg;ze{q?TC-%mRw=pN1W)W8C@g5DC?Yrca zl6A#fcj=!8^y;##%s*yDSl#cCBD1DrDFf5si%}#aD0B=317lnbk;n)`d1h4maF}9&O5QP zHla3;D<$=$rI>36o>u9+dh8i`6}PQSo`C1PbDEwVRK7_biqlo6z8|)O%Evm^Q_{p5Ly0pM763)~%^O5902#G`Jf{M zBnaHolnW?zrz7{bE$zLaFEeo^QJ+ixI6r|=kAtPX!heZ^!kv`do5Ef0^uj-A~OWPEx5M&QVxZ03b` z>bjk5-0In`{YEFoF8WLs`lBsr@&r)i(c@d0`=`Aox^R4VU-x{P-%lp5xBL1E8Y1#T zA>l+7KLe=$*a-&t=AO@otUsf_nAzAS=wHzeoCZ)Zf(vwk{|G3I2sorY0td5qCl(o1 zeO)Eu<>}EP7iA9mYEtTcMY`j_(acqdx+dY^eebcXp6K^E*FOn;XHduyyhJchp#1FF zw+BWMz00bKKgf+hk%=K=lh@4?M31%7sBMTiSGM3!((6ZAnijv%f0D>}9G@%_7^B^A z3dgsQw`{_1JQP>3EI0lhUnm1=xUNfQWv%-Y=-#sgiJ*fAv>r{l9;VKLg)9Bk1ihWO z9CVnvbh(JbvTBOpkDJ2&-nMkU=hG;ivh#|~UwAIpVbca{8ghz>aa{u1rl0qwcBp{M?ZHJk@%iHaP7maDxx4u-Ku5C~YP zftINe#2$zAAW97cFwFtrqOG-nw$T3dl6U#~`Kc^-{IRFUUYjLdK(a4O@h6$uj|+Km zM>&R4?=QF1o$NmRDE-va#_@EzC%9S>s@jGmy+c(yadZMVyV>2O;x9H0eO$#O(`OFW zw5VZr$0z+Z+ul95rRZ>oN*+z0M|Zpqi_ZO^#ht51%{%Ga%d%wt_ykgS&*NtH6kktO zhNwM<2kN^mceeq4ir~}K@G6l~cPR8$D7;Iu!s4^=cFCG|waYENcoEiXS-lB!tjp+# zAr@wJ`5!%|f+x2l2O_0>lpXUJxMt^+nxg11{^T;+uCLvEzdU99)kVLyNy`=z(+6<+ z30_>BM_>haL|~!B55xo2^=G;*a$@manH?kL?W_r6N`a}P%2FP{ow^jEnMQ9&lZJ|0O| z8_D(3U|;F$^SA>w>aRLE>1E2tB?~iu)wn;Y^c&QsqLQ|l=vX)Jt4>oDND`6fIG(MS ztS2)Xa^qD8n~|WOO+~{c)7F9XLbBwSUFCZf0oGdw#n&8}=Ef`dW&MW6DN^{Hv6Hd# z%lyieHk=0)XX@0vTz>_0&{G9WQP2BOP88l-1_0=Rmx}U#K=>!Uf;r+uL_}mIp#%p< z2n^*Y48bE%2sj*x#Xyi4JONC+NEjZ1cZJ~q7yY}6Km*?O$g++nl8oQP%K?UVMIpS1 z5G(x|e}0dHoR;SmG5`CO{3-8Sa3RzwQ0=1fRW#qHBKg&^JXH2kSJpAuPkSBYe})+&17ldHv9CQk^wavBJ{k z%*~dS#7+{%MO?vc$gSP7_eloqi>@B-kxynWR*z>kNmSQ3k)lc2zkirL<(bIYI<<3{ z@bl4^m*NjsrB{4H@w)w+>;vQ5`+mqqve0e;2oO35)qLI(J?Igcc zU8bM*vG|JM_)lK||b9)UVt`n|JYO0}aj5G$zPVYbOWVRX6u$fM5?I`Ua z5APqf{3?WdKX-5*J}0vG3SxBjm&Aacl#f(zJnt2eJW47>&bpWMy<%5;4g+R*sEV8e ztNh8?sl(g=C_q3UfeQZzn1AJ>Kw`B=V(|nVQQ7+EX(P6X|BVQLgVyjbDyoenK$Tow zRj|0<1nY0-O~zwTX4>TPIPZ>AvL2&052P-RHvKQ{LaAFvbC;ho)1Bcu(fQ*_F|e39U6>5GPUzJZ)Q@vUCRo;l(Vhw!C_M0 zIpPd=x5lj2#Q(UVC4_ju$OjcXr0cp`w0ECZ_sJ+t(t7i9Zc8qjV#q^1xBdniUECTk zu|q5Mt}JOTs(1(SnlrL3@wmYl;Q3QuE86xPx-M<>8edTSBB!2w@yeS=T09y}v(N7F z(0#U~=M#NDcG&ni>b|d8VAXB%=l&s-;LuS4Wcbeg2iw0PV~=nEHO|Qi4{0wW57!-)aH0sO!VEaBmW7 z-|Evd)P0ukXOOm>X^J=ETDptaI&N%-)0prI-^rsq4$7S{9gXOF$YH=RD0h++Za;vvx$ zvDjdAvWNV~9UaWiT7(||@~FiJ6->An?YbiosJQlY zCm*(S^PFfN`_8@jOMKPL9EDPbJoift%^?#FkNt~yWGhq6vKR&}Rz6qh4MF;DsiW^RmiAt({xJ1uqd&r7G->*an}Ir3NDbSyZnJLM+Hq=x?CsjZgdfe-BJY~V zO;_)Gd7w4<>g_z57p^Z`r<77!ojdV;ZdMxoC33cK_mJlgDIL`px4t(3p!jFk{M)^P zJK>Oc0@l?@7mD>j;5?8BPY4{;ECe2Ehr-%H(J&yq=Sl!s^(^VZ&1Xi`$(y7{`7m(tF{xcgjn%XQ(`k$Gv+eSZCh&cpAN z-)13sWJgn3FE9s!rp-)p$pQr%qIlQ+9gN}NssxteMynh ziJZmk9){h%{yh&`yehD|eqRt=R-*HBWc6}(!C(n?SSK`8T~k$4<^OPqF-(aiR_XfG zD-EXh5bGG3QUSG>D=adk!LM1)x>H_ml)(N9*pTKCIBm4t+IDhF6)#2Nf3{JgJc5t& zR@QahZN96Yd3JX5?b@TsMfrqA&Q&XG3cuF+5@)QhoKh~PJZ!O1QTq+D^kA#SUfBXy zy&ey%nx|S?gL}8K=fth|Yn2ah7^js^NOFyyPD6hiv8Ysc8?6XRuiP6n8;KM74T@}9 z9yVx+pVo{$F`r(;Y-wyj=)#gU_5J!h_zLL~30Z^8cWX``4cGF6LledG2g7V*draNr z4;=s~X8&f)?n(Z>qin)0Vl!!P#4&hE>EXufq-;3|^$&S5IK7hWlYAU^u7pvZROilfk z-2_)W(&8J{l9PPVV@$NQ#=EsVYAWqq#0yOq&A1gyt>6a@@%4JlHcP|M1)aI;;esU8 z_*%zCghITCJabFGxo6h7A9tZ_Zw+0;`B;D%@hXrOSYvH8w&x!NQ}_A)dh}&x zfykp7RJjjvUL}HZJ9%TX!92Z*)jRdvHo0 z#whL|HgGrSUjl=4pqMkGc?>CAz0PVyDU80T51J`r9h}Wz&a$!$^rvLy_3JWtc6Vz# zo0Ub{^5*HXo9;C{4;D`ioOQTB)seheXt}f(LtmC5I4>UR%zc?}^qocas&&{%NoFDb zo%xbfe(m!q%!E$I5^gVHf1%j%xfRf#(!y*)b|rhI&oQshM_Fag&Ig{;=dAC4Yx0n< zfA0AMAKp|?x{}V$0R>%=3n7tF$!2uL&ILws* zMjj2sAA9+K!^)Um?)e51R@fb-4JjXwMx>lH-b;S99sbMc=O< z27KV5it-_HM`N$>)_PHv^)Qz^Sh?Y7hLN+g#-UJRkG zZH>;DaQb;EGbavdU+rRC@DAT==x_ny_gg>s^h6)%2lLYD<&&7OTKS!dl^+#3ZezPO zxpK}z&MeQ{;vW*SzWZ12E}&(*+Y%|Ci6ppug}7RM+&+7EAo7dlMUW zo5)GPib^vJ(&FPaJzl|HKX@vS8I{-;>MNpD}Q??jSPEhUs!3X5wMKQPSfmi)vOk5?p#vz&AZYho-HR{$m z>CG0yUJ)-(F)<~1MIb9}+TFS+E#^eB#nW>c_O;wICJZGzwZtlEch)f)Svd&wl^4kz zD@?DkT|3ZnZCI&J4hfb26~`HBs0$lZ;$mVm5Bk*76C1&?W{SRflqe--nO8K>>o(?Q zFSn{~9X9^p{ZRDgZ12+w@B765BnMDAGU=jeWCBsaVO=50NGt;52*hp#jwnE>U@<@y zARyh*P&gn@uApIMMia{qNV*h#*J8q^Rz>)u^i%JOPh>>G1w$adM)z}nksZF_`9K}* zbIgqs>EJQlFyo(^6xCZ3gp^_o%8;#$d{ZvutGE^1zBI?+e{rWbg?}vT@pH*6`7#y{Bd-OIFBUhFEPZd@%2RAVPmFt1qEP zda8>(n?pJL=dxaw&h2Mr;SSx#_eWwCEFzn~h6XHr54hb@zE~2JeJ)1p^H5#Q=C|2DJb@W|qde zxbwFqbRU|v8BoR1O$klbhtMK9$E6DlhHYkxzC$@l4KE&YqF2S&qKmD_&}LqC-oh z8E3kfiawK&9$R<$^gWZh8b=+mc-2!ZdZKQVIU9~wUy52h^?N8)GvFhr1L^eWo5;er z!>`4D>ECqjcs}wi{oN~?hA8~4+|(vggG{o}w_E)C_TXnY+5Gi8`=>^*x0|lMA_*2k>+T5QF7 z;8PFMjtc$e#>6K#-$V*uv8#DIM%QsZCnmw~^{RYC-gWg`CWYcFdr~bcro3Qp(mAb%(5B#}XLypcdhB30yke}Vo5f0f1W#1p2R z!s8bO+9`xMWDrNwBV8%9J06HfC=}9J3~GnO;IQ!D$Y4N91E@zJrP2JI9v}HRkBUJB z!9nxD*d$hHn?W8)Olf3w3G_&B_&?RRVLhGx^yeF9ysEDu$t!_`sB$`?d|9Mp!s|vV znI8~}o=YA(9ZW{)b`%|_4P1HY>SQIRXn0I!a`Z-UYd+)hz>$LLoXbe8B=#%6C2qXcXB`pe zrM?Qi52_n%1+NB}dMoJMDBjCU0M}CGvxw!r+OLOLVyXZMQU#S%q2M5rQUFknjI8Jz zKva!%p!xBbM@}KsP2TLIg(6$rPi!{VKMO{1b&U;Z=<85DDY^An_n5}5g|Wp< zzlE2@rg4`uP!8v;tJrJC9|C=<-n-6sZ zNyAceV$#ri>!(L;F~iS!v#cb(i5QB+tJU>IIyakS&MzrjHLsD!cL-O-NQbbjtvwdV z3c5IU$6nlKD>fw{hjwUT&iY}FZ^Y<6LFxVW0l!$ZKS^|deT5phYMy3FGfJ06q3^}6cY3eJ% z2}lnFR2PAA!QyPxbkuFs-Fi5siL4+WqVTkchxk8uHtPVue_6IXHQ4pgmckatRDYZ#; zp!;~7G05XOoA)+Xqfv1nZDdYmHNAZQ{I!-S`a&PF?f4I5b3aBD1rAub+dgp=lZ6`s zuucN7j;vH5ZHIBi5mibcM-rS+NW0&c6LTL#EQa&!Ekl`3km4dg3yGgU&&UwVmg_8G zw#@4@NKqc=ARAY=7xVPz%+kdfNS z;uxIUFio8!Q%?5q)tUtV7a8oyqy;8P=~3$k2{envY%NwmSey>E39FRv^}N6a?d8eR zX&#hQ|ERNl|FNJ-Dv2~bF|YcT>w=1_*{{QyU)Q~i*~iv7eo==2nn=nFFgbGS$HUO> z2oxNP0zwhg)!Y8}?IK>T|H=O?Y#dAis?BY6Wg5aSKpj+Oi!Dt{_5Q%6+K zIA{jwZ^Ir$sJe)e1ZKPWLeYu5AL^<*9w2vV@<-`dH{)4QyB$p8afZiNqph-zvu?Ic zK7J`|#o;Zb6<;T-&3j8$I@R&gsloV7bffOjb5^S%woN`$b4*HadHIks0o|46e7>_G zXH|XW)^Pf8?ohC_^=KAQpwO_Q^C1LIV4(4OY1v-o~hlCUE^!UOXGSn$Jv*Eh`?`4*L&C94z|-9)m{9=DU_ng zdnMrtwuL+SiyJ}a9LmN+zpu8PGQK1*>Ph0MGcFL=+X%BiI*n(lR_GnFgnWC}|s!*fR+-f#c9<9(8=e)5cOs=RrycOutVN8lZh2fslXe9Ua} z&;}%R3VNG*XR19O5c9o~;B{oa3*09UG!pvPCmb-&ff)OL>O7HP{jWv*O?`57$9e-_ zMHZ@TUCRn$t0Pv&RCId7N77Zuec{# zE@waMVfFcO7@**n_S+X`1HNzv3DFle@t3(b*`gOV^yOgFqEIe)Oh27L;OSKwBUZb! z6=&iumb7Iv4s(pC+da{e%0&+Ir7Lyo*v&x}Cf!n&@@_fl8okA;%D>D)mhnfBS4=5& zR3y}3B)Rf2zAMJlyb(!r{`Ef=9-4DPjOeblq(Wa~qjn*^($*I#cLXdmMK&qb=5vUQI4h#Ie@p<<{Bb%evv5c@ym z>VN00V@yxS8C!d;Saofn&7DhI^Dj5V>Q2{h&ney}S^BVXHzkTGOz^v_Ex4cBQX~rf zRdf*%;#a4!Pq~{cQ&+9a{z(5rpF-E8t)8DxK=-&qwqF^jmuF2$XyF==Sa3I4FER}# zcdg$}%hFbujcLF6t60f}SFShC)iZ76bcqSITzh6&Jv$+adOvNM2Dna{MfpN7kNXI4 zH?NoxrIqAD36XJ8PV2`XpWG6y9>~15Y|ZqT(u$1eI+?iv*U9<&VG7X?5h>mhV6YCn z4vxwyc%A1v0uwyKj+h6Ag5&_g5hOUhQX`_YF=V1A9}K4$PkXQl3?T(&Dfc_?q}=3+ zx%l-C!yL6dUOb&9-r5U(7}ck!-)DARtxnaI!5HuU@Rme(?d_+LH@!XMzei9Cuki^H zTw^X#_jhleiSReXSzLB z)*{;3v$K(Lmi6gX##VDBiZJ2Fk_FzHlQE&vxhE4IY-&~9D_@B-R6#yqw0LjSz_p8z z-FY=N$cdx7k;{_|V~(&59YqUdx?Fi4*P9dmsbRt5N}*cTmm01^Tx(rE9a`8nqf=I+#05s)Pj^iJe#QQ12T07Tmyd3F~Yts0yI zP~ip#;Yb(T0R~6dVX-d%$>2ia;`Xjs8w?Kbfc`&iY>eUAcg`sdxHv@~DR6;Ih%V63 z>hNzD$ZxRjDOTZWL*Y4~=>L@|34fB^6D!p{+aE*q0b+PA+=I-L-bDr?T5ZleH}}}_ zd0s99x!6LumgzU89ZGM%IITPjo|Mw~QuMo5u@;>ut#FVmS*_Ue9;GQnQp;+vDa$2h zZE$=$7a?w~9Vb?>w%^P+l5u5}P{3XqrZ`d$#rD`;cypB`h}hC(N8xsdmB6|HomyWnD`d&az zu-C+5jBzkmjo&e4eUIPjJ|NNmjnk9H*tQ-|FD&h7V3)Jylci{uL9;AS%WKk`r&k!p ze(U%&BfOygz}Sub9p^sa@zx<>;k(Ooi+~j6lGGiujwW4@w_M%41XGF@Y21 z1vj<%RkK+ePt`^GmZ>+!g~Lu+Y@&+j%;b3PFNJe4hfbL zp?WIDBH}W#;u2zFe^}#xVQap_l7il`M%d6wv7MK@DWYc#P8R*3Q9SWv)%-C<8vLxy zbzsy2SxEvGuP749_Ilo_Sk2CTMCm>Z3#x2=)K!?Eb;7g+|MqU;X%>!0sJq8m?b=`K zm1*#nuerY}LO{KT8gYfPP@M}IkOa%ho-O&Ou06mO>6Uq|G44(-tyNAj5N`@4}+w8NswA^?-fYd1>$P~U}phfM@LeT9#{gfsZjtkxc7gy z4$xQ(5+FycR@y|N-~n+V16_acPPP;ZSBp16XoIaXUh^pTC^Yu4be8p{%mKq5g42I#?m*9;dMEd98={Qb~2C zPU*aj`Nq$q=VzdtYhmm!u%cckmc`a(pxh+CFp0E@ubn=dG49ACR_eq43>&mYkaNE9 zy>0JiplvFLaLPz{8fsqF$ct*X&Ppn5`0PB{wRhi44}^UtKW-LNr$3p3<<1Kn+;MVX z;<_gp7ZtDU8au=jsjyG!Ogk9N)kk^wXuEM=u-c#8Tk+8^fD>_v?#S+LYKliAQ3!7@ zFxC=a8v4I4+<#`-%pM)9$5jm^LTwc8VHA!rH?@+1@|l}!HhUtK#q7>I#3P5TOmFc+ z|7riUcg!lRJDoYV?DgoL(F{VdK~L4k)(6=r6mB;$(a0<0#a+42tXhrBWM}M6R4*94 zSy*yg(8Uq;q~%#jT)hV2VUnhDX-3_oaI0&FZ?ci%f_Sd81(s*bpvPj=Quj$gbJm22 zG{ooqShA(jo=%5b!$H?K92@>AaA@Vy%o^#6v^fi}Ui_iJS#{k=XoInQNcH)nGJW&QccHBap2mUR8ghs2l-k@y_xCpiN(-x&>a28uC?fD$JF$Mzo* zzLK%2lBldO+Ww!~$~o|U;$la11r0+iSRw{Ioj{z2!QP9F2NLfNLm}~h>CJHl+QjZ) zQq)orNhyLu3HSRC`%6?btGB$l$7=$5L;Z~nQ(|xE*+ee-D?V)yzN{);8-c7~Sl1-) zj*P$h4u12)g5-Q`xb!ihnD!3@55X6nR2@MPCi%7+Iy5&*s5+R>bY#l7x-qm%ceoEZ zx#!oK68yJ75Uehmk$1uPHhSmR1L^lejG=U^ERix5rminySv+FiY;#pSE-Pg@z} zLt%1ryJY4->phvN%1yGfLkTY5CO^f#Xj~0)vwA=NPVlBryVGeVt`Ddc2FIqUyLWiF zuQnX(UT__qcUx7waNtP_#O~FvRQtaZLl=5o7l@S`srKlHmN=8!ab71_V%dY zK5;QCzPMh5mkqffQ8n%8-ckSlY zjOg6cS*KSrO-!lteyF<)Rs4LxVEXV}=drQ{oE&D$Z;JKi7IPRq)5RaBwom-L^X40) z+zSdxLV=V|@Zgzh@kQI3^z_|t$xDsCoR6L{lQ{GxtgtS-Yj4pqpO+3|l9G+wrl1ng zx!VM1dc0IjF}=yd)wo6bp6@G1_&)wH_Mq{5s&xa2=9_4uB1LSOM^X)A;^d|RvRee` z|Ng3^Nkr-IlEUG!HS2dKyC9S!VjI%-*K%;mb|g?0N_0H8GQ6?V(m)rMKAp~U5M(aJ z*waCF^2wV86J2bz6x$!xGGw1dhL>be(Z2uT)MHZ zpv%|p>V>N*jJy!Rjh!xuf*<$r>?*ym{Ves6Ed2YjmNXy1{);(Y5X*uHlumZpN1X2S zDDV0N7Y8>FVxZ6$ND7y++Nn}k6T|lNgp8IyalPQM-o76wq{2z!NY8TcefBGT)9eYU zV~2`+OZPQ8?oKt5GIE94{|m|Z*Elo*i}A)gfhBbm))NZG85_L)@6fI;rsYonb28d3 zm51@^3t~LAB9lR5=h%G)vt>~k0d+KgJmF8^ZU5tK76qA)r=JHQKuAX zca&|g<3;T-nyoOJ2hBB13j_LoZ?47PgGn%Sr9eGS@EeKGx*$jNN+Jb?+8kRdja6?df%8f7gcT&o&iB?qT8QPQ9Xy59(z+ zMac|Zh;F5p%FCY!94ryL{sCF=%;#7?$s?-pt3g%1i4Q(ZJas);|B=JGLiG|f!a4Ri z3jI>)_`(|lH3Md~;nPVSCK+C5ihKs#P?2q3_>8`Uc{x-0y${0dKe8&tGn=`-)5?FF zhrl&*w$j=LI(2TlZ3gs3bCuL-Z+Y(V82;L!F|<-3?P7@g25=*?eaPh_>beqm@Bu~> ztScZ1?gRwbD1&1ia4tCtd(`vFnp#+I~(iJQ0+B zv8_X(uf;fs?XuCLQ2spWWZ=7^i?rMkv6+Qmb4-8l+J!(i1p8|IpA>nQTP7>t;+yb! z)>%#J{xNk>MJUctzB*TJs#9Is$H02@9SDQ3Pb6$~!+0z9NQAog3=cZ%*4-LTm6kj` z2i%I(V3i;FFo$VXN?`z zVY$>cm6MIh^>krsFG=q#gLN4Szws?UN~-buUKMCfI{LR#W1J<16@tDLmiG7jKaB;r zz^2dDe*EDZ&BMQs<_Wh_B!%XtJqHjHLqA6f5_o64D-wYs8ltY=9za3NqvXQHu3YVj2 z@Y^y_Opw=fYaQ6)`20!$HeA9Wpz$^%-lFmP>#f|3xbgzJOo`H*6vqw=vDl7*_&txY z#l0*u)-C4mQ8M2OkaU5fDlQgAISnh57D6Fh9_phjZ?z|LLItn1r|<~amzHTRHS6G8oP14cQfoxXmuM8 zI~DeaZO8PyERg8>wApq0*Wo^R21E;3H<(Fuh;-6txBWUFS(4mBmBnVrF8YJS%YbbF zymY~hYwcL)B@@ktGg|iv8Fg|zt3E>Cf%4S7WSCGn>3oZYhAGE)Ap^9 z?oG$us{+GB(Eo^D@oQ7Cb5Kk4imx7!+`QauefLlfOpH!$fJfnj!cze-|34R8@Nf_l z$Ka6;#DMreIV(}`Ig(z09V`MKje@GVxg$Lw>MA-AAjF#zW3B(N1DmeZMGTt6VsbHg zylikPvU1AwOW+X-G2Eqp9-+X~lX7_Ir{HHjwwzxgkabf-!uz=GWp{iUXFKUt-o59+ zQ~Rid@5tJy%D&)H`LHp&dj2fcHum#e%dugXPG9Y93ws%>VMt_}C|C$>$xAOSl=@7{^My<0B_npS> z*$bVoW~LOTQ|3`~yf2aF)P5^G&~Nd*>#gU?*L`y;F*6mu3h+xVKOGYnt>D?2k8>+q zpJ_;j)1lkb47)NobJZqcpTW|?r)jhHVb{|8?m#}Smz<%1`c89S7U^@T8mmy%C`w#+ z#-H7;h!`i&`kfr_;i%*H8vA*e&cpa?g=TGs3cngeR1JwF^+;FQm6#L%{j@3K!)siz zz?!xJC+kmx{2MH5Y{5WL)fJ&Q-~JaY8lTQ+|8bRjK-jepJWeU@HK*0hB!?##S!id; z_SWAedp(#hx#>9SMVECglx7jm+noh{0UuJjCxK9&-aGj?`OkbGW%!L z+C-SY9%mdO0lLn}frl){+j}{1{JU!jIz+r4UNOCRZ5hS!id=^jY}0;eqiiz2_utrf z6K3Yx&&W!E4$;WDB6~z{)CCVSz=HLBVBw&!P$L2s?qs719wFg~vrz%t={8C@m>qE! z8&EIeQ2sl#nCz}Ic(cJ|p_A(Kz=d&{c(8;Z4VDm~UtoSLAdaLlD>K-;R_EQgFGB;K za1bV9XMgJjX>nwGL64kbE7OBso_K|R32!flnTQ^~p6LgR%S+|5$57VHNix;THxth> z$%|!*@W;K^tT)t^$Hj@?7QGefTZrml&J}G@I0Q=?R{TVvaxlO9nTS>tHxc^tyLCHo8>I8Rp zwfT=I{m@rIK}cXUbaNqSHI z^n(Z9(%nIo&QkbjU@v@xd~gz5(rD^YrUN;nb$XDPJM}%_BM;SXC{dp+YzS`wr;;cN z9qCkqM@0}E!KT_@pFx5V?A89?I;O(ZDqB<36fPPAzs#0h0P5K8GDQ2GI(Xnz1RxxX zjYZiT;@Ywgszh2Zq?fbZVL(n;$z||e{;tFuKbdNDBGJ56uSC^OeVcYGzPN>Jqb)Kl zO{NZq&XC~3mdD3zJOxge*~@6+Wd-h_%dItwEaMH4DLHAG%;1DTLoK2AUDKhb288nJ zSzm1GU4dRwd%F6~24dcNs$+yKK4~j+PuzQQ)6WsUCGwi7fnDFJ#@@4G!lwRw}dn(H7ZD_bR!^0NDC+>AtGS_D!=^< z=e+fNKluA&&f%=HV6is)+0VW2`?_A&j_Vg6iwf_+Y5snN95(yAtwYrHmV)T_K0j|I zG+%k!dVBY_(6$!}6K;dtyw8HU-sQ`hFcF`8<%8ZpXM%mz1EUB_z45B}@eHQ{a`Mxh zA-hfs8VVEd$-2Zl`E2N>=L#{=W3iNYtOGfqC;h|ak_~u zqBo6UPr>p)5~O1>E!MyVV{K!Ja0fj_I1pch2DO!k3@FY(*9)NvsX+daBH@UDA9HN! z0V+EecLz|fARVmGa34E6pmPQP@t^PhI~dH#0kp*}0g%7}?(X9Z8f^|(v%T?ATT}nX zjkGNmcmwILp@}l`ND*KJ*V6MO`gs2~aOLjbY*H{t6LfzKsZTm7Fr|F|bG4;Tb&)rO z21T~o5I=zTi7%eJ+4)sjwXfQeC7%1~qBg^K7tJ4fXNr9L^=7`I#zkL)KMqkMp+ld| zIJ$cw>w?J+eIElRXsd`X_dGn>Gt*T~P>ebMOPAY-QMcsg=-uF)zPdwh${S+hZQ79j zq`*A32F9>y!Z#_NVVarykCJ_u5BssHuO6aol z3kdS3iUSr++|3@VDjD+vA23hNUXat*vLA>*>Ni)u>Wt^m(h8|mzTGwD;IPSuO;e@2 zQdDd1R?lv$ybz#=Avo`NfN1cg;o@~XRJ~td=bHywOdc2e@6?jJOs@? zffXmc8xNUw<=V_|-!r|68xa-=9UzdO^;mfU3hnEF1f67CAc;alGt<~V_GDS6(Ako} zL&uDLHa1UdjR=)dSShYZB=@y7rq4e4;_iUXwvQ!jabQxwWb1^>y&sr$PTtav8ot-3 zsd~DD^*++OUBuL7TPm{ZT<)={SXfYhd13PMEH|^`tjB@b6-c?Mi5+Uy|2x#WiYzrE zEqbTm1fYh-Ln?)SWyxcp-IP1{Q%Iy+cF3mj+e1$_o{&=FYnbAiHRJcC`Fk>WBa7{m zZ^Vs!JFOd(?+vC(uoKjtf*>m;^Y4gT+dm(QLVi|+MU6(acnBIS2jrPM_Ew9sbP%v} z2b}>RHMB(9D9HZy_y%L`r%^MQ=*B>N4vSc%-I`(Vqa%(JX{+)P!7l*|$_g#1NYNp7 zB~#da1q2t&Zq0R5uUVpa3m)9VsOHkGEbS@l&QrKsBhpknlAhf?L+M;Pd3lekO2+0* z^(@0;^+V7xDdS6pmArV%IF6`Y?%{p$sE>|0b6nCx+dG1E<>9wFr@FEApR+ysOnEGA z1Jklw-EJG)csHoSQ8X5yt|c9A{ltfvDzadW7gENyGLr&&5U_3{tEz1H;8 zxv=-PKMtpVDb*U#0u_q0NjMCw3nZR9*5c!ga7Vz^Wpre~!vn11&_4OYlN*)!Xl(Y& ziJ2BgH3p8d|2dyYYPqpb&i_ogz(ODRDEK_f4n%G8F^SD5;Yi^;PybcJuY4?@^~m*9 z&MU9@Q2?WbR}LcuQ++k zv#9#^^kde-yeWqnWICfB<~!=emn9$Z40;{LYi%aQoi**ipCWlS!fV&g05iD5eO&1y<-aGFd7D6f%{;+k9{ktK`U`_riefF>i7z-C9!r2$@B?^!h zklG0>3v_`vl0M_kGrbb8i)AOJX0_y@u2;!5$qrpnhAt2SrocAuvw7gZS680$W6=)C zQjN+XS?U+V*e&8k|F9b^dwA8e`xXL)6U!AGn9zvK2dcPi$!^^TnY!ql(VX$3=MNoi zrW^j53JWJ2*N3>g7UZ4m+z#2;yZq*kZn-$gZ4NDuv8OT~58nzixyQp{(I`wkv=1U+MtX8CFC;yE%@8AeXl-hw2lI-mGx?uQ;ppKm&V z6aB{baPYRpJ@dEo)zQWSqRA-h>L{vV?WVugO8sJ9N>=7L%&ifh;?|=W4E~RI8zOu7 zHSdX)rQAPDfSFVhvLkti7u{D~?Rr*{HTFEC$m?$!u}5Mh7?%ijTia>g?-6|uks>7H zIir@QbI;5?=aYa)7a>3ce9E4ELZ;iTuY97GNCYk4WyWVBefucuY+(#FavYg@jjPW$ zri+}kC?jTs?Ni+!qFH-|mg5b+G{=hx{O8qbMwDa<4Xc;m3yqt8I){ZpsuxX6BdWN1E-R+HScec z-~&@=0?=Jmxadj2!KakqzI$8X%{`^W#~pmY5L4Rs@xI!q+Sy{UI4}VOQ zcq#%%=H9qDjv3z6HsTaar1nwU8!H;|cw}cXOkEx`sK!C*kn1@9tmz>GCo)QZ8PC)y zW8~m=Q9+kY#OuBpI7febIJG+t#aB1jopt|WSKopl@v^z6s;yfW6lghpyYBU#BWURi zyiY&?m%=?|d5deSZXK{HU}xuj@p{%{w!kif&@acnxHN#<&>ajV!Bp51E~_S^qw~Kr z8VjfBiP|ytBk=3ERI0Ghq0E!7243fyqueg1)g!>`yqrf-5Z$y14b3~|XI4mMBg1`2 zfc5GMihVsv>Pk(CLrVCD>M6!Yj8poo^DFnQve)HzUCCSN-BWW^cYfgCfN9-$xS@Nm zph59XSRA2hL+~w0f-bAt%;nLWRfPp7=-B$Qq9TCZ zJGsG-kJPV^g6P5E6ZdVWt?IqK-XMR`7rFgy0-q&)ByVn}D5S&goHSxk0yQSPH#MwX zZ}0F4G&dIt+Yuc{g{*>=`Kwje!`2by0(Y@@5%6?&fEMMC&R45g4B>|rX<9xJ-D(*= zbqF*xF69i&DU(%4h~n8t(#1TiSN9%rE{9p_<0Np@tWE{t+xwedlE2Vd?WcDaB~}{>7(;(&0>}fHDD;u_1YHJaXLE1F5z_i!c?Fv_JFj{~ zau^HbUKT1#V~>+^4h9B29T!dDpG>}6qtbQ;t8w?9U652eWqm zsMlDiT?ec|V9B#=2Y@aApRo4V!eRt9z|(n+Imj#u+9WyNtg z0S>2G*o!ktRPS!aGJ9kT%lW*zLy`vJm;bAe6Lv^`kSqPBlH4K@kdNpl=+XB9CH z*5$84cvUo@d5Q_cZCvbq+|AWgLG#81dqew8l4zsG;i?(PN*<3m z!a&t^Dru>vUi6e65mU?%lIJljNH7 zHkfeG_%dLmDt$`~7|xBOC9|R-1|uZSKK5u)wLya(skIzj77ch*FeL{p zfaLc}t7jsnCpLK_1pmfY`wX$N{K|jervIlX79<*a{p+QWyS&pduzA&_I-VxDRfZtx zs|__h9o}o#Y=TmZ4w-@uGI>(&+h>exbdbde4&ACukaqM?&N>3 zx-~uTW_91y<(G9K1v^lNWJYJXRO^KazFUJj?cOiik%Z1&hs_&+nvj3bvrti8z!Iih!3e*>S`rIrA?D}+ zfEMNCW}(9cNLU#;alhscrBQ9-*QdD8H9k>uRlrKA)9lJ)^iNkFltr=gmE5VyIcLXo zo6LyPGOt7YS`Jc0H~&hbWrTC{PPaVE*CBm9+8 zZtFdo^biN_FUq~PdD#o3YST64g&d>S2Ky7}+dlSGL>#?%N?=a-J>N&N&0kTzRV-kc z%UQ?wNlQ$ihV_f7`@yg9g@NQDo9XV-N$+#9HNV8)H-856u$-a?kzn6QgU_)dBQPTb z6A4o`4K=_aD$zN)MzmoN=fbY=F#=#N}wM+q07CY2XhLkuEFx9v_ z5j7d(5%Y;iA^TKI8OaeW^)-N{uHsmy7N0)h|KsrXmD^?=@j0;-e3$r2CUL&ML`c5z z62I3k!Wu#FQYWM5I@Lq5ZWGKZx2rs+t*U&(OM5fVH?H?-*VEflq((Ih6@d5BX00#_ zO3i7ZnXfeC^hw(ahEEh`9_!a_#tKagRHaEo!s=wQa~eA^OzCBsKbmH@e|Sd&P{j!> zs6wT|*`bcS)mrX~PUeMfbjoGTh^VYJHlG$@Sv)Xb%OTC8{#18(6)-#$Gi6Mgak%=z z`oTiWg3qyv2}>(GcPqFe!bS6_x&(5PBNqnT17QdE;^*c2Z?g;QPd+wP!B2U^d6|7|7Wn>UlYIJygCh%7ak3>po#th0uIlRh@2H zq3lZ|k$#v}T3JWOVpC`3GJD+BSMDOYa z#lw6$37p|Vmw?0N(7a|?aQqI(BcV(Ua8ihYzs(iNV{QS~2sgAHI3{!$nw z`U|2P?Mq~atpn0NmPQnSfJM(C(@XL+`T@Z+A!OCl-cyf@-w574^)gFd2@qbpJ<~q< zcCke>ck^+`ThZ#=&_q=Tdg8C>!jaPwfeNFg(?(9Qm+DtgFAn>vDnOs`$ft&`c8(sw zW9qsCUA0~E45FMphR8{aiCy2(L=yd4S~xTeIvevHZ+K4fhu9wa$vZH!gQupKf%VG* z>*wKq%=)3M91#F7<_=f$1RD};IGB!uoioA%WPP~9l%1`RfR+g08Zw$1ve)GRtrI4P z&8L7t{cxBnI3e6&D(Z5|FgYEVj;9MCaYC{W1lY(r0K(w{;g&le^aCU;MR2QP&lpB| zYBjH4c5vRZ$G zd3@%^d(4&3(l`X3iLQEeC4}OTy+XQL`EoOhZESTF@FSx)w!Z9)^rNDqJF6;HBu&;` zJHB-(jL%p#wt5Dpm-6IhRpPIHt<@Ufd8c*XvL2V!@P0L!M@HOG6vAkpCjgQ9(p_ep zvt$h4*i@VAax@K7_erKkQ|j)7YPYR+dZ!1));$J7uP@u#bVkbBn7BTyXn;+t8$?|w zte!VjZM`Et6S=9t@9@C&uKc`%DbXy~ICT=+CpuTXKl5fXRtjG9%N!`?aN`e( zE1};bh|e3pHlX?A_JKzH4=QGvZjI%@W@x%E3O>IltA9mUI$GPfgFn*$8Sg_wo@0qs zfbneahB%4`@^o{CyqZS^`)`*^0?roX^hH`>&4}1ck;TW5)AyfDk)}Ck;3jAK*#_L? zPTDxR95r7?&)KHjc01ok==T`Lix7>&S`l^1EoghJA&dPiFd*(25f-miONcOhn9l?R zY+EvB6zAL&Dvj|Ui@g^$=|pefSb68bnIU`b{icK`m~ymj&|R>^Z+*@4Gi_g{~F1Mt{0-#YTxn*on~Bi`%yNMNnQ zW%$pk{^5=TSxZ1s2|1^&K;#l!PgPf2MnOhL7pgISw^w>p$p}m~>sBKugM`z>)Gayx zg=^%xC_r!xEIIW=1s@h4w>>ZhA3 zFOe%=U?4KJ$>FFbEGaE;OB7Fn>DT$csp9EX<8*b8{g=1nWrzh>b@V(DZhKR}GAcza z7g&x-s@5-fR-_vi6c&xlbMSq5`4@mdufC%An#9R)P4>~iWJ%vN?J_sh8Cy3+V|hUM z#Fw0Wm!e?&o$myU6p}}WoNckEDW-7V1XXJD+Qosn^uQ>*ztjj&GwN8oCK&wyWhFRq z0RYV%D4YLZQd1NIM;^reR5zX0299V}h}rOe>AL{e__{5urGWzP9ZTqFwT?vVz3OY? z-6zB;(^$e4kY(~z3ZLj)FM3hxK3ybTQtvTfi@w8Oi`MeC3VjV4xw66G`aUx(?)!$2B@81#_+nFgW3S~x`+PoC#t+!c-_`VlIm&dF@BBIDc3 zRT&HtNCF|oB9I2YTBfe!dAEgG3m zX+%9jn`^XMwX8xk8D?~on>MmHMM7t{%;aGECB1@13c*G94>z`)wyV3|cb!_uKFeXA z|03Tzs_7g5Ij^QPD}vgSQ2%n~*PzdRI;^Y3r9`Z>Pc)t<7??D0;NWwt`M|@|+6HOs zA!rSkvDO6wr9UcEu&%V=+m4+MBHUmA8VznpC?Ng;-b#3s9d_@76R>=}E2^@5a*YHZ5Jtbv>%j+L@=kdC2mR9DyM z@_{>5&n@ggU(fWytBaGWM7+0pV)M_HG>qkppiGs0>}kLJ!&3d4Xg4GMx~5@13G-l_ zFPAo)U_8~Vx_~iiUJdR$DwZ20=vI0;R*t()b*S5Gz0Y77G#>o1Fx6%*v|q-i2Onty zKo#ICZc=Yp=W@`aK+s05&(rrHXoCqS_rC@!zHiZxUJ#O{9=r2UkgNm*m)SvO4_r|b zrl_HI9h$B@kb;VK1+08tCFut_iY7w!x}*m@MK=k5B8hp3^!`PImFvDeT=qoPs;#Sd z-<72A0Gn^>*@3!nhcWVgEj&ZTpXG4UJ3;5$TT6HiuhtTJmF#_5+H>f`=HECl3x2(4 zO=h~Sy==0ab7$e9zBZAbOE%Zg70^7_(1gH1v(7Zo=8I|6a8G%gqL5l$L^s+?T8JUj z?kSJ=EFjZzrhwBmX;5S?uQz$voXKTYCAc{9;l|g(?WUmj3LKof&cfGs85sCeW=|^G ztGy4z&tQPYp`Qru3ZE1`aX1GuwHoI^+QIj-(C=Vc6X+ke*fu7`TsuTHnD9WJmzzFL-S@KoCDjdw-2s0hf%GH=0?!f%)PLV!Kq0Ik1_a#R+y&S<%>Shw!y#NPu)u(_9Iy{<>`>q>@#lhrx!Cc- z0!If|Pw?a9q3lAyaB_ypU}s}OnyP3WmmkH)b_2J^mH<%olj)cs#msnBY%kO z<;~5va&?)?tn(IuHlP>l@sa}N_EPT*m)?d|u@Hrk3?}iutvktM+)L!tJ|tV!=1*3i z+nKbj9cN@pFBVoB^+YF)yLbFLthy z8?8O41Pf*WKB_m4`8zmz+Iu>C*f;|=nH}5`+Mn*8Zjg-^bWWf;{6Cf^NUI0|u|T)~ zkH5)Zs(`=RcR-^6?F0^ExE!!3g49G88<-suxH=IqIfbK#LF`H2Mv>hdBu; zuI@B2b3z^A*pZ!w|Ja0~=?m~Vv0~n}$qH?SIgKUO-Mu5&Pi>c`$TZh(x%w-LKelvg zN{x9ybJJYux;irfYns6#j(z0yJ|q2#Z0AXCbk&2pglLPRwGKgWO>MJ2`PS`)l=Fzk z2+{M&ZcDk2?76KNC`1Awrq+dwu8@Tq{h%H)x|&We zjlW9Y+XQ(K2q>)JNF8BuX0gUfstf;0Oi3i+`7Zl5%Omo#?*mFfrD}`#K3mW*XC{qz z;;mft9C_Kak>%6Va4q#gwTagZnDzO$2_G1K;`8VwE?z2FkoAm?ljEp26X*Ea$W3mC z;?|p^ikKZgXx1))Jgz4oz|L*S2BgEC&n-gxQ07+5EFYA)m4Sq&nOHf%<}&cOhAtll zd;#)^Xh0#>d{D$6UnbV{5!j>Uq-0_384r6;FiwnGesGqe_#>9B7{?f6^Nz zh6yWFZ%p*X?9ysMuG>jBz4;*((~S^v?zPk)i_w>NS~ zDypksJKQ|{)jje7#17cj5V24#KOk=TcuRcW@iQHgF1qJaY2CZcT-nJ)XVqt_A`+G# zz6ofSYEjI8jJHDM@pLF5EAnN7+19kH;D&TFX>~=Kt94qS^yZHwBWD7O*IhUJHB;8e z|Lb`n`_aB0l{a4oy*bvtTz)uK74Nn}m*F9*X;uGx{+r7|a_$G43RQ6&LU(}N_b*X2 zVWQ?Fjg*|`+6$k%JjK?|*I&Kudg=@*>s!yy&H5JA#AkH_Vz+XG=Bjb$_wlctOdkNN z2OUJmPJ+Q(8NmD;fa4dg1PXCg&{%T=w-OH@5Eo_xsrEGcp;6|T8QZwl$GpVhD>7%N zQQvy*-c!1!X(^$n2ZFFFpBNXQv||ewBW(hjp;B=vaDC?ab6WW%dt6_RcB2o`k1l?n zR9j@f+8($3H8=<_T796TX-UgwZ83-3#uGE;af9^~iL*;R|0o@8x&qVo&+5U^Uo=CP z*i|Xwg)wu^3}|{+3>&HR(B)5QrhO=z-nP#&I;M|#pA|fl8?o1+R8@0uN!D~TbN1~) z8nIKUoKl+X6FyMeZ3@}qr3|)p;TXu>Tns#MmOexCH3Vy#zt(=1l<^a;FI6qkcI`Hh zXdB}^T^DWTN=N`p1^FnB?F8I%K-jqXc*3>iVc3v@Bc#ePZ>w%+klW%W0}&kJLWLF6r)Mr8*= zJs_wcjjXEB#8kL|Wh%KywnN>sO27O~dE1ou40}Y)(r_|6#-&u%0qi3QyJp4`C+ZTD z%O42>Dz9fSG;0BpRo1hbPI}+Rop`{#R5K}urYRo7hFKUF$z(L+hI|Tx5>%kp_px^r zl%u;7!UrS(TKiZ6a~xK7^2bmUm&^+2*3F~&sg0mx+6ElcC4+^3@(2K=*-#ZhjO7uO zpQ6Uwt13~;K-}ruti=sRth-jE9_h9oJ_RH+B90etH@~}bOkLotdgoi>_$*Ii8}gRm z_XbriyR8FO>=HRI6pqO3)j*Ty@En!T_p>!zJ+-T|2t3RkG6@LfxVBV8`@i30Imkvra{3cgS4{&MF+HEpeFwJwZA~4ltJw!b}5R=pu^gFM>I;Jes|U z$-2%5HuPxU9C4ED?|dJ{$LovZW~#5hA&Wk5Y1e_7OY54V^6dMPmM*)r?qGOx23fz1EZxADm=qT?Z37_m$N3E0oz*Rty>>~dcYJU zU66#!#s-5O1^qmfn_5z zk2DFoBV<#=grP$n{msaEJ*iMZ7%S*ac&Vwr4%2Hj*2>4AFn zkIOKXN$%-BFZ+6R;x?c!SeGiH33xdjstAK|^sg zWu&erD zP%0f4gl(En4x7_}>BZ)ls&Kn3SIk}B<;hzIt)hZG=iwrB8850mZm5IVH^@Tp4d5n7opl14)yo@xS)MvT&NK?_Vzj{pJrLv3nG2MAew4egE z+eo`l)rBjg-fgQ1M9*%oxz!@YjQ(Yl z?OPwRS~A&wmCo-?T#Y~Uma?lsf7pouqCu=i?%4C&1CaKuoZy-^2&e0AZdmq5>>p8& z+FnspQ}y02%?7DZgszw5a>yybqsFrP9D3AJ+zxP<_OL<&Kq3s#RS=J3qq*A?X?);z zn&8}eTWP2$=BHB>a{)FYTGin4jW(LX(gM*5lt)GTlrH>KdigWQqAi+#^9MfF4(z~~ z9F%!M{(w{U*xD5#kqJvhgbViIc@0t?&vh*nx>QZ$o~c?AZ%B6_O3X8*O&Lq+Z@O<_ZXn`VF8W2F zC;8Rp!44Fu!;jB2@TxS`zVB2ELE5)$#)UKOD+^kmB8+V*C1puFqWlXcf~<*HULe&Q zB!%a1p2-c4Llwz$JiN0A%xOT8?Z?{;(gxC{FI0$AG1;c;8-9F9SLyD&V$;pR>)X#! zl&0S^%f(jd0b)%yk__r7%Z#Ai@xr8V6Bq7h^hv>j_JhlWgd#AfeMIU2b6QLfo-;P< zr4RWz>0R;HLwBGmT{i(asXgq0K(2%XP?g7{p+U@}rxi5TQg-$NxljCa5Hhe$oE&P_s_hkH9vE*|(9 zdm=0b3E#L|t!J7#NLxPBs%BNPpGy-JcgsJ{k%uDSV@-pbE&?bW@gOn04Mro-gKLGie5C&52;G6(0@k6p2bt*j6Mrcs{wjg- z^>*ZkGJZjaNKR80rXvdxEshk>San_hcMewycwYnq*~hp{b+TMY0>k~(7ax+qfP7yn z2a!j>e&lzQA<6XP9u0)FzyU~$&WENG?cJ+t6BE(Y@83Jr`^-uoA)uo!PYjR&70fdc zlSQBNZMxAQxE63`Z@+%=9;9UP`~$Vvv-lts;veE!G^`>%-gky>oX}K!wDL=_;7RGkYl-U#~Zt@P$gn_v7;jLLKw1x{lRe>!MGwhM2*T9)(woSs(A(gyBt0VzG~I zp}2&_y@!X>OoTNmc=i5xw2Gln%mEa%dF-V|&l0r3z#T>#GS5SPN>E<807MGP+!@?z zu!R;RF2kzlqr26W!ej4Qm_{>j&~SzaN^uqdZ5ffP_|beSZ*TTBaODDmqtM``DC*R4 z>pc3f?*1g+SKIx=2MYeagd%(|SXnR+=?6mDJj^KOmBJ0F^NEI-4?gC|tTSBMxYj6Z z<>9ne@J40B>C0&g904HFys=k3hgmIvh@$1OpQ}+VUSnK(PA<`&uQ!?koy`%Wq7&Z- z?vLMo$SGYk-tbl=a#C;^$TI+SKwSdqT9+C;6z#EVb4Dt_VmCLoL+raihs?Z+b`O*K z?9bEUw~~&utxEw+7dw~e){7FMV-6~_j>TL$009*oL^)xZb?)ZYKuUoZpmD2W+dIX1J+3WS4>%7J( zjMjV;L*}N8&pvn)JA1SWlJY*`!zpGw1D-kM+tl>!?LQ8`fk?>bB4G!jUQ>}zz`8&v zqGR`vj}5{D45gr^H*o!UdjR{So0pBd`H|=6|EU{VVy9eUKIY;E%D6@02j)T|yvJF^ zz^%-az69_8c6E7YV_iXZ7Z6TaNnLNm4^QUy6^y-CEjjg>c>P4g0KduLD!q4ty0L!b zc;2OY4aV?A%2XUReh0&L(&th_GjXxy7vdx9Hl*dpn*`ZuxK(9aGJ1>p`?d$Ukv+kd za^DA?h#EFd^&Nyq%2Ybe`&wsk3%A(^pVKF{%Kls@wXHIFY2Z{g19q}&h8}# zO8Jc+&TAArW*2Ggauie480J+GI}jVI=?8%XU7m)V8Bj;B1~6Ug`AN6ElmJ?%xI$p1 zj)g$^fER%;kek`@3-ANN4${X3Z3U8hK$wU*FpC2HFZkl$_KPO=PN>`5RuyxTugr8d z&abkX{~{s~=ZRace>QqOK?RJ>%kMeu49otK2LWVS)I-JQ`WnF zO*F93He)Ek+wXI3dyjM9WN}5iNyuH9>)LUVY9i#5`qpjFBNzz=NLxrVGE*M3uzCpd zkq%1ZDcx;Texa1-CW#)H*@|GMF}DsA^NOb4uO{G+CXTVBo;p?EgnY5maU(Q`?&b)E z{%reC^DWbs904jkrNuA$w?ie-LsBO@#AyArJh$ENbUuxW5XsfAicPaDO&o$V{?uU8 zEmUO^_&L*YHsMz_k&5%9lN2d$!P-Gy$zx}90E+?|&*1H+ZsP$$!<^JTkw_&?NX+=) zx0tbiqKh}Y25G3rGr=O6#YzPdL=yBtoLViRH>B3PnRIj)1StPbiV!}*c3_sIyURRS zTUQt9m*T>n@$-yio|Womy=PjMd6r=(DF~WV*pBeIVd&D6|Mv8YxP3jB_NutLF%@Az z^N(D{@3jV{X*FE>g48ryY}t3dxhodaVNK^VhkH2z6=KY1>G{HRchShix84KUp>DSx zZsEuYmPzdvh^yLcJUb?2x>5Fwt{r!s-cMo*vMPABNMLY3l3fFO#+m?}Q>?W{pATPN zC37|<1xd>5Flb7Gt=@m@H~zRXD}xmREPr58@PKJ~f(9Sl!PVT(8DjeX30R;HbloDU zd!BgP`jrKb__YmYEr=AU^4buZW774nL6qaGPOB5$^;LsBFvBwrBC1(UFNz_DmN=R3ZNDM|||Gy-gYaWEj3Gw!WlplVZl zZL2mC|C*0`jY?9OOY&veKwvKN`Fuec3U2N@-4k=xGZUmuqwk2@QYo?~;8A1XxN*jO2h5dLUx9RDm_6u)Fbb+J(qas z0tjbw*=iPO8rzx6?r(rZ9|fxC1a0)$LGc8xSCqbRJ$z=Qv%1Y%mDp_u%pH>nr|oY5 zI!Z(HDn7U!(znZ(=#Gz4W;yD5j@qxJ6>OyID@Rj3-grLNs6Tm~351Sn^jj+2zk-wU z0ti{MYGC`4>|tKLBYhO7d_%*?6%_d_Y35%dJWaNlm;yc%`OD9HQ^$V$CB7H1B@Ws& zwf%7@u)qG=xdNfKz`fz@0O}}Jgf1E?4}cv1Uv*F01O@RHP2DSLT8mQHd1yRN0U&d- zB}V@nj~{OCEUk0N?vBCRPIou3<5@`67r{ea?i1E9^F%&5y?N{OKQ6KY-i|QU)I#fg zFYS#tzb;DZaE?^w7k^2AQ=(0ab#8z~nOfch9@qq;z1VA<>)GAvBT5A$xsGL88xyf4 zs|w_LnZ=UPau65tV7rN49UGnn#s0d2*x!qgh8Torkq20?Dh>%6amiIeL99TunN6BR z&4@W{UXKm3dnog^377S1*v)@V^y7trAhXxa!xpp|EP<97z>J}z38WbUNQJp2;2)X; z5`ZVxTbBPo3tB4q(ZcUFCre09c7Is8DsT(8@Dz)mG6Wrs9TOY{vxi4T`{Kh*M<;su zxtT?M%%o~1TmRmo{Vcd664j*e4X<4K+I2v=N;c0edM90NoF&zn(Cb?Y5CuX3QJg8l zL6hqD6A8`{WSa#IR}Jf#j%in+4u~J)3M9N7%-Y_l^$Qp;oE)YvC&6>2fEnSmFvdFC z^Ssot@k2fip|ZA_E}$&}g(vD;fVdac4fTiMK`g16uQ#qyJRE%8f$*~9(VB1`8~_l z30D9|xsN$Cf|Sx=D@o*6hnfi!PDc}+{?G%lvYzfZ zDPE`o)p6y+#t&^rFW-g|b215x3fLVPL<;=e_ zHSw4U5mM$OaV4GvN{6y9V`g~VqjI0H$;Pd3^d2`Xb+#vms-U32uS>`(mHa$k=Z|T- zC2li|=rKDQOO;8kp&MiFR*%;Tq__{7sBAdB4*#PD44wVoyN zWs4+;eg)~tj#aG#)`_!?6Q3i{QX)MZo*Y>6=by3h;cMa|C)+YH?oo+sK=veDb53s<03v`rggwEzj@0d8SMd& z*#L`l7A(@SEm1op+6F+z?cnY}666E!-Uw)Q{*Xy641I@}7Y}{v6HBtbXGP_Y1eRheL}ke; zlKR}5&D*V6?n{ieRsK2#>BQ#ug)2WvKrlSKCK0^>CZY|4X_yCdr)}7(#olC=<6Y7x zphr!ARYOZS!Mvi^_HWN<9vxS0^f4ivXD?=hr_Nx(Dsm_AOlSL|28fKAJpA_nw;-mL zyA6f?Fk0PUz36ul3W^q8F5t|wYwI65JZ;De?EtAUf`yKCx9yOC>0Bhe2_F02q+`^9VoIa4-+?CE2F|Dc&!gA6#py3H7#OK@Fw+{^spPd8PXnN4j;Y&ZeWBcpK8QidQ zk5hykbrV-iV+Z7DnlQ_oG?KEjrzBAi(&?8a0PC5fd}kCpPu^m=_~-W*V>WYiEHCSP zlVz;0F&Ll(Bbq#i*EZvyKtodo?Gv*^y3P)Mslms{>6jg@DYMEz2AlDVqPY%Bd}o}9 zt|xE&DiPvATp!K{IY7RJcZ8+CR$h6!BGmsY2sncWB;H5$BzPHiFJ414LR_K5D`Kp- zE*~DEL647{{9Nt_W8Ab&(R<7`Zq+K0wR;u}h}6T$26hCHmJ#p@=&v^ssTn|^ak>x z1{Y6TV}jo(=wo2%<&}dCZ=#WWS{~0oG*d{1Pm*D9UcTALy6wGQ%_p_nJ{ap<hAl6FvTBwof-J_$c!d+7c3k!3;nGM`=cng zvqd{tBe6Oo2*3mc?|>e1l+F6PbfU(?+?uA(K5)F-id7b3D{>``-^xNzI<2l>@^=|4 zY6}bZi97|w2wy?2-U+W==alzhQ6+aKPjs&Q;1Y0k0~yB>?>vfIQ zR44iJuLG!AxusD$L^4J@T7bN2XH#?|z0IUI;K-vlpZG8dgP4>H?8Qsn9 zU^<5zj&B*+g;+Cm%Y2!2G8;^bFK3*Gd=yx(r_T<^Wg*icm1aeBzPxgQJOAtz-eLw! zt_eRPD)ExUUhQXc3`KmvP|kU$%lCryqHoDzBAfIj2j5cjB;}lHJcf`-K=fF5~cH=bh3-uR)t6$~vzgso!jtV*f zimDV&uvBKcdGa`0H#gI(c4~dcK96_auY|Noqq3$r{leWJsQhx-4mqo&!Bc|OMK1-6 z=8Fa3F`7jWfff31&HZ0dp_;rWh_G{p``Ea+VBccDC6`uQ+C1`U^+R!NtKyQfwsgmCF1+92NuucDBjUK?{)+T5$=l+Yd$nf6r(bKVw=NKKMDRzb z(94nXWE9KCs^aTtqeO?D6Ys=Hjtk|lDLCbO$7)M(5+%_1^xJqFK3SD`OP=QPpIeLKq-1~8O6J)A55zlYq@6Ly6fF*(6?Z2O>|1JsW z<)U2Bp3c_r|9u6$%Z!ikZ$+PtyRRUTuO#-(0sc+58>@Djl)W&lNA)dxS@35)OMj~5 z2M{waYk3AxugPO;ZPix!-?<9HsQ`;bcs6US7Per1iBde2{((I+m%xcBSt9oD_ecmZ zS=8fdg>+gKW@k8=XhxzaFSEQ$>ibmLzatGVwPosK$oYPGT0N$K#wgpjOOh`~*fOnl z!$K1!_~mVspKwzVXd$yb;*nh?$bpZqbuaqd*oh`#U1*;i6IcI!2AZPZuP}EjJ=MD^ zs<`>}$-t$?y~D%kX4b~>^!Rm-)^|~0Nw8w4BHnzEHiBNOV0*Tp~OC1{25Qch}kh8Yi;7d^BmkhXFaCSt1V+EV8b}X;o z$JHGT>OV-|3GQF+a^Q*&XqBL~eH6#}XGf%s+KT>jwUq3)QJgMAH@>n^U1;umMaql; z(huZK`a8@X?tH*fJw3)?suijBkQyuvQiI_RbW>0fl_;&KslXX^+!E*>m>3$iLQ%a& z6i9i2NjVp-joK-_8DqV5g(c3Vtnr3B%O>twsq#voCo;19Qw?5=5x1rr|2(hxo-NPp3Z&i)45uh*sq!Z6j;HhCs$ zAPi%vgoI(mq>z7lNE9fSy`>n>{sQicpZ83gaiwPCb^cNZfqC%pZ+yUqbwhigZGGU@ zkRTb0Y<9QtFn71Nv9$&4SZmO#baOK2Kaz^IU!(w6TQC@IA(>dFpERZpATF>v=l=vM zA{EzDBSt}*pI$OcfBMy(xku8@tgKfL9$ub5XQiJwz&Il^#-8D@`{2YWzkHoD+{6hA znYf`JTa3>33CU-g5vX&yZB4Rhm0#*S$ySUYxypCv-ORK15e#}GbeW|C~i3u7jO$>dsMatCwm>Dm|jP7iyK3l`Zn*eIqLG1~xM5>fDY?HqSiq!`{> zBb!!@-rqz?A@rqJ%48y9PyNM(4m|qJl9;fm^n{xI&unXSC zXZxm6y_5kNi@})qc+a)Hixpt~yLrH^e1KdY7z(g+f*w39;!^GjLAioAk8&y8#`OE0|YYUJ>GCf;pD6WK_UzO=`kq zy~7pc5TsS0uiaOD8^U&`s3v_iaSpwm=Im2uOzPy*9<H> zNaW)ddY6%EMM=V;)gZP}1^p2g#FiTJ8m#*Tmm)k1mLDQ-vMj;!BZgnG zzNyXkmqhisrGAshoBSY}XVwm8YD}6;~cU+5z&^ z^%LR&CtS+zL5|4(8V3IAw{vlG0Nw$?{|BZE^}~+^g)6ur)NHJ*5O5IN>VN>ANpR+4 zqqU*h>32swoT1y5oaruub6DzsD*yT!4WZ%A-4q7Di(GmA@q{C~9K$ zHsD5N7ZFj4`3D~Yly+3#m~B!&EhjG@FfqX&iBiQs`q4)04&ssqI5eE4gl~)O*xMO* zbH6C{pL=e3;gJ+SUVQ|KQnib=l2+hz87AB)O$AxJOvj~9dZ$_B?+Gv$z4#Hceg*B@ znqqocT9vzMd}d-*f8m*)3QmhA&-wkDr4_sOasP}s60W?MDV#fxc1mLk zu~^G&o?s7+o8`Z0mjL2M$%i}meMx_WE@-HJNMQS=`4g&}0=xw5zQF~%L&m{!;DY5~ zVm#&n1Q#_BA#MZrMq?S1e{^n%sgVHo%|t^T#YP6wwV-ySzLX2}_A8hI(LTQd4u5UC zQZoB~&}a(-QfLr*cqE1PWePb*$cs21EOxMXGvzk3LSJGyt<#HkXO1lrchi@%#wJV-@ki<2Uh1so@FF?PUl>MI8vr=8hvP@kVaSUe*Y0a${kEJzn?fN+Iy_D2@sm3c;i z>pj=6me3U%hTls$Pniw?Gi=G*!2Oa8H=)jt*7VG+9qbqU5VF%|EjtAf2+H4X(~N(# z@@WqlTUDra9yax(ucW^I#M;lJ@y%3KOs?icy)tEl^q8^vq+weR6bc39Xhi@7!Ha}e zq5aZnpk6uBHVuGKs8c3*kb_C*5@V(#y$f z6LGcL&)h7f-7;kxhiu=M`;PGJ*A*t+%P|%8DFVUabDU|D^4->IG&%TF^pFvg)2B4# zJpv2VyQNRtu|c;z5at1P5X9p?7K{P9{XS@58ag`6;Xv>M*2TdJ<_6p`;1q}U(r-x^ zs@Lr<#guR8C)@?TcpPI2R<7&k7D1$;HLKiMG3(*NK>*X7Xs3AXxrcgYtdvCW?tk~C zw>7&Lb8AVy%%xxO-kSz(QujwriKX#tZ`cwq_QqgzI4B&cbNk<{KV#}<`WjwdtTY09M^12Z5)`s;r6+BEA8O-mL< zy{b5Sf%eR!r3-?4wp=Ur?>^$_+&!Nq{yjj;h4u6DcmGD8SteUk^gXZ+qTqAv9bQ>W zM+Xk&c>uq+J4g}(&3w41xG=D#{^7&Ikp$oVd-(R(mmnyb>;bA3XipICKL33c;y`qU zV_$_Rgcm5YoPZ1L=-=31fnS*KzrO&khiMUlAB0ae#RWoI{1u0Hh9 zZukK#DNKH=i8)Zgz*T|f>fo=Ieed4Ha}mAze4K{gRG#Hj%Hj(b=YNV>7n0h2o=m3Q zC?{d%BbsI=(SMMT*GdvpPe%(z#oyW&NkN?KLoMxI^yljbq z0E7&;z-l?@aJ73iAyVSpLBpOz0U`qRma{}6j^F35h>B=QAs_O6YA|E%CZcoO%}^={ ztx=$PkdkwG^V69s=J!#wLe(8M&t|x3(HV*csrZgX=<`yRA@L7han6aj&pq0S>3T!5 z7ds#9S?hpg*qu7%WnDH@<>c{{A@qQ@N8CxtPK9*g1)O`iwBPII-NdU|{~upx9ad%b zy?qcxK|}>q8Wj)_4xJK8cZbp(`p{hh2HhZCDxfG`N{f^rQqm=*AZ>txh{|t0hncVA zyv+N@8DOp%xvuSgp1t;3_x-uUr)fTJ3>w>IvA8++YEx2f-wzYMo2}M4#QF4!*}-F% zsST=V^-fop?%tTBBN!>hcmIwbc`FooD=NZgUsEDj-z;3M<-|3l781fL_^tXwR>g$y3q0X1Tsky~JcS>!RpJG{W?K>J}5G;702#fjJKHm89z;XQD5`YBgx~f$<9?pxZ z`0K5^FZ~z#rXY880eJ(5mL~F!MV`*RYv;S;*rejsW^f9)-SL^w;lLg*^wLmuDzHkIPH0(YHTQ@LAc!2>E(X#(JjBgv95jG$3 z<7>pxAvk+ojtXkJ53kb^dm9|f2&=PB%rEl=$&VSU+18i!GKV?6Xsa$bCgUkpA4;31 z*!(tT)~Rm(b}`3lHlbT8M2TQffFN_XIJRDtQkS4|-j#Y8Q;f3^v*sU-?*89i# zg?k46{C(e%Xh7FM28F3Bho^<9vbLt224ahk`A3hoU*qlQc&+~=3}R4q=KcG zkXX5@MbU!$s93H;)xIDKf7V-95`P4O*g2k%r!G9e1!bVA0+q)V#f28B3r&5gkE)W} z7Y?h31_0Sor~Cv>S?6P^Je|=(vWDzIoAgIop^xW8vt_!}Oy4rjPx`0kDozb#w8efr zvaF@7?Py?c!#bi}H_6|&X*)i+ma9`)lzy7ZmiqN%Wi zU9zlYZ#aQ!l8yy(*n4@A9CqtXB!|6ftYwD0czkykUXoRM0noor>kfm;;J+=H+&nN! zQ-%ywYc%+Qv77XFQ&O-}l4klA4P>KbjA~HoZ$5aO531Wn45)4=asTpQwzMY(%~?I? zZlPB1!Z+ERt8AccCJ1bG_pNktCQpXPc5{rBCm)#UsMr|^+UiTZZ>5jWKT)rBc(O`% z1JQ@4V2RxWm26moqxGZtQ8!r@;UcDjaihrPypP|`suQr(EKgL5j-z)alB>58of$LO z!0FUw;0VC(6)dnjV8cp(fwPb5!iwE`U`3H+8g5PqeuLn*Au3BnCxO2$n4@c?Dxm>D z@c37?^JniGar(PjJ6KpjhLsbT2;AK;cCH+*E+EtT=Y~UAq5Ez)Q!qn0K%vQ@C9Vdt zOUS_b7iQ;GWESpQLr3tj44F_l9V8!<%tmCE5mOaNevT%FA9BSv5D$X(w;3OSqm20P z886Ed)LsV&kBNMb?j>6_33&G)ZU-=s#0)T_OcDK(G+X?0 zcN&DKkNH9;1n!nc0zD0@jM&$`Vs7gWbTep(xPl5FONg>VnYp_`(-M1v{?4-P_!jqm z*0od?VW~n6M>`hickJ-rKtCj*^-;;ujizn~t2C69>>y4>4r3~~StA`W8@O+1SsrwM z2(%Sq2v0iCIyxpTs}ylNN{(Mxj@`SXPJ^E#d*F`k*wySa`8tThhliAa<%O^CL-%J( zG%?lR>{=ozrHZ10pkIvQVu8oAmLR3MJ;^k(V1za13!zW8c1_2m;fbv?5wN zTX!CEeN064n%xN1aH~|(N^McvtJO56jtQ5g&UY@O6>79sUS;;Y01Q9nBi5tg&yLHo z4~V>EmENQdc>QvoTJ}EU&0s*0d<~8}R2;nfd?_UGIbp!18wVaT1j1GDzZ(h&oe1oA zXctpQz!m+~a{BW?HnHQx^#yUoEuvA9rXTB-gPiH07zgKc z&SvfqbDY!d9&`mA_s-5P^B*Q}&rq(&Kb-j9uJ&Jok{3?vLCaSx8QD zn&f^X`QYbm294E}!`Rql*7b(=uj2Uc%qLUx=d6A;$|Yr+89ZKh#w8*qgD0|3jIH%k z>F#b2s0LP!z1=*f_BPTC76wUy6cpI!@O5)D1<8QCq^1T+-u`cF8_!Rnrq!)963PT8)UHcWTbNIA}(Is&HJrt+66y@{B@oeOo=@ZtxWX&bnR}L)zpWn zUWBtD&CEqoB#ujz)Yi>Vlz!{w%z;Pm-bwnVjSyHF$i=%W)~|GU+$#!gbqHY0>X-X; zME7F6E^X&|l>(+Q=HhI*<=U5N%SH-}jIIZc+TZfru}>@Z>a!)M=?tx0t2KIG&TC=h z@erewJY-??Fjr{quqI7pQ>E&$ibeNlQ}2=~y5!R6UQ1+h^i`ECbgK$XJ#mz+Gb1rj z7r{vFENh5!ST*(NH!xUy`M^4PA0E7J9~KW zWwhI*b`a)!xotl8+VBc5b3C(udWk=aA<1D=C0w=94aS|~PvwE1bMb?Rw@K%nm$y&h z2@&3PzVAzjH?t~^G||t5+#+|lvpc;E&mKR%^Y+2TGr>p`-3~rzlYO?Bw34)vxCDo! zIG8$JP|)Lq1S}wyqW+?2!UjXBVW1NXcYKtjxU!ns9~B?=-zbCf7j4JxWd5(3j|F}w zKi@vT(;Mx8l#)E$9ICGNh;Ih@ZNNo!v4C$JZJ@gRL%7KB>w&zX@qyE;eXXg2#c<#- zNo#`=k68tajv4Ka2i+o_7)+u+LaUi7Y#2+0FDkH0*``R!F*YdrrkRH4tuH34go#Lr zagtIjG+_dK)BvI!W_u8L=V9za8>7}HW{i#Q)7rkB@9NNae$Q}VT;mr_C8l;3l*7Sn zEu~aX*7OS4h965s;CNVnH?#_>Jj-A+_R%w-hwV~{z;kNB015&Z9FzAhs0RIK4$if< z_cvtb)i+8)@w}lz}ufG5I;j+@NiIakL{s*>Zl4D@8 z{xcOu9*$&epeb|^r66`565OypWkjJTt*k2b$9-7?N~6D3GXJcIkUsyu_d!~-2Hf9a6+P7<=X2Hl<6#Dy(>my$t_ZL z>$voQJ@KWgXLqhI<1BOiB?UG2^HjO$Y%3l$inlnHn`dVsvY;M)HaLBHm+t$y*^-8{ z#rJLu1cT)nTXOHa91?cG$>A_VnId2JUK0=-3r@H4heh#Un3R(;Zi%`KJc$}i)d~|n z67`-CGIH{8FdmJszQa`ZIwAu&oqg|TDn;c9`wpD0yGeeJ?F5?!DDNE7CvG=CRQGKz zjDqnf|M?msdG?h%D;jRS66v*k659Dd$BD_H>fuQ^f>HJM93T1m(&|k=$vVw2-?KYW z?>$y8*sXDfc{J(_XKFKR^I(i78ivhs7wv_A&AUL*2bvuzxsk(}c0~ASq{rtoS08fr zvC4oYkihC{kRYA|?LnZPqziOK&lGR%(jgi@V+e%_QxCie%Ykq^eiuCd5MU`df_O$2 zy8PT6nx2r`;A(BGqNHZ5q^YG0ixaDe2>XJUw4W52b-(OgJP>pzNupdJAEkewox5K! zE6^b8(Go7}z-i!XUGCcbhRn*EGpYhIQ_O>{iSKMQ-O-&I7^c~-;IZ)`xq16zZ*4^r zZ%JL1W!2^S(!fl+(pH8Gg_;k{ZiBVuCBGUC4kIB!ijKFA{p`sr2c%TbN-Ks|{p5}= zxI0K48NpSEy3|-+T%~Pjm3?TCQkeNZ+dAcrhS}7aQ>SeS{km+84Jcoz$A{yQ#V6aY zM+~s}*Nba3@NGUAnDM#9S`xLHGPheXN?#G4DG9#b z;?bLPglsird>?ICh)o^Gr&i7B6Erft7XEi_dxz*|XVM;Bo8lbW%v?ETQ*+g9A&}14V}wnF+}nzWN;=w;-ZN1tc@SES%n9~0T@a`tJMzrd zbp56+2#bh~HV=P1a`)gb1npMvwz2qfy=-_lEDWN;+V{={904=XUAw{b`tNV|Rs|pw z0A?#Kde=n$E&;F|xU;G*2W~rK;qq|dx@P}?ZD7*3zdrS&-!orh`HOw+q;U*q-(SS_ zZ7p4%dO)9YO-VW^mSZcU>BZ{o$a>tVr(4oQ>%2k9M(2FEOi3?1ALQEQtyH z)@CE)9+S0f`eJzok~Uy5C$~MW)f?S6M%h$77BeWwMf3*qtF5L#-lJaKt7GE1PFYZn z^bjHQ`I4#+z3_l%gET&2@Wn0F^HY|#>RJDP}&2`+GD$dkP# z(=f@>KIdbaJVwY06UUIDh=f|9MVUnV5$2j3GJ~K+QmTJgFKjp9)(Iqvc3&B}p%FgH zG5XZ0agQrn3TX~8Jwm)uQk3I5!w^|Lzscd*HM^MNf=lup@;#)Ds@!YivxdHdUNao; z9>#sD=oZG!!T`A;1cLZsD5Q_;&DY)CmdhlxU-6xP;;*eo48sN@gO%CWI|`R{P@)6k z7*RT^Nung>Flu`{2s|N>H{_*nV&_lvnLnMqVzGhJ&W!EkmCabJxhQ9$aIa@4v#|u+ z&ZU)CNi&)EQcdamC{Nwo;C1lJO6C!yI&WXm$+SA_))m9?tCid1b04XuMH|r_W$O;b zIM>wV$pUL_dTn=RDb{5@B-TPziQxe%cN%gxMSH8;oU<~Ezub=!y}zV1dA}{6q&!i_ z+6;g4WpTWpcKSK>FJ&M6Fs6i7iYfSKHAnO}S3Xw;E%h7l-rF%QeMYcQb~QU&HKB9HcG5w=2(~C zice5hwA$*y_7DkrLu?)gh$Fz}K|IT(cJocL{j#c z?C6>?ab5c(rHd}aLTA+r&j-lpGph(qzIGTWwNgp6J~c61xRxd}rJ2u+Qj&>MWUq5b z&E>oIC8PaXPi)aj$KgoM4d-`SzR6L%r+}{MyLcm=-&TnsLJe@zCp3U9fHO4CU&yJ|Rm<oU-HqkJLl6>pI_1` z%~8FZqH}2HN;yT6K{zCJzY<_^)3@!@>b_r_b$xzM&j-_K5LIv0o5~F&O_yfT>YaOV zB81k0DN!S{kc!7?s=$9$KVUf2Cm}W4wR&qlPKYgu^EkH0GcxBFvk`DihG#pcsGq-| zAScuAOJI+VzT3?v#MFOx^RN{G?cQNk0f*Ie&Brifc$P%qv#;_I%WAcNL^Bt#s{>9H zIun0|Ho#2u&n6>0m-}l)%&gskD&)!GVT!FF6&--+0gC?8sP<#uAg0xl4lRb?Va8^C zoeb(n-TB5QQ1o9^07d_b&(~jHoaq50$R;-uEC*Gs zochU}*Qal5s;EZZ^YYA9sw}dzp5(LhI@z0`pl~!^eKx7HXyX0$%Lu)SnE7K%KK#{9 z6thnTib&g-$B);0-u4_P7jPNiY>Kw@)sJZ~)oFf_eepo}%ZX>-hi^>w|KJp7Tx)TF z{4XQz@~PaLHs$V0q>uS<0-(1;cPw7otMu26?&AbE43EzJBFMZKgn^BQoN4>Mxz*56 zM@u3qQjlP~If9_i^54eC$YUvQ?Kxtlw}t{KOXp0?k7M~$Ts}RNprBEL=@VpXLPN3O zNMDr0X&C@_b0gqx(F8xi=DWmV^(66+CJ}R&M7?jL zWSE?IXGn1;()t8Xe2SOyw&%5H(;rfkzz|BG7VeX$W;3bw=&6jo_JD^Extd45q}}(* zJ1&Z|o#kKW!?p$Waz$MSJ^Btk0h`6uc!$GXERVl7RH#^m_kv&6v9rC&+2{q(y7hdI);l;i#NQuP{k8at z3|~y$Ek5Ed9QuHtTE>NYnDG;|8<<1R+66VmL|yC-P98us|1x;5#y3sZqq z<)-7m8X@P$br2_jsmd)VbJOJVI%#vKIjoj15@ zz-pwWTqL!f!MzX`W^NsZsWNW+oOawY+Cjl8CAQhazGzva$i@PdZC6?ap(%tDmW-!7 zBs{{h2Yi1f4{D6kr!ttNh#8mH?M{%ig%Dr3wh(izHSdup>1*^Sb9}a3&YP*CN2;Aa zDcZuARD5Um28>B<4Z#OEefoAdJiW+!Y~SVZFooO`R}NcaP%~rqBXWf14Wlimer#PfZeVYg6{tpNz9BG>Yc~@c9=?Yb zvP>hbr@2k99`*vke=2+l&O1wZs<_mUELbREPpBzyG9vi&L+6WL6P;(ct0+r?|{1C+YEly5qg>Te3UnvK36~WUa(pe`);3Rha)gxg2`GUF*N`9h?(2j z1MEmauUdu6luwc{(h_>d&QtN{ili;4&|jS7Edj`9>UmNaAb;qnGqQ&KcOePeK?}|0 zT5UtZ{5%w87|qxGd^bbgp*t7t&&5Y@tV|{6Uf|NtQOcCKR;HcT0Y{Hs4r6+}w9{Vm`teQNV85_nIf8r+|`| zR^&nHXo_=aNNXzYw5EQ%L=!PU)8 zr@()ozWqF=u9uW5!1Grl*-BXMW1Ngf{!^zWt3yJPwSQF`T;1OAz)hd`SPQdQT{K(H zf0ZJ`9Y$slT2CgFteUMQ!OVWtNPcX1D}xo{xCt=Pn30|qr_|$uFZ`9Pu8oZ5o-(LB zrFzQUR$+qw&4<7ubzA#og`~D+slFYz?S%%j)njPFn@X7w)6lj;9n5-)$naxKZfMR! z0is-iCellqkxjTNv`9k&8dK|bmNRmU2qvJ5IHQ+zEzi*g9{2a4F=@_ z+!9j`ISVhe5;lk#6vxnGf>t!rWBMh)WYTm;)J3BzY7kS+L zCB^kv7(FPL)Bc)mgAz6bjI4CgU`~SGt0RYGrX=6cY&sV@y8PwNa&F~5@&P$Xdul0=1r``9QM?EvH^U+O?kn$KQ2)WdAunJ`<->ipLcHf$ zkHYHBs9E%X9;p^mL*fUH-;5)7Ap9{6#rpLKM1au58;KMPh_ZCw9=*7{21*jdYpxuM z;_7NxjY{X<30UH#c!xqbW_U`w@C237bzWs)bLq)$J~*QN^H9Uhm={1NIn0KwNk3kt zlbsEqdSgV+#ogP7LloU^q5s7HaQa01k2X*K=E?W7zvRu|&SaaR%`aRsO=k{Ln7=m9 zd`~?!pZ{xZqc(l#I%VJ43m3RQlML8DR~iudh>6Zobv&4Tu}(WDFTc`#Bq_>h#HPS4 zz+HZHDZj5gxmKC%dG|uY6ZW7?3k5v6ly@vUI$pZXZdu}4G?PuI`?h{4G)40Z<~E%c zBf3(7M*zu%?8E^U)z^OQ<^vJdhUo1Vg696)_Ww~U(86NvE8A0WwBQor6yi{F5;sRX z?d1X)BdZ55>VGKMyCZ3;!k$U4o>1IOToDX=$XmIW+T7E85=m|DpO1fk{qn~4CwI$p z+kG5L6=BSl$2fh`h9&J=P8!{QM|i5f?a`DJ_gqW)4dQ2Jr-Zjx!FxB^KlW%b(`j;p-L%P%4kVTyrVp)|9eKyyD%i#pPHT z&OW>lI^qxBxRKE_mMuTC0DbQ%43QQjo?r!^U3q=6n z^LR61dp*NZP9KkzhSg>w9&X`FD8X(r^qJaT-i5rqB5OJKgebR#fuf=PgE8i>>{U7n z1@3vulPQ~gNB5Fby%8&5{_$#tGwU>2rCAN*n~wOb+GY{KZUG8aKID|GQ}=Cb<#&`m zkGOp8gLXBAeZygo5D|c`3y#vg^?~*r9QM$j z!?n8&;?0saesU}r9)X{i^s{2Ax)tdN?0eUD535Va%F{(^KZc`p>&MiHq=AX}c{Ka+Dx@)Vj$_ROgHY(Z8arO#4&&GQsm!R z{{hp$8i@HcYlq|)o5B6%is0C#Tmq+~XCeOaun|r?V|Br&>IHzq77+s+HtTluTUa1u z1S@=LpIMcKgA3Z8!xZCUjnaA z$RSKH;Nj>3&vvL1x+8=oV}sOQSmn!iA(B+N6o9?)^wHNbTz#c6uQ9K1xntNm(Ie;* z@0$8$1%Na^h>F`nO%m_$PJX(*!XNyAKL1sIW!cljBFf-eTXVy)oTO0Y5;tT1?`rR| zT*6*(tgIQAX968C9+&LBOLs$s=tt+;&X6*yrMYkvk#F0g%$cyhy7N-Ut4Wx@y=~g1 z!u?48f#m08^KnxDiG!glhsE+!b79>`VcmWg@Biq9o1$GY!1snzPDNZ+MiZr`i`0+U zWA3j}$}a(7{!&S~Qgl#8^n}c^QhqB;y4;|jx!G7v+w zX3R$3vX=wlrs9v9(z$G$a{nXK^#^R#x2F3Bq^7u|6ND}wU_T90eWyg>$W|xyU%S%~ zClBiPZK~5(LtntMT!s%X&)-|>&wCBD?A)v&aTWUE2qOVHF}4^EEq74WVbI_%`R7#y z>51<9{)bd23pYsLuy(Y!Kq8)yAB>S7kZ1RI7UTq_NNc}P`El#?@`HkL=lWQ}AyG!x zr}dRJp7|IXU1NK1Z%Nfhs-uujiWqflxifn2zTww06WkytArW6{rl4~gzjbPLIiAVV zyy?Q(t_Bi?#B7$KS!T)s{#-Gw@{i+s^+(ZxFICRpN6qWjp*nlcT#G;IeQbbKZ!FJe z&=1mNG8QQr-=>V2((c}V8)Zl+E=`^M@z{&rpK$lJvqrwYi} z{H{oa^pSR53lem_CYt3wS<(iUT{**wx+D)xrxhGl47m=OPoqAGEIrO2W~_$`j@9ZLxEzQB-=^SwFW*@$5MU%6 zQ(I~KhH@u#@njH-Qa#yNRHvZP#;y0o}3X(C#iko31B{L+-1JLI@bAcXy@3O)Css;#I~XD!<<_?9GG6{Vc50zEeU^5aBsG zn4vLTLO>kXQdW{Sb#!zCAsH4N6Z_lEVXII{);?bGRDbifj6a``q78k~dW;+Cx|`jAORQZh&PTe-;Xzmm%9OPp&90{fz{!{vKi{c?Rg z#9G0EIPA2DqGy!U6mt~2u*K7B<#W2IE2>lr`zK{p3x~R-1Xd-BNb<;}U+9Q1c5g8CwDXu8lIx0_6s(kn(pd3ie(fR4)ibg; z2T$IfP5jU&x&?`&N?_Ti4m(fl+(FoQ=u1J~)@K|J*HZSTcP&3@@?Nsc-IU_eiBr!( z9eHH1x>#zq8s^M+ozZ&IS)B3tLoIc^`;n9*2DJm1diBMY8FxadCZ8k`zN)s%MN{YY z9U0iNB#W{e8&nwM^J{;=Le_wqFj8loHBmoC%4k<@B zWwaAm_uU-fi2fJPw_7Pgu;^J33M`y=s-qt={1wm~-Au7p|3Zf!g2&)SbrL$+CuJg$ z-dWE^wYvqKWr{|rUGl-+TgKLY*)N-<)1=P7dn?(L1PHq(Ji_0S*)}Td_2s&f|bW+}d7uKZV5N1fGP1 z8+;SO)sY6;w~InhCwgDirxLyIDVKulYDfv zFX~-u(w$4?jxbKTYV!6L?6u4jk+QYZqSM~bcLXa^n#rTPI8~pR<8k7z+g_azY?=MM z^a+Q*s}k;5LDZoX+GVsIrz8*rvVEZM>G2~L=K2Em1(n^Rd$vbC*BT@y;5|JOKM_0@ zos+2Yaen5zk=xqvS|Phr0JS@Zx@*lD1BeMq0?Gg@+9OaJCsoKe-|=TMpRxccr!JjJ zagX!;t4cj}WJhGLw(}lf$1=NjtjTGbqW-pnnk1`AH**||RBxe7{Tgz9LF&ky+bNIsZmraL-lzx%Tvjn)@ zKg5U<(4C9Dq=NfGhMI*WLdH?*c;xm(qEnfzGC#HJcbr1n^Hexvm(#yJR($P=nB(4ihEfSV@qdwdYV~b4eXdj!@0||TgJ*uUuEak!%9k0k(%!PT zo)oouPv~lfl_~>3>{b(!=3i5NaNcxtWAHE_)>}97)R^lx=*+X1<~bI%cwxY#t-0R`FI7FuP)!p@T=%KD}|}y>&CRnEnz|HI2wqH`Wh^Wv&6W5>8kTB}&Ut>b+3a5R;^Lb?g^D&zP>AQpb|?AL42tZS=1 zz9POxqtq?rDoQ-6PZ*BDo2(#j)i-eMdqZ`~T~3l!&-tlm<+n2>30t2$KFWW#MoEDk zjNL4?R#VnBed)FN79|gS=3xGth2(A5fgHVIft0NyufOB#y1!!2n7Rc%`XLb~*aXSKH0erfY=Y$cv&*=iw}yk7an2YSJmPlu=nE^y z=i!TWYUZ_ZZG3-oO%YwlVi#5;Mmd>cB^8l!pP2!Hj5V-r)%Tyay7%jGzJs^PhxB(Z z4IfTqjqoM$Y1a<=Fc(yoke9KLFgi{=&vIU`Rz$Z6LR~b+A19{nYF%tcQad(eO%8f zT*$FAh885dnxnXc01fGe{zHnphj7QbyE#12X5dG~ zHj6-o^3NOo1^5m7lHp~<#Z_cEEYUzbM@~?<$s$S(2P7=g6zf`26SisPJi&NXooXT{ zCd%G@g26EV3%W4T?v_0Trv4n;^&kW0t)z7pv2S}RhTeBoV_ zv}it{8q(6qBiQQBW*B-@L;mE5c{ZJ$`61!$xiC!M>D5s4itss&1cL;P`gJ|lhZa}) zl&34#<6OJFFEC1sYyNWI5i$N8uAe3`j8=G@rXTn5^^nBo)1^U^)jooA`YjfdbTyi} z9cS24>{pFkQ1H`Dkzca>qp2@ zP0DdQnY$^Urg3k=wRCGH7LG*hZD8NwiriI{=2Z(N4oz!mFVtWDh`o53-zdC}$VtJd zXCdj{XrG;J&)_c1Ph@@jp_ zr5k~d3~MG+twzVYuh+a@Bu?-(Iizpk7PeGr;%&;!re+f6-jRN$(0$Tc>y)u#L$fSk z7gAH0ySYAYZos`gEZ5pktcB>#Og`he2Zh?t+MX?HPe%-iy97lsFxFKvzS8=hQtY?g zK^=WT-Tx*u!^4f^M}#5;_{lDs5VIZHniD1twr2LGZpQY|L;2&?*p(7{ zySLAe7eAm#;xGA~QO-yq!uDQrxXU}egU!fd4Ccy6P&YDH#s}k)fWAj3ksUZkM(Ha* z4R`BAS3H)0<06}AN#ZLTdj%!mTC9iGa}E3awiAihmF0VrS(^7w7SJ1R+xT7Ip-#OXD%`bXxer2YxC&9O{s{RdE#2CTCfkMzI{2&$rX+Bj; z*~aYZt0zS3J$>G10zNIiVvKII#erm9+Kl5lq}2txyY(PU%sd9!Jv?8+2mYhVKrZRD zH2W09GIr3$^X8CMkU$m)5#gPXS}Ix8YiI4yv0F)zJSw_iWbSQGknvJ(CE>*SQPD3E z1+P!LTtux}jQR=lfPfq_uALS2ymTbjA(R#7^<%Xg(JiPQ53dA% zP=4Fnq^m=MpYVC47v|q52Us{?;TD2SEHEz&JTG=`urERejunYmFC<<_H5_6@l7B+JMcf(#M>QwZ(*8-= zSC!)$TQa;lx|0IJM#n{Iom{h4YJYZqE*GR|rpD3rtUL3ZMoc4x*2m;^&gJxr>`Cn8 zUDmgm+;a79;~9t|md&vDGwWhfySpWDSuY}5{dKHxFcfh&$-rmd368m=8<5fAly(8s z{2x0{%F%4k=ep;k`{$XANZI!-o8j5EaPYB)q#p!b@8O8Sx`+S4^N9<*0d(WeYZO=q z#OOnU8VTaQc%3Yic`7?y7u1I!ngp4aDzuse5eRfg#YAv_FlpDheisjd^NiuwKdF@#b%cf0Xr_$@S)7w-Va| ztsD!{^BUJ5=v6%kF3z_R-b{Vl3t)-E@14$LDbSE?5U~Xm=nLdU8^Vb!kw0$5QA_s< zs>35*?SqW%H9sH}=x@7KNPrD*DA*oW!e?O9AuNS`m&C!*8uD16k7*95K4TbYU^S2b zxY@@x`CwK%?6}TUW_bdvG2p_Vjp|Parj2? zkiM??^Vfndg2y80qyTbM%b=tk8b@NKmfeb%k2mb7ZKXyznc!ZuQGcGYwX0PM69c}P z91MxqU$#YI26ts6Ba#8xT^M`JnR z`f2Mt-m-2YfiC^fZ1F|Y=TT!)fFiH!QVY=&p~o;ceDntW{^0TX-Q6`vHh73=VKSpb zV*h_dGcb)DIIt!yX{bfq{~8j)(}-NKq_H3~*)YGNAk(UM)_@iv-9`b6OX6AB+n8dD5mu^;j4DozSY2}jsqo>Qlc6>FnuQ5Wz zqWtt9oL^@qD&)M7_}uQX#!96qcqy0rW*{_-jN_`Plc^}eTUJ$zRnT*u;SL^sJj%ty z&IiUt&cX%jk(g5StR+_XSbdcWnvw^YBR~Ozyt5z>5q@~PznLK+MRQx!>f_e+ zME&xt*4VreyzU%?*p>Wag0>PMlM z_AerExZ$Dtt)=?!S`Fjo^sh?oEc~DQ!nG{jeXL=C4~y$&;RPcYZ#xU@tpNh>kP8YN z32Io1pLR)eW%Oa9r+5pCV%c7ta?$opg59z;5~_nOiC^eJtTt^uwe@akqxn5qh#*(X zfxbJ(8c(nkIzCO`ni+9zTq0*WE25D*5Sp->AeuXM$KsT5D|Hx!dYMaOcE3~EoM|hG@_nMoYgFmA^?sbNp%$HzEa(pfy-UvuL4wS}>PI%RTB>2kbbSIS2A zlLvo%n)!Ur33o-h;T(i%Q;?()${&=4-3EL2vpZ;Fqf3(s5ZU(8UDoeo_0YREz z3tFUJlR*q^@8WKlcj|fSmv=NoCYGU`*#4rkfiVf8vyoYDG$F<$5IzJRcW{f~G&MEK z!6m0r)xW2I<2D{D>2&bVUtfb#*ml4aj0pK2_OzZ%CDD6TKHlNLv=YD;mES*?XmeEi z@?OaD94dDboxY%NKRy^?*DkFIjMHYgS=EdsQ7#3N_2UXilRlj5jvjv*EPpo zjTdBKJSV)*nQV*olx%~iB=b$``r={#_A8`^+66T}v>d6v#0;;m_Ej`PaNXwa4Q%u) zjH!C=hH1mvAgA2EcpS(NL0dr;pd=sLb?&%)iWbEY;dl$;0T*Vh2t+P7*WPo6SbWFG^laP=-*EMiw8px;XIWv#`uE z3w{sfJhkWy$rX4=x2gk}g45S_Z*0moKkI=dnO_Y?gdircBhG zz=v|QL`h*>+`LgL5GH92UsX-PTMkWa5GtcoL2%^a2{v7n1lk;R)zQTqmaiyQ-1Y$1 ziJ&Fpj;EIK-8LkyWHE1z4#rr#G55r))xD!D-TM6X?u0(QeUsTy(PPKocc*`R_T2Z4 zTxi1v9@hg|>UebF#;9sYHUU$muH%Z&Q!A=_vB62jeyyyN&4XXPheHOdo7BnKipo4c zzm3mfEp?;IRyLRE5%Xq{S*Iww$sTfq{o*UWQ#S31EH!yjS@XU<1y_5HEsQ0ca8mHD z%8YIgp*2zRd+8Z!B$w4%u5ECI@uAkdxPS$VIA7(H`%iT|nie`&n^(TQ?BvwVpqB8* z39Xs5nr?N}6cJOGwDY;)Ax(%fI$In0?&PJbfumgHqO4=dF+V+33teB{72N$b8WaDs zl1}EsJ;j3$tzn)2(*y?=Cg^9oLR%7T!vXO>7Iv;&LfASMkx0VHS^iLy>6S`*Mu4b` z{v!gL^o!1ivp^x~>ZF8KNKz>puUG8u_Z`$^=#Z@*Tk34jEymwCFZ5)*rdJ7f!T4hxy+R77}Da{ z2-n{%%Q)Hp^??s%h*8dmt_?3c{gKL7f|A8cZcHKEYG*(mx$?ZfEHH>%5tnQe=I+u) zTfEaRO(=Z@)iZP6c`D7Z9BK<&PPx#Cx(Rp94?`hgH9KJx~2_|xTC9Q)aR9<_GZs_X%*+)i6oXT zQ(7o#pCMOP6enY3 zUdgLuE{(Sl#nY-vEK2n8!RcUj(n|T-fwP(bq1K-(&E3u-%FWq0W4K_lh!^j1$y=ge zC6>+P%WDXO>m5D}xh(6u)1RPzyN*|P0}rzp0ZEm(tgtk{6(s*y8gK!j?A_tw3L6pO zo`Wt21=m+gl(@2z6pEXZ8>~Q?YTYUj)e&9NUy#6pDt#hA8soF!(AFnvRXS z_K@rBL1cv{k75sS4UiSey67@bpE`p|{u(csqL3n!q<1yH$T5i1bb&zvAO&B67W^&L zmNJ>GrO-X_nx3ULsACthlrwQ^1G%dZ=`&hIC;}h43~a1 z&UgPZ275Qy7j`29l3rKj9~_qMHekZ|qX4eQhz?6tlLQi9AEg7S?L4btDhAh>}uBtJQ39=LwzSdzL4`%JO zfAkcjN`ym};ZyIQa@7EjDrCA0=^_TC+Ku@V-q`e8>Sjy`YBLdQEF;_Hz(1>NrYXw?|b|kr-%OQ z`aOX+r;)6%0~t|RMozK;XMiv=I*cSkh^%b z?BL~ZIh>~ENHr!BGjV+i#pA1%!ZK>j;znPu_C(zL?Ocu(+UGyajj$l1W_QKsa>A}x1TZCDgX#loY7X~oPt3YVmx z+NfO!=U8b-CK>=&1p}fr7;|i!IGtOUU^sElj`mFZ0vY%COe%gl2O39DlngG{W}i8) z1q<)&_QK`b8i8_kQHJ=zq984_efzG~?&kK6$Xm$F8Uv}Yu1;vPznnP#yD(AU5%41; zDLj5Tl~SBX$n|<;kxy|@>($;)*?Ml&zyT(p5YmCJ`X-_Uc9$bRB~z9$t@@!hs&>qf9u>w zeLdQ?H$xNDkJiOIyptue#-4I@U$MPO>@VcyBGy&h7_jG^^WZOCIWG7(A{15yQU2^3 z;)Vf8C<;4>LOHoOnsXpO11A*Hmtn_1fD#Dwr1}c1^(RPBbwy7ucIoc9f}VB(FlY|;--|L}JO&pv%6H01BT!@g$`+T76=z-u7pcGN)crIP(_#t5ot2#d+QpjGBp zF^vOaITpKisG1Fk<-#U=*tHwqszXowoKtNM2Z;M=lXVeCKcPVSvq5J(PWsRh%VnQH zvflLtg3g*JvoLBui=Qm!R@FAsx3|^w$mL1|gQWr(EMMw_(YcbX%D0)X``qryTF0+5 zys3Un%Bn@@W#={@$Q3vYo?6pqJUnC(J`K2XBRGe+-s3W_g29qi#L>BCpbLWO}(L0k}vUZ&pq9Yd074a~Fx;ekbTS8C(c^1QGx5`xMz&q3tX|ip*h; zadCmZra8*h42~vO3|2w+`?}XLN*t#TG-SinTy<|7e9sT+h#w zzm>nn#Z~g+K;X-P&GDKijrxt=tr;d^LXVUhP{f`Woli!MnuW0yIt0q*+T4GrQ`BFi z)vGtoWWUqEqi}p&L;>@#PzgO(G&td$zEB`GtCy=~--`RxGtmE&;A8RQZ7+Y#BnUBI z@^NKgiSqbAzRo)y>-PQsq$s4U$gFHKv$Bee?2M3+E?oBIvS*6ynZ0R$_SOc zLPf|dLZbYRH}~hh`+UCc-ya@4?tkvb-Bt1U5)>AAHJe6o#K* zR;c#NPd3MFQZF}D(s|A0yOmKB82D0;CSvQwwmjwuox!?rDFpZ=-+%g+3o_l7 zTPR9powRZMm};GYEgAJd;v*^BlPc!9kL!`6zeqD(mVTo*rm_a=MWWG*i-xoX$f*pUSvW!8+fm_^SnAhA1CGXbN z1nB~%AIEwZdh}=;86^qi%n20k#%#;7UP&U}LspO%8#VnnN$yv%Y`Z#l0>n9by5iW{;(TX@V~P={wS}3#fE~H9x}~#;Z+9| z{1sI#1;PJ*wjlM>femLe}L2}^oj%_*P*o-?uL^W^MYzp+2+_C9`al!v>@pl zTezwHiNeVYbzLhYW0+F&Q4DPyEyrd_`Ln!M&*!9Bp7LeEM_#MEm~mVfa(MBXiTh&L z=lguwEnP*{i$9?^eJI1rx5fsaCED>E}h?GD6Qb1PdoFuD2p^pFMq^FY{g z&^N|SI6wR-L=uMRs((>4NFqbHF3*>M&2{p{rZ@DiUy(IaL^d6L3u(oCM>fW;jeN#Y zZ>+YUYnT!XOy)>Xs^7mt!a4n~WOoggBxKntWf{i_=Zd%(l-Kmt-9b$0$@^!f@Y|u= zEkau?FG=awYh5Qdo}_2hObGRUX^d36I7aMQ_%kx?Rd&aor1k}W5FT=9c0o33=nITP zeRQLxx2Q&ZLi8km%u{M=w%i0!+yY$0yiq8n(qdn_hh%&YNXFIUx|5-qaAqPi^aC^K z&0))$+E}AZAVU=ce0NhU2IFF91udZec!J>&b+odF(OcFZO1v|dD4M3}N()u^qD~!E z>e99p)SVmge1cydZ$6T%Y;fo8 zdKl0xT|CdX)bmPW^n;Gc>DsEf;M2*)Bu8T(kSR^e%$7)Sq0LN{Zb(?=KemaBq`rz6kijbLOyk4H1|azvJU75J%o>XbSRep0AoV7W@4`>4#6tioh@WPYNA( z$BtN3s+)zR2g2I@E8W1!)(T^9Y0s+#zyIfp6^(ro;(dIWX{^{(SM_F1i`^NNcX9@ym>*bKhyqJ7Up` z5;Z#Ev2)u!^D;dKFMe^2E9r6PXkO;`008+RUN8M+W^DnfG)%gPLLui3tkxN!F;C?S z&!`{z0vMwwaD?pr!0+7$aQWsEE&e=Kem)=x76VZU99WG&lMrp|0wKG+Ch*|dTaLdU ze@&vT5OCcNDzk=!qw*V3&ybk(hfgDbWK>(xYg;dfoq2gb@og$XdpM;dI>Q2t`ci|j z5+aLR3WE8%m#!#NLna7F)hl?w{CiqcbIHvyH8!R~Kqsd}SY8MO)m#YZw2)(kWI+() z(aKUC_4X*uPo<8j9_CM~0kQtx zNC5H`KtgH@$qh{GQRXQgkANJY*YRK>E*K)Iy{cJo=QYePyV zrBSJF1xA)FC>tXm&GA!@*W1IGgn&7gfI3@!_LJ@>m$1J0;1Gc0p6?Urtqqyu#}^>! zWD(;7bQ(l4KPDkLR#36~a`C>uLAr1HOI%LtptgUhL68gZz?ZTc8faLKc2LU#WyTDy zQ3SmSt(Ctw(ChHy6jKOf>Y2QL#cSz)882*w~4IwUgdBujhoQC?rr|z z(^)jtE8h#eppk&o2cJvC?K&O%*!0&ZI_Xha%jw#p46Stx&jzN;81lLCwhP?mAL>6J zeROT7cw$GX3%t-dhtHsRhUP0f#}X21rQ!lP(u`uK+E?Phk($s?PiZi^DQPK##0&&IhTBGey+|W~Sbg<<{ z*+YLWJ?an4H*v*O*~D1~yx~m6Pt5WjD0PUH)BT`nNQ}hB7VTth{cd|Pc{{Cz3|;`J zR}C-ZiQaiJRMc1|9AjW3+_|i>c_Bh!L)D*3M2kU}u+UWPIaRXgkGUIqU}Pn$oo3d^ zrQ58|%U~WYDJeN0-dn3Vc}~COupH}zNzGD)!;87(LDBOa3jGm-{g1Ac>yEx7F~X>@ z5k_+_ERH+pBOlAG}PF^qk zggItPO!7he$#bxy54$rTZD)Aa3qx>U&l<5kh{)_(J+!%~_z7s{n8UwzDGTD@6&u&? zh^qULA7Q-YDVoRB7w8+c)Jexm7xMNTIjx)tDpckBmzb+@+SY8H>AA+AC*nru&SB@h z^EcmW(r}Qo3arxf6&kdC6MkSHaz{<@@uK0Fm{FC!klb}tVY;iH4o|*89*xCz!HRb3 zwc9uCJUeV@SUZ!5_ePJYbC}~(Mx4Y7>Z#pRi$Cc7)`d?_-CDay>_a4tdr8#2ih%lj zvr$xA{BG3O$wQYR)%ZPcJUmHp&-~j;L2Ykm37iZUUNfwV31rYA8x*E#@SfUdltEnu zD+LY*cY_8$X=adl>EbciB6{q!J?7uDr_T}5VfU)yTHS-K+gmlEC+pTz-!<{CC@Nb? z)=CIDi7LBRRgXSBuiDiwWctouEhkgHJ74UCxUUZ78e%Xj6pBxZ?jbX!~b0<>Fi{ zTQ<6JBaqMUUYK>;KxF5vl2u3fH8fY2Z9n&H%32q*Kw0*VcwS%GcT{WpWXFBq8wc$2hlnMi2B+^0QlqjS|#mcG=9HeZ% zcW?>wc?vKkoQ=G7>ICJ!W&5f<|LYrR8pDQx#({$bsRQOLGKdPkiG-saDHQLBAXHe05F(c1-)na>XsB=^9 z#|QaJLN3-BZhfDgZ)I+;I(Un!e3-_S878e*H=eh*jL5Y$>2Q)u3b5!aW;u<_Xfi6B zmY3x6SdUwJxW_{e=^>IpUf2Ae1oD}qnNe{CCsef_=W=or8Mp6-A{6NRy}au@tMXZAHu3IQTMnSY~!FlGFKhi<^YBGou#^#pfNlg2pLOl3mZ*ge4 z_eM+ENhA#rmpO5#Wk9ivS7yGVC*qj_hrjiMi$iHt?Bu=ColCd#79>}wONl<8&{})9 zE{}TpgdW}8?ce$gcU$L;R$oNu3BP$-1NNF0FJn?JR4L+%tDnflZVxf#pCICkc=G_0 z-mHR6$}8<4=cypE!VTX8Cy$xyop1D8+f_zl5_JHaED`Iw1S_x&KeRLltb(bn6>xy} zW?qd*u(s@g@kjl(T2UWNedZ=1`!F=?Z>8skt`cp{n3Tsm=JYdc@b$h(Pu}kP^;$Dh zOX5Pe`Qvg+ACBG^BZFDwRV>SGG_D~mWPNmU%@f31^D9N-4E7RVKL%{l#Gkq{#u4Ip z8?deMoMx6>1GE+Bk@b;0wAwo96&sE4-fdH=^Z1l;OLA|kqk1ng5iB3cy(O15XF9%S zUC61Hp~8JDseU45Wwp@YCIf!b;*9$@j3U5SHcH<{cTJDQvGLg)CIOF;)M-}rukU{B zl3rroap5Q$V>9mcK}2v!$m)Tg%{414TQE3dfrE_ZRh8D(a#6)j%(Z8m=<}Tw4aMPtY8;?SmGkW21XXI^XM(kvR1xIDJN1}xrwah@31&F>>Qkc$ z!f78~ZF7`qC5f+maJRfUWNLW`@-*!nhryKdQ$Jw4Ll4%|vkf>Ik{>_3K1tB<3V}^B+yUUw2el8o5un)R zMED{EZ^MvT;89qj1D}CF+s8UP+d%N2hokGhUia@R9)h(SkjqVL%!d+YfDc1qvP0F5 zo`P|g(b#9pneN?{wa>1ovp;(tB$<=<#0WObS?NzIlSle|I~hzulHl|{n9_EwgS;=E zGfB=dnanVlUbj|hxw`-H+uD+)cv}#F2<@-OZSlOdoh0?DCmhq4x&*tgZR#9DRnc3I zxms4`g_jWKFV%S3YYG;0Qluv;+_Xy!%4F&~67>bvL&x1kIEebp2d_wJ){zpyLWrRmPU#1T8CHiFz7X`2Qt|?aLZVuA#TP6l4wxSXyKJb z+d+EvHKaU3F8#~IK#T6GIQ;g+$t0H(@8mY)&*Env!S7Dmi-0~?XT}BvQZmn_j<*jU zT!NM~G9H!F;}SfB&g@=INoYx{Tr&e{*2taIY#WefWvY9%+*}XfHyVf&LK&53S=Ahh zk15ef&)LM>NY2h<*SM`;h@80QdnfL!^2YPknt(6z1Xo0Am_7pw%GXz(Fw)`S`ygC< zm%XMmBMzi=9o>0}K(by*8`k3{JA@%V7iL(Y1ND6vxP0KP43(rV8hI>pt0LGQdt1;b zI9u&0wXW^e@RYjW5|~YES&!A@i8Cj(E4F^S`bOc~lcS*2B6ko)jI`RNp1m(tX2*vV z&(lT-5Z8eFV^w*4QbDN&SPR6F{Ld|O z&>RbzsTKqJS_bQSLYxok6X9;U7(&0ovHMf9l_+C8r6~q2JM_A~hdafO_QmPAZ}R1u zLqB{bM7$@rM(OeU(5GG&QoT$Q_B#e$LABw&YVZWrV87)ijQzZFr)Z-#W_kmVfXwqAPZ%fTUbx56}HThXls!vl;E5A_^@W!#(w_Jbu zIPIwPg=(Qe3c^bX1~+r`Tg*t6aL>#G=V&u+P;dfs?}@kQm{=R`BiHsnC3X{W z;Obs4!5v5WBv>9PVd!32XNSZa+tlu7`2w=`x>Qw1dYndS`>Ep%(`FOfx4&|IEvn)c z$g}&#sbxWx&c4=b_BzJiL~wr9XIxsOP>gc{zxSF9hupQ z_x+Ao3ip{0{;Y4jlYE2;j=-ZYOjzaoSHA6CNw~GHQ}r*A-H(Lz z8455xR<^u*^LGsJfWfcwUtR)r$X2w)T6oAh!kFGp9by&{R>GfE_rLlpF;-A+yP$YA zEUcUaAioujw*DUx)>Jghu6B*w*=>OewBoA@xFOl$h7|Jb-oGLJwr2Lw=1nr+2Mbj$ zD3q}rza?2+#~W1C@M-9zo4@Wt>=~*E5A}WT(D(}qTUmi*O2c+M-Jo14!kp?})$2Ns z^JRZ^RnJIV35nW>!#?%Sz7{XLMN@09x;l+N5vR4m+fN=v*M@2{npsDviW*WXF9NY%GkU<$^BvG5n`|EC$KrX zJhS_9<-Wf{xi8SW6%|53pZo6`T~n+rsKf!Y3D`D>C%K@opP>53izJ-2`f9Zu!sCo+`-ll0}|XUc?+sbON3&OVx+`Nh1CL7P3 z02Kp1JsvUe=^12Y(tn)pr10E%*K=R$(0pU!SrYBS%NKuroNK=O&`-frWcu6mrI_EF zxN*=`*p!Kxgyllswg>i1Elo^$O-y0-uLY}Si$-#J_s=;f&7qZ0lyBNFOoAR`D%-X` zgC`Fe#Z%hXA*1+IoyC_2NXD2|{pJJ$f;Z=AJ4uB^bDV1S2o}UtxO{16Tc~mo&VE|u zf11%D<3_Vuv8+k<0<9}fQ>g1>E%8TgnwQ*Oe;;l4=3<;K8rqbhKFg_n)RJp`2s?Qe z?OU#rGgIVujpWx+{fLQWqo7Pic`pA1Ji_NMsN)83ytC#ozviElFF45F6T?ZmXq;kv zABNnzJW=Bm>ca`sUnm9Cl9;3$gg-Z+A`Dj+5kpqx&ksy>iW-^y$0(Xa!|b2qA+!LGrcpu*5Pjp0 z)x^%O)JnY?XEtA;->Nm)jZy+?Kb0}%@mEKF>M9=H;R#9LewD}RLVPiJwMJ9&7**Fp ze(`)a%LnhaT7PEejMYgyZw50I^WC^wVQ3eG$>80(Y?EQ)kvS}8eYMuR@7%m^crmwu zX_ZZ0X(Z!MRD*&A5ewr&R$&(x3iU_;f{D|g8wbsDck435)o{>Nzj25XQIc>Vlwlg; zkGO1u1G&WSZ;>z&8LIBy9fHVBPDrD}6Nn1@0PBG$XAboLJDWi6kEIo_nv0etoI87) z1Q{*wV2;nQeXvKi(`xT=Dstq}2#pLb^yzWv^LHH3(}dRbO{Lp>x21gx4RWe#7?jIJJ*)2qdGhx1< z>|Ct|wblMHd|+Ql>x!bB0x0rKG{H(@g$C8}|7?X`+Cs9*LKYQI zLKrDeqZ|TRO85+rC3vjNEU;%&|Fl1;L)_(5g5&)G_2u<=Sx{na+ctrF({i#XzCK5& zOy1cer*v{;M9p5Cv2ayKduk=?4(QdDmvBc+43^g;9S~5_D8H_}bGpq*>O)!}$1W+2 z>T`YWbf<}xl-H?)ZYkHI1tJ{PtTAG<^D5ukZZCHdISrE|wg3k8SB0%(WNk#0;k9S; zw6iuS7raXja97 zM%ziw9olmMPm+cQuoD&EXH3`qz21q;tckb)g|~Df9R++>z(rY+7W#> z=~s%xVaB{0u?BjLYCOD2^F+ed0$U+qbZ9^E*t1oC;8{w|bX^?x1QTaVm*NvE<*kjX z?zKo=`rc`)2jPCUsk!ob*1kquuWnttDkYftg?3`SWWoLUcGu(kMGNTC)8_-7g&!u1 z8HY~F@*1QSYC<65vf1nD``@!OlU1CbB}l%q3OgUF=wuQrC+6|;%zUVE3>tEg&xbbkd4qwQVFaubR8E~t@Plx z*(h!)TJ{eO%f95iT{k}+qWpBUmHyg>=EgG`n__8ah^tJ6bR4ap93D_ zz0Oa~dOtLe3|9bc8uawkDgEdonM1P zGTJpwj)wbjM=SO$kON-s8ol4#hi>t?8*vDUl z1w+VZ2bv{Zp;c>*LAgSP60Z{)W5taEx+eD?yd9JpFv{6;WbSE55v7KPS^(HGr*wXD zWo6y+DVX||`lyT#U%!`xdTq7h!@OY>i}k9M&>b~_**B7FzrQ-5&wldHuo5Q6C=&6X z?cTnYQWT2E^disRMT8;eo?at~%2gG&BecG@YTB9uh8u<* z$^#2ZR$JPG@tQ`z*s~G@E^sI;a;(&NU%U_##NF+yRx_U#cLl3zs{cH&Lijaqm9$V@ z>H4rzUq?3M1v%`P2d~K6X6s{)xVL@+8fZOZv+6yEjNk=^3YEqlE?M!h;J7T9qvFzr zD&2PYbXT1;mc_wheUs+WlOIbWhiknL9SY4Lgw=3*@#jmdm(5!M2mZ@W3ubGA5rjtG|<*!U)B zT5^M=gPa%07@dOoo4zxGfpknY4C1BvDQ`}$yP#FMNDW#7EgQU;!(1ZV%vC=Qc`S7< ztQJ?Qc;FoKuBVtJe5;&RAjs~Td1+K2z*n7O$uiTzv4Xe*t$nRutwXr6m|2E=Cn7# zeqvF%W8NdWE+GM&xh3;vE=8D0-Qz;qZrLno@>h(V<-C%~O|rJVc=B_Qyei~j)a-6* zdXIib-XqQH&IMn5!iD(*xz>%;r1zD zS)w47$)*?QO<&o1)-eJ;lxtO*#Gh$hz z1uR!QzUvmg9bGWUrT!=R{f`UK-UIx(Ao={y@4q6ME(nMSA0Qh&@S&rO^6*4KA3+Cr zg=T-jG4?c#0Hg*0+;E5%nK5q8^HA;-(ck1&PqBsz8B$-l8|Hb*9-+m~A3%q;iDjOFBFK^mgEvJL1D?B$%gMi=O?K`u(FD>W`%{Hsith7)&aM zSohG~>YF44SCWX%Xd#f$uQkv}z{}h~387QY*O>}Vv|OH3xhO{|`EcCbA-TWxLMt>&T^sLKkJqpV?A3lP+v#+z)$Om-1p^T<5eUKRy`7`1bs;h~L=Ad6o1RC6#{bYE_jZ(_SA&q)FJ2O!5iBwBx&X`}XD# zEqfJYKM2=`xrCZ*~z8Q$b0wh zksOaC_lmbY2goxWuOrJ1st$**8t{C+x>mPY4}!D{sdcpPLpaPs5+35{B4{P2P0n#|b84j#gkAzc>S^jGYuFx0fN4?%q&XN-P zmj(V9E1Y5=Q>!{G-bR-#lt7=7J4+dAXgMC->-C2Bz!8nJwTYvZJFmK|>fd4_h{E}w7h(8Pt7_eq`BTNH`>Y|cRZ6i? zf+FgnGihXhv<_fKV?tQJmRa=JA|vz2oW!Pb{7@gRyYZ1-T;AGQdVa|W!L-Ns&yg!I zN)mBh=s8(?m`mF+V2sl4CZ&FUo`8o&T&gMI^Yc;+A0_4Q9}N=1{J330^nCb~;~8`3SNO_$d>sK3-|X0a5K+l4t=R)5a6{i|Ykfh&c!ezfsh&yFymsFYofA5Q9Gi8ERXP*pZ)#3~`DEE^x4%ZF zr_FJRYwfjzYzf*2jTn;XZw?7gx)1lOqngDO)Um=J{W+rOizBTL*B;(KW%$lDc7NY> zL%Nj$ki+q;G8l-8xKLFd{5{!OU~SEWpj8P;aK?XeDDMA5sc(y$rlxrl?cJbPa=xLM zI7Ch3Fa(>v%hdUT1Kb?NuithzzpZ+p_j+$5(0d~~c}{gKhrai8LX`PX^W3r7%qud3 zMat)+lj4s*Co;a+ku~g4O#bQ2Mn$YnyKABI(`l~RgtgZcGNVd$^pzg zg;Gr>2=>yNU)rKO=7-fH@v%9K#CB$qG?Gg=@7^5-<%Ad>SRmec1uq?30Q?Ug{Eu7C z4eJ7edmA7u@sVb*NWAu?`vFOjHP73Z66b9yUs!3SJSrxT1MkvKkURzC%^$UtJ8)6VP###UosAm|Y-Fsk(iZ<~pTs36Pc-rhTz<}fEBq5RQR`m% z;Azi(vOVI=zOZ<&t?+~ILbv>578@K!OT+Ea7Cb$PHqjMzhrr5{#z-(uAYJg63X@YE zlm#Cn-hZ`Y>aUH`7q0p2GPGEA+hfMi>1LA3RU4klW---DiK&=2h-5~mS;xut5!t+B zFTeiUb*wSCV!XUA-g}6Ns^psUO*?P2@i77gJ^V9$i8kw8--@%ps2z?y-P|*9k;J)a zT0Zg6BR3Zn2K!@3u!%x{qxikrrCr`_rjHsHw}!AMlY+m&svs|x16Rc!3nMBgUVAg{ z{mt^%s=&Zt51)1mUk0QHQ_S9pf6sR5*ZIp-x+`T|U+|hhr26`4p)lJX z92K>hI^MiXjWbt1936`q3p3hzy+8NU8cL?QD(XwA6GT`0q95xnST3wb`D`pZ?c9(<2(S-8!d`>7$t{;k7^eU!41)#r!JZo9&jOK0O&{rm*Zz);0YdP9>CZ2uh)ixu zJ*ek7pGeW$Bp4=ZGk|keC=pTq;AGuBGF&5}f=>nNLU^S0dthpEi1Ov}V#& zi>N7DZTUn%(VZ~#vmDUg`A~FMyS^mD8YC~v77Zu8DJUH26F)^|r0CzcUVXSh2*H~& zXas#Yx?Hn+3AE=5>B#u)tVeJytOz2;I51)aAVL;)RxbO&K+Y&98*X!FZgZzS^5eb; z6Z9BJ_A(y+?a0OF$EZ!?FO5uR<_eNq1^@z}27Io_?0n^2YQnP!7%9>0q0fx5le)rBMO#8{oPw+VBICJ$b?w@%E+ZdNl&4B% zaVUmg!K8wea>2LAC&IE{Eh*jh(sUyyDn4iZ<}s^@fFc9%y!#KxITLwVfm_yqe$qm`~4t;fwWJ?QFPt|Q8#Owt@UT{p1<4_-N_PQ@N$ zrur1O+w%&lYc+vBNAL+1>fyyA~&6Ue0%oED`kc2ZO0^@7aIg zm`xV#YGtQuX@%Tgyjs_;DysdDcdR>nZMJ`b=ayg8<$LrgtZKF#sz~h&suV5zYltcZ zz4c^oo(;nS))h|?Lhkcxbb^;xhIzKjl1Yj#Jf}6zGZki%kenF5^deL?k4N-W%~e@t z+jGgN1nf1;^u~>unz)+0Uc&gie(uNe5dHIsrDe?Sbf6Q16^UyoRyJbmrg_}wT=q>> zwJc3NN=#)|MFWkhL87V_lU(Sf87E^y>gGniSGdgIzs2oXzu>-8b?xk=US*1)3jNDi zkwN#i{#55x$#;vFn@$i6bzH$x^9=63T)yvblP{fCk6UjaZ1gX;{=bUMuVI~>F)mJU z@F1qfy)2+B>e5%PYC;$nx11gHP~dd_vjCv{x9y1NU66^eqV`qkE7IKZI>>hX2Tr)u zb05Pp8X#l!sPi4>^jHi_k*R5fmQp*VXV63$!nAVUx>fJ7vi)LPtk~+9w?0Qw8Y@LHO19A%-?dkt6jBKj5+mkN7n1=H|X-j_eZU?jkIaG z%j##}+h;tydeugy(vCh*S?UAbuejfQ5g6`tt@vlkS3;*qzFVh3?V_pPo(gk_ey#) zm>4Kcwcp{$goXEhO`xCEs6K}RH(^Qk| z=`yxG*Jm$^)#?$8#fGNIkX&Uh48|>cCC}GrJvbmkr^mK^v!viO0gvlWG=GO4O5hNb zVro!|eLnJO{uhH-Lj9vykrB}knWM|zE2qOkIXW@T`u9QzTU zb{Jwze6PvcvZG>NYVHuesP+TD!!z~Jy;4<`$R-;0r^a-@+Ss0zUR}xFk=zb?^3ofZ zWk@~lYCL$B_6Q#bo|e}3wrDqSJ2V@<*jBCLpPn2 zM(T;R-!~pJ^jtg}ggR+D8!i#7LCZ}j$GEVpd+3xynLi5-FvN9Ua^Hm4peo<|2)05k zEc1fA_}JVrHUnOVLxz%q(0`E}M{#<^{@OK%=k(ZV80~0#hFZX)AeGpGZ7JYeS=l+e zV0fh+oncV1_mKXFVA1wu8e6tN4*&~;gu_1wLexm%VO03;xsJ5KnNH`6IE)q(vydm7 z*J$waC^y~ODIuTr#SNa7q0`yyznZp*vA*4lnII~M-e{s6E51prXh5#$hi^Ww6d$$f zcd}wrOEf-nWrrUNRe90oEWX_5{FVsQlmF#@R79S(uV{Gr;?cI;^9t98Fah2o+VY%z zt}e=|kAIYmZ}-+ym^a7aN4*;|*x>qBAM{1qUuI=zWZHp#%kQ28t2?XBs*v7aMy_iF zHq;E+yDuSO$|4?tXDZ_c&cKq;!IB)9CS~aY{4{4zq!>fnVqj`xW$K7E*)xK`j`?Hq z&@pTkBaE22gky&_H4QeNP-@p5s{D8dF>`(S%l`4RCOB$m@xBaE8(k@Wl4NbSpm5%Y z3g?ZT;)l5UgQJBL*|(m(A(dlMlkz%MBdPuAcyg#5*O(Fa_5A6u_X`Dmxi%Y_qwDiG zpuz4dz=TCUr;_*INljfJ{Rn>a?#X#IdVQ|$cWM=}S1y$?MaZ_&dD`?_LI{o5i_h`7 za(OL!gb&|A0be`oqTD&osxr7mE_6}lcB^b;j73_`F)RG={Uj->TdaY#l>13i>)__P z*czP-YeWTW^iQ#1ovg@!E1`RH4qn(=RU?3QOZL70hL)11UF7SNN{pCJ}!KV z+4ej&n-WlvMZVB0@0amFL7wS`$m)Ps)9J>^CKq}VD1FuhS85oh4zW@3&DsY#7@6wx z{W!7e0C2~jiy<6mxrEOq-xGO3O29*6;!d7!?Zr})qTJ)Un(Xw-!&l(!Qgzp;q`$#U zhfb&SUc;6eqm9JDv8Vc!J8SPm)y(QBKTmB{DXbfe=m~hd=DNUTkx)e4fc<}EGjK}hp$zwW&PZ^d!x_24>y+aQ}n#V=Itg~ysUQW zXBk>JhUXxU@k8R^$>5Lj{|WLw2_Ze_?!Ek%vO^b^NJaSoP2*K?Rau&$(e{wpCJbpt z&S0@Z_7s9U+nbIeX+6kw`KPOhk4NCYFUNoFf;j<&)Y=Ng&ns``2vh34Qsj@J*l_H# z9B&*R9Fs8C5b|2M4);gYAT5HS82ZNV9*JK_avsk)*-JlfP=8EHG{cd9YlrFNNjq~< zoKcd)eU<;)+H>}n5eT)vbrx7o{Bi7VC8MZc=Qib+B3eqM2L$IeAUFz!VudgiLwwHJ zEY%HTS(@(DVz!>W!wb?wHSxi}E^B|O5i85RUZcgSwNq^C?QxTYtuqetxDd@A;&N{F zGnw5PN#G#fQR@+-mF%-bLRVh-AqYRdpdNwuDtp?ZO-{e6J9xcs0!Lw2B8G?q6U&q> z9bIf~u&z417HCLnbawOrS^$t&{yx?AMiCvuZ`Az?8&0`|(OJ+LJaI8o*Srp7U=2~G zJ>tsF6ORj%dr}9Y9`)bt6i%|rnmDm+l_c@U2;5y9(l0et1RZO}<;dtXlabXa>Sj|{ z3Aq=M!Z#J8tU-8_@EdEEH7$$BtFc7$At|T5Kn+!!*QKuMvq-30b<_}FglbSPvCd%Y zccbxsdEtu5f!@(Obhh{DXI&Qs02uGeJJfYHvA5dCeBP1!j?u)B=;YR%W!hwsQe&^8 z0t{2X9D9C}?Ohn;IM7La^`$$uOu_JS3z#bddy-Gf`#1jja1WGUIl<)K8HUexyckyu zW}g%A$H0&ucN(FhwNfinM@e2osA%Nhi;%WN!O}k01GLDLHs9ay%7le@D8sZcW20Ws z#>e?bd)e@jCQ_M`0ixh({$70&uee(~AYS5;gyM^by(NQs@wrR=)!tyCK{DK)-WFem zCni9P^w(&6L@cj$6r7Hjw;5xn>%Uw+-*a_2rNA{LYV15>;C+2!J4WXHf&_$~iy zN!ymR-{a&RRA*V_juH}U!D`VdB!*mt;D?|husF?-u={d8$p0mcUzPVEWBfpCSdO!> zV-8gNyP@oz`1$z3&uotbXE|aq+yIvW_o&m}CyH#GKl0U2cVN>qw*`vP2Dc1kXxQQa z)hH`zNC~LM?$sc$>)r{zifBY`>M6wByu)-$nD~Tp#{J(+A73)!fE)6qUo4PM{(atj z-4053TOjaYP`ql0FG9-_^}kuzX0~lsG%Hm&w6`L%JD4j{F71OR4E>e&#KOI{kW%5b zHnX<8aDL{4*QH}zbZcbXmsdvegI**nxThcb_|?6I^kc_n3?rJU!hV3eR*FN|uA22b9h~$4jWK zJDaKG7ZYkUFsLVD?)33Kx>H$~m7mO13@1R+xMY$OYeQWyf6FrMk_N+U zuV>%k?Ilmzui-sp3w(ZPR~?2DgP+duOlqx#xx@M(GS7c%@BfZO1y%s85wDZ08S+d) zYVUuD0k>@JeWBr}U`kzdov@uD_%S$+DLn_ofE1CuZ2rV!_wBalR(&wbkDj+pN84V= zKi|a?fk_vmZ`7QHOl|xW1c+ zQ0oPP7fv{k1S239vMU`{2+ zjbNv%z5)3}4l>zY*Gn|TS=&dR@=rNDw6ZKV^%jF9~6faD*tDMaFSj=EMV zEk~tv>O^?I7IApS>x-TZEC^zjL4!b@*lldr7#PQy2;*2OF2wZUDq*j~b>MkyZEgoD zD&T|hLLG?+&HmgqO~XF0Yep2vsr|TsGx4AE7$}v0pT`jYNGes;*xeKy?Gb{1%U+`3 z>5DZB_tFb?)c1ng6(Y}(r-nra8@AJLA>dwfrro>e_zg!kY;-r)PCwg3 zIc||zeS3T|G3pReCXvIrisYQ481Xpq5e6zKOlEO$&GmHo^6IZ`C7(ar-ALos;z3xKuQq)u zg(B6N*k|Uc46;2wxP=amQ75Hw&(8TI;E;i42qP@o`L%z0R~qq)@&=FL`M>hFfo~Fd z(;V15Gy%o}IM{({ta9}Vx4g8LoaWwsLuz**Kfhq<7`C&eTj4xx(nts&rhwx4EJZVM zx$Dp-ASC%y0V86pAnY|%8*)RqrRp%zyIRpam!2;1XI3c+GIxDupKk9sZ2$b)Bv9ej zC}-}{svjPAwIkq-M?BHD)`-u&gg2jQUQb<3aO(6hk!h(4bRNr}a(_e4gG%b7H5bcq zQGP5uA89s_keyq$=>IOTPo5@=hNY-g_EodvvNDm@uttj@PO@Z1iE+f{z9V7Q)O%-{#g+Y~c7__4u#eNHY*KqHR&O zw!FyT-Pp-VK}O~Jwf|nB?k;B4QO}U5;i6l(3)V5iAuJ-G#U_Qbi=>I;Pc8QH!jtR| zo+6z2_|X@weI`1J1U>@~5& z>=pN$*ouGKmbIvxy9C!{l?QQ5&%8|eN%7YH`}6dromE2GY@Z7n;acXDV=^QyWtN=6 zXGUa{d0s_^r4KX8WoGrK4ds4GRg-eMDFbio{OYb6mlQjIHEru{@S$t#C6f|pzohuh z_xrp#ISCM5sHIup`x8HQIwa;83SX%BJ_rEz#nn zdqiI5Agn6IMTAu%SKc@Fg;Ut)R=S;7FHT!fP}XuLU6y>~8@px5|Z2DUpBoe&eP}I=#^G_UoOyL09#e&DbyaZoHeLf zX>!exTULDZCTF!KnfP_R@507AN}mq7CKMe;+RyW~gPk$i&#ja28A&QS9G)<-LRhu` z8)5#7>=Bn60FNz#FRE-mFtf68N#(~9VZ)mTFFa%Z)B8NQ&F5n7Vu`}?swrpz1u1j@=ftaeTCcQu>Nda05rkd8|&3MPp>#l4Zk0{F(d)cchUn`%gWrN0#yFQ_Na~ z)Y-g4^SYlT+qTqNS{k>=2RZb_Quour=ph|!9hpKC^rK|%)z0s}{RvZOH0-2+lpw!< zDL4>Az=2d!X;>Qs2Ik1Eg?7Zc@`7VR1;h}5_=S^hf9kl8elaCxc2D$nTdJqA#Y+HB z+rxZVv7j%<_9$1Se{)H@zyuz^AxxI9;UFbSQ(5rt&dk$}`Y99J_6L}+;`ilqrRolQ7N191CU?9f<`d~C-CPtUlMk9?=xpq6$L-hDVw1Lg-y=6QeJ!Zo#cc#%Y!F^`m_>dxrvSq3bKBBkD6Kx zaw^vlg6-pSf9>6)iy9|#Q}beUHob&gMc>41ynge`HhmGFhHm7M=1$GT&s@FZ3T@g) zt|oPsZ&lo#Saq!QS*89oQqI!7BQtU&ktx;xq*d*8^ZTM>H*XVlmAp?W>{1B)6NBD z3oK-atG44sJf(1t@?Zc1{O6N_Yc7%^hD3gGyPzz26%@6!xsh8~6K#+D z6Z`=f3u#k~?Z3%%wgT{@{)+$H3~Tlh$j#s^Vw|$MyR!u|wquO%=gd+5=$ntD9i455 z3g&+CM?_nEx{B)5EnEHe<#W`fGgBA1ZD@*4vscxwffw*-9&yZSrbmzqL7b zC|9yNFtj1@219IUC%PR8JxMlK9vCkz(r3^nc_ zcY`2xU_W4ZdX51RQxlHQ_{LEaklp3pA1suGn ziy_gQad+Fm-|UX~o6)jQQ8;;ALT5g{(>HiNn{Cq|?3{}ue(n2{HK9^y?reRoI(@z+ z2@iQmd>hZ1oVq#!=bsC^ie5EM{EK~h9YfkTIcN_Tf3`T&PMAP9)k4N{US zDIlE^(jZ;Z5(3g8(xUS1=P>U(zQ2$8!?|#-(LdJgXYaM{bw>v3k37gmY(SeHDgx6( znJ;zfKSCK#OZ-xTz8Sx9q#FAwHKm_ezXVi4yx0GeE$7~W2|#=3OjLKW>3YriiWfgVBhC`N zfRD)cVqSAWC3XeYLeuYQa}E(fEclN2%*76QSE)G773B<&X7wr~f$vjh9u&x#AD|2x zFPquGU11=~P#kSQ#Q_4%uS&$_R}*xH5nOUe<()nl^dZoO zbClw04q6vQiByGa$z*Q6j75{R4g(`Yzc+>3UDK8CvH6UVv4B|rG4uM(WivxWU@@yw zKR_wKz!}eZJz$(c`mHK13!yeu$$t8bZSW>$!hv@9v?;vTLF!r^#iD9uabT8pHFxo3|M$yH~NkH$uC>GoYKDfm(I zxvoDnJ=Z#$x8ShC!nW*D=ak1WX_r5Hi89lpu9UvPs;2Ix_WPHedW|C~PqJdPoP>(Y zM1?y-*Pm-%#seM=$$We8LXXqaf6~kUVC~-YFFC()z<{iI&owT#wy{KkX5A9@7)ZUu zRTR)nivN4R^MgMtBzVsKu4oRV&%nVKxO=-lwlfn07HHt+0zv^7cNjR22s;y?u(ySX z0a?Hy5wO6;p>g#3GvXgRge8`3)F@jJ;Wr*VO&B8Sj9u6s_x)0o1)hnHYQtKdN5cvK zb(#$`s%SIGFX@>srTjT>)xER&8L){62Z}l}hLvUnG6uG85ADKv6550Tm5#|uxLwpcWMqb_1Z0|kM>7LWh8?Aus8Q`517 zkw-X9Jq+4@ov|YFB)oD0zvovTGQ5p>?4h>0eal<==M*#32hR(}8YvK(29Ye!o!l*L zfSav3WRKxwYHDN(@gktP^sha3EIr7@+_e(`gF`M;f7`Cj?=Cyv%48imN+(mq1~}CYBB9}VLjL)sn|cGjO)kJDxw`h#Fp7l zw(?1@AyhoYz=4I_fIeL++bTPADEGZuVuSR=qI*V&{7B8cJ=4#sl1vWVR~+5OfnS5A zeoM7D4O}@67Zrui)1G@Tx8jz|Onm#RQf}1j&D3bI%qK?nhbAjbimhZ9-&nDYE!Iw~ zFXhy_uG9{uGGFd)v|mBo-6mr-53Taa_oW0t(DuVd;ueF~4Y)_NU#p&BIR3_(bxGg= zm4ntE=PIayyPl@Yyp68mri=YVL?tWF>$E}#Ha@rvJan$o+zK-+55dD~*-qLZl)|b-A zk$~{Y1J{*m67qesS~z=*aJb}npOpZ0TjSG--M6ZHAE$pSUe|B=R!gR%5S#0V?nmqwne$0(YU~NWW7N48R`wCKnq63I*-C z&sFar5jH?QZ3TRLEJ2|6|APg|t1&uyMr;kv@mER=X&+=>0xtNtbLD^?vE?(e0=WP` zg{A=>Y*Izh%l&{J;qd(TfqpXTIep!DidUlDTPC!}?LZzeT?wnQFTs}bmIHtK=gl`n zNLIn%JTD-t^Y27k+=36Tn=xdf=v$aoVC8K7Oj0{jbDvAgo{6w&j zG_*}Ms22LQamHP^yZ24ki$VyA_*nMF%oErc6ZX7f^VN8ob8 z*yiuyfEycERc~8}H@Qmlmc5kwIB!kAU8zqtLDtqSX_Grv1!6C`VlE+Mjw_7hLtfJpz*7ep7Em~k>op5uBnqU3fcRB< zLQ+mLK>$R^t0`@h6;yFteW0f}h}nk;GNK&{L;w&0-b#PqWw}nUu}7fI9SHG_{`0Cv z0tW}kI@%JB0&zm_4z?&XsNanQAgg$^<^j2m+!|~&y0#=bi=>w}XqnO&1XX8(Zn zg`*Y97tG`Ep%s;p|p zPQ31SGo6VjonQKxHuGi0OXN%ia)e}Cl!=y@B$0m*zel7$vhHQ%aJ_CtC z`%QOYj1$YZfi&G|J`Gyy;V|)7rjJJ*hBdF6(-Vgl;kdw>ONKxZn7ny=+G z$zGHjz!B?;^H9_{@(b$)jDqjk8$2~PfcXyEw`1?D~QZJleFECwnB zw^fh_&KHmt12hE|_Mi@O|2c-q-FlhT;ZV#+l`e8M`F2L zs)>FyN1TMQ1&rTV?MgrY%x;VHv54~t^phk9k7i;pNI8#I;7N^}Dpm_o&H0!KZ~D@R z9r%VvR=?rvQ+jX0m_W%fqHfFd-sEoe9K{UM&(qFhWJ7&wOw`ru{x-y9Q@E3vg6{@` zecPzVxC2dEGUGSrrg10 zRiR++cHwK_p6xj`n6I$N=mG8Vvw-d| z)y^j|7w%cEy`p@_gW+pLXc&WQc{OeF^mN+SPpja$zBfjsOpqZc3M3^w_fC2OcXfAm z0PJ3n9c~0f7eL7RkFfV&Uc-0*)^~xLNE|MZ_NI66hyw7$`v*PmZqT2_99p}s%c4% zX1$-3t@xkha0Pcaec-g#Pd2mxBAM1zG76IG*J_$7fAez(3Qc(-Wl&~WTeXU6G0Y$R zP8z;&1YYM@y1>-4ulXy4a;#eDlW}?3)H`|aMYk|+HJ7mKaUzO7P0zu;F%62er2dJH zwCo3wmR=VH!^1;BX&|ECxdHncWYQ9-cNFdmh!W13*PH8|cF%dI2Lv)(f?F+0EvzRIsdR z_h6y7V~*J|*T>=)sRk~s<;vJxV4jJ({njr#cK(Jnt6ssRtSIAKFRTHK2@*W!wmQK~ z_nG|=y%eF%>zI`)UN#=eV#6*tCA_IBPu|@icYOC+Ud!!vX{bo0FpRX$7{RG~KcCNVjCbNl@lSRIMlUQ?&pj0J?k7633Z7;ELi2NZ@zi`7FxiR> zBYZ%Os6dU*#X|vHY;Pw8b&^&Hu=)WOhUQ}68UmbUs7Srw_PNs0mcJ;gCAg_Os_!;O z_$68^g+X;nJy_5$yZ!=E%k`IdQLbiEdVhXD{N;X#Ql!r8> zO-8gd+tUVQ-}G>?cKWkqd~%xJCf|yVqOhJUs8pDCLb^@Pzpbns*3(0B(bIId!Ek$9 zT15pgn4KSxHXseh_zii|#|57)a$?`#CYf;D6TJxhy&i?)LZ(f77s<5e@>(|wfj>%> z(7RneQ<|v9ph!>~L``*0r!6n9C=U~tl~F_oN7O@CvOk*ax384KpH}OWEH=!a00kITKJ<;BsB=UCqpD@k z4`Ni6MqBJ=0piDN$W><-AS&j-P(bfo)EK z`&|hs`7!MEXj|T^x8L}g;>{Bf*2KHtWA?wWbL9;t=kTWS=U8+8vP zJ1dL6`MlG0wDw!0n4-o}eZv0`Iuc%K2aq65}jOVfa=Z5+*hOt8*#clr8@;Pv)aXTM7HB zpDLLXyH;&mKqY@4M~CTW(Ba3@62Fma@!*W@YyJ+Y+eElWHJMpHb5tOtt2cEjlS^}O zQjorGyaz3~Ecf9~SVSkMMn_W3WiyR-vvE&CUx32=r-BZUJ%Ix)D2S<@E331e1FHkV z!5la@p?6tL;QhZ6P@&a{@=1L({Bn8Ri}UJja(We$SJ`<*#g-FvY$Lq69s6@Fu;a4d z$GfZRcRXEU^!%@JIZNXhI|*r!=i;Qd6whRM6;UKo6_mY{V_pu9|Xnd9nI z&wX`m6E)f67vZ^sOa`=-9lo9*lmy~8W_^J|NxJX*bDs!FmU}d6@^n0o8AVR<1bXoE zh$(zn4C+u^6npUzGCBu_}u;O|t#A zfJH(H*`P#|Hk#eqqNFy0GWMn)17F5a+^}_UZ2nXH1P~(tvbmT;OM81;8xYdyawdV2 zg3_(+K%Wh2_4khEHu&4lH~R%;1z3w4_zPr^Cg>L3TpoPDcIZYuE=tMH?>0&MFz7<* z-dyM+NOGC*+Eo`P#=h8}$Q|=RG;(pwxGp* zyPNjC3u=aC!05tILPa3uLmQ1#8LH?{B@UiI(M5$+aU!ap7OBi=ObO znHo&Uu_z(;6T%4kI1^%&b|}o|CVw?>B2^)=*^e~Hf_IQsTP?tf**3!I=K^H)tLwW* zxUe(xknuN6gucptydy#uoJp{aKEFu^BoZ4Fd*HBb2Qzgs0l?H5_w8RUVDtE+;vIpS zA{SdVC_{PKciRa7`)-IBQ=HZeFX#pEN^aOsyH0Z&uB zyy>LLCsg=`@UjN8sco;xkegd%0dmT9WX$Nx$i zg$qIqKu&-E223-2P^4ATxPoV{4Y;;`;$~P{w#|DE`N0aqn)S))mALfq+$VRN%OJ4OO0QdH7o1pVmOh ziZ0jmo3c#7BQx>|#fRStbXelqi5^)LR?XwCph9=4NZP%fEcF;34hTvLPOJdl$4~wX zIJg%+dVOD^fOsF2k))>&zBc@7v@>%09+3?y#03t{xy(1{inp|aLGBH#W@w*dBRgPi zh+gU$**gI1S%k+w_#ALlgN7UNk^G~oM)I(@Ip;+hR%2T^W4fm@Ba&da8KoNmH-Eq8 zZ%TxP>&(Z8USG6>`uO`n;-#J{Mtk&hp!Jpe#x673ucC@m(rc|}Pv(ZnFHDHoE@~bI zEWWVBuB}-N%B?Y;D0DVzp)hn(;r-Q2G2$v)9{28>PKYJ&Oc4O6i_(PPHu&fw^-5{S zieo5i$_6}0n3UA`tOmOtSL*cTWJ|u^=XX)dKIEcW5cl1P+`pQIKod1LQm0waN=ez``g7r5CCkw-Gk_Rlak{@@`^Q(ga~(}0HM zbD=vm$lt*faA#mLk5$o&E(NezxB~f%W_?B^SIWAw=rmp!&ye38mQJmd{qh)u2F_um z+>FFPxcV&mzh3O$u0PC=q+?+Y;P(xPw(2G45GSuP5MX)g>BQCcwdvZ-w8)GYX5cHj z=XUtT0hD4D@A4@IcZDwJP8oK8+_*P#@I&_!D;8Jc6=b6QZ(apU;bfl3%MH)7HA^cr zkg-MY4_O}2x(uO%&@G$(v zYXm%n`a0kNX8>mOu>X_C*b#AU)D3Odw$`0mos8nBzcaC4Bf`{0^33++qo)NS7st{?&B(uK(1#<&mi-obsK13r4lOGT<5Bpz(Q5#zs;&i`HN~PHDi@e;xiBeCSYt z&K=9)fP#YnS-?P$=meApfJ5hCZ-jI=J!3ljyPm!^<%sb*<0-czn5-J3h2Kg9-QQyo zHVL1UW$FGA2`pcqL8!$YU=AEp43gAJvB@qf*=W^1!Aklq^(lh*1gjpFK~Eh)N-9C$ zM~iz?QzM=6z+da#BF=dTev$i}t?a3@`(p^{e5=pw%zQpM%WKfAQn)db14q0 zkF~TACgx6-FbP#7WnkB$X6|SN26dDz`u>CFgMaC7+@^xhwrh+dbu-yAWE`jf$>EHU z8PPg{roV-Pyv~`!-&+ekDOK%#@+Hr$@bDY8$w%2Xr?F}`zZwf}5(sTfN-ek7dm|2A zb*1jEFEtXomNpTqnK!3JyTnH8hY*lb3<~YYcglX#xVJ0%V=WEn@fNW}b;W)O7cM+$ z<6?t5s056lM$LBeURnz98pM;PcjU#VjHTba1AA%^>mS78p;ze5VTTpN5QYgcmX5bR z7kbyN`hTR^kjZJHTD?(EU0&rUnZtI_()Pj!M*Yhak3DHFyKA=o*{knUcn~&NRv*Ox zY6fZ3&m~oY?KKi^Y6H5C$LiptQ+EQtIm0j>L)$N4J7T7X!}#{m%jqxAz07YvQoqf_ zp{9}XK@>J5$NR2;yLh=Uew>x+Lq!*c5V|3tV(Mp&S^z5Wn({a>0#V&qXkcx<6n@ zzqhWti!Y&7!SA3kRjQFkEK&6Nw)^Z&?ecpndnHup7$vz^gwWHvq4f<#w3|v8VCO`d zECrrc9KngXOH#=v>=*0^0Vx0fQYx$L$9%)EKyjeA;9NC=860U1w?o}O8U5!~Vrk0kg(-bp9sm9&lm)>?M4 z$XyDy-D_zj>!RA_Os$xrl72BB%z`B82wQG-KzrRv@n!S&3v#h0~5zI7^4(7?_Spp zkEpcLh)G$nTZR%Hp#%UO`57HPd^(RM0WysBYNNICM8)EolDE;U9(>Jz9QZ{V>V-BX z?r4%yUe35act_2z;BJW3GXjbWn{Vhxar^um8kZ26LCv;<0-|dbhQ>`>o*Ef+c1{@=qsQA=X0Mo zKy7of0Z<`$CxIdX2shl!;WoAiKz;^2tQ~+0#Rv1P^rk)+A>IQ?#{e(#vdSH?^y?&# z3kUM|LgEH*ek#==Z0bn-3f86?t&5D$h!uMv32e-=`(}~|H7!@RFVS<=@I*Dg%U@}a z6x9L$!18N(VXKe4|CnmBZe>Ne{Sesa{hpMr*P0Xp^dc7J4;?mawF~6U*-F)m0zY_Z zha)BRgst<7+u&mrPkG2EQLu|TS<0?=3+|CTV@WpZ)MuE6Rm^#-gwytsQ#0@m|)C$BMC5x@m!fcTFJ%0)jWTI>!f|%*& zJM-Es1JGEnxZV(O=1w{BxODmZp=nT4Mkr21yJh4~6f=u790;hvP>oP2xz-sQ6u{RT zQ6)%gRQ-9=kT1{sX6noNXy;z{ox7r4xnC3~PVx;M<(!!~u1NVW3{;p9s|v|{faOVy z$k8uandK-uY^dBI^|BrN$XOB8b``=cj#YpEs9RdGf5lE%CYboZKX`^cX+m7eN`WdJ z`wQizqn)NyBMh5McI;c*znz<(?L0U5h}p6hUG~qncP{GeU}XYFfxJbSBS_{2?gSux z5$S+(`Dc_yE6f33p}1fZ^JTzmaEzXZV|JF{UCGS@x-qy%Gi07+8*hImr(0bYt)%qQ z4`#sRxIr*S7hoOeC@6|Q`kA@VAI!Sdh?v0*}XaihX%`{b&mgjz|4NZPb%<;q*L3a(Axg4%20-PH0S zjo{M@^v0N>{Sx;?XdkTJ<8hsS z5)}S=)o?GiL>X)kc$tgh+~FXj56C0$z2T8WUHllCrQ;eu`{NLRtV%{&ZJ?3VlnlQE z5$2F=Q0RA?yIzY+pm*$EYGA7M$0i!cDE^zs0sQE!t>I=sAqBSs`QMI^iuyFtPEVbT- zxG%*>axrhb;Q=^3HU*RN+!!0TUnHPtZGCVIWb(i!P0yX2_g750sc})%3#Bet?QoW8C_Uo0_U~Y{n z%IJWgXzC|qSYSa+yUD!WcT!3@xe#yr#&^qoOqw~#`syzGr@Z6`!jVjqy7;+)H&0|_ zbGamFroJY;?ll?lJH|9y^SJMQJ@ugaOI9<#uG^qFK3BczXH6+7E04G0^XPuw)t3lPs?dZACt*h!oJhBbY-004NVcOs_c6c0U$tz zK?F$O1;>+5;jf#>&3t(sM!EK7fq{l(Pilh;I5xOGWBxlHhU z(f&+Ap**}&eihIlJv+TMBJ^HSyD+^POtq!<>VHkLVFhTAwp%S25Dl`WX_v^SiBpK| zSHllr3epSZo_(+m83qNy1O+;m2WjqX3#6I|6H{Az6rgPbY#AEWfGiA*YKl_>Dt#PJ z7kTX{n7ML^-L8TUY}{E-6?13S6?%eX?lYv6Kp>ei)j$CDfGdWX-l^zj&P-ouujSTFd=`(#43 zy6g=%+n;7sL`r%UbS`M(>qnm*Be@(Ph>)alMO#3d|J90D?v z5)mpKp^5lxkIH%8k=G=+X3($1*&63RVBW&yegs!-ugm0|f-OAhyx!O8Um9?W^ODuK znj5#9-U(mISKgnh&_}Y|zK2=KRjGUo9H8R3Gmgtp+xGIm#kWcJk%O&zS?u zZefc|Kf^Ba&=OKw_Urv=*`W|=#O?Ye`}Lp??0d9Q%Xazf48vjI2(g1I2|hRnTyQlt zcLED$XhC5McQ?4z?0|6xM85?auL@m+W$;U=4}e9$xH~};*LWMu1!{E#;!%8ativ`M2f1OeL&qr zuj#*h)Ah{Y(WBh{&R!`to+~fcZ%>B!bP33U={*{Gd|tq4em%STz12Z#?-2i?;?X8C zSntd|PqG4agF4D{84PN0aD^)%oIK#>2w)R!=c4=%Y3P5BmW5nW$#lDnC+=M=u%l1C zTR3zV7$gE7-WOjzw0(AW!;{m~L$G~*!F<#gIroM%F+*&lZci@8KIfvyaGEQTi09R( zp7Scyf%Kv{QZ)5G!wkMuc#Pvr8JgB*LkALCh2#8c%Qg3G^k(jVio0^ zj9wa7%Ees^peWESLufl1IM%Xt<#AUsZR%1?Y1_PKlTT)He+_%OYKD({Qx^Bqm(`Tt zNp)=S>e9Exo4=!X`73@%K7UGR_-?@ypK54BWq~NL>ZfVP3$4@BheuNL1v8hH?P**> z?t|(;%dc~9Zm=N)VH}_;ASwK>pV6P*UkneNlsT~e3g5wqzjXVlG_?M*9})x%k6`nf zWLWRXvp>L`VY-#L<*Eg|?Nc4YWp*rTJ}uX7>^oWP*hv$$zX6Y*fVsH1hS^UlJIp3G z-`AFC%|)}lYcfx5LK^jcKOcy$uh2`*sLpIwAv{)Yjd8P0NU6u|6>#Ob721UjQhQ}Y zB6;_51XszOv}0DmYG<7e_Byf6pKKSCX=Bsym#h2KeEGU>2}Mg>SNR@#e0%Qr#wqyX z961P*!R>#BfnzexbM$NxINb8$I-cS5GR`5nWkw#OYXYovx-fuh&ZEHV@_{vzoM0>)o2*(|wdPhUZ{U%O2TcIE+~ z&*sL6$q=B*pLue9jJ&U-+5=AGpQ&&LZHv2m;WbsTVO1Y+DyVtmhESo(u3!fMJUKzyhhz9JA3Bc}{=mI+jXXO?mTsKb)1~~_{ zuipI+iT~(P0s5u}f0#8W5#&N}t}4*P31w;nvGE`u9!-P?^sfDFIZAk(+pkh0asBB_ z?Mz2ArI0skH>xjap~rq+Iz+LbYu{%J$y%b^P zbwHDTmGQOEZP_*HET&yfHr+g7SN{a1Z_dw)a611{&9pA zk4B+b!txs#9b4-hR0WOg@IjtLDQcQWiB0X-N5j)^-U5`t9HfuneF^hg^X8$u;g=)@ zkFUo8mz;oT1~B$#a<*PO8~X)6!|w(pubrOmOEuSb0jG+Omx2NB7riq*_j_^%Bz_x9 zm?GG;pY@7jN}P(EO3=GXkFzz*8qi;rjZLH;6 zS0O2@63W9DuB)e^m&p9{mA%(-7o}o6JIz@vVi!DkA(gP*?z^R4_P{WOq~Q1pUMj1Y zi%%KaBj*o>14sk<%iM~qF3j*hIsnLt^jx0?D_}%mXNKZrXXl4m@R&je>EGr1Len@_ zk3eZrJ1^PY4(+q$yZt~dw0s9i!su|W->VBNwt=>_Pn{SBTLCG(8 z>S&GFdi%oNr=5a=yt1@it}7L)llXyR%ad=F_bR7Py6TAg;)}09klNbRS97IqU)=1$ zz|aC`m%R|kZSbq@`YVe@)<+jX__Q5i8T}LO{|O-3IKaba@{b0`hzt~jiSeArUP)S9 zO$LUh*eg5PL*tn(z-6Jf5!^gzH-X-;g-pl#tqYpB`RgF~+kP2U(d$7(l4fHM@*{bE1K*5dyA zR{V91k&lkKN#Vo_6J0p@`*i7|J<4|!KyH~`Vz!OQia&KL|ZYz!&z>s-l;v%M`4O``wF>g-?+(h>iX zOadJ@E5v;H|0yJ)SMXfwr#0LbBzT>f-b2{#*>nfYEC2xnjT9_k3RXTx7DjpTWRs~K zE3sEYYvrmH;WL4=2m&%51=C$I&y!`+>dWAIJaw;PzVCTurp@yV7V?sz}I0nF}KDc?xDCYQAAt@fRKw#RLuA zPC%;V8{*OR41BIzVBPkxRk&ZHND!9?O?Cw!s1sg^PA&lqK57`Vyc-)?&x)H~JL-+w z71#16kX%yOglR_&LA$Sn0pGgnOax+00Qh^S4Z+|A#Jm6sLqmPeIRx07oek^>+}s)d zzqdQI&Iga`Nss6Nst8qgpL14%CUO~Aac_he>&)zyN-pSGu`G}@~3 zrtu~2La)vFYv0*>ai2)Nj@H;90T}?bA@z5TMWxxkNq!>W6NKAl6sT8NO(AQ&mcBUs z<2inUO4%GG#b?J!=(POY%_l-dHk}0kD1Ff}T%3zody6-J}w>Ks0K?>*+LgdZai82T43fx+c)kp0OE-1XD3Us2!3DjL~d zcTd0I1C@ZBF3-K90Lc@Gxj~RFOi_^&-3I&>i_pm*vAZnhGQ2hT8RB^UBLq$HX@UC3 ziy%Y_VB9mWMGAqOyC#mA_Zm_KGb|$ex4u<_Pl>xrs1YkA;rtVmTea0ILCnz{zGm0y ztLFrYoM;v6v{Ld_Ni?Qn6^>IbEC&Yz*xv<${rm6RIsRBBSg$CF%YDkcE=lM#N97u~ zs%MbE>Z(eF8vf3;CO$ajx-GY^s?7o?_mMZSpQvi?re4A&xZ>eNswe(%;QkrzuXhCD z{<5GXZjNNEO)Wo8wV+wf)VNA%Lf78}l!P9X`zJ1VsaWN&1L}6Y+_f0W3v3w2zxrGZ=S-4iJDs` z4?$|cM_z@X~ggW%_0>2B~G0L*mmwu^2(I&J_ij!9j43Ey4{NLKGi^m7A(L-0YvN zn?0Hh`)_LXi;lbe?X`w)LNiaS*{>I4ad2Ezf${>Cm9(UgSuDxlVwRph+&$hy0cr6l zjsFd*U49*Az5Z$aWwtA&O>KPj4`UK)#}b57UHy7zh@2xS0V{7FY)_Kpxj)6q<1>B6 z|14WD`=(fSs-p8ULCxoWjGMPPuG96a)d=@^JsXyy%wfAr0dMrtOs?To*R(MZsAAP8 zSP3F|^uaOPWu#Iwcs_C`KB0?&G8c1VKPbET3%JiLL6emI}e?Bwl&jlrf z7MJ{d=Uhe>2oJFRHg_@ud6ec(4(2XE6mRPeHtwJ~H#cL2e2mb`OFal%A<^oDAycu2 zr23j3%I;dxq4;PWZ?fjsU+9?E&%BWN8|&_~CoKKrTp;k2l=!95uW0GgJMAo(HzREl z1*r(WllzaBzk1)BNz3?Jn0SKE?6nrNqR#A4uK8ZDbEkFi z*5jbG(xlO08(4I2VuT^my2L>G^1GGiO~AM# z5r^gG*Y>{I?Nj!zRU8-}yf3g*qsff`ojZTNvw)kLo7mgi0BNHY2=|7G0ZSRc+JO2| z4hWf(I78+%(;w*8x~&;1$J<`6pc6}1gBB_=dr=w#B%fXpQi?QooIY%z{-^}j;eNjC zCT51Em}D0s^9ySE#gtO3I6kVawL9#6r(8a6D|^?Z*rD*(B z0k7;>km{COWDrgXdt&gIzAL!b*?vBfqc#CLZ|S>D5xel`4|M$=ty*hOtQslKmdnMfAg>g)v2t*(1P`smx-AKXUe zrR=6tR8}~)h*9x6jSrQr zaU?^@_1a7YFvURQ9Zm$=uxSH$%guUj1IxYEC9-x0;{f+``dc8l)2O!e_2Jdm)HHJw zfzN+@*W6;zEH;&X{^}ZbnKYa8%TNotBgtUWK{AcxynA{Yh=Oeaz7$J_YePwZEv1+c z=F&)iEYMM`s8K0t_cRwU<8`L;Xktf@!_xk;uCCorKTLz+F21|r3k>#D3IuC$GR|mBljMZW)Y2P_IyB?6ToB55_rD0wFsTlq5kVDdjNo`?UX$boI!s@nZ_oi~#6k zBLS5;7dCMNN;Jp?708lNrZ6QtG00sN-2+&I7F2Roqr6~YP|2iw<^W^b%&5JR1&?w{ zI>Ya(lYd%9$9s9bPn`Y}o-IO4PhZ1~_$smNfA{Dv1&xkh&{kqYnKz`&y(kOs>)}2A z+;Z}Z7asQB*pf!L#MKB29@BJUb`{+)b6%|bhDvjSmET-LbqjGtN2&HynI>hk_E$?* z4OFg@SdUOCUrAZ7A*Vousp1N7ixt_-sK>ozlE_}Eu{(9@pi~TCCw(+Jvjw_ODg0sI zD*-ddWy}klL)gB+;u$2n(`z@5_(H)6BOuxRf0Z8>YoA?on-BQ~Y6D>d=kAxlHozTe z4?{XT0eT%5s|m^yoeJcP{;rhK!9d~-={Cri_yvXSPr`4^C2n4Y;O>Ls$f@fsqw2i(MJ+$@}q zsKF9t1%{!XyU)vq7sr>!DR5iZVD-EXR1=B_4Q$Dro$=5D#I!)||+N5~Cyq3*r zAVZ9j3bc#*ocOp4KhX94Ps4+HYBe!k#+$RF(=e)}M%z~h8GbbnOJ})rSoPD0RR&^e zlkWWFdZx@CgBQ`6)Qo$%`~lO~KwQ{w4BNb%t3|bcIKDP~eJu15lMogOOySVFp>qcq zz=Q>y3CJE6rUqJTpg%+VS^n)0H-2?@0U0r{mEUy57>~U)5FJ@gUT*<2NzB68}x3yj_z))DK`Tr#|Q#q)=7uzZSv3rkPQC-F%6U{*5~YBOi$ zk$mg$A;~rUsv7l?GO+f>l8~G+t)V(_|M(4}?(az{Bl9H*bhYwY$>S;u3TrD@WuKOt z>vd-5ye(P>CL^xvaujOXK^z2+;A~?XT8l`)m$-r>_<`%&AQ9kdG61e-=9j>04$jcp z8+RW-1hvPf+faH95^yzpWbxj~6`?kH9>TbP+GH`0X_ux?xs=+EkZXN^rv zTd^LXr()o(jQ$>JEDhyuunmTukl4|y4?x&rsjw}a zRQs-UkL>3}D)L5Ew8%eTlJ({5b6Zfr0j#sd3@@$yv|%&SPb=#=SRs|P1z!G-2me36 zGlZ#&l@k(XiA1u3GXw}X;Ee~wn}2q1|MDmrO?@x!0E7T%f$TnT*3E>5GvK9U^M7<8 z2}KloAXw5ODI5e#rU)u6jA~ineW4&Xon!OkG_D)hRuy;NDKo{c{ka&7&;S>a#YH^T z*y4T}$_kR2cn#LaBj53X3Is4S<=hYJ6ov~e^@pp?C`QG`Mu)hC^P0*_W7aUJu7qO_ z(hyluD9e_&n}vvs3Ii30M6us7hS^V7nD0IKk3<$e*21cM7>M${B8D&=R zw!7Io*{~{r?+uFm`)h&H$?uxNJ5&0W`1Vcn@a*7JZ3QUI6u*)7 zi*Fc;Evun58Y}m<7;#;KW;hErofMEtE0247ILGwSjx4^IV6`uEdX-D7G0t^L zxII3NM`Pz>JfODviL*k`6xXrY2Gmw#G~i+LmWvz(6=Mb!JJ*z0PE=7IpaWoa0$BSJ zK$LH1X=`pK4dxbLz63Rpf147U*58>Lp>iXe?%8C6kd*wTlJ(;jY4)F(M3J;A+3Y1RQbqAw-`^X@yi^uS zENdrTAN@l^-E~S^BTiK=<6Hez&7hvU7j#uWqPp4Nd-w_Lt5w1W_zfxwo%S^|Tx*2+ zhi_@|Dc>JX>f0>0cFXP~@#?WLv`cQg1_de~crtr4*0VZRCso*8Vq_U!T%o#iKz8wY zhf_#e3uX1@^LZDu zo868jQbfC%sJ=K~Dt5@;`=z!PiZ{quiN-VxEOBTPYd&rVyjv& zLAq_yT2RPd`Kn*$ZG~GXimK^5n9Rm!KjMC7G-oe!cH^CfqlJu8AFMv_NivE z8t5yDu_<^GdmI+)#C4p$!1IgMST)ma^hm0L@Hx1GxeJ(2Cs9e2C+`v#mnmBU0wU9| zi7}dMb$_&bVlVii$}OF=vf_8p7bihG=DkYbA+IMw7~n$vCh>t$i2d^f9z`wU2ZSOi z@^Kb&MGwgBO>S?>^GM-(;3>C|;x-_Bxv;cqPmLgt1Fz?RNZCd9A`g@VT7;jgb+mVwDuXWalghO7CAh+RbLa1f)t`i0L1tp>?Ae1pAKNXeIHoi*VJmIa6P554p! z$bE?zEr3Xt{_@;Kg{N8v>le5_Zs9rLMcmk+1XczbO*nTGFgvq^9Ne!iNtE*{X(ULF zNbXwpu`Vad_?73>p{>ywzYLWxamh#K#aUS8si5lfClyyFxtWx$#SQoKqW9m_@~XK^ zY@L>B6Nb|DT==xHujx5ObIAXYyK!9U>qb|3#9qLl0vnF#b{CIFBdd=OP&sa1nYkt; zeR>M199b^|gjcZl#WPR{98idJGahUaD0dVb4wI5lhZfy`fl0kbk$3mWTd`}Td^hjL z4?O^;AiyNHc(D#`eF0|w!cWb^olA@l+HR!1=9R}}ZcWs%y)1B*D;)|yRCE~Jcv5x!SC&dZ!eGT{ zfn6INB4H9>{ z_l;Jyng_4NU-;Am$ZfK9g@~GKO`jv-&ZvlsK$Mpu_k5uC0RvGrmKDS+g|ZKmw@#k{ zo9p^MfPA+z1_9I?O}0I^dxRqa_5oJ?Fb5}lXFw@AOZxk_7Xq%)2URoL7j?aYs$^ zzRn0ELa{3)uC%7C^j;QcIByZ~hv$kTJC;6neK5M8=5__Ka~!>rgHy4ZWU-Q?sRNWIjJV##n%kOWX<)O8DQbAgI)jNmZMXJzIZmczQqVnRkaV6QVgrnj5F zQ@M2X=EcO3LnTrsVunM{-uZTlZSoJSYIJI?+7EV}7lA)cmC!{ouCiBN4haD#1%iIg zo#4^hduw}ODFbmtU^1%8aMLr7zJHfU{8`s7uh|WY_tv?14tisI0O-jeTVyXEHiEdYiz8PFjBPsqMtfCu*DN z`K^n4C{bIFL4HK*PBL(Bg zscg0B;xSnh$Ow2ZE}Fgvy? zO{UA8N!?r3io>ilh)y`0oBGstm+bN=y24vV0HQpl1h4VxNr`v9Z| zUEi8N7*!@AY@jB9&3`^k&BDY6m@$F8Dm&na2E9su?;CR}#k>d);;yH(-U+6AI)o4W zm~0|K!NP}7JA!-r^mK+sgbl(6CA0yFf8xTG=N$zQJ~;canX-n2bemBnSnbnu|G2m( zjvJT7q^rd$=se?x?jf^si87@VrNl$EU-d*(>3g3bYs?qh3f#l!C7*m#uKH>G;PdAL zCCMuBr^95q(RYVF?q()^WF~1SsQOr1Kv`y~g~JaNm1}x))mf{G%~|M$&-m^H{llLm z<7jBG3I}anILZFx|5z7(KJ*m?wSag-=MwmVssRdFaIr#2L%dHbz?B7|H&9V}f!h$E zG-|jMnEqCeM=A_pdcdr7DzW(EX$P5K+`jJ;rKp30d`>xhk?8KxCqQ!WGSKdFjJRLM za@(%WNkr<(iE^#WA|2(|EB6DA*0_x4xc5HYa}A-tQf*ow znAz{(_fKuo_-6(*V_;)gfa;7bdi8gw5}1+BcaG639AQr2zNTy5SdXhKHx zOhj!O@7L)9O()F_$k^k$d6+h+&VR4Npxz%chf_eQA;i#%kM6T;rRa&=%bkmSt$L&H zjU%&n=;DXYBp8cb3i^@yu)ny5XPIe4WWCy1_R`b`b`T3h40B@?)WPASv z)_yJWR~^jM23jw3w0i8E1c-6QvM4deE?oe=Zw8bv=&JuWTVdlDo~TU#+4(&T2BbRu z7Y1{-M>&}SYhNczb2b2iK`*s6WF-qTF#~R?u>ULc!v%lz+y)T|w?sMfal@qK&txNi z`GTN`85Qd|{y)aPJDlqNkDC-FWM_p!=CQX>_9i1c4i1jvI2?O6?Cgx}GAklmSs^2d zl5miboskMjD$nP8bXWK9aX;7dhb!)`xbC;lcz<5+*QiX1J7rD1r>S{rRI1Ei$kcYu zADr3~S3hZNH!jM7xrP&ce*Mo@5V3?ZBp6Nxx=9g%iaQfGX+J)?;%9e64&WU3OKfRw z_G|Q^fTgr4P!^f+(_x-pplHIZdoG+4i%zUd!SqSK90_Wbc$_#@kwZKs zyJBE4j9T4#+j}ZYFAGTti7Ur$4Pnj1;q>Q=0k39!5AbUGh+_*j)UAtcm%=W}os!QR zLPXKKZyB_KhrDsPzc0AbPeVEgM>QMSyd3^i~J(urf~r4y#utgYX?;$QOR#Lyk80R0zcG z8e~2Cc#=gp)`ML7V_%Mv)%Qr<2X^YWxu655Q5vVPt4c$ zq<$uSzwFU3ICbwF6p%w83qJnP`TujBK@px9H!tAXGwV@Yz*42tCEuLV?rx!8{R?8&6nAC?RP29-^X~4!Ml&>x= ze`w0l+iq&LP%4Qu8`gT4Ls`4P*dG%`|N&&Tvr#4jzaJ_zstMU7|1cb=N;o$2SgDC z1Q|ceb}~hgX-_XUa@(gGhXDk8Bj4(D_mT_*!HQb93 zOkj4ncmf*{9gcJpm$titP3BT8Rk4P(q;2a;$D^kX9ZTBPI znUaGh%;ET_5%gfqV#ctrV3iGDoVDq&zfr9uYB%$SPQfrb)PHhw3{jYn>X~$NrD3}> zS|q#7faU!2w;y^Qb|W_ilaKGX&Bf?GotaB+I#uSB*H#?EcJ)#O8Bo#=uoCls6yMtm zzQWm5LcaWyyrw#O1C+!Pd>H5-Fp&0yqw$Xoq2HhUjvI8q_`11bfObSgQ1tg-!@w7U zK;R~57XWU|zvbSP|Ftch;aKqgb$}s2Os|l-E8GFTKZN^Tc-z?C+rihY?(Di;ZL3o` zv@amU-e9TO7Z5_Zv%w>`#?1xVEU!?jNiRq+5v+`&gGB_?-5Gqy?DYz`q+dwe(PrR+ zHwAPuqNy^d?-v?AHhz3_y-F-q(%Z%mAksrrm~)WviVA~5t7*Z%#^b+~)T z-7|tTSM1o;+r%vR(rgJ$PjCTZQuaABJ3q6%y#__<+*)Ff9pdcFXkeZX!AJ*ps&H2) z3=%E0{|B@b{{+d+Ce7dVivREw7hpE60^r)PM1HS{*-ORb4kJDi--W04zWVJnRZ=!@ zI2Ge`YR(>+KG!#HruXjfqes(hk#wsw$=t>Pt*@AG(--)_ZoT z1&fayEQHP5G{SnzUp^|90TTfDctSq4l7-lvArPf%;w0g6zmI1nFC+HU+6Sc(@0Yz3 zRnm;@Rsw6^f z8edl~+inZdI7Dh6+;reM-3J0E0I;hpIeKby&U*(dTaGk{K_ zlg+UIi~$&>`G5yVZxu8K*b4@jwrZ4{7YT5EI>!yUKBa9}9S>y3Gzvx}Z?WdG-u4yB zr&I@aJq4r#G~+K!%zDU|$)B5=GrN*6SE+H8uF2^2OC4%fQWmyC#OjM@#Id%*i$1h58e6uzYKo?zeS=Axs2YK7TJJuZM5 zF?>}4d4#gX18Jw@&sP&c(0@Kf0piiVIlCJ$XWt<{iU|jq6Zod}z{1#LfWs;VVUL6h zxx>AE@X?k3LJ6!~+9BwKnHeIQ`aqcVdSVuFX&DH--!ICOw}Jw|5g>yRpaiZ2UFSn; zy-&V;)%kd1z?%@>=B*vSk0vP@KTyXWNwJb72|0H(_-4C!92L6Ep`a#Y{;m6E&^Vw3 zL2Q-ouKeuHU;!B&7Bnh+bxPp*@Rzr|5=PfI<;j za3SZob3UF5j*gp+i;8bP!EIDLI@1{6mBnBnXVgD%?eZaEAd92$b|-#U{B7^bk0kep zqKUmyvaMl&^7a4Q{JXV^H&dpuGwh3xi%)$N>ypKuE{`>|dPQn5+V^T|%~B zHe}$#dNL2-p1@lL;GU-{@D@jqV9e}aZe*f0_GWNc3^_B7`qMEE6mHZ>UneHQT-oe& zPZ91|3k@KO!@|?7OZ85zciQ)5o1RV1ySY-v4}7+=qWvkh$dx_Ldo#p`t_H+#`_t}T z+zMPIc3R^x@GiLc;U1qV=F_lg_fW-|4B1xU;K!0v#6ny!U;49g)r$66p#`SN?p<>| zI=Be&YtgKSfn$EDl#YhG)b~AIMb7Ug47e?tvD8S2X$U!y{b_IiTBU%s`fZ~OMb&TFik(jAxnbTZ;+LVJDtU{AdeYQ^ z{@x8G?eT@U51z$D>j8SmJ_c0$YU=Alq}+L}+0V=Rd#h$+j`n;zqZ}X^op+0*O#kVH zel&N+>fJ*bX$>oLcbNSJcH7%gxF1@t4_yk+KTH`m^O18{ zbGlQc1ukslQPX#>v9X@pJ}gZ7F0XT=?uyP=Od$If5d_RX+`twrz zCpy2PpGB;1&AD8>+oDAg`7YG)*=l7{8I!g<5rLs3q2uBsN0z@c4zR18J=#M(gRb8E z0ZR~%bi09}m1-pJJ>yhT{h;KlZSBNpds?UDAP(11n91LqkP`HwzU7fn#I^7l0>cHY zZV1=p$*HO6#QGdd8FC{I?$2*4eU)H$lQGvENf<$c5hYtA;9kFfFfNlVeK0lpt%5!HWXvC?HC&G!42qs{Cvr2cf{KI_91QD zz!7yF(_^S4*Fv$}X!4g*R!zsti(cpJlxn%E{saM7=R~h14paIzdIM=)I~SSfxu>qB zI!4d(MM_K%j>e-BIRasZqEFFIDuN~~y&40^$2*-V7g+@0WLI@=3-w@96NVCeyAf^?dyKK*=9y#SW7Phm`+)9)17T+@ zJOJ=nrDcS$K(zpabW{7hD1fYn{z6u-(y&uv_&216FL7V71Lg#%*96IT{l!wr)uX;r&8U)7hLX?oQ=e4_tg+kYL?%uO ztXnyvE{|~C;yNi*`>JW6tR!G)Gv9x~$}=}ZMPvUFwHwqf(Rca@+}Ranu^Q)mxUPKN z<>}C?Q7dIBrqb+`&=fwS&6W=VqxP)v@$OQcz)$fzv5c$7Yb`$-U1TV~smu^g(WK?<7dE9e4};6r1d0pyz_*zn0X0KGIzg?tsa3o~=fCbw%@d9++s_s+r%;MLa&6mgo;1c`vq4fz0H62OOK{< zvEp)4pI;~cIrnqMU%Qy2t%glTJswnsELdT4^E7T4zGzg=T+F&WM9+PDy~0u0WF#|L z`i7ou0Xga5 z=8kpcb%{*rczY;E(a;zm0qXB!r4JPfzQ?LEujmwQ4$Bs7&~81IKi3 zkmJTQ2somHk}n^13=GyWx?D`Xxt-(oyo&){^Cm70H}te6t!aTyvZ`BU05SU6uunXj z49AWh$0qBMb0yG6M8}5@NB;>7!-5K^0eHLQjDcvX!Kj>AGBax7eP>*N3td z<5F*L#Vvso@;{x(G?4B_#)dFs;2vMNF7*66>9n(-44zKryMky&eA@c@cn?Q0P==@Nkj~%VdsNOMI1gwf?#^)jPR6 zFG)fsS#|xCHMYacNI29%Oz<%+*U6%#fYY`2wumZ?^szMCIcz9PmT!QMp=FM zX3OH}TVwK>BN$#&9T)Fjm6||f>&`xXJCs|2Aerk?>BpwUxG?onBDp<6uUoMY{R4Wt z|GAU@>RP<8PH>Efu#mdG5zNTnP1*Z@rf)qjq&1Pvh_;)mqa5AdEZRt*8_6$OrjtI* z4LBeGu)HF!zxM=m#xX|+Z0}uZd3*crVmJ^&faEYO z`k6bUjt2RK)b}}e)N0Qds|=|<=N?r!!`JbGbGWJSJ5aJeTF`unJlb?wb}&7Y0^rlS zxDVgD*qL0Va9a&XB+9)p$-4OKIq(d#rBLq$;w>fX^3I2yyVTvxG^hHe_3O0q7Ehd7 zvn_C1bEV5Ck`H7DeX*dgd)hn4?|b8bT5QVHvBliPAMp^>1WZ>B_BurYxWpSJ-faGnGO( zT%vU+##~g)wa-L1kMRdqtl?@>8I1?|y$$MmX>D$)+f3ei+9Nzq$hiq5%UVGXJVL1R zq;*2X5Tt2Bw0#cwPFM&EfU$Cd{dhxG`sv3Se8%Ugyy0fgMJx+fR0{DWci^ES)DeIKroKk=71)r_{n1 zDwqsIm1wR7+}ync!z@Fl6xD){cknUII!Q2$$OOgsd#}M5>)AP@cmqQ=wkslury?ohGB-wxs)N_?|VWr$v9E*aC~JNuQtDB|^V+L}kH0dj5| z-=D=;Yin$gzrRPTG}w}CMWJM_+a3D0F_*u4=S~33L2CI&o}r<d+x4M6$Z&5do3< zz>T%Ny7?VsXrEw5HAb81lU?-*1@)R_j8SfDITZtVXZ56y=CeM+45R5~u*xt7x7GyL*~ zTq$mpTmOZIIV8z7!``Q`I;FCGChQ`@3s#eM>dFFylc;FOq)x~@T!Gzo*B4BYdmy)L zAL{Z3n8#L->oq$sbn#8vPw~aC*_y~b1Ny&^T~n>J3iEcueV``#_vK4{v3807Yd&5Q#THU?96MGtdZ{cYjc@>e1vzb({UxA7XV>JUt>QvEuFGpNbH{09@o74R0bkGl9uW=T z>v=eOzW&FPAKybZJkBbZ*`GY~Eaf%Rk)p0*By-8Y%gxITb3l@^v@)Ylk=jO`a|(U4 z=UdSmg%nilK$msjJuK>iVWNFG=NZ2XDLU!ZGVYg@j6{FFIA2rioab`YbNKC{qgPj> zWxQhiFJb!E0ze``Q2gmN0+Qj4@?l{X0@28wQ5fBm)R~a`0 zGM@3Vj{dQXAbFZ%F2M`NGc2ACaBr?4b2>s;ZAt0_?`^)o@fO^$cw~P~Ltxr#EpzFN zq-`rVd`M>-Odpj*g~k+`PJX?#P0>T%qDaxqNHLefEbjfWE*w5qm9D;z%p(`gMQ{Y!V&qa5tUt7*RY6vL&Kc zq`C|f6hMLglf|G1jL6Y;SOlPm3n36_Pq4DUwZDJN{WH6SB#bOB9+T zuZ&%j#62_4s3hdzGIMY96vPfReYP-(lkHJ5wvzk|`Bp)p^xa(Q)5L#b=H+u*r^+r?Py#qgi9Tt*-suETMVRf%Lel&Ixv-5z8;A&viRH{lv|= zt9q*L_Cf0UH2%y32Je~(tzap@m+9$GTb}lu*1eNpQya73)|l_5#X5lMKrDs>VK!G^ z@Q!kKhPw(G=^7eCRrw1h*Lf}O5kw*hl5nMyafV|m`?=o~tw?CfPPlK)2+39Z+7f== zi^W3DX9XwgeIDnW5YCZ#KI44-*4@SC=Wlw{g`Tg94kf!^*Aj>!r_VaHjY~(Pp37of ztVf18@-OJ<(B6~)ft@+UmI|+_C$cB+)uCz;mM%9xIiO=T`Dk zb76Y%iJh%VNDxNy*l~*ZAu1h=`pg#fv%CEh#g6!}G`)JZ!=iSlQ<)Gr4=j^&9Dhr` zcV!STG3kiCe;mF_A2IN2!5rAMs({3KtiO&w9OW!zd_l*?NMnDo{BEr(bJ*RG_OF`kpzl5 zp{$8r=3d;Wy1q`YtbHJ7mPQYva;8)3bD2Trh{eaIizWBIiUCI5VO6C}d%P^QmpeP3 zd64arImauWPO9)KM77O2sG|=|l_Qnv64T{W?<5|JVoMu~^mf~0gFtVA?F*6c@myLr z7Ov^Pjb`m83bw82C)US}29cebp1S}5j}tx0PRBm%t5npBu30}J#M(uL{|Dd!F3y8T zQSJ~s2#yw!6uMw&@;_@x+Gnj}(&LEnu=7pn%A&&PRZV^n{_{Xa5Kn~JfwJ15NpS^N zga}IpBG*}$-j-~xzHT**aX4zuqg?!){1cG*0|W!ECKaqb2sQUMU#qo_p9Y%zwRGP1 znacRLo;{xb#ngyIXS(e~p+0MeJRKV*YhHN1dXd69F7BdrDGCugmyb5k;5FC^E?z=6 zZ_T4+Uc^IA>%fZMa@sp)Fz0Yre`GTMpy(^cU&=vk7Qzt%Iw;;$GB$qNZF?^UG!dmj z6)W6TZlTb-5bDhjRLX1W8Nh%%00?zFg}~?g75T^QtSqNST-U9YxY9P^Tt-dYl1^#I zDd-B*YlTDB)I>f%Z{GefwcFQi{z8Q89nS-Xj9U(b5v{WgrEi0-?nZXsOz&wfMAL^$ zhtrywnMcW2zD!XH=US%jOBSb>vz#)CGDHQ(I8%uwr~B#p*W27h_NrUFmR=OgAIkB1 zUfXzCZaGe}VL*36qRhp``1yLyr!wQ<;;-n@pYuL4Ea!{_&$s7-Oq=!%rn~&)Hy|!q z(c?wa8zGw_KxUdbOHgwd;*zb;04`bS!rqlFz+Q>FV5LAPc05cP)C714A1u=Wfi`ZS zKlj9-fN_PJC(2()$w&>Rj6?$~K@6x+;vWL9Z7dOm->6pe`9_GtOKR(I;(k8xrSuPI zk_E@{sH@ty*G+qiFCkM{X$(o@Q4X~Q35Kg}pC3t>uQ;YZgVkw=%0224eCK}s?#Qjp z(f(@dY>kcq;@#eesvU2*rl=L45-nmJlqSun@DSda*ndJd( z60w3~M^}5RoMrRnPhZSBX%gDd72H@LjW9hu)L=-6jk5t`Sg+H`*FmL3 z!N(si%6|f7_Q0>}e?T&bRe$h-j&uV0aD)#;>%=1wV7v#eIaNGGfq_Q&3zSnPic8Zt z|K&WSt2^;(aQLDHZ)087KoAnpfE6z*JhY1kIj$A$VnRKKK#uELFI=IGmZg&eouEo< zcN~~l?JBV(@hWTa)h4^#-OPI6YVSIc;_Z~#5S;p2D#Uu6Nt=e4#24Y)k-)j<4+*>&;7`2iVy3!-pxE z15V4f7~~iGE|huV%!Bmw5$7V7Q~H)!zCYP-d$gU5O}cIsrLWMm_CANmXZ8CTQ4gGkzRFdMD&?1wnLlY*VKfWZE=zyxik+kJ_g3n^4{W za&n5|0MR1Bu}c?@;ZDR2SY)ZCh5@^Qt;|~uXFb@vj0wW!ZK|d&?80716$pDJ%Ner% zXyqmozX)RA=RxedDL(eS86W#T>Tv~%eQ&h6(JS?1F=-N1>Oawb9VmNQ1DH1n>1wKI z!i?Z<4!%fxkoNv>0iE8=*~`^;49(ks=D^-kD;enLAj_R(ca7hcJBxi_XJ+)ZSt^Qj zvJ6nuUEP`+PRPH#U9llOy0}DjxlVulI~{lMX0tt`+}EXOfcG)=a9Tjk(uQG^$Nl9? zLsf}}Uim?DRvx+O8X72_>Zi~=^Bw36hJ~Xod{Zd{1}VyL?3j|`TNYk)q-~MJy7EiK zyfmtJ=f@xuOU%(h!e?)GZvH4~dI)bCXnQ4hMOZbPMAlBY|FFKZt_aETM}9!l8@kIz zFiiP&-^4Po53<#zWWDhT6bVB1|7^>@nr%aXi0bPhkgh_&wHRy>LL1dhe{Gul_PU+cu{VV{QTP_>OX;VU z@b`M*N^%X!eFf_~q1R8L-E~6yf*Gz}0ZqM*Q&xrC)2J|+Ktk!4s_%K!4KpQaJnN714$ znYiJ{SRy)Y1_IL)q`+w4J&7>oQNKI~t_k0-VYd z%)6a|U>rZ~{>yo(ZTf2@Ub-RqCwl8u0sSvhM$7i(HkjzzD>RIvbwspjZyriNH5$xQ zYjgU+m|d*k4*7#GA{E!I0dj0?T|w33pyz&Cze&NvpUTig=~HT&usXw<0_T4FQvV@OnWNSyvCWAjCAN1VQ45QiJ#gjN9Dgr89&J>7^6}^N&Zv$WPHKM zNJJWD4EIEd;S+119twX}m@&kz5CEXmKBxeOrWMdI7Zj3sI*%*L7H06aDt0WjjSuma;o(+cZ14<~@eZB6V6R1_v!}TLx z{S*r&0=_l|msrf^%|GfVe`x-XI32KPidPZ#KHd;tvmS~JZxp;^bLs@rT!`zk?Wm*9 z{$!C~w~d(ng%@mDnK#ogJ=wL);L?yuS)A)uDNyN>aUB{n%00F1-?B(oCm!AuDG|z; zAbq|xRknm8p#)=D7H7atLEYwj-)k@f1S9*b_nIzLuwPIhNN)zgSgG~${U>B8cYAJE zYQYPAqom=Rhbc-aPet~5LS)rVqDRUzCl2jlzH{tU&twDrJiC4jbT~sH37U~#(8`!I=*UUT&=gqPEisxK%go_ zp3;oP=dOk|fU3kiWxYj0b0gr3lTnfFPH29H?xs$^@oCvReaawN^V~c|zwo)(y({BD z&!PoIw5EKE8Usavo+1Yp1#1s@Nf?X+px}cz25+p-{)P3MFy0q?O3M&(G4nYaT_g$A zPn0DAM=07f|5iT%YJ(lAeR`@%^0S+;Rt5(P>$O?Nn|J-=D)= zZE;$i4^>vM3xd(R#VJ~auc+;dx-ZKWSvY;MT z<~gogt&?qck)yS7H=Wikf_YMc1nOHvE5gK%rxF&f&RhJjXexW}<)qZ351*CozA(xA zse|`uoK?5@AsC@J7!xGQ#R|Af{oF%?nLQslAU2eJi|PAyxE#pkb@m4{Iz0Y}g(NNR zKt%|R?EdB4*)If@Gi={gFc#tle&>AhDUj{N?UPB^%}<=MM_*Wq5GY`A zHkMiOl#^1=`C+-DZ<>pg%*AI|v>o6zE*;l(QilwENo3ekg3_wrpDkGQ3CfJTprC~; zd|&{dYkpJjW;3!L++G%DynZyBMBb$eSL*NKX0a-L6H7t2)x6x;V^Qm+Y|2}%mU=JV zn5flzC_S5lAERqlvLhL^jOnqM(Nl=W;!&m;i` z?tzwE$3WQ*oSo(4TMM{N7>!Jt$PiH5B}8<@!-~B3y^|{D8}@a~ z`^KFhILMCFNLuvixq+MHt+dY`3iVN*`1tYB`btY=y1Ol%TsTV2q^0M;GD9tV_#`;u8vdEcnY) zz5D5IOs1i1MlZFuQG@6Spw10Ge(oW%C8<^Xt9zAheEek*bBqztT+$2z&873Zm{6M< zc-3L4>V27tH>7X3;|Uc2>j;uj9N5jcdLw}!r2_~qLBpLffct_!g@0G24@VIP{J7;a zamgKq{|Eaiq=2cu>3Xa(6>s2vhc% zvR8TBn7NOy2cN?r61w}(_Sjy^yqix``7jxMlkJNVu~>Z)%Z_VhH-!$#!d(T&t5P` zK+X8gkoSUvtK1DtD;-3{|2_3(2jA}CmKw}U5c;C(SPV!xgc9wMPTl}~0|OIii~a@O z#?o->HY*F5RlyUA+kF1mGBXNB0lnf)t?He93}s?O6U=~Z!sux>GR3cMoSP4pk03ht zB1HUUpAngVcz1=_I+pVboGMlN+nl-csL4zmjy95*`Ee39^v7|xS^*AJcv<^9){7kU zJR%-?h>->VmZk7v@$earh`ie3N=ri?w3OG4O!;{EHR9;CDBSoY-~D?AAbRNY^CDFi zG9#I*VG22=Q?d_b`b|@L;*%9KWsmf(|C$6-3=ozS4 zK#mm{o;z5wpZ-pZjUP2VEz-e9nUc;3#aJcas>PixkZEOaQnxr2cZ1hw-)}q*YCUFJ zoX#lW}!>Z^8${Li&gpo-k#964d`mJ;+W%#nFpnP?8on2aQ>M=UCugDb1*dsNrQKM8f}+Y!Fzz3R9e(A&ajvn{OXzHG~qn->*Q7(Tye<^nAhL2P~g8&F1QpnhPI916tjcsU4} zxOoB^F4hNx_c_3HH9Zkd&}#c*64w*E_MLL-D!{|l+(bPDp!XY){eCj!+KHQd}&uuNwnMmXZsX#AZSN|BK53cxqrc+~=hbBtJ`h9@S@(nL-3n`Nk zFYo(>Ock7#Mn%j+TKv2hp_HrFIlL@UXxUa$&lZO#q8Q6`w=mZwwJU{Z1c`3D-buRd zv~thvI#FW)lx!g{M4(P4MX`MgEHxWn5OZ~_DGpNJLnNRBNe3E8Wf%Z7fdwz%82}p4 zUti&?^c!HZJx5IHm!*5+Rj@-gp#W7Z@c00hmX9IU0lLU*MBujg`XXRTE}DOxR;xb> zNEs3?R|h`TY=v*B`*}CMg?VuGDttFNL&JUY)bThz8j`0eXHtyQwO^zLDc>(O)HdC4Emy&@8iokQ8X2`ivLkfi!R!xK9!(gQdP8yV>R?@bV&EIwKFkgtt3j=3Ay zH@D@|0v@+*TeQbv4eQfpA;`(w+f(Zbf&wb*-=mR|)P0&n()zP@&y}UGNB`t%e>xk| zP3rTdhi1^EEr2tNu!n2vqYK9n&0@oHjq~&3(#A`Zi!b)Azo#gA#mjbp^|x|4oMojB z6Ue1{R;7_HR8&d4x$0RQPnAK)}o%#LVl^uXb+F!F)B)p`3E&V?_27wb%U zJSD@gcYYYX5d`2Q{~PlO=W7)=?N?F;sl8__)>$mZDXInwzg?DHi)eeV^r<^wg@0EI zv~8kS!yrvY^a!la_e@F`Z?K03>aI6{7<%+4V)DkV48> z1USfn1{)6Zfdf+}H!lz~=?Sbp{qRfzZ#!=gd<7E`6%-bSiHQOp0}QnJAd~vvH|>AU zaNNMpuoqGTOb|nix7WX3O&~7ABQ{xFfq>-v1c%%y?n{F}42nPSh(18Bm=tC|Gr(Bu z3K(mhj|%xH<_tzSg;XRjXI**~SUkRvOwCqney6^4mS=h{+3A%beT9^N{sq#V8mfun zWgnBAlNR?4>++K(3MehW6I-&WIA{epue=`>K$7j8;K}3Tiu&@V6eQM;50}?A-%G7k z$*Dc{xUSL!3~?Us%=?+MMC*chSDW%q-^dXOJzsg9RrM?RqDQ)&Xpn_Vqh!Vr(s#_y zUHaPjZoLoS$&GV+_1{i^dKgDle!D0w3c`q4!LQ}OCj$5tfP8ou78!s+dJ8!tK^{8B z9g1KB5#al0-9NmcHNv-v>EM?}E-9%Ie?hoVtH3I+`~8j<kjH z(``bncE>&lv;LF4_JnXjJ6Q!(=>L=CjJG%RvUmTV`3-a$95_3?fb{~2q;5he0PZ*= z@$b^V+qyU!n)OG7$GNy4={pY+Ug6RZVd+?!?-}q%>z)95N>*a3WDMe5QWuwdq2tupD$6$mf(| zA{Sx0rXVhyExx?pW7<@!zmYaw%@n-X%LqBaIJHqGHvXN<&R!`rP8EsT;Px$v$O6lb z6O`mYhd(V~yM}Z`V0A#WCQ8T?1MoW6-^oA!UU##EIxJZn#@0S;y23afieFg?F9&BK z+ciWr@Qz$}!pBOSRs7Z}4uHW2glhVO0rzyzwZ)LP$z@&Eja7d(;FrB!-ANn7VBXoU z5NKy%3>+jom9afiO`{(_#PP%A%Ekh^$1g`Y^-C;<_k`6fU{{9yTi##Q&b6zAtC=gL zqlILv<;g14*L6$D_spYEK0&rd^T4HGZ&X3H6#*67M=3dtI z4-Ckil^dNqrfvyJC5U&c-_Q7#V?FVc;~n4~g7N%?u$xJsPN4ZcxbgLL$9TaYNiH}C zg`h70KnQFX{>?E3FEcmb94+YX;P~&;5Sk7h*a+$%Q3xG>m=^GLa6|YD86liN?y@ns zt@g_gW@!7k?02HPPgEtVnZXiQMJ_?(k8mJAa(YjN2iZ6va@_kY{MKIxt#)&S)F5v_ z>SBeY>I%2?qhuM^ol={pu9ALYKB5~UUo#M^tN9$oCS9_e$$mXWFZ^?~{pXbKCY2a? zbCpg&+=vbpB@6qpwGoe|dB$7PehmX?lOgNDyqngen|9oviqvV9QcFy{M@Q@uuv45j z>gI=Ueb9l6hP~hCMj$3fgeQ-|ok@HpRX62>ZG}UR#lM0qUmr>0?xq(7W3zfkPJ!$D8i{irL1`qJDp4@C^XkLttrf!0oDz z*GnloW5GWW0)e$)g&^&~yU5EQ1_s9nSCB#md`7_<`gPM@07;TwFhg&!lsMpiHjWs3 zZ%-Rxk-z>o!0wd*KTTHVfS+cF0el*e|LyIHL0`Lv{Ya*1B zs111oszBKa(NA{#<@o7cDR-YGf70jlaPTG08?d?->cPiC_*&o^qJG)ibZB>?`$+gw znN6Y!g{>~N{Yo3U+l5RlgtR*H^F?&>mA1&9m!zBS?k<%9e(58W7hnZyM`U;B2sJJ4 z7^ThG#|mGfzKs2*6aK(bbGM{SuB}7<^v%Q7Fv@FtT!}8OY4d>D-b?xVeX^^K5J5;d3Z9;hqxLOcGoJ&Q81B#~y zsA}M)4<4={3Dy-jf4CxGpc(Ok=p|UB9cWB`*-t`!O%eV{MhjxkKn1VN$i9iR?PP6! zRA=84$FckPD(D*rJ=meXkv)y088i8!X(~8d^Lbljs&f^)o2t<=k)SuLj_m91RrJI? zP5uR1ju!)8oNX#eI#?EN=sKFkRA+e2(Q2K~_gAO5S|>w#*6gKEGfweX`Qz6*6gMi7 z)Z4MhE+!+&iZq`Umsr#0_DgART#qUql_?pJzjv`ACxcA-{)>)2PPkAGR&A}2#lPSq zy74EfCrjEQjGiD$!KPt7;H5-&){VqIYCRvGo_EI`8gVR?O{WaM(AJ%o`7yJ!r25kf85iZYvC;tLcj~xGV~Y8BUFs z@dqeAW$|dAc2BZO`555WvmGLww0~%+b`f(XJNIzC^v~)2!ovq+>NB}mhv1+8b!v^- zWm=0QK7BuI;T|`9SNDONstsQO)8nUACAyKpgM4#OH8&6WmKa&XTuiuthpwfAtT~LwDPQU`v9#%?A1~tLeOsiBw`?M%W2!=fSA23 zfY~eB7lQ;v`cM8AIG*6Zk{K>!s$+y_E468L2-B*5GOemz(`L%5+aW%A{kpb)oF=pL zT`qq{#)zFGBO&Fyd@uB@FF%S5djZ#=CMfi!c%fF{G0)1k=;N?(w{KcoC3MSakZ|S9>7w*T!M=^$6OTHPMLCi;df&OZ zpOSJ*Z1%3AD;LQ(RjQp}mqUfSIutL}tOf2^Qp9k*Ac4GOzS8??!dq^(+Mc~qV`6ox z84hGrx$ZSXOlqxtp^z8jV8j5m)sLThuK)vZ>xj?J;m+jHSD+sMDO*v7W5IdtrY)p| z#3*^YIk@86|G&}#ZeZy~nFcFSUzF+?Ew9=$Qs?9!#fG!wbC+GOJKyES+19-`we@~Q z07_vDr{mbTDbPCYZ#7D@jD8yLXLo# zMz*f(Z$6V(>1_|4@3QTI7+g@Yutz`(6Fym3EOY(J3n-sDYzaUe902NI#6ulJl#XW6 zycQPppIP;~jWXUg`8%9UV{+i|PUih-=KF+2;)IyU z0s#+XRqKr zhGbdQy=byUY8B280&~7;DynDJi&aB!B&vM2?*g$936OvodO-@Zz$+A5sG3?H(!Pv5 zQcX1)X-*5AT%6!3e8B%b#6Bh<1!g)=>?u&$ zABf?rYWQ~~8v}SpGLrBqvk;tw2oHYKM*!hrA`GvaTlWI~?^KOR@dtM{`04F|)EY-X z(MEdt0{>~q*%L&?!0}R)KjYhWTyY{4!ASmvfP?Gm>a}|}u;G*W+Ybur4Ab6!*2WJG zK6{&Z&Ryf4f5TJ7Ng`Yki!%MTLl%58dcHk{r~NwfY$0i_huy_~oYE26=l6!{#@@R) zl@^3}c1f4@24LbUA6hIX=5g`>`r3~G*@=xXZKlQV>T8zzwa41nmcLMn*W$d#hRKNs zEmyBgDtzsd%D9;@KY5FL!ToI8IfB$iFuqOLmdItb68@GX#W(o2_XtNEt-1RhY&_9) zyDf0_TwH~-UyIz@dk7g=q73RWA9(QW?EfV5`Jg~A<}c*#544;9|631+*;ly5C56mZ zf=kEhR7`|r9PW$@gp3mIfQtpzVwPuEHWxH z1&46Nx-5^~xzA*TG7da<+9i0YA=hi%V3Jj%;JsL<<)g)UtiLpky&#l{(^=o<5j_}s zq|3*PKecBYpGX<+ap!e6p|JG5X)s!9hOw;sw}OLSSD>NgmJ) z86q4U{2>~@G32-npd3gj;;HFoh;UMMMSuqh)LQ2x>OxxJa7YVmuUR)JJ?3lrBp&nmx8B;%< zLYbj?xTY`&mFCfeJOSbc>)<$7IObc_;UQ$bp76g93x%2Zo1mm9xau2l(qXsZO zUF)>p+ey+{C$sEN$?=W_6@h-an>sr*B8)yER2evsOJST7eOat1mRdWb;00I9w7I4+ zj&4zFcx{I&xpgl3fq%^VVYflACDn%>o4jh-&+6!MEdD@c>}c zIRn#sZ;$|we{TN!?MDUv`GHIeSAV1naE}1-R0tafH@FR`EyO7OipDE^>6OrB zTfzU!JmQu8X7(St(zYV-C3cO2lO7j)L4)og(7E>Atv5uzE$G-f{2KcF2>EdzZuQ)K z?{gK%`}|^0k+i#aVqcbX?6lf@L!QY?z?e8<;{~VEm{AL!taPM)ZsdE!F*LJ03zz9V zb?$Md+VY|yT@0N%3gwzm;u4`VfN0!Cbh1>DI}$o9fVg;HufS}!I@{G?=|<5qj$m7_ z$9X|772AJ{As-&{aW6 z64>8CR&P-z*ou8qz8X35QF^+a&xaJ)s4E$-UY#T5dQ2-fx#qXQ88GO=;a3(g!4v!v z*tMR|NUXDR19q(=P1Bio&3+1(%GZ}~#d+;sCdD+5SOgf~OL38dWi>AsvXQD{R%$`f z-z^>OC2&{>y5B;VTQ5H4Y|B*aj`fbuy)68=r|7(NgKROCJ2^Xaop!>#4of}Vn}U)E z!Xc&xM`-vkScE4)MGjgn7zuwlX)yA}&n}VWc?|?-XjBPQ zaB##KDLtbLh@7k;MW62VlKD6VT*HBp#+l;jrx%??gkPEyeQkY;y{DVqs+ZE!zGB`i zJ*m#7%B6Rjgx=!0mYJRD`EXKGT)9EB=6#PcDeGork*n{^C?*JQzGYSQLo~jASHqLl zZihoB+HQ(zh(WbZ1eclPT38*~f6uLSI4ifX6P@?<+|;+{tyrD-`(*5Skg@Mfu;y&e zF)p#Nl2@U?YQP_U7XBy_M>z*w~(1=Xep$Hqm`zVf#nM%f^Q^ zF{Sk(jyGiTfjQVp4?6h%?BP=xof$hj@3|r<^8)3(EN{D8T)z$D#|?1n>zEv2PSp8A zY4!0G#wS0d%*CX>Wb&30cJy;#KOH^@q9{nt?X=ZqtTt9;yu#6WYymD@FigG8qdVoy zYr)ao5W*7H3ZEXjhbRW1_1t{9xegR2R;;%;qiGe8&}Z3QsCdIZr(pN050Clp7dLC+-3Q>so%S9}pwQp+^47yr(gC?AJOyWy zBp3-65n~ZpGACR0YQ+;QoyrzSnfTLK!|;Z$bE+972He#B?r{7JbJhV`G2r0wppd8E_!TuwDtI^c9SB8tKpnErx`VmH+O8VTOj z_TvNTd*f2iUdH8v%0TS01M^b=6Aul%Mc|&!V2eTp!ILij$)ELMxqXNK(&|8shn==2 z&1mwOzm#<%ug^i5ypRtX?&b`G{)@`UvpXW8^z>R3IdP2lhWTh?;C$rahaf9*ZP%;<mfe~#c!hMVBGRauP2khPyE5s|FOF|vsYA$#x9!NJKm_DsplPPR}f zBOzqZtYjxy$p}SRWtYnD`98Wn)%A7#{y4YPRjymFUhmiY^?W`bkNe}{BxAG?jLWrw zMzd$UczZsE5DRJ{QYI$R%mK|F{<*{X3;}n6={KjuQs36ecM*}FH&z_Bxb_#4a}@Aa z9-^~oyA^Nn#G*vQoyXu#j$kEfSm#kmbsW^w;Bta6XgAsx#nE>u-($!2=2X>QDW*SE z)z+_ZxX%V#m1f&3K&Pl3m8T`0xglbTz-ddH68GHWg{VTi&y`JI+ICHjH>R}-=u2K^ zQRU+m|CKI1I-y*h;TyN^H^e? ztxX+WVJQCBiA8{P>3=_he}4-+Q`!yd#pB}X00T&5$o|(KCcFrLct^FB7j*NlxB{3O zwCO4s5Tw~cgCApuHHR-$pxiLkgdV>I=-jYgzZxN+wnARM_>CNK-5|NiaBbw7WU`2x zZR}vQ1a~?R6%LCY0E?*Nt-((D(?NVEE;y;B-wB@M7}q|(1R`;++58 znG4fjj*h!w{$&0<&VY!$6>;~-{?^um06ebv| z3o8kPk0FssN>T{Lu66n9h(vB{&`D_m(n*B^LYb=BH)IBV2T4en-pD5XXe^!GEg*Yj z|5*QQomm}MnCvMM-xt8K?i!pH8Jh~o1~h{hvHz1pC4D_iN|j`e=>=kKACJmy-KQ19 zJ{0cxx$r*tVO?Ip?RIvZ<#gcv>?ATJK;&!!{5w>>z`V5vk&%Tf*p56Ad4sDRxFVdb zxvy-GX#V68J>NayL?rU3JD-M?zvO6A zRloT_H7!s*=a9EWTXDR>LhR_rZ^nJHu9+GcvGaz*bs((nP&1oCz1J^;Gn~;uS@VVW zZqX5!)P>akHzj#_8BcSHbaX5n#{_%?UyrDnXM#Cub16=gt0@t(S-1x#bl(HU%wI() z@q2U|75!b_S28{PkszDcmCry0dh-<`1iA9hAr1tBy{Qp_aAy5y6$B$S8=M)=2{FAw zZkVMd2vvRGOI6TDl=F&2gD+nwC6`{+FLpGN&SL)U%q#iL`Hr_^5YGZas3ahk zp)TzbQSB9r6-&ANbeWHoDRYwew$5bv*QWjPeUve-_2DP)s7KPfmycR%q&A(J0QmDxSam{j*icLwL6-z3!j!(RGzAmL8kQ%`24>xcPvuX2#ptz50Dp zdi&Cg_9IV4^))Jd&6bcVh`wLsoSnM5>b0ks{mec&*6ef=99Hdzj%PPZ6Q856E@wz# z^~l-Usy;_yl6r7%?O?~#(D$%o{;n?nK6Koz0a<2^cHsHn7tT(gqyBT#{pXF|4w79i zI`e2^t?Z?7|0k>?^_9q|PKxhB^sGo}1`9YF4g>On!M7KGrGGdSF0j~reIOVKkt*lq zZwB%85#Iw-1Lk%UyERw7i8d-aEK5e_nq}Q0;w#D1d(uiRfA0`oEZx+eQBHN)*TeCP zVqb1%A36|osW-WOz96zJ{Om02rk+{iyHcz;b8=|{y?7*vXYWb3nrMol~6tj=r3rT7AZU?bq9J$FlMQwB}*73O7y%z=LWao9qwq$l!DAmBmBCr{;QC+|2N- zEgf*#l@q!*gUfCYSDgUv=XHE&f+kI42Px#OL6vi&&drsP{If{+LaTYeG;!YW)6nBk zSt>o?w2U^YXGt(?>NQD6&E#U{@v)O$4iTPOXqFx&)8M0#d)&Q)1zs9J5qT1${J^rGwH~cYRKgf!0*18 zWp3hzo<4}VQ6xZk4)$aZn9KElZsjmi{JcU6$y(tXf#t9$Ct#=Ts9n$mDX^!lyuKa} zQuv|Z6^XMz$zpBs;gE{RYbXern6_?2i$sw74ki%izC2VFk|6+F25HHs1Ts%7-Ku=p z9{pi+>+6Tfh2tdoRh?zv6+Y#eJemLoZb7O(V3VK7ZY4^J8`WFE5pI-V7BWNti$ z-BPBW>vntf)a`_74$n;%v0nKHpA=r`Si3?wh)@gqW8K|n-DQ&`9Ibe1kYVU z#@Tn{IfwQ&Z9*BX>A1;2-dyj<2AO5?HOdPBe9ku4si(_H(^{_f|RU`|*Hq<-76(~N@x z#2#}6*lJjet%V(s3!wOe(#;a0Mt?~qpwp~|-#kZjzr+hnmS4K$L7ZFZM;ETwbPkk&!nGDYgm4?+K(KmR5Wnrd&kd|pC}I%;d*|{&Z=+tS zqVQuVedUPzMA3zL3@%k!?EC`8n%cq1pX$>WDutd-RgZpmnjod7=pa;EO|bc5JPXMr z#)9FKP3)YS1xrEEY@u8;*$9E1^vKmOxKSu# zAq(;#ayC)u{<(ymEtk|tr4s|GVWKMV?2w1idPWA(26lfFU9{r)AG79(HSdo%PxFeu zp$L_tm)_4MkRv71C^>sirdsps8+NcEVr|<$)z;_KLeOZ^rI3S9IJVYi1&lx2#tUfl zA5L9NHo5OJQxNx@!<^36*tp`FDf1iIoHiiSVyNp-N9$6A{r#b~r0^)3fT`L@x~)`h~n-ZEi3gSd43WqN1CrY0bvj`Qa=+Nd|_fe1I*sqzB`x@hz&5$9WA**zG zO0a2cS!{tG{hXDQ`_;|KLi<^fP?r(yl2;D}KkYi5k*)Sx-GrVh-dN@fW@t5i#6zix z=HNYWVlAb9bX}b^=3dFFw~r-u{~){~aH}j_0@Jnb8%9iQDzYI*`I55HIJVAa>%_aV zBfjN6yY@KWMuahlOmatGqpgb-&KX)O_RuLXH$~9R_?O-PdZ?3xN6qR*i=v*>5ZyNC zqN&0MBF|hxZd7MRprZKs9WhOBXSL`-Rtw?h=mw!;A>+$zciU1fUm6;EI_zk9aM(Pg zDcpMS<`n1Pmq^BqFQ1>QN*(K=yKm?kE4B6{g|6$g=3eaSRkt@Eyk+$rP)vO^nJ71v zH?K19N6pn87`MFkzG#CJ&Z8X_5Y&m{<4`qi?(?|)c z@=D|oaRT#wce^$QhlmqM1mIi9MTE4wEyzWD`U>E>3ne!n!IFrBpwO-^)_}hPY4vtetv@f4RJ&2)JO7@2+}JW7r$-Zp72&~!NK3!U{EAS)}_m|_;oeUpFDcp#whJON4wBX`d+@!lMHWq@p)081#( z@DJ0rw5DhpO#-S9V0SrC``pLpRLEw*Amx#I)vM1_so$U`l>*1CLhi&FTytEy`}9SU zp5*Iq-jR$NKi$YU?C+Wjr7^(@GQ$e)*tT;3Jp+ssc{E)e9Auy*GF66IkQWYZiWE$+ zzd)h(&ut;XbYEf3Li~}r2bU&T;%^TEu|N&(3c-`_pkw^7-86aY3m${9F~e4NSlY7b z7HLLIqvvAV=o0~!GO@az1c?CV6UE)s?;52~)8GK?&n?O#OJzY&EGo@gPae)#?tv*e z{6?$N)05HzLmH1nucwY(?UZ+5OK1{K5jP9ljRT1D2Er6T0eN&)FQB9@ zXrX}fj6` zqZ0E9oT(0ze_3^=DXlTwXo)9sYmv6vy$Sbw@|&Rg^Lp$R*w6o2m0y9QzzGM22^}2R z22s#_#z2Vm_CX2HKAs5#CM5d}Jzr@1 z#O9%91ukmRKInTItJBsbBRgvT2HC>zkCcgQy2z5+8D5z?_5fJgEOV>8TKm;?GS6V- zI`0c*yaM9Qe1Afk(mR>(kAzffKr&M{Suc1yba}?gJ@e$Z=rO*RO7(myS{uBC_GCy^ zUT{vS$$|az*7I`|3`h1j{ve4d4x9UrOACUz6cpHDS6hLAS3rQr*&e5%azWk`oJ$s( z_?UDxEHpOoG|JRlzsKe6#hCjq?+B|t&vxfO&!Oyl+fDE3(A?<_mD;3{>{f=p$%Kx@ zP5wn;Ms`H*j82Y_u|ed{IJESA!V`|)}5HJ@A$^!61gI=l0Ft`OBg z-ye}ZzXjQ|7-DL=evPpIkdfV2?+x;Obw3CTmf3H5##qBPL;4##N_W7cV~?}|t%QYn zR5Vd%xcLR~dklFXFXBt9@YLPxk}|>taxt$hle~r)q}1q zwX6rI!3d5H;-m$+vf6edPbrb|!%X7qP#&CpaeWmD9LzX#X%xVoF~8jBh^yKyk`d8B#Fi`uK>Z z1$;!4A8lX(EDT~?*zrmSM2%HA7xG^+ui9e15f<52G0FSlL>kMo)*pv*bi8{QzSIFjNaxFSLs!D( z3Ygf+4plW;RShYMGAJ;XE7?bcXy5N*+M{Eso9#arq7)$*KNw#*Y;Kc0Dj*$bH}Yu> z;wJXTfaR-5+^*f}u4=I*<3(BhPb)8NB$PDlcn0t7$;EzBS4|xeOtrTzW}!0e-;^jk z-PpqyLZ|@)@ml^GTde1ci`bQSb2;?v*D|K66>nhOw3YZ@9y<@I=vy1RJ0jmPwTRahU5HEMT9o7oDy@-qL@aQ?JifKpa^RfA;Ld=}RV~ z%;fi09S-`>2h&}JyaMWVn8>IsRq;~iqxbzUdiBmM9=<03ZI~4y!Rk*YAo>*qUA*IC z2rFmfgtM^VQBl*9g*^u8$H-&&Tfj2k6IDom{9e0;_1eW!7Q?2}Z~+w#Cfykhua|Ab zfxyl2$0wP*o0WnFhw06e%oE8}&&D0!R_2iNbJd<}zD#}Mt~b@OwrnKhQf#BhAqTd%F=4IWDGA`(!C0u^$gn?K54Y`L~#WtcJk*;r2e z&5SyE>3&Co8n4UX%ZCQmW2;KzpI768;$1H09ZmhS@Ag7 zpq!lk<$FPnwjFWM<_>1I2yO?3a6)WBG-VGGgnz$4PXShD?s0v1{r}>|y;tIY#h>7j zdN3o$$BkP<-1tKzZk$i&Iqq^pxu2?G5>swdv8F=$AuCYgW`qql(tENdcEIIg=qL=a{`d6Qe2NiF8mvhnVb@#D4w1>aV@tDbM{soMk4NG!; z@mgT!Big6UjPK$T6fGfc{OEZYl25p|4j&$<5~S&z=+a`vefZvy6*45}(u?SYML{0d9bL~DJ1ZxE3`(J$U`>!DB4s3YXNRHDe=e;BXgn=2 zo`7n%My4C)R(7_|_z~xC$>(KTUX~5|Aze-HXYnUac;7>^jHYM!{zdF*rf8(;YS`Od z%|^savNr6ENuZ@)sPAE=@KCBPo;=G+9XJTRfN;(L>CBG)qWFwCg-0q1$vU)sx;X=> zI!`o?aq!2`cq)EH-lu8ZP6qm6LcD%BwA#+m>sGzB&)_44_GgSVt(v6a%-VQ*D58$lVU#uV7{teNr>&AW zZj!v63WKcZod!VvZKAng51ly~S73_rXu9A47Y5-c=J;3+*pwGxbZ&vjw2=472%;-F2<9ntJrB%7Hi9^Nk%|i?v$D)$_Jmt&3wf6kTO)Wvw>Nk@y?Lm5ZCo4{} zfy{;5O-k~M$(MX3)LKOBQZf*Z(Px)4v1W1_+Q; zVs}PfF~6|$*WEu$H{iku+`jGZ<4k%82O|aS)g4u6ZbOt0kezrZ^q_p{z1xbOvFg>uB!J%Xc&8YK^%`s zBEobxO_(;6HE>kaJjK`H_I;|n@q=~H{brRUhUOE26wgaEj!nLN)YS?|cemNolYxhX0syH_RHJIsPCOknRuir0EHT@ksZV46=$2FkFQA&a*iOA;UU6 zDLeyKh5xUW`TJbFg2Q-$Q`!qWM$UFnnEh+whqzqNitq5gHuu5-uF>zG4>t<}* zDj{Mp@ienE1yQcE5qM_y8@H`p5|Z89JCifgp89HCe{Zxzh%aP0gQYd!>xN(iQwwj^ zWAdC5g%HAkg{k`e2U6`XjtO7clM_dDB}}Yp#jSAO`qyPRJa!>pR8!d(;8UZAA48Df$?QKf5+`vV&_l4>*IBAB9iCs zeDjDwqQjxJw-?f4gzh(Y(BCFWr0@1NHK^MaT+WzQ|EAB5>LL>vyiQPshy2lPi87B= z22%>W^JDYVTwJ*iWAZBHWFivpHwCT7;#wd~-IJfGQ*!HY=~I_d9|?ny)t8YHg4vvX ztH;rbEOFR5-Zw&;->L0=JWa?>7q$|Gzl0a`{O6*U_g#%L;#K}_Q*ze?qb)-tJ7qGW zC=o6A%3gw6UaJ_f#H9b&QypmtOp0O8PDvnn=Akd?8Yn1$0LiF3o#au zZwD2a=>>bVttWm@{=$uHD_;gxl#CWD(XG1*0jv~)%5bz$^~-^6BtuVzY$N0Uv0ACq z8Gm$;I?E`YA$Y)ER-0ffBE`s7_eJ)Z^Gv_RrclK|g0y#{qZsR5y?P30%HJ#R_a1%rO^u^QgyR)*%xdAE6X(8Dg3ZGQ($<{x$;l%?+~R|Z5zIvNH=>xRw*-jL)quzJ8wZ$~?^ zx7-7~Yc$iB4<3x+5HE}Y+K!H~Bn$`f&l!>}G{^toY> zmIw%`pyOwabATqF6&{43c@gG-@C+gx5`4uUzQPTlN}zO*(ZJ{cyM1z&8emY)$i`qJ^B>xM%Q_kCaLO1q=D4k z)|QqF%Xko$@%P2>58Z+dv_;WZphh!8re0P!d%Q68f2}0eh^>hiUqmbAVk{`-Q*yEY@@#uQaUHHe&a*UlVFbt+@@H=Y3`THd^e(I+c@3GL=QPD z=-TdXkm9_*a$(T_wH#}zx=P3!8gu1!t^msP^GUvIM)q0A(-?^AF8q%6)lresvq{dc z*ICn%62B*u`g&Jfk%9H&;Y_ua!jW%n4HPQ!Y9s{C2{x;bzZ7M?S0R4I8_eG?sKz*fe%5Aaq{o(opr`2zB5MZ26sbeS92b<3rflt{_YKQhR;8Yu9;+&5dhtKy!yqjtlSihP%BgaV8 zJKiozc3zQSAB?1nj*)abWc}DmgORk@(WX^OU9?{zY&+urS`IIm>F}sv9h^;-wf?mn z9#Y3C$8JCpMP}ioo&pj~3@^ybt)9Ol7k(n6#3rh%BKv`ZD@sSe=wY;m%;-N*)U$PX zt`{AN6A7f(qsa|mbtA^INIp@^Pjd)M#lFlPDA7o@UwsfO+ty&gX5v(FJRVt%PS5c7 z5!ToOint>Gr^ezZ1hez59FwX^uyDn-M$fz4-*By6JyJqJ7Wrr|f(~AtuQPn*Qg%b* zToCAO={J;GBe;p#7C$APeMwJ%0ONH0-AUcwY%L;89Ony0=Ll6;0-Inl5YozyGGrLE zVtDMG`B7+`g&PWFj`*teH#X@x*@u?t7q!Qp`99vw?0qi}Aqvc>Zx;aobXW%bEdb*r z%UdC;DG7;caypAM?%Cw$TI*||AQ%0L?nc#xDa~<(_K9r2S{MO z;yollcPyJ_WIAsRiXWqY~1+W_%|Ce}0_rVXI-Bt$6RGwMGCdY6iVcJ2ttkbb)Ih+OZ zGwv_aYebPm@K9aP-8A{C*D#UPMo96^>+!XVqoo34t!hP@7NPj;3HTi>NjO&=#6wimLVS$WwC;Eq9$l8)#ny z3%3ssL-;$}y_19%-@FvBp+<$yjU?S$ly&FR*CX+g!^ut_zb`m6fg1mW7)r=1`~`Qr zPjA)1?C$|G`*U#&a)5Q8~uUI>XJHzlem|LOn6v{uEKGAG$W*_N?jtNlOC z>3Tu`W(=l{RG~?uRb%3C+@rV)xfUazGOr<8*!xZKWwE#sp~$kc!-5ADe=@t}*cA5; zUVdt`zUPzm7$--`f=YmjXqH`Y8^f7q#@Rd`Ts!(?si*Ie4^F6S#G|Dmqn8E;$=@ky z8_?Gc%M&#}>(?xiA$L1?55}O^i|Z*HN9Lxz5tA~Ut#lXZ{cB(;5cBqq8YnBACs_XN zfzd8=L0RY0zm8W3^SxC4xG9KOyop+wVz@9CkXjrF!Jg7V56}qi&$gYU*VY}WB}HZ} z!8ThhLwxyolug3=PlEot*6Bmap9`*gc~T{bRLU+wirG0v6 zisC?QpG3Npl9I)QV5Z?ym0rQ_;-;P~;dEM(rb@n44h;Y0kv^Y3og&=(u3~}M;nLwG zjaiv5*ow6bR}Ojs&12`KQhA2T8fK4W(jDCYV$y~oczll$rJQ;~X63BC8JgPDa%5fRO{s%CVt&jwo`9lO6k^{=x<9}CvDJO;60o$J-XAFc6pY^al7sI-OG@l; zsaA7Bo1tycpuE@yY;Hrx%uK=mVGBNPz@nJ`58?7>RogkJVqj+Ki#o8@FXBiMCQynO z8xoVXgdo#KKRn59YYXW@2Trlqo$0hpPZWGMg&nmW31&XaTD;-6Z=i*$bgzZO@o>Q! zLuyLCN(-mES08q8x-AV+_NJQ=-98bffATN|;uGM}kWu*8a~Iw#qFl4Et562She;y;L;UMGn88!0yBBY< z`T-WJr^){A*EDnvo>VL*=xQ$z@)T!$cY`fU(L?NPj#BE?mGs;$ExCq$rwX1OjHeUm z_Of!tM0BkmJI%Ft7-eRbek+%6WNlW&1jLK$?@tY4nPpo~f_$G0UV2sHMxz!#0 z>o5OD!C(iSVJxDy1b!cnxf2@Y;$jXm3L)4Rf6lyOr|P_>Xt`eVWF+1`2uCLO%mWG$><aPB=f=b@&K-oF&td|gov|$#}~Guw6q)q zOj-f%z!7;=3Xz#sQ1fFC*3O|zjz&+8)96*{CQMC03GliWu$WJty;w$+%Y>6WBko)4 zbx@wmK#S}_ftf{G46#F6O9raS!8c)kjrOFNf(u2U$mW#(VC5l+Gnj|B;|y9m6&|H( zl-lQ>!Q^#T_C#qI;CtSezPjmUevFS2$>M{acXOwK1hy&v)j@@m4+_utSOwkKYiRq) z#3m$>Bv-6M@HX3M2z$JR%2^SyH& z!;PD9U&l$xcspg&0l`|oJ+DvX6RD#)xlD1td7(s^bIVcxxeB+KS!F!!ET3rdCGIdH z99g=@gwau{c23>&=!~mM_~f=XW|buXlY&I4$TYV~Ta5O4rk0U*jYvF6#iytSolxP3 zA|K;nGQ&HkDs#O}z6pm69m)i#%`9q@bG16~V?Irgy$gGc9PVRyeKZc@05y;P7 z`tRoZX-Mmhum0J06E403y%g&iIq zW{L*-s1qDdunGUvv#QpbJiTZ;2m^a%F1)%4QDYuTXoOVeO9^m?-u(L670o@^ldtqN z+?R)N|aB&Iv!onINupStQ%kWG$I~RG<>wU9Xh2!UoIm5 zlk-@op>bPvj*a$XIl$ll^Qn|~>~#aL(wgnkjLJ%6y%L*VL(ddRh=lGhaea7d zxqu{*I5>zWSg5gf+Y;dNUAo(?BIY09Ach?4r+Zsff1U_WOfnZDh?({*ab;Zl+`v@P z%ddZvsWw52HB!=>TGswF-=V4o4y{g-RoOP(KAIZ-&eDa_yw_E7GX8q^I+|>_ha8RL zXMOG^POJUg`zTd5@WHX+`iU6Zmj<82i&LMP+msxCsFkA`IH6g+7U%PR5Km?lHO+yT z7D1Ho$=Gj0jK-;rFBoIr&fT_3Hah-lHJvwtL7po#ctY>|{JQ~9&!`1~Q zd}bR^ZbEc4TK#Rwn!h}Z<%pCm^L*r*7v>HWAS6<&W$O5v=d~FS7=104T~U6Nog+{x zZHSKYe^nZ9Y;#9pKqs!K-XJ}Xve|T)na4P(Nh;-Sc!N>?5H2oCqC?e{cd#{;BkJ0? z>i;6OeDC-pNBj>$UW!Id|1CKHLtk+|G$8)=e#8JQ4pNBhICiqdx?`|#l3DTCW38-R z;2yC3>Nni(#O7_3?LK#c(EIq;CnVpG#Ss(*%V@`3+J^vncivWPDc)0;+l$q&5!yr?~M9&BKqP_YxeXdxcQe} zhs@2ZVCdwvc$!jCMAYvLL1M*d4h)@8%H-U)p5L_6oo?h3xn+0^E`1-S@rFZ%w7l+E1Q@MLPY)T0CRpRGXJRC|j8Q`MM|+N3*|d&TzgrTmDrSii?fwxW+cbF1V81gT{XdZW}t!JtF`|Y!~?AI zPueLik5DOaH^n?zKC>jw%6K(YDdtIn0Ab$YV;djF?|I*VA(%-_+zq z2o)@bA$-(ScX*1Rm2VFta40;WnU737yfCI{#48T;gTK(7|Nf8M4lu|3Q$zWmeQry% z85RQwd>k;TTzJ3%vz;-8TsrWX)~$nGaCt178WoExReN-G;3F?r>?A6xIF5*y}`&5~+-!RakGUqm%`c)%t zAFRcGO1@0OQ!5`+ek3D#cSZG=bB9j>jE|A#g@!{o(R;Wugb{&1@9i8p5uee%?{#0I z0t}lkq|^~=eUCt@Ib=$?<0TbL`L+Pz;E|J6#$y40&v7km2bdzs)pX>IU83l{Z|f-} zzu1PiL!){(WCP=cqaToh?!X=MnZ^)Wp+l%vf|VOVC8&|J^)#71#NdUWCOSPR`tW?m zjGw1s;^Ec|kMUvpuh1|n=hl8;qdfxACf!OW6>Zw8M#LfwC5KoKI!iVlx+AwD{;nk7 zxGz27)#>|3bkcJIKN-!cx5-*Sw8^}wPCHtvNdMUxz?O~u#Z&1nwocD?%Td2lm5g1aV?;PHQI3#dAeG=qCL&Q zj*GIyQ(jR{=G#-G-_BQ1_C2yoW#05tXHoPur=y5#0O#xf*aeZ~KyzehfMHg7}FQx$!!;QqbTxYw zGo;&zT<`Q950&96kqjY3=-`dw-MNEWAiWE z{ZEM7&9q{?^cd`RKeooE+Yv?}l25|Coljz4I@d_*X4tS_GEmrwAEl&@KXdL^Nu`i# zlBlP@X>|+ska<1$aesT}o3d<^jMo~LY=!So@5j)0GB;}yk;(tmA%RnN!!E8qYHhbA zUbnr-VKcn@?T6Z`bZ-d?6UqZX2l9>`^QTL3DOpYk zCJcKT$5%;eba2NCr`MyIG3Qn(TkWMW7n!rrY_BG%FLnE0>oex33hn-~pzC+UkynY8 zV4rJ+&e%tH8v}-8YJf>k5(`cm*>*1GE=`9WwE*;o|>y<=M0G6tx(jsBql z)s$L)*X#7@>qIAa&^!p3eMUChyWI|Uas7W;wk>X7timf)JxlYYA%9bWUznYMCz=#)TQz= z1p{V7Kp>6v57)iQ4;bQ$+r_f--oWQHLbm9=XYt&@%{5XE6tJ=hyJFm3l^Cp{Z4H*g z=K6#Ro1a^{&@Aesfsvr)`Ny#J6C{(Sxar`!Z>>tZRu14XqvBq{v!82*Esnk1z6?}Sv~=euoLS^zht1-L2u zmsP)TQ}~dopG%GyzCb>%j)D;DPLj){0(R1NllvUl>Voa=E54YbcswXNb-v?fzL^}0 zE^lkbpkpEBH|64>_00Dr3);*Z2!9Jq!Ec-@R)^1?Tk^9>GyB?@oZq)!l(TQ>r-(e( zXZTd!;nkOOaq%h-7;`zPYMw;Zo_P`w!^}l=3*k+<$bAKL%fZ$R6Eg53`LuZ9C+J=@VI ztEsKA-5%Cf-+qYyD0N7M4t*xBTu|-rBOu|59xMvhQjb0DGB*5+%E;a+yy$~itl#;i z&RwwPsUMn-}08)}F35IBO_%9Gs!eCy3gPSO(oJq%tBF z9keY9(R<=h4o-lS`^6s2Q!_-apj5hFcrH~7Dc{+k|IXavHWBzba!y$dyw~)xn&V5i zzU`y9F8(HUD9N+R``pt+hlXcV(BIWbQI)@T{1|1_ch7@Ai)&@NiMaLstZnS}nrADM zQa_1Fn?0Bpwt73IxtsOzU=VG{qf#@Sn9~y!53?`4-oh%mKruBw0tc~MV%TyTSt-Z`+(V) z)H)M}6XG9xlRw-k7Nd>2W8*~sQJa0*Sn5@bjahQJ$*luFnZ49E3 zcW4kjR45{$n0N*UX8s|(vw!oKv%jH(lrALRmLTZ-pD3fNAD$bF`Ja^d9)pdIm79AQY{Lm z;{&xtPXb!c#1XQreAW1&P;HN`K78Lo>gw4n#3U`f7*#6uzX`mpGJFc zm{QVfjmN2TZk6Y<`(M0ytzCih(q8>TPyIIjJ?Nub8M|2rky;{LL;F02CzkH{IVOG&Y@?FB)*?@Nv3uK|j>oU0 zoE0(ifSAV0a};Ud!r3HoT?mVXWkLEdJ2vigm1R&?_`p0VEnSqX?BDmUd0GyqPuBVh zZbQUHTtz1pcd?&&C2~K&=)T;HJ~AX%N?`oZxrJy38qD@{(cI-hI`p6VLzCG;Ur?wyod2KUckge`_#3U+*t z**Uvx13i^pHSs!%KdWAq6vnX|M@E>lk_O{%vlX@Efxhx`x81f1Y5O&%%BWjPg8LDT z{eVE7th$W#fa$Q_~G2>-=Up4o|2QtiQ zq5~;br*7Y;TAq*n@HH*UGR(g7x}J7~&$+!AgD%#7*Mau@J5w*qj=O2|a|)L3*N)?; zv|X<~%NL#@ zbrgZ8m|iYZRc!Mwf%P~A>mek#!|`eX1S4m#i10wzodY1~EHI`ZHgR?~g$UH`cnoBe z@Eg-PS^ruDGLCS{1m-O4Cup*eQ9|&iR(!_M{8AL`n(L!?CG)vHj5mg0OAlqF6JEWC z5ie$A0Cif%<)ESrfh6mZsR6O1{qLA^Z*b?Lr`2}xjR=CS-b>!0-u>#^eXE=%E-iwL z2HfJT@A;qj>1Hk!$5o2sz%9+Lgb7(CrHj3I!@KJ4;mr!hICbeUTARSzI{Pcioah7T zPG3xLS$}-vG0b|8fp&Z}c~Pe~ejwN~v6G@JALW=S++pu1ddO?w`5d99FB!b^2Y8-= z-TcgT_bsM(WvT>jo4d2lCtQH_`cKR`j96@NZXi?wm68Pp<6sJg5K}v6#B}tB+O&bj zY+!Ku1p_V3GtS6p2{moVqkgaw?g(<6B7KN>`|i06S+QuQ%GT5*G=EPr>9Z;8(aIun zhU$n#^g8+(F-cQNM0uH{r%u5Lt6T@{Coc4J8mhJd}?m^E^mb{(AD1c9wT1-DuR3vbF(P&d1o>Qu2`vLPnTS$pLRuN&Y=i z7RvxNh4zvwwygZ@#^7MLeao4^Xi3{9+6T1@ac3xbraYeU+`H0Z6!m@p5& zf%)F{%&TdwQ6jc}-NFmI@E}NNuFOZ<>O>ob-VW0U&C8BZ%Z!h+w|G1&*L83@DP}MB z(%1C@(#9jIB3Y6av}^(PmVHW4KHV*>yioSjyy5y$ z0=KUu&vxBX`j1P`5m?0ESM@)dbO1oNhf5?v55a-LNdpZl$1j7N;lDLblhIRmUl>g9 z&e%Mo!&dt2A79%AuQtveb zsiqMU`(aQhW2PEC<7 z;#v6KFa5l|)deTOW#EwpZfCeb5B_J>h?J!}9*B@eXl?@*aW=4<8Zp=W(I{(_rx;2OE}=Wl&V>V`&CZzUy1O32r{=R@o7J3V+n)IRtrotZ5x z+*O{9b{}_BP*}t`PxY<$Dobacl(}m652Pq|QBv;1hI}kFef}kvJ>|h_YtWI5M=~Q= z_3Tjdkrj!f!?GhTyCUqhv;~b~Nx%-xdv_V^&}%`wN6xcFWzBhOR|R}Aqb0E$xw16l z?bUcWcVDCfZv{Eg_}o3{68>C00E5=%#JViR%F0e4?z78;Cw3E_dpcOR9f4D37;tNW zufrOM;uhemfiw{e{M+D7N1%OxQ8Ydyqhato!6yBXoIkCtu5YxR268f1tpp*)!6N+# zSv*0aeE>$$;f6_OLK!Gdg2~$>!5W*yBnD#+j*kULqwYxr$xolP)F}uH{CLQRmTD*wpsI;VV*?({_7dA>1~&8xy8r|o;b%{2-{Xd17#TuUJt zBES8OS^L;&?qMCWU34bS#)8T^8Bz0T31J^F_X8>j&bR{BRA5@7gItED*!3vqgS)T9 zNs6e9q@***NgSL{jLYrm{!T#3>T%gaaz%CD7G{szCW&uzlsYUAl54bMjxq97J7FNx z4Oo!~G|>DHl*1o;^S?8!ATZO@8O*})2128dLICK_7!<-W{ObYu4Kn1h!kNlDo2r@e z3GoW@igLSnxNuu}{q-dL%O~~U{Q{VCqW{$`I1PW(9X;T#z>Y%V8{kT=jeMzw!`iz@ z+u~d;bg&qAq!;rCx`MS+r0@8MWva7Q8yq9wC_OPFxXzfb1J12-J0X6cpfb zqByxK-e$IR!Bf6Huc)f3-Q{kEWhgF^LN)iC-Rh`V2Y0e=nAHlGIXfK}pLQ%Qe=_d~ zho}BR`P+RzJ8ZP&S#zp~bn~nkMRilyPutL7@UH8*mW&JW{S5L5fakAx;%oxc}i96hHDRzd@FAUMf}7{^Z|5Bk)ce; zt?yI1=m7J`Ap)hek^TXINtUg%u3QUZH<;kBpt3T!h#$F=6jJOlGrSy|?WQrWR`+7h z12KQ)AKBtfYbFZJm>Xv(aTRYPhtHaRh%SNOwNJiBaiY1wjD(yl~2`lsuO(n4z1`7~gM50s5 z`xrqO-nX@N<2uXRPX{NyADm(g1EmaJEWcwhBaOK90H^8#ofdO62JCgvME>1Tr|~=J zafi8h^9nDK-mYH>!xB>629Pie{%$-v9x55gZ+KuBc$jjT`rxw}k2h6szY(iQa{mmB zM!Qn)yNI8&%VN^yG6Uj@&N4ide6gmjlgX*v_aWy|dpbT%CNYs>E%92gPa1AL)G+00 z=;32F8T&`f2FQx&3i_c2`iCH0F!(QCd@ccbp|nCccj z6lv1e<2& zsx!ZP8)RXttPEb!IECNpzbE*kHZGzl=TmrI-29{bsy3QAy8*!f?W&B=14XOvL%xcL zXP)&9s6_&Iq6Ae`xvA!7c=DCrHQl8eNfb;Z(G3zIKrtKw(MAsgNy}#eyd$Mv?9A-FE?l_mEvqG~>^-7kRAgjlWo3qBXJk{f5TWvWzAt^gr*nS#!>My_x6>c5 zUhmiI^?W`bkNd;T-NO=4T2?3+Rv>I!6xPZL-lT3Q$dHFzQa}{#ggl`@^oViu22>FQ z^jV;+0fh@)n3|}1IJ-!KSQuE?&ZMw*4&-$li4b*@cl=P&^EOj)G#vMq^UEvCJ(={A z4KD}K8|BxoF_J}|IR0=k%^FfN-QGy8LM#nRC8_D^HaU?TWq11YR#=b9!XdeIoA+H8 z^gczaGBJo4+zBElOHVy&K7S#qqrsc>{RNdvJtxN-Zvl&PAqJ8(>(8HhRUWSu(?O-% znfuheray;cP$fgCKQ_*weF+JzxnP(0JG926@s;C9n{isl@x{0HYEz1*JyXw?E0(I| zr+V-7beDob9T%UiBP_8-<{vx2EwYyr)9bxW6VkA63EuG-60ue-DAvD9y z8TlK4y{d+PNof~MIS|rXh_9WiEZ9$7M{L%?@J0t7Qis2}?mVPkO)0F@BHD$yn`s}r zNDM>#1nzbxBxIoXm%B7j1&Dp6_>|G{m0swD`75SPKY8<$q9Zn;$wh5L2X86J66?=C zY?Iy6P-M1a#y&7mfB!-MAhXPA**)jYH`5|KY9{IZjEvr3Dw7SCoTbij1hVvrnGL#2 zOW0_KKKo>l7ayZy-V^*{1GH-Pm!kF%@I!c?;rnj~7mZN|8_m1*WQ2QY_Zn&N-#mHv zgca%da?JLZ?~T%5&W#yIx1?<+{dhs-?D=!Qd40?pmWzyJfWklLUz!$qK`DzN?Os*6#$GCr})%J#3LOUvKJ#bNAkLEz9^A z(I#d`X?@!ZtNW~S(S$=g&WiOxGir7ilREEfvYL_|H5fHcYNYodxAOhjmMp=Ci684^ zI`g7#ChO4ju5)Q#G5PGj(N0H7>LGrS0(oqnWi-6Y=5n3m`g$u{nEN@5bql{|MG$=v8{-A-gDbBt{b6cL^$kCIG?ttA@K` zHve=bM##Fontp?dzH^3TS|-^ewrQ-@2*`NXGFn6R#!EMS!w@pw5sv(OC9!F@e}9d; zCVyKxgSG(xSk`D6Z8=^(J|5G5qW_SMhA#r~N}|po(v1}wmpyvA6fK@yh+GC4ML5%S z1w-dOhFd;m-G2qika{Jx+Et+ZEYwDcxXwFe?Li;rhD)PNu_^t97c~btHh4F%_oDsu z^y`7cW3G7IUWV_Slq(0R5#6mTR7U-!Q}~eF-MM@>1u;9`;3@n zDD1ZE4s>U#gp)XQfX>JWpTA7`>>OHwFpPWTTiyUB$T}=_g>AYdt;bhn{|kE4^h>fr ziAJoqwKH4niO1fQjj2DgOVw++f1uC3eNoijy1b>pRQOptSk?4rO1OH_1@{{6Ht{l=ei)4*R(^E9>NT<|>epz0{ ziTwK-iFM22&@N*T?*{Nrr8YY+Z*V4oTubjj;#>6VUY_3bnPXE2_@za}-jSlO9x6Hi z0)Vl`u3T|T#4vN^(#EiOv3o6lxvjyJ<6|nW&AzzCOVp&Ss9c@(!>-)Bb}C*f#c$x) z$J%E$hIy&k&H4NuS*1d02|lkYn}1zzUJEZxe)-7xTSqj%9*HI?V&{5!xu6`?QEnLI;Qyn7#&;?Hai{!IW!ZT{W}by5GMTkR16>OX zsd~`vK{Pr$;$3{A_swg7;2{u`TPtOfQ{~TT=uqk&U##+O)wi&{R89CTj!9I% zjaAbfKs*~=bXI3t4-oJV8h+(mseSog^58@IpC>ygeKxO$Fw0%$F*`X`hxul-%ori@ zb&vR|uLlC>MYk{gGWF;JqH14+DQrn((!Xoi$R#%$5b$Eq3Z59%9f`_6nZXusB-Aa_ z?i=%YplyQ7_KG}X>LCTo+ybG`+p{)@E$u{|zY~Zr`+g>4JR!JG;(KgSd+4j4JtQWh zZ|26_Z#`P5HawWF`?mP8a-O)4dRc4Vjaw92=ApDtg|RCdHh02N9<_otC0AuiLPA5O zikeDB(9>5s*itf5-<;Hr;#GN>6834a(yF&;(%ARZSgH0{`OEybg1Vt*6tJs6Rq@;Op!#p13MC84cSKe z(9>4h7<612V@AR3(UO)mnk>d{?8qaUU{%nge0)4oTkc_bvWudTjLcUlvV+g$@vC{pzSad1WW$t^;(jIJqv_fA~RCB$~dAavgp2^0} z+#~lRIS@pakoW@(YhUgdNADz%h@vBkbBZaGZD^zTEB#QsV2VKOknLNUwaly>pM9U|Q83vw zPMF091R=Q7qyWaBl6sR#i8h3C-Hsn`f2l@qCie(V)@^_DFHuxUS^xw#l5+OfBgCSnykb zy&az%t+wXnoW;y1G&x#Z<{xCfdO!b`70s~}%6sG@20gemIOBh$H|r}~(YQ$U0Uwt9p@~XEU zN|XB{Jak_cG%YeDiXOXd|}}#A>)HLb&e$6*3b)ubeQMzqwtEn8!~x zeLH-GH3a(7_~UKYcdi=LoKVMO?3JOVtSM`%BBx<0C#j;KtRRbMZlbzv11=tIexJKk z+B|GLD6>8wz30YW#onqg&%S*bQxP02GgO=B{c9X;4xbZ(*O`^O;ToRrrH7%AJIh17f6CCHDa%M zJ*SRqgAu#(ZqewCUJM{*zFO32RtAc5t%QL{UIw| z1=mPYz|A5N94U5EuM3mi6(U9Fzsyx~Rz`k_k3-#2m|eKSzfn$CQCQ`Pfqv-+WFxWi zeoa|+c%j;hVzhxc4X3?lrK)nV+fyILE7S7QBw4l+9`kCb8=tf*{KWTies&=7#2n8~ zBgyEFY52OJs@GX)Bt_;cKbWVG6|P)J{@qAILhX3Hsp%TWHn#^s=lfU?LVv#G-a#%( zWIDcURc`O$iZ!>!p$&B1!B6h?5AzycSN|7KJbhB>*PX6o?Cunn`Z4<=u%P-q^ju2y zjkL}Klx6a#xq6_Rq>gk91_pu#I?0lvC*{ob6%b!OX*@KCN~!n0axLR=LP^j}TW}!N z($Y$+rFNluben*Dgs8oj9%GIHaa?~bW!~e|PgLrstyrTUqGpXDGr|~c3rDrJp2e#= zUk6UkTQ=_2EtMzeKc_w`_N6;oYUA4l23WV6t=TyJyChHOF7kRq2Do+v6y+ZtKW4io zge6W8*Aj>gO^@S;WV^l(50Dn*H3AXoFRv%I6QC#`p&R-}TO1i40t>_d3$*KfW@Qe6 z#Q1VZ!4cv;k>VSSi3ql8=LHJ#RIrZ~=2`GWHO=kUcOUc@ENh`|+rM{O3n`0u#4t!% zB(#I_M&vnCt^?jyL&}uJlcVQT%4-=K+86Nzqia zHwvjQF{P&2c!v8pWU9B@Xe$kSYDgT$++tT|y$^*=&Vml)zXo3a{8z+k?ueGvkQV0S{rwBK8xkY&M|D5|hS5Jz z`2Y5!#e9z^xjgC&b_;G4q# zRR3W?92A1>`7+w(TU_x8HO&@*Lu&`Vg*-mSA3O8~Zj};r2kPFdgt@492ZseLQ=XOO zWO?4LtXzD0UT=hKNa##tj$_~b*H*XRl^L1}bzoS8^y?1#gLPyR?Q8UjHOa>MTK3t% zP*au6uDhMdnBw0 zm_#I}4=A3lSPWh&qXHy=qe!ha66HcCddOsrxUeAT%B8SnHead0qq++UKCLvR@p^A& z**a4<#{B@+@{-PPo(&`0T`hC?wfCn%>G3Y+>K^xop37Bb>x~x)J?Re0M~4=JQ$#S? zxi!v)C4Tf$vHFO=x?Y~cljTaCteyh!f8&itxU~w>MzPBkO;=UzWU^Q|PFrWXcw}|7 zt1&-uOnKOohf8yFPzlacqBKp|s_6q&47Da0zG}|55veC*&{rJ(u zNzwNC*M!^*#Bf?}9JnY?LbiWfVgs@(<^b%&^Ef>CFOQSIUM(1V>=kRAIVAp~AtB5T z3t@Nvy;`OORQ*WTT2l%8{Xya>3pROK8_1JWAQI{B zwd!D^tADlnsagedLm*lAtZ58;QyW{il&%L8X%I(-;DBqetL~&`mOHbc5Vn_-W8^n0 z5db$W4(SEnyml#4znIrNt`+*q*hRn1{~RQpSD9VpARl9Z&7-M$CLa%a#sYtDbFi(& z9-w~Eoj5f(?R{q@{7z`&%w?@?p5@d~0rgdTx_GAZJh{Qh46vJlj(7Cw_O&tSlo$k5 z_z@Yihc3cKx&%LB7yfr9{QKJkV}P3}I4E6V3~p+Pg@idM3S8X~!+@KqEyfcAdIMK@ z`u_Q?Bi)2uH>b3QnyV8`w15A~{~Pc2um2n0e%_S?4EYZ3HaLtM%t@@FZ-bbiOd)p? z8ie>3v=VaJ1-;iU$(xQ|swi`hSn3pE5UWG)Ty>XwiIA&qnCn^+5|{yQpQpSPg$J20 ztR#?JKfJX5;6efatJWo=?BwI}6ne^|(+&Q_8SG6gnRN{Ufz;6)Y+6Mc2cD4ToO|-N z@69Fcv;9IvKJogkOA2AtkNoV`ag~cm*f$-s`y;vBJo_gO?vGj?oF|n(eIfT*mZXwO zXHX~DbFtyd+N2cL0QJIRC!TX~fuNj#_^RKRQHr`_ zjYKb3O0TJYM~i{wM~y6c20ngd7P;#iNd;@EAgzqj(MBs{k!TjMLm?aaAKL(AFcA8t zYG=9k-*cF)Gvhk;zsapE+3(5iPF(^az$^9o!u$XxV6i1wv@u$johSPe^0U!1@kMPN zX~xP5g+aoz3?x3_?|>=8^_wo-a0 zx%|EHLY3jK;n5FIYV~_$QRZmo*}o;`R2P2uX+0!I7yA(6e^;{tIq0I4Y0Q$u(~3>Q z%LSEn&$7B_h?*b!dGRyrL`Z$~GrjL-^<1QTe>gj*UeG&JVSv}?I+pJD{I&nVR4^i9 z5{93D8d3k=&bnAfBr((xh!_qaBLs@98!|QfWB2|Ydkzp<0G|W$$sFw>qbY^b+yQF* zbF7}3en()UQ$x6vt6)xk=X!t`AyX5T&uxYhIR};mt&~puxe8k^K5)VJnpwgT8O2pHJ>_)@V=t#CQSDodGGb*(te&ZiGj9y1I)7Q)?m?-~ z?5Hck2z6_(8u6z4ym4KVMoPRcIDq1#;qf&YO z+H(x4uYA(sQ^m0Fa=-Gne3jg1A^JTQ65ZaM%%C+|y;YMdwM58<$NQ=>_{e{G4e9St z%?!{CeWd;NFbFZY{?(g^bA}*S1gejQ;RYy@Q0{Q&AQ-tn-1%nBtQWtWD=J_$D`crl z*m38dc+c{Gc)icoh&2(xm#f%dlBM4e6kAl?%dIb%korQKTEB;)_k*HPi6S@?l_CNY za*i@TYb%jrQHsmbVAHC6FE|iyn{}SS^q8P|kcQB(W^LLU_W5GCRFU}BL0dc74}La_ zWhWH|(&`&UFn0t=`y#|VH3!WugKo~by8~6McLZPOIlcTGS1!g)=EhDsV5}Vfqv%aw;&f>Jo6x~Oa?h`fbtr1bR=g4E;dVWxQY|mk{(h9%KuIrQWpL%NM zKhFJZ@^da0<$1}~Lqk=!)SCUE?tr`$jU3X-3-6PDjlQre6n9o zOZ1s`kox=xu+47zVKK8v)^IDUiS~J`L9D#3D zZ)X?CkGA`ya`+gdeK(9qeWjDgPHEy(gzq-EVsA(qdI?}xDQe7k2KLH}q`nD0>Fj~e zgRj8|?bU7BezB7WpC&=Dbu|b^WQ3r-*?r{d!vj~Xq?{%S?=uJNF3Jms1V(}G(h+YZ z_y-Vj#zHhtrP`u#pIKycXsjMyt$G%2-@u9;wW?o=_dBS3`49JyUBv`J?g<}3z$2Gp zZCb1E6PO$GiG$=P?y9yuT26|brL!tmGv6oreJ1oI6Y&7k^QWhuKUK=_FsN2=^bC)# z_R;Wt(8LS|GG)bEC0Rd4oO<0~9?r*+=m+DlTdMjdoHul;l^`}TP@{_`0;X)vA%|(h+h*qls@Op-81f_J3DKK5KVl5IS<(< zhCTb`oi%5!&O@)si@+?=$=o4^`Y$gQMC&CY{J)F&kCr-^QDCHqv4rO`cy-)O5rU(o zDaZ&Ql^H)>GghKSLyd75)LoJRNrzxFUT=pslY zt`5%5WZ&-jvB6LjZ`foV3G|UI37RkVTIF6HCTbQ`oY&UE0}$vL6jjU z)e8yRuzIc>N=%A9)yrK=1{vyYWjkMAH4IIZ}#9Wd%GfskBZ`1X|SUz1Ab zRj;OfP_uZH=OIOm^0jkF<=E6i`+Hj=XZTJkr`>Yd%Z>K^77~FOxo@W4(`Td_k=)A2 zO@Q!VLgaQVmI#ZOP@LEf2f^1_NaZ|YdgL}D?}e@uft>6vdrAdoZ--Ha$)UTsJ;nnq zCy7$fF#YX#`d8(rAuB1PBFpXY?|w3Y68yot#>HXvFc3y;k5zEA0z;`e+QQz$9Fg?@ zE5Hk)rr~|}pQ{+Da{jr=|JeU9J@N+G9$eqf2>r{#1F~vAZn)}q)UZH^(!`YEK%C3GVFpzhS}#Dd%}JzZ<;VR}i@d<4a_*s;U_i8|4jcRVHz z63LY{eu(n_VWu7utCA4-o|yNvX0x5XF6|THBH!Gz83q|2iUz|+Kj;DxCK(=r*%w6f zy2e(HNgMc?Fjv+lN@qNYp)d0ljBwy*&wJ8i*xx4*9;-oP7tc|93vmPh=4x)oiR3Yy z*CY1bjPAJaL%xmBZfe{Eo0HaO{qGV61R}`}Q$#!zjR6fSY1hvD(4}wsNPA2fKTksF zc)J$F-dEf_(8}%_+_v~m>pyg{*efcahs>I|}1MWHqd$IVs+X6ZjSFmyR_G*UQhECMWWwT+_K;ibbW~*EMWWPoAkS8?C_3R7jbf=T=uPdcUg<=Fh7>II|9LsT`?O!~8U{t`C zt>e0>H`sdan3J;zt2I@)PcFeS&FrkI;M#eM;iF?BW>j|;Rg)1RQ zJGuey-|IOxs{eofy`OsnF*l*_j4z3I-7JPS7VWMGceXXc9Rpxl&ELl3&&znmQF&_K0_ov34xB=# zhW99PryhCdxcAyIsq&hHB1e(=aGx271G%q!$&S0+F)iCJ-KEU}%HR*E9Frr`)^XZ8 z)PbskAI(NJgH;uKJc;8MiDel(N()H1Ce03Y5Lj;dJ-Lc9859^xGUKxld3#=7{&Uij zjpqG_>Mb*bi3pUiqJ_SirDFKkc98`6Cy)K$zrnVK%I*H9`H%Gd-_@BN&eh!86X)iP z_O!J11l9V#ml>HZ?P@S|ck}VKgp5`>Cuz`Cu7u|3@HA@9!H%#|Cr$Ew9D{(jRZkUc?Xa8m1WO$EPIlbJQ>_k;Jnw*h60fRlQ{;i|FF7hsxXF0yHGI-YwYw|%${fO0Tpb}K z1#ts-{g1-*k9^yVq+-8c#3#;x7WkMu7|3PS^P`kjAMQj+Azry{M>Gf-Gg8NzDGfuU zGzf|_u#CBFjAlPpsmonu1CFWYuHV>^dVZ)hlcTUInmmGj&TjX$E08>ZT&2ICllg+f&oM@kGl*(=8d#0=;%mkxA2dSW*#qK66Vq)cn9 zL8D(yUe8=1=;Kp*kP>&-B~|pW$6-M6XalM~CwI61-K>8o z;(4h3ZYhq*jOh}cEMz8U_PvDgEzmu85F|r~J94#dZ~1Lj9%LMyiD)?3Q7uEnbFR$p z9@#Gf^y1PNs^NwK--wN?$CE{XGMc1lZ62%~%7xW;uq@~3OXUi;S2mstf0|q7p8z22 z`>m~Z$}gtRzmu9emT2SOeoU-%>~)k5R#@X5``Bl-+y^I1ZKG|CNyKtg`yt>GCQ!$- zr!W3b7EklpF6pxt5$c}qSEiRdzY z8j6hO5QF@#!(B@x3)mI3dP309`|Xd%_nh#;o`x$VfYo0Rr>Ub>+8ZU7`2^gnV0qrD z`%^Ec2(Q#2uP{Bp@gCcu@_`rJ%lk#TS>cH?Ne=xRkyl+#Q_Kq#@<8I}GnJ$mVzQ!h zW-CE&TASm}qwTc&Ugj$6v$FDiW$67>(Y+#uE_sPlvEFMtyr!Wod#6+Fv>^SOfmcpr zQ5kb-B`4hE#T6C zlj;99jG4Vf$X9$nB#fo5l>zU{3YdvQ{HlD$;rKYz&*dZdfm||%q6gZPD*_)M<&Yho zuo|qFJHgAcaN5i~-I&#=tX&-ZzViaaEG8N#yYb+3>tMhu*%^nyaN^o6E8107xrw z{gn|+zS~+D=GkZ=3aYzI5tDg2%(JM6;=EztLeaeggc5YsOJrT-7j{@*x-V}Se;%d^ zh3gQn2HSNS=LSVS=uxn4R)}W}{7n|_D0ulpVes$wm|0kW#WaLe9^tvp67n17nD~Al ze%loLy{W$EgtUuJ%SdbhCfsT~hA8Pmx3BIuOdzF>`3nsiY3frBLvGJcqlb2k0Y=6r z%r9o&y`LGA)rWPs{H>njBWX@ivp|l2{2PU2mC)#Qd`8iy%`l+FbsbBP6Kz!$4j!QM z9TRNPDGw-p6BZKAn_`gBs^R&y#q)EAQgJH9OlAfs*uc)k07j(q<6d)k#)q=dnN@dmBwjKx}d z%Om#PKQFW}=@c{5>#~X1$m}jKZs9L9FmC-1S_G*&KoNi#lZW6zm0-`6i$14+y}qMj zN%6#5A=3-0n;&0|rkUP6+tvV3{oN?8S%8qmevsjyK0fcGc%5FCA8q zWScr_tsaV+sgpVqPaUelYf~648Ydd+bR?&YQ&TDKC(nh`XPc0~6Sv{gMFHb)%1Ud` zhC+^2soJ6rY`GVfy|c@#`P5@(5o*bK?@7OdgeRD|W>a)YQByWa?SyCg0Mt?DZQASJ z8}g<;xU=t^fw(%~cxK&zpE<74hf|N&n~4k|=jFRWnaA{tbWy;;TSti}qUr;w1 zsL%%aYO7REDav2U@|k7lv5R+B%tdT8T&|lhyQ*BJAJ>%_yTbnNq;DV2naH2LAbc&M z@3H2*yxN_8qyQYmkY=z}B;%c2pS1VGv^)3ltqE8c5OM83DS>I9iw6eWH!2WpWsgu( z@vWdg-i=|Jht1#JIAR{j8fsyvIBgbr2T_+3lOkM~qNO>d6{}x(-(S+r!?`>S{7%CJ z6?L^FAF0WX2K~@s*S~Vx#rWpR4=x%Pl5>STmB%QaX$U9!m_uTD8Ccj1RkGqjdZS{bg^S z3a5EEAuS&Lr43J`&#zMv^cS{rXMzgzW=Mx5c$W80oHUTY`}5Mp^SP# z@a0og-J%P@^=JXN0*ags$<_cZwx0Ld{fhn4qL1@^XLpJmU+(hWTCVl#sRVznYkC&G z0xi=vw>~ZjRMdw&_#9YYMyrj^YJ#jQRnSy?9M|2S;OwbYc8t%zM*GEaCtm{wWuB9a z-1%KD551JTswfXy>$V6GIU*gkFJYpuA|^0f+a1VMjv59; z_?G5Qu0GKGSB5Dmgdd{hF&@@f6od^!#4zfz6$+v?AvMw)r7W+7S3IEMH;A)K8Jr{- zR|v7UN38d*|0y{C`tWzNgEt3^N71ehURZdn{;oJ}n|Mg%-fLGo&X%qI){65tO59NQt_61X7yr(ckL|I zx`4w5DMe{)&6`odE{|}VCXut@8akz#7Guy@cRcFTO6k7@{8bF;9Tw2WB7ltkE22|= zj{wZm(3Sukxn`TFr0Mjsv+B?8{V1e5odgo*qg+b?~*D<_zZYU>LE@o0i=mjvAJ~5J2~e zaqN`#=D7cs||c*&lf`8^I3epB;<1QaQe#G`46w2 z&uO@6_Z?tK-SqChz#APkkmOk9LAQQkn*6}MaUS2Nw`@2~Ts1ia8rt6u&4mP)X?Q$> zG{>h@?J_;{MeAAq9LfniM=oZA%vN{G;mCB_!TtOdoyU8;R&Ugk3W+p~9~WP}ekTCw zOv%vuDE$-N1z4q~f?#NX|GjVL;SLxA3pX^*$qi*`iTnd%qb+{-!2fsfek2TgHVMhb z*Pn&Y>^FlnLs$7JUPA?k;iDOK8%|sE z5L;Se?H<4`D=rb%us7TFMmz=P8`C&Ne75CntNP4Ew`I7TOZpD`QeLxo0e+ULsVM z1`PhlrxrYU3*jKVUyhyY6h_QY++1fQHsU#iC|*7lxp$#+#4e*T^u@4cMrXPN%c|Ya zyF6XrcyF;3-%j&iYtl014Y2IEdE7uf;PE~>j24RhoRG1wVpJpn@qM1;s4^p~u`(`S zBuD49s-blqU6|u6q1qiu#f7lNFcA`;1>Z-QLRIqz3(TMVXJ3=6CKS?LS2oV?FX-$@ zNT6j__F1zYnSoT^*ffXbTd@vODI#*+`?e>)8%s>YvRq|$xwgpIw$0f+{EgP>YotCb z5+i(AySBzvWmUDMa85h6L@ZicO3e+W2G~Soi6jv{jVz+4`4~OZJ74$xoru&`4v8~P@K@wPIV&0NX%g%QU42jZS?$$mEP*8 zz-f>=N*4xUmzEwfXUjH|#Hduv#LP?0ng2X1WN^0A`@xKy;eDD<*=H^}-jz1=PPXR6dN}fmbwvD*Q-G?RJqndb&BGWwf+^ zTgJ@ooj`1ehcLsX4WcARQ@Cj@O#$)EEBs&T4MLyT^&Zf7a`Se(q>hFdC<_~iGJTsdTn88QBC56hG&GHzHV|cSHjT^tEvO6a`dL}&Nvd$FTR^^98BJruqx+!X|HIg zMNq8d$$0i7gfvcbh`%7%_r_CjwrsynVFm(jxl%8u$eOW7*7zaUxmxoA8m))9g|iwH z%nzkrsVT>;B{@MZXufwA%HZBM&x|ozmf2uMK){ElsyRwVI%DSNnP$(6Pwd9}k`*6; zw~=kmnJ09a!2AOkEX2mw3r8!|UJuZ#Q2mS(XQ#HisU(ov9(>ZqtqwYMTT2+BLykMQ zfN$On<66SK=R=Va7{I**Qj)u@IGD_wY(Z+{fR?_jqacIgJ%@7l!Y4h$pb!NVRSP>5 znB$S92VFQ4J1eG&#F4+$9wl#)*wAl531|s~96x6wx)#$EEjD~GZ;hBPs&|6wl*1vZ z6sydQ@#^()CvjDF8Hw*F6=nz$&Kx>!4w;v23dU$ckd0LwfDvVr?g7H7fDiu%8Jl(OFux zOX*!Ny&4h)BXl{m`??ral5Ym8$WAM--ruhoByzO$N<-@j`Cns)Pm)CCnqBi+mX3LM zT?#K#m>eIT0YS&}IEe%yuGi}c3E^FYPYut#`Joss%LG2!<8o9F z**ufAANLWa5_vim2@O$?#Vabsp}Rx&^YVv=LazludcaOo0jyQchmq_L9(H*-g63mw zIkNhKI*Ljn3zCe%q#oJdtH(B%o9T|X#2r8~LBDb?*S2h^20cvs(H74^@`BmeT;KuBP_b{4F$PIw2m8~XR}Y(W|d zuMaGec=!hmV7o38p3*%{C+n!{6U5FS`H#?{vg?y;FB_4)88oMVSiwWvP5lp4MWIQB`#~c zv`Uxh>Ct_0hip1$J@w|j7f*zE69)#Ldw*Z{+^VAJ75*GA16r~Bi<{DDgXuc<{^amL z@#$>cy9sl*_P!+G@vYIm=CWp&!gyNJHCDnRw(;VLOCp%%z05*P;dBq@W45QhYhGq2 znj&LL*R27ac%z%=7NMqVq|fiYv@6iV0U~+5dHK(w;hxa+aJ5FH^!Q`z|E`PaX#1DX z+=DMZ7)yM{pRmu9YX>nN+2e*c8+3hYtWMrW(u~bRcXFkG?Jd$h{Wkt5U*nSI`P{d4 zN|}R#m104sh+UT3iVH`i=%P3>;-B`(<%GvL6+NpgDm|MeWFXl6J6uY|+e_~egiBGs zshHuHnsOZJR}m9v%zyv(g2!b8+FngTYT5l{m9n<{FXx@N5;+k_8utz)t!0bm5t4Gh z0D4*M1R0=+R>LCNSi|i63y?HyaOgTL4T88G162m&KYjho6^q%${p{SkY3nFI&IwTCzb~Vd;bGrt@wvrc5 zM#SdoysgNY95}X99MadT%`Xk~swz=lNl3fcTYmTRpcy)=N5ox4qD-DYOhYL7VR_;D ztjjsB6dtmR+0e!unZVvzN!!=lQ+bCwR(SQcx*tR$-}i$8FXM>{^O$IN~ucEVpLJWgf3 zUj$~#JFzsJt0ZE!;dmE1!u~|EKy-P%2cN4`_waYu6Szlgy|ik1zjt~o#-ugq309e6OTHLV5tW#Owny} z)B;#3ugh*Ep{T^8c32=MGbO4lFh5t$AoWCEK99x^-U9y~bPxrv93kZ#M5xMFqpnua z-uyLG#gEPXwNLEZ484W?@Qi5PFjC<#B$Lwfjy~Uhu>|81;eZN%qA;gmL;?L*p$$ln zz{3iR5jXT7zs0{PE`hWLZRrG$dO+eK6l+jgW4!)T{Qnha<%qGxxH&^biRVB5}RjfmSNGQV=aU#Mz$J#nnp?XAm%a0a=DYkg?Zd3E|j@&otlDHpZoMf;9(+NE$6 z6gGW;2C^LM!M5X54lh)YSkfSuP$#XPewKtjK`mQN9$Wh(R7r0AZYHU>UF|phwdEAW z2kcBosT-pMrwAY-QQ@kriLk_2Ym}3`*Ay35O?B*ibPWZKh~W={lJl>@^eLJolbb*3*B&x?#zqpCYHFyHul1GB*^ zLZ@Md$>IX%7{4ASP!u@M5jIcl?OBu@Y*{)vAIiQ`GoM1u0z-pTfj?>cX$Ty{kV-SPY9{rJzHvAa z_kKgFCsKTWc737}-Sq*a+xJg0DZW3|^zrbO_TYfuYqIM|(+VJ=4p zNJL#7w#DHzcycp2JK0~}`C&o=mMZ>9aimEcC$A>`AvTQ+ZZaD(Sr4wTT%|`Hqgvrl zzR>qDv9c4lG4wjUxZFE!Vspo$-7UxAX}z4GrQ*4L24zRSrSW z$(tG^MI;Bl0c$V#Lj=Xwc*LHl*~nj;yO3Ywr!f&7aDqm}w2`31n;lX!5@#@GHy-WG zh_!KQLFZXe6f7|D=S;W))PWR<~^y_eFy@7A!~^(dB-tf$?l;-eIA=e$qR^!3}P zMsV_W+}9*86@!E-lhVlI4sjA0|MR2|B{@`r_ZviAx!4=&fIjd2x`yzm!y|rK#$Q*_ zFH&FbQpb0|o)c2^sS*AqNMtk9>5BWdjski?qYC4A$mTr38C0w&H zzf6vlG_enJ-~`%t0H0r%IqZiUe|7C1nW^v7eVKRd z+`hpJFJ0x_Kc*|A@AZ84m8Re1OFvVoW1BJdnq%4jGl0-aOt#~LSmLDgi0>ByMn3mz zZPtd|zSz`;=nr%Ue$~lPWYvji;DaF4;QaAlhKx3qt?d_}JutC5n&{-{9|jU4!c(v= zyP8)dogromYYD|R4rGvs))5D%yav`vNfOyV{|I$Ehw=}Hdtp0BpUG25kSi(J?|9bk z1`#1OUuK7C%I9mZ5DDr_x}ZWNIZ{iY?x>B?)3t1LRHojUgjMT5C@ z{Nk%=6NCP$v~OGEn|Vv3!8sJ8-%&J%9GjOCtKXK~9zHg(r*#zu5MADeEAE1aicdE7 zl(P8vv@FRb5=~CLO*&9ViNuSSixGm_xpZo6NP_%V6~oX5ZO_H%Pq0pGuui-@yA*zM z2wTe1?J~^s?67DH_=mHNGs?ovb4L#Jr>E87=Xdl#*=q-3cxQ?g$tL5cY+U^rS=}EQmsH6ZB5}wDH|qo%0o7HaD(1!&(`ho4}A1{8M0|-##U~~g4 z7t`OC%gY-~Q4ZkrgH|@sexS7Dmr=8Ea(99;0Al+4M@TQg@2Mz0-ax2VC@(!Fl5kg~ zhz&ulPh6lQgLpBPu}<2~txo7A=-*m+YIy571(Jf~alwHR%V;H%9^&meUR7A<7BZ8Y zILP5#7;U8<5`g$H0;xk(2Gkxq2Y0G!I~5Js7OZ>dKVcj8hy)#<8##p&$GeEsb=~UM zDMeP}C9d~fsHwvHQXgod2DA-1nzFkr(4krZD%Z3pdwsrj=<_EXK_E8wfcX4i9krRN zLAljNubkY_DvyWQ=xYYq+|W5de)Wp;XWBZNAXq5cZQjWjKaoM`s&DiHEEF>E+jXYy z1_zQG#vPjO8mh9ATK_qduG~#gsIj;)^z7mobBhaw67IKCbo6=^^3@R~WF$kZ=6Y&m zYzPO*Gg!tBPn_jQ*Jjllb$U$W(V*Dm zW|#WA6Sv>ZDVaRc4XUv=B-X>I&VLx0#47a02Sc`nyy<+!G(OwHY$w}-j$jf<^qrV5 zB)Xm3jNd>g7A|>Lf>8HkKrt-KVOW-3m2*BeNFp5!G4xdMn5aKH0?JcAHVKx7w77WPbq>hCSr|P)Mw@IN`a|2HU+0O@dvp5Nu5(*XFCqdP*%#Nc~5P3a}O1$u@Z>5L;06_}sws}(1t=On_ z_55tTcq;n7WGF|9PK{Ink8TIL?YujU{;sp%{gezlTH}Ij=iNYd2zMV_E zSD%k@^GkjlIQ-p#JXxe_LC>a1d9{hvC6Mcj$|wIOIg1C_cutwX&RoDc?v-UVv3N^A zfRiATXu`CFJDN@n4(#9Je+H6F%J6>cO zx-O_pcAE11Vizk!qK4mm#wwECGb0{$@ea`~@cgR<_wZbh+1@UIC=MXCZtXd!8ibVL zTJWQAX_wViQP5OFL#x6WxR~~sE8c(ng@OG58M(RP1F$geNPdu$D_DLJ>ert>K;+Ng zRa}pCcd$UaJ1DD5%lzjUQ8ph8twAXX#aGG)xWL^_v2%AXRiOX$n~$~Vzv5k;WPeJX zlxRt~S=YE}RO!8^JC-P&VZG{rdWhzw8-e|LAt%i5Mbp?lulEetyxy*85r3A}UL^|& zpARCZDByVKiGb7pBn!*5F>&!u|!V$YGunGON3ABsI>P(+L;x-H#EAfN(H)0XXmSY!CNZ99lp zTQoSDczN~9b-)hcZ=hX4CXVi~oDjX|h*rWl!=u;^2Z9qPURj zRB{W|&T`MR?9_A@1@0NT2Orh?hx$7sQVa~0#x9m%OLs+2KrsLB;qoMN(-9S$gcTgS;9V^aR4!HkAMptvT*V2fyV3cyVQw{ z2%>-uj@W9kYEu~yC&hx!1dZrSu8Q?ZU)a;R7C3Q~;*R27X;QAR{H9CxhwrPsKe9$U zyfiytgHszY46HiN2nrJnA4S3QD~~+Voi=4alGJpNj<)}8wmgl(5gObhtW&Vd!rO>G zf%mn&#_P!q1s!<>B%D(=&yOlYR&oFO<|DyQFEaKc;&&a=rklFso+1hp!I3&{`r%jl6uSSG!#aXC9(6yV!&%BBUy z0`90Z&Y<8$_T;ll-^IRfo`}n3-2B{2~=i+?o&LcJz zu4}m3ZC->Uef%Myu1?N{Bv=x#UUvMLBTk2D945oMidcBv=zQV3C3(!37*rffv20c; zh;UT&5;uZ4OCc)gtQC~fHdxAh`@$8Uikc}nJK~ZyUnM>nW7I07Y{(R!z+6`C_)K=> zqd~qXs>-VHwjE2V0Y_-^!w%Q(4wYgNg9amlF;+`R6Bmg*c9BbN*Ebws?Ntx~_U~W+^BE0A zr0#up!&pC~9%O#3AoE9Le$37~bdp)02s+JI>L^mnhxNWCuo3(MH)%o=NPCkoYKbZ6a+h@3=84HUAa!e9DRxR?5I_ z!(&mz(WikWIJxYO>p*EX?-60uBdzPM1)E(o{f11bwVHW!GP%UEpF8Gsm#JJzZm|&X z`9If@U;!7!I8{A9hO0ZGE6Ki)gWSikyN!tZ*j_)kFTNfiYvJD5&T~BTqm;LB@r3C428J6|%|R>&A_n`*zzaMD~pA z)e?p5nY|)JMj06?Gdn^>`Mp24^ZlN4zCZn;b54)P>5r@T=RL0L^?JT+kswtA%E*(% zd|@XpM}sCd5aRa1xJ7}DYJIrNjXd@#2+ttOQY9n76Wk;^Vd9eE;|_+^+^$Q7CAx;0rV=BU<*t~X zh#_BYj3YkmM5oDi?n68< ztB;3?zvkI#DEOijai6Ypa$IWvNUQgr9Bx7TIG~oPV}V)EbTMhDL;WOX*0YJ3^-PRL zCxP_?jdI0APuYP{z}yH!x_}!mxL#qN;kF=39@uArkI>x(BV7L{Q81o~%Vi!gCNy&@ zY`H1`^&<={C>b}A|9EDzkH7x@<_71&XSxD*BGcGU)A(7nk82R0)XOQ}kbQnF60xFN zTW|ezH!jgO>Veogx!Ps>g%%jgg)wEd=Pn%Iv|cAuDluq0Gvj-@xlnaMDT?sonGb5s zUQ@Cky~1iVqBp;hU4K;BOk17O*P!L>sr86jj6OkuRDkxdxtOxK#HW?EJ_eV-3CnuP z-?_ysw@&t&XejcTYkMuLa#^BV2mLwOTCUq9&U5$gxE)g{&-_rUFdx1r<>tShhk-- zeAZ&m)v+}pY3-n&oNj&Q)6s>aUQsFj@1@CZ?0)aYM1@R-8Y8KKJYB~r+GxYF$qt_V z=K9n+9|E3~=eiSGK!qUN!>Mm4n2m?9vyd>DP%GSmnmB;70RTJDaAPVlQ}WJ4V+>9? z%ql)U+4wB^Bn5X^z5Mu97ckpc|G@uH^kdUO^R+6-#J=vDRDLU8CQ`xS9i7A(dy$IAWV3t>UgG>_%E_W1B9TG12l7!=UEKWD zhmHxIDYNICH40aixbn>3_l{+C)RwR!r(1;LJ4!vsQQ4d=Zw^|j`NH#T)o@f+fbD_G z*P`S50E76PO!C#9gu%|zeHDnL5~5HVD_c;&K*C@ z=!>IZQiJk7UIzVupB3GmFflqJnC7KRU`om_Wz%4AQRedGUC}1Lsjo1OpFaGe%MGbD z_jL(=o1R(5RSQee`@yhViTn}9Jn*1a9c{)^LDyZ%(A8oc6{#eiyAV;VqbgdI-pox| z0$by8Vx#ASkzFq+t6QBqX6ac(;3$bSR+S_(eYrb4C@$U6J0_w5XE;==-^NV$Sq`KQN$K#i?^d zM^EsYbOX>b0r|gT;{Tybe#6>c*c?hMW4NA_Em@H-s{Bq?62I(5iq& z^;D*kP+-Dvm6;Fx(*9gM(JripjFEBcp(1kvnA4k5J&{0)=)-Q(JtZz&j`37z=+G^q zO>zMk4SRArOTAg}oNHK^3{9JXq7}e?XnJF80qm#WI1Nq)MJlp^ z^oU}{dbK;3i$_W$Li~1{w>G}~;SIyH!sRRzE77zS2Lq7YDyPh(@_ADA{0Z7Gv0Io>*T!j+k;<<>JM!ESUk&Ur&MIT zpO@&f=F=)qjg(GF6}vgA9&kn$Xa|^V`u*0MU&)BUBJgN70Dvzrfalh()RSzK?n$!Z zdfA)bwqU`Kj1Nir0pM$7j-fMj@7NZKvo^a>M+)x?1$8+-z8PavW$vD=H5ps`^Nz*a zLQovY|9I-_9}Wq9;a(^KKvx29+rPBeVA6Uswb+-gV}ktxG?C~Y#DcVuDv?J zi$js5x)1Y38^l(PA9$8zxjd{&p;3!#$`1h0jBl?YXa;zcHhVn3$DvwVHE`3bWP`TI z00j@v9Z`GPQUZs_RZM|?RwN=qOW2%mHY|_S0HD>MydC&O@`b}6d=vsEn>d_QW|H7y zd|co$dPIUFB&&IRXPk+kj61ac4vuP5h0QDNDeyd%_L-TaMZ9?ge794>M(kX8s1;)C3~oX|6^Q^%E3jt3;g$IJ7SrpyUQiSW9;9886+jO;h|xoGd`kQ; zMiXEYfD%ZBP0NkU@a$5PX)u@jM8se|#lkQr_0qzu`J?)3H<*+3gMl40QH@2c7w6Wx)y$P^H* zk-Vjw8SH)*^o$tqfFPz%gD2q)PLR|4{DasDP#k$soKrPtDloJO&~pnyj1Xg31b#S;v~Pvj{@^nM*vx{WN@bMyt+@1|)SMjLtdt5hKQjk4RPs9*BNO|2ia`4dleHGCOz2jzW6Ubov5aI@p&l3wWF}a-K_t%b*FU< zp;h{BD|t&W!y_*2^TR!Qdo$a9nxi+>lh>*DhZa`0t;ypO&(M-;YCOnd)7EPB{PwX? zn^Z+0=GWEUd-s2PUr^Ixs@iz~U9`a<@brteniIG>`T&iYKR5z0)dJ-M(1rhkssD}c zj^}W*+)v;m5-(S#7?0OXmIkAVN2?d>DI_|9N1Fh8*8X7Hl3;RJ3ZhJc$LT0~*K29b*l)3WmP+5YHkFdZf2ZZ@~gS%4^sig86BENAE3CAs2Un`(u+b0+Gjvux#c|_Li z=yRq>T<4o~SrC^ic`~yGc4FZ!+o8)|Q`pxcD9dknVR(zPMi@dOWlTqg%&+^lw@Utg z$oJhk%F63U;}6o_JB`>5APM3i6*7vbfuB>OL!5vG(#{0}Hrl!)Tmfto!|wh|WC~!8 zKw5;b1;{d(-T%lz&VWDh^dl4czd*{YgAHZ``%l}K$?x}4l>)YAK%HfG!pHMzMgzx} zVZH^)n}gW{5GU)>f$uN_aE2mi9USAH-Pz?(D`t`^Gp^u`yw_dtZ}!NmXGSMYus&@& zbIbT)xwLKJ#nsAQxDgmKu{d>sUhu5#Pr%@k+9X1`?Qb11W{6>$<8)C7v1_X;Jh z?R>63C9x-2qnkRD)PbHFPJcI2L0iCjUH;?dWZiK;V_GCJ<^3P;7lv;gskOghmt=Q# zXW|`Nx(*RQ%J7`28DAV*?Sd)C_}%(DXDABp>VowMwmz&=dz7FG2qzbBX8{eEy8~Pg zg3;XcMkBrczgaN8Ct>Qs)?`KKRoa|rTb0kRGtWK-Lo&%6%#h4N4$*QT@=(G`nM`jR zMsE1+&8|)Uw~)iVd{4I&mV#|*6nY0c`I&Mg&J5bkSgK2v(uPRg8fL(3YS%|aE4qGt zOH>vCA85~Nc(K+U+=5$gGmv^2XZ3S>?eH1&NnD>Xv1vx0*so*4p4GNk___)ErtONQ7 zAJ`BfrSd=EDZs3&3g9XCy6v#kgA1>+LJ7ry?dA-I@WK`Y_m)O)qg!TM&>i^WjV9DW zf&#->-x4CgLsbb=`3aWWddcE?i zp+);!V17nHN#hs)rPi#+E!)BT^PFFvLCt7t?P3e^xywfBdP79H&cqi}9y90LYIM1r zE&SP*?wJwM)X6}|<^47&kA7TG?p6B`$)?85Zwyv~L)Y7d_VvFH;qa5F`MPfJ;OM^H zrkp#Dxx?B-OW*YifB$Zr6IfRCpfaaEk-@UE1=;7Ijfoa;aP@)?Q!sY@&!W%0vsaCf=dvGNG|(rlD+rR1_Y%1zVc!!j7-Es1+Pu| z!9ffC5g|`E^JTqh*@^PT_+BNuWr-8`ole6A+x-P1HSWN~XQAtZYP9b&YzRP&;u#e$XjHC_3h&iuT6ABlgTGwDDe(iNF4uUb+a+TAe7egzd!p2n z34P8Ya=sgM;mw|Ch(v!7l#0lnITnBvku8_`A9}qAp8|DI1wV3EPw78_P6}XhfDxUCqpHHc z_IF1wz^4ZLl8Y|f!wdPp7%b@1JXP22Y-{8IGLXG>o9WiWOH>})pSnb|qjiU|0s??YS(mho4SI{c>TGdSQe!^~FpNv)A4yDxkG%NLm`{ve{wB=T3@C4@z z*02JXx!3hei&}V9nVplhY|{RnNQZ13gJv=M<5a_l%zB>SX3Hj%t7h{S+)$1(2U{YbUz#iqjIU}u(v$HR|JhfA$Bk2`Q zt{Sdh@4$5~7m```e*Vz4+P<9DS!9(eQdu$9_z^Fw8Q|@hrA-Aq5qn#4q%+#GHC6Zp z@AK61|crFdT5vQLS5W`#2B=G7JCRPFl*2Z>I{^DThh-jq9xRkVC zewWCz%L^|PdT~myMZu7+QOcL9-ud_O^Z)qg%Rhw+fpSQJa>&Y@vhwsD0WjY~9$djd z!mV8-fYcgqwy z4jpp{l!0dC973FB;Bfc&PU{cJstdp7NHs*1ML7n3%=Pl2Cc~q*EHVVIS|6;ss@|H| zNl*R~=sr-Tx*+p0dx?y|q#d*$Z2Zm28GG%d2y~Ixk*w>uj;YHnXH5*WKb~<6e#0tn zs@qc2FIjqsy}Tfw2CJFjnncA{5|)&#yA*T7;te`JTM%89NAWP*6|A5vkKt2(ndf1x z(j&|2Uvo=LKKrJU+Abdrr*Iid!$b!aZ%JkdcY)j4%Q-9hV`Y{A!LG6^zh*kY?aT}p z+|HO8vcT{TTL2VLTI!SndN|sG5xW2y67HU`;{g^Oh4O-ecK`C-fwut^8)XK4dirG@ zXw+_bJjLMfam<|9WhuP&&)g7w?!_~1K!bUR%Kw%5esKLw6KDads+@F3Z4FaQ@0I3A zQw)mA%&R?tMOLp;|FWNIkV_w!&(Fnfe?&@$c)eMr{6tZ~K3@CPS&AmOUjN}GX8qit zb#~^3wg)tNaRypS@oeK$`7hQO(9-soiq6x-=ltjjPs%yKa;Ntv@Ry=SBqW zi+EYNO}=$p^o4LRxPf*^(E>f-Q#pQrEYee&X4z-O-FRD*!!=RXiO51FZFL1e7iE8L z$vww?N*eNeb1qW#j@enZx?5YgS>1oQPe9}IQ@dNPphp21AnpP-aMWMcf&V%=JK{8h z=OF0E1b}|S--l7I>@V?}J8IUCb{|Sb%ZNm6Nk$FI_l`K?9G|N!D zQ*sXbcGcE?ft-pH{5`Bh+NpbxwvX~on6edPdZ$z74i@=OcfJLcL+WB@rH(02bCCu|lB1?h7Ulz|jQs zTe?8C@V^{_iG*bF*-D#8!rP*n1-1F2(d(+%kUw$mDds(HLj_LCU5po4L#SSmkVkem zhZg{?(=8@7@u@q$Gb)Yy(SCv*em|2^5o2jG4mE^V4_`hDCnEBV*mG1^TIl z23`UnM@A$%DLuvKpkL5@C(2_-PmZPhy*)=o$(VDVh5|7vOKUH}laHsb>Pw;1A#@wH z`n`Bpiatu{(skIa<wNge?yZuWFLS*O8| z<2yhyX^6P6j5cJ5yaY-Eu2@*6CZ~LJ>M(aSQUT0t;DEdAFQ9lsQ_l}{w@|`-3P82w z0!Q+BqWS*&yCQs=Ky?7-rF=>VBpev$UBI`Bu~ye4HiE66GJ;Ksv6IIwKs8HxR{H|c z>RB20k>m+oTIGG!V%@!qzV_yBI?B}4Z_9SSv^IU5eh@|K;z}{xb!AXTL|n33J=-U! z=Yhty?YuM*{&;gm7(c=N>TNw==l3li)GnXLE(t+~(&72tu7s&dl5M)oD=sYEZUX zlwR{W-_&P{+|GHd=!IgLDMGQ`OofthIo66hHG$!gDtNX(^VolXJApQUL#|ss`;J$ZfS~0Wi<^n^2N*ubPxEV_m6igvsXB?()~q_*TdUB!h z)`M_^f(uuL;njDG0 zTL;Gp)i~2VqjTW?6*YMETj}VdUb~oUWBsk>LXnx&9w0ZslOH$|Yb7mSTFWK`Jrl<5 z*`h`~U7^9}R-k7swdDTM**mqqn7CZ@>-giZq#Z+NuECGo6wyndG@M{Do;o|cp#XCS zjeHDB1*pH_NDqk8;OYVQvO|Ms5u|nb=j^ceds0ewV+^1AzGGG#&=UfgGn71$6P@?z zTw=r7G?6Uy@1Db%o|~9dV`#<-AU(VFjK%ypbK6mFJCqN%&joEF)bbzpvkiZU46z4# z7YCqsDJC3d(fgg3pO!D0k9_!k)(MeXs8OX?(~>@-mYLc0+PM!x{Gou{#dZ!guVJMw zTB6_{RbQs5thPlP-SD`fJ#q{Y4#|R${*!1%t*#8>N9Dj*$FK^IAJr6$n^}3QpFj8g zB1=4A7+r^`#UKOf68>@F89bZ%-W{K>OyogR*dT|Zd@2<}JVV2=Q{6vsPY(pb3yBmE zaYfkul@fdJZ%grf(u>#P(#rdAr0RY!1+Wv7%aUodv*|(XFW>iBwFDZMY)pEEfh>Gpl=qm-RS?FHs4QTa;elQy~of&Ib zYp`quXaint`vt!AkJ=-Q2L$7?J@;9TE)UmJg)(Wh!LH%04{#2@4EokiT&nZo`50x4 zz*-()9`+22q+xTe?-~9w4L~CNgxf_2J@>s5Laqt@nB=g@2>`eMdU8&1CB5jAA=^2+ zf1C~=6h$Hc&d+o!_6w*85va(iO?pqrsS0un#W5bxq!j_+XeSR`4QjOmjER5yX{**{S{* z6X@ZxI?$tB=jI>yxIyq)M$#XF=MI8F=!m4LDs$&$h?2=M_T$C+{5m%d2mXz zdF)5xEp_@$Z*9*&Nw-XP0f(4rkMr^j>3ZwIOIT0{R-5~Z@!Ve_Sf0Yq*CaMBM~(mC zdO!opbZXk9pO=r1m%s_ifX^1;Zp-J21ep*2i5Z558eqa|W{o_9cL;Pawzh5L1g{zK z0l{#n>RZ%_%>n{G0O9w(m#y=5@->kG`zL%(!M&q78t`!n^U)R#YI-U4{Jr+Hsk>i+ zR;WDo-h+%ZT)IT_C1=as`~8)M%FJN0xMc85|6|kvz2sXB)~A%a>ltZ_NpI_4`3*Q{ zkh#C0LlS#)>I;y_dv`O&rPvD4O%{%28j4vItTX_ATCP+MJxmVt@MmJJGVf8QaWJ3v z-oIq@%IhgZXtu?6SaR{0Ahrue2Gd7m?SA$aNm=^uq-`r7>jSjZ)%Y>6j!6Erj_kp; z9e|WU7$VdxyrGGCR72|ss>@Vj1p|lmmgIG#4Mt*cWhVqO??;4gxPWB~t)u&x{N+1! z>19N*TydXXpWjvV5XZ8;+`7Id$JiFWZjn>qTT3uohkjx63=l;}pT`iZ)Vkj9w|}*# zR?XBTHqfnN;4xWZ_O5(fiD~98Gs2n&0bNpu#=px;lW~j!?@fqrT&`Jqo_hUp z18nP?QY!CL(<%Ak<$x`oLp4dvWaFiIu~qcU{Q`V4jb>5~v*neQxL*J4GoKvSGL+h+ zf*}DF5PXu$YG5WlZ@F zug}UJ4pj?8Y=S{YZlVr!K?i-AZL=PtdM%s5;XBSKs?fXfxRN>-7YVN#-!gat`Z7~N z3DszCd(a34nUNAN!@2A|ER)Y5(rJgLtF=hBZB)$I9F0xR36AbrEV+$y*nb8wthXruCIfHnpzdj@v0zzz$u8 z3h2)H$xye9<4ZF>ol~Av8>W%+_J606Lzwz;?68%*Au3Zi4MYD8lvt}!&W@`gj^aJDR&X&6wiW%+ZPE#2A(m!k}U}e2lWt6J@fSq zARRJ**q}7vKr5>VNBtK#N2&Mp9vd8&M zsf zm1_!Api%}Y19_(`gEBhpRF-h=M`NHtX7j#zSMnH-Cc&f6KYjQ-yZ`;c6lUqfSbT^P z#z8uGVHz+vr=j#4Xi1$ZjtTp#GN&G4S|$d3@=C@6jtE;m8@P+D+!JT3jvzld_h3WU1FLirOtI@Dwtz%=mlHS7zm#Kxyj zi-j$XcOouW%nYyGecr{q^t$6wX;(I!`?4C+Il5-#9wBjQd^etv^K_vZz>DMdA^mz6 zp>2CGFV`cZkLj@wgTH?6huV_;mOU+*l3e90Y>Q4+!%x{tY5`X6$20(2m2=d>gD)=m z76E*3x~O@^#Dcu6b~4A?EpeSkl#~1KGz#%$lp!;d*ya#L=PS%n=2{{WH7ZdT839Ci ze?H3mK=c3{G|&ZZfD_FZdiVZI&?-52i)E`G=eB5c4-hB-dD$bdToT$${vS%rT;sf5RL9eXClbL0)#zi~RM* zpHdE7BFWT#>OaO@b_4o}N_)ePl(da%G`*1c&Jm@IsrN0=>(_CLY-MU4UkHx@LZMt) zU5l(hBn32a{s!2x(oiJmZkwcZt6t{aMW46@PXF%cz6%^}?-jtrS=O3_LTLJA;(QNG zoP%)k8=@bAlKj#42GV<796TJoq=W!c&ISSE9WkuEf9hJNgfnCmSDzd@X$W3s>%Plu z0BtuQcn54ZfV2m2^v4Gvo)9L4d&#gMlLL<|)vg%_wfo5tZ^E3KLB9NHtAVmq+9( zkaSf$O6xK&WhJ9CIWtez`bPtE!1FetX1+8d_t-fYqL6-36lO!oYYAWoZa zf3wS}^ZeC$!gR||!Fl)HQ^w8*Mn?1(>VB)3`MKIb3m7vb5T$6zXujm>d@dpTJZQv8 z@qbHzHs;4wC=C)WFFc5hUhRne0 zj1mUv?zUiZ`p+(~UAH)J-nIievxE?94g4g;ooF)}!tAM30kAk=1wikKCBg7e0Q%H= zg{={?j2b6#3m;c>{t%F$eN@z~*qQCvC6vyj50|t7Xc1DkMf|5z_DaOzF#uDepY( zZ#AumcrOAtd(*(?=HG<$d+GjI_RXi2De{9Ot;x4?_yxUnLbtAS1%p{Lpj`EenAaR1 zH(nzLQABRP@r->VcTd0hV4=ZIYzI;nJY7Ac1qA+~8UhEpixWz~4&>+90q3@rD*_Y7 z2Uu<}9{~qPK3CA|1;+5de%HzY=4lK1y+CF!pooGat>hJzwZ-|BZ|E!VgRUpP!~fvO zi2rn-K%qerDb$q)*<6ZV02ukNzn%d}7Y6#W+gPjC0iXbd{3a-MWdI`}iF5TT*~gR7FIn3+T~o^LvI4DO*IG7^jn{~_834`Fa9POzh{fZ0Xm z{8!SaZgwYd$a7klnjn>aKJ^p%co^_qx+*2_9*-XH1=LOK?<$Y8g0z%A73?60w(%a(!mmrO)D&rGzS+ZppqH>Oj|VuzpJf-3pQ4_cVGq3fG4ji_>0Q%w z#)@pDZ$}HO@|Lu18@=l7b-n#Zb>|>xJMgTLPp2Fo*8ymIDgi zYys9yJT5k-gTx`NF!6LixI8O z^+p%yjUe*lk_x`apvuKd5x3su_4EEV_i3dU4+U)kED$fq6GYdHcJp_v!H3YjD?f1l ze)g_Dj*uaJFpG_6CTLvp?igXC?{sO4cAD_Lz9S*cdpivBL4u%MKFU;iq zpB?oC3i^jQ6*rRwHF?=8{@U&E5>CFQF|7){5GL0% zO+KGgd+FgmFSpyyWR+_JIr_D}Byoe(_1inC+@`a?bW*y%-g69LhEet?T3$mQ;+-t_?PdJTU zv-uB0o`%d5sqBnp3qLaEF)4?A21iyh_6p9AMdRAhbGMz}ZoyuCi%{O4U%A3FK?|Ui zPCA0I3&LD4_326_Du2xNJumsxsH@igGH7wy^}%CB74^Ql8R7E>0Uc65xQ<~4RjF!^ ziyBIuc%K*=nik>`TJU_zqNW^#yC?UX*I+ zDLDv5sqT%z=LhGhOlqsL8%HUNczF5aj11Hg;L0W^;UH1zb8Z)j+n z{Lj9tkt0bkTVXSbt29d{Qb+`SeJwc4_1gpjqUh(JRH-FvLHLBsyrmsj;n*nl- zE=6`ro1a&C=j&&cv*zdY;#QcP^omQJ*lH};hV_dC4hkfUQ^-}--uFFm8l(K3G2S97 zz>o}sWW+!V;|2Z_eHU*$rkn0v7-ssOdzVJTRby;H&iK*f4S-vlwk8=9G(WjZ)nPVY zm%^}jpfZ2NRe`M+hOh&03WT$u6ei~p?Q(L*{Cm|*1pk2G>3GOCK5M&sdIsS6@3D|^ z0#64j4Zp3!pqCFacfBj=^pNZyLeFII4ph9wuj{xyJ+yl9(qX#{wNT=ecv?j=aau-^ zNI{Z@4UuIEz9oZVAa6Z+33p@D#)pCnP=o;m#W}7BPnmA`%VpvN!NzY9!~@#3xT|Wf zO;@$lEZs`7TgHIxyx4M$%uI(Zre*)O)rX7@r*orNcB=usK{scgSTWiSkyc&Jl&w#a z8_u5eRhP`1{z2ec2VJKe_F%$iNl&uXt7hByCElEiBRl=xfLTbVPJuyl1cax7HaQA_ zDLia}6u}e9W`gwS;3LB@_^sn?FM}i`Bb>N9tl$eH$CT?^1}5)e=D)dwP$Z@+xsvY^ z9}U)}i7f@OQlh79Zx>e(TmsDTbK;ilI#C|Ln7(Hr4u$FFce@1E6q6rGQ4?tkahxEI#z$FCZQypWgd@oCik1t3>NZ1k;KkGN2}Z9KXRm9N|b{ z`4)iLfxNx{*>ooOwe%rHM%VmFN*qMOyfjw{CQX~c{{_om(M`}Z55E}V$3tZuL%;u+ zVf2jtnF0QiU>DU`$1bU~*Bwri=OQ&yM|6ur_3=v{kYy`> z_9w)DdsPyqe|&DanMU-->~jhliT#$f9xean*QDPU+!8|%@`@;1QFSd1r4ixOYv8&drUjgRfWpB!p998kG zw?#`)@Xqtz2k;L`!LQZ$^`#;Q8Y>k#sFHb5S}&}#Z#~E$dt&Xku*27z&*?|{Np;yRDBz&gNK`}n#(d(|;8g=u4^6kgQ zw9NO5EcA>1aOMY<=wF<4NS@WWqMjChxQ@75fkl3i;@4LncXb;@b9nW*ughDZC+j+Z zxDcP1uycJ-{NB`SH03%n)HVy*5>D`122%BueHJu{i2Qtf3&T0P?D9}GF$|pHf3^W> zXefhpC>Ml)wyKs6RF3`=5?-03S&zv5k3U0n&pi&`j9rgIg;=S~hmr3PSsZxqj8auP zH#T+BZ(iNMSB*{mCW)*LJ;=YBZ`u%QPm_4Ac#R z^x0p%C@jv=PJ`#gTw$i+3^J>2Dd!7Inh3*ITIm=232ZPXe0SP^dc%+KQ*Q@c(N^xT&fQ@WdV-Uf_)? zBntTu|7oy?#5<=7R}~ag`801SfW{k!W6%suZ^NNYyLZoPMD%M`GP|MJMS7HAN!J+Q}FO^_1>#;`bTSwJ3Y@x*ssNE z=I{I<9@@oG$A{0XP6H;i;7Zt4bCVpU{4V6(yosw|?NB&1ZF3U>mYIoleA_#1;`*>&tfD{3b1T}u! z#nwv(BMge&> zTmVRTfB^BY&Lq^=23KlG^L2E|BLPIU;f31a6LptD%x7Z%aftcs+!!ztfSx@7#zkA^ zpS@O@HVKZJgw{I?;ua*ZkU+mzj@I80H)-j4CN`5f3Z8OKMA-?mzkH$9bX7IzllfPr z_Xb?qwOftLEW78~-N5WNl4H^fb8mBUobIv6Ua?|M=Y#ARA~o`PpL2zF_it zH6HPh46RLaHvFyxy~JUq=eLAe`OC4ROKXxc4e!>defVv&i4rp83~8K`knY^R%%n}y zDqp$lsgwr{EYqG<=|e^k3QFiAIrYkIdN0nII&Gozsr2I&{a%zUA&8^3$`exoU$4%sUT>4Y zKlaD_mNo}_^KtYJ*u*Jw?Qh{2%O|_BH;*IjP>uT3lQ5W0f=0Qt#0iTNmCi`4NGkj zTOwDR8&C8oZhKG<65}fhp;GQAh=ZCj%Va(sCoFL#JSdG=3V!bn*oZRQMi5bl?(2Nxi?3> zyq;$Cyj0}Uw5UT(>N*^BQ*X-pG<}t+Y(@{vu4y*1EApo_^{n-eRGi1B;$~2Jxtss< zqMyZxCyPQMQ)l#=?ABsD`MsRGLv4FGc#o*Bcaq{CtaP@Wk(fC?29(F!xi>6Yu=e<3 zAjusED9fpW0UH+&Xd2}O{E(iGXcsF0{jvfEWRM+bi*^N}1{lG>-+rWyiD1>QQh*yG zG(ip$*>s_Krp%r_CVtWzLhbwlTCwSjdoJE|CvZ@EWp5_64I}DW%J@m|b4HaxYaC36 z@4u`h@3*t5y|BKNPA_EOgG-mB5v2dMNbc)qSN6CXC$m1Ax`~*B$u*Bs$+y~9o?cy? zLRh6KpsDZgc@E_+2_#EOlu&Eia^Q2B3dP4W%JAc}gd<wV&oN@`#$g{_u-1PQj0&X% zfpYz^O3Guhw<$NM3by1Sf58D+=MFNfPI-olmj0N6O^uMPa2@zGZD4{ufQ>vj0FcFEn3 z*s{6G$M9VWB13GtNAh3!SJ$mvG7{|am>a67$SGSJO3K|94Wc4k{7`5}T5feiSJO%# zgnW2_%-a*qVWNyn7<{ ze~$zl+s-n{mA>(lnFvRSlb77UIiJ>DqA4TMX?{4^US+~x#j=*R^kpw!%HO;rtkIr8 zX39DmkqVTWigfMalG{Jq`dQvqbJlR{>KIhMU;@%#W%dgrRG&8x*##B?^%282>C(7n zvx}~$vZPgVei)u0J=D(_XZ;jeGZl5EA_nC9VnO1nLzq`z`6HEA&;QG>=h12O<6k7jk_(?4n0#{5yBNFS<9f$V8e{b z_wggolAbw6Ll0s%>pHY=Ks?1Ct`blNz^UjFJ1@}W_6PbpuqlD6VN5XqA^}bU==KHH z79^X-?Br@SM^Q_qGYyygU$M^_M<}wX+%CG|(;j0Lf&f6@eo;H~Pl5 zT{wCA7q#_0O@V+KL9_qBlj~7t1JD=b+@oKXVP)6|!?$`?o|l&pv_qzPuZ6_zZ1a_K z--p|6Zv_uiOzX%p9zF{@9)YI8b9WQh!J-0`7Ay(zQ@%qjMML1q_5k=E1Tfly|FQxc zFfT9`K2c740KwD0?-=n6%kZYnEMn-A!5*D2O0r)MJ*S_2wyu|b${(^QFmP~fX=CYE zS595BMW(&TFqP(W);q#3#DmH44~uTMEk_l%yO5s6<|>b4K$UlSW>#RI zOHQWL-BfH&-m}$NEgHI+9IIv^KTG&HTt(yZBkGY2>h#4YMiU+D_la?E2RIKK@CgCYIX*3CCMBfCg&h{G0JuVdrB|NYyUBMQz$6P_Se0!$5yZ(e#9b1qGZ? zCt|0++u*?Y!Hx)wuT-uT(TcmzUdaJLh>I9$0EqD7-9?q-fjh@PJ!N90BIM}9sc?4h zi(Y5)HAa0|A|TOaZqn?Ik zl&T7SXNJi!KMt8mZY#eJXg+dy%(b-JR{MQDipa`f zvY!E5EsIv;l5*z-0V7Kho^`b~TLtG;TfsB;gsb4IgKqHI=dTLMN*=c^fG&dzuG^2Z z;-R5>pajC;=WmV0A7iy3+tB~SYC(_f%S@*f5RhhF;4U`q0?Icrh>3sIFznrI#dU}% z4h%G0URO?VT4trOV021ij0wG&-I5!$i4OX9gIY+atQVk?KqB+uA{GbJU1E!{PCxB!!-qH|do6T}2L$uY!xd!6on((!3p-3u*y*bWi;Xy=)PD zwl=m%0VNC@2?$jG<)eZ6HvfKP{_&7-L8H6?BNOHU16)5r0b6%_6h_4fX4C&A*{@}) zxaX|U&qP-6sh^qpi!Ffvg{ydos!{_c&a^b}@BpxUFM^Rh6)eobvI^TZP$0)$=Z?g|?3BRO%>H2%Fkogl6V}g)=K{zH^Be&)18?m#NrAzNYnW z)@zP}^iq4KewrJdH@w3_4GqPJ_fe6RbYwasN&5}x8mEMJ8$_?t_hyYA@WN^}7O}~f z{ELG$wal^@k(_%NMAvJkzY$$ySTaDUh`8gL_UXplar_-tSss$l`^313aYmrNir_~| z@{~2_?&al;5WxJ(4{|s#FHL?xyaU(Slivjfa)*CcnE?D*r~7bF-oQ=mY6}A(J|7rn z8m9T*cj^ZAkzNM`CJ?t~l~EG(hVpW64RuCvmZVrrEhaBzAGCmESR%7LpWncn6eT}` zH8@(m_r9EeF3n=>0^a$28Cx6Nslc7CYRo&8?Ik4}TJ3RyxnsN6ltCrJ)Xx`W!ac}T zR59zT^UhwrEeBH>&H5-v-&ohcc=3|WdcITR*nWYO=NLB^>TO9ypUpM97&+jFE9-{| zh513uCaRBwTV^>AMFmNcKWe5LsyoEl3xo7iy^>*UJkm3a^hf+8gBN7^j_?x z?`CX#>_|;;j6e_fQ$J`MM|(KhLqJLY<}J((w;$@A-F#K%tDg0h=bk^Q8I|X~?m`MP zHOLV6xTv-5{uTsA#^)^)b=Z6fdPNRYdKn!_G)vKGNUQ>ICE2?`YW-`)CZcDib5x{w zf@|UWo7XWLN#-|L(``DZ;`L>QX=Z4K#~YHKY;1vc^d)wRk*xlW`oUc>CbwR>G>JHg zC&~3QW_&Lz>Ag4%&~!YFCFSM>VsRf5N_Z%n`NmtG7|8kKQhodpa^^0dX$&}XvGlIlA1)-&`cqaq zW&HsLnhPHQYXe&Z&{zX|lBbs^TmS|xCXiJL#s_I0fL#u#A)vb*W)AI}Qv&jefJzfXcDx6%iT{J zAuIDMS}s?gXfw)T@iR)grIAC!W>+xsR@>x*D?TZS&o^_vdVDu>2eqz_m1N`K3f8(J zRB=Iyze_bjX7qW(cmvJ5JYA*2&#LEh^fGJwrd&RBCXH#^gwduj9u{V=kE}fN%b_%G zgVFwk>EL)Zo(5|~D%H8W^ zhz4zM;TUvE(owQ8B*MOO4<3U0E7OT?p;X6_XtKsaE(6K}uyVOX_5@01j(g)Qj5WAh zpQ}b^uc8s;_Bisj$265rR24Wtbc!Gq|BL&n)4lc_Q?>?lCfQ1Lea4&+1dR{$ON#AR$~S4>@0&URA-}TK1EB1P^TQ(a z03RR;+AQaRtl)>?hlLujIiJ5WIxaBGtwO@|)@+c`@oe$94C>Z)UoQ1A@s6wp1%X5i zr&__0aI~M7BN~Lo{|EZT=Z)|HPLh*gFd$(7T{MHTj@nxM(AJ#1Lnox|^()k28t>_~=Dh0Z#lI(#bq}BlWE=H$ z&&gHXg|9~y)>;$YAEdfVq_a~>`HL=HO}vPK+mdvCsL36?#IqDtl>6$|KK;r?*K^QS z5+~Oz+0ee4KG2gm;OX<)q?&!5;8T;Pz&@{AhB6$6w=ORhW#)*&_(3Wd`qkEh1D2nW zn$!+!gAHo1eA2iNt1IW?OXWp_8$)S>JhvAvwK?96q&icbNFJ^rRteC_|N1ACC;>gj|Dk20p|=Irwm`Ak=o{|mL2%NBtvtk(_=s6S=cieptd)DiDdYbECGu4QaV@S6d8`{B9?bs>)s+0~BS ze`)jqoSnI$w*(|1II0s`yNhlW&~}^=jlB76*pJCjQkF@buswTwkkH$`mrSqHx-1LX z?YvOg)uQOvW%yn`x^%Syk#r5jF27+NseUFavpH+ETT~|+*%U#8ke>Z=!{^pj5~bRS zn1a%q0#3Hyhs4O^%LCp}1|r8>0_;KFXfElHMy|lCvi^R?U-9=6I@h(JjB^Qw4P+k6 z)k*Fyf^SQ~p!??&fW@xvn&*0FnTg=o03T?jX6)jjuFW6YSy~8SQPTzrS#1w@ki4wz zCw%e+h5lt<08FN{+ss#}W`KdAob4_E5JAX<$Q4ygzWF~AaifBEi1gk-k7S2itE(5uy0y$@6Obue7!;*k;4@Yqa0DAR7ykB8>(wE@!HS zRp$rD0GaGtj7&Dd%aoqAwxK4pqTx$9qglz?W0!my4TG&k$MuOX8pH7BgT)Z399|eB zW^*Zfbx5_0PZ8Jsql!G!34)NT~Vt@|v zRD9JREgwL(?;wG)bA_77022U=eNcNE1v}vqK^yW-Xsm&1lZ=Ll3lmN&Q)BK8pT4otN7$d?j1 z0LN913P1>7)#+jxG{LS94w`(YFn!};-Mz2h-*pBaA6@5mKPPorr}9dkk{-1ZO`~() z_N5P8%okD4Tt?N&bt&9xM9Dhy_xnpN3clPw@E?>*6S2Dr+WO{w5#G6ue$LbqIG5rl zqQuboCPM7gRpA|VKX%GId0-CO>1VQ@kG_Paj3b_Gk4NV^+d;Is?5bUpDt3*Cmj=l< z`bQz)Q9X+1aG})y_;?uHr1x;$f1kD63+V<05d}Yg%U=HYs0N2P0zh;A{Vah#&r?r0 zCvN~Ka!12Kz>>f%cRM}MA^hJH4(WO(edWrG(OFxCMG%~t01eT&{D;V);M8;sodQ6x zFfN)dQQ)E(gIqN9PIULSTkO~LJe$?G@Bh3?V{_3AsI^7QjM1+zR<`3waEdqiEvnf* zdsVumTg&+XVD@0{H`2{rBH>vsKkSy)ZT&jEpCFLqwUxbXYxFR0K z3V0HjIfeSSCu|t4UaKhj_Z^5oyp(4)9Zge99&U$4rR>bOI?M#?q!;?p)V`owj_$3D z$+d9ric7qhorq*+OSkE1WI&SwIA@_x&49Sli69CFa8vE~E>#A_n0Vs5Rei{k&a>L2Bo{Z8DMCb zp-U`Ux=U0*kd_81B@~eo5$TXtLIng-kayjKd+UDw_qrtg9CYA|t9M*CxlQPb~-pjCESjm;Wyc$h|rC!Bu)aL7wb-_Ya zTV-%5hE3JNf}iE0s+!vNk<_Tb-e_(%>D=J$FDhvLYK2<$z@ZY^Q|)h?1vB+q+>9OZ zh70B1PgG5*eq!Oh7_yB;>@Q|NlhG&`ec{Qal{Q*+x}dnn#JyIxWh+>A+jNX*7Pq7F z4jpcr1aDt{HG?{g->xUu09}XJIo@oE0HUiq8U+C9A z{O66uk^omp zOHDdJb3tI*gLH`9%?Q3_Qd)~&jU=Cd_<(RuADOB}8T_(RMB(%Iv=38*PXkNXD(+Xj z2iPiX;6ffXQU)C~++$rH;oawPcUexRcNE@DD!|RBd9ZR`4*!HaYZKv)QzLhvrLjrp z7)dJj%k6cGgE^WJfh(5~d{QoT-GhU;EUT_~AP?9&6VG<`_Td5ucLaIBKe4`CTeVC) zyl@X3G=9{vR$zreQql2~6AzR%$c_P5A-FBl4z#`0KCB(GhI}((jNEEdojsgh4WMM;{EQ8G=s%MHY}d*qT0L zx%kr~cBAB~dc?_-%ywFWnVj2vEmr)Q2AN3vW!62(rvsMonF~NA>qc&()@%9}>)n@{ z-RH9x&~DVvXvhv^_N0S=fAUGSX&E5y0A1QqpJF`+UM;CY3_cFc4(yvAAnS@!5 zf%0!7y2-{q(+zDj3~Z@Cwu?&A6wRfNX>U!iQI!qVpD5Iovf6y)AJ`8l_$fe)oxy6DP8vqRe>{?i9_umvuge-|G7xF-0>$sW^e zu}7io0WQG(_4JcDw}o0PD>DuDTz=ZoSMyh6c=o!K?Y&&opy!1h zc6B~rj{6>dGMW5ZI}&BYKBm5q?e?gMQn&YH4wBi^vxCH*s; zatsi*eGCGTf4rC`p)f9HK9TuCppeQu(AOz5m!*K_Kuup4#j-T@%vt-#9 zmaLc%4xNg>15OjDayhn%ttSnlAb>;68FaVc(l%b;nfCJ61)CGnQbk8s8PXv_qom&v z5i#pbZ zF{Pz@txYI*3I*SV=0Xntfpj}A>GRmj>26b#yM~g_$eMWfh(DYxG@&XSL9VB}*H%67 zL&|rAmSzQOFPexXut}(oH0y~*=E*Cax(O-6r)%%a-l#rz`@_EVmWbH3P%Ck22^r@t z-WoC%P0@q7mBUtP)2MB`zM1(OO5Z1d0cr2#X>euQF(~;^z4i!bJddR8liXoj3 z6rBDdJ7wcQK;O#*rXc!}J^zf)se!Nlf= znk0$sW!ye%AvIB~vq4Q4p1J~3g`{AvNc2zZVlu89S^{~I_qsDUOAklN$b*z3K zi4?zskKpbJBbxhp?hHB(w6`#!?#CXLAd3ZzL11onK5+0y7%)LYR1^^L2qYGmWcS}& z=Ou=HrUjr2XQ7OEMVw4o>?B~g;vdiq#H@B>V|&WiKkaP+-=tfkF30H^eV@x{Hy(ZNYC7M;cqI3$<~55Do)jK+!@@>g z9SK&L+@{=#agY$SixmBeXL>j24niwdKmXL+&0GqySnIuaPG%{ zqwSi1enaD@<@J8IndOV|9blOt$L6u6E;#GJvxI^|#vK8`Pg-3EQ|cb|;~|Te+MBmN z>3;#DN0t*+-e!&;a)z^3E^E=4WpTjE^Y-_?UpySHjKQv4eoui`(Ntq}|h#egW6d zhl=bi7^fE#D_W0Us5+QNvZIf0^SAT+yru! zwI&6+{HYOjr0H@1tju7AEGM<*io-*-FI@raJvg4}w7LL;dl&RBo=;Fbq9(lqtZ#+| zueZeIg&S8gP8)`aV3pBvfbkEvFC|S!-?zj11CW#|5YZ!SLK^`%VgH|`RB$Zd8vg^J_}V4CcP0#AOV0(WM-e7 z={pE0N7P3d7oS&+_!@Q5sxU2`1~aEnE4gDVTaG>ANsdxG2iyq{U$`T+M&!F zKaK5!fp5Rz#4HzM-xB2dy#hjxFD&I%yCE~xMZ+Irm-Yyg z(E2Rc*fS+uv@J=NWxEeNI6(bUv~$w0gNJH}#HUick7sE34nwC!)RTY;j?2Khg}5 zy%pnh4pPZV^+Gvz&t!jgqC)l10v-80j?g~FTxm5m+c{Xl#Jo`Kh{O=pRN{0*`IsP5 zzaDA2>sr04HKMaCS2nC-8hS@X@6J6Ej5AeT4N;}$2>9=CFI@+)$Ai-1KT_8i$y!~SPgh+Uu^y&MW{PRJ2*O4GWf1{P!O7I8 z0yBr{yCrdq)Yc9%1E6y@Z!3a2vFD#Qku#qa{^-ZodAp#d24~!+dOF#GdRm3?fSMog zDx+!J$s#VkGzwiv9MHLGannf2FRYyn`Bohu{3sS{K?|)@Ekr=`L*2cJozg%W)+ol*YRkI8_lql@@wB{MPzd`U4@a#qvl27U)6IxLyEBYJSC7bc@|5f76cs* zK_U&;sGI_xuQW&9s&L;UrjE6leFg!R4L`p)|8#V7_cMLs;STQIY$@#f$OI>_aAaWN zj(w_B!F}%t;G_echyWnw@-ONFj9@XYUbjTC))S@Bj!)B-NUgYo5gB&P-u;(-4m|px zWzFlqd;tVi)YzSymFSz$%JK_&R3QpN#%^BF#@f|}h9z^8}|inEAMES$|Qv+;lg)WCA*NCaF# z|2$MUAV>7GVASNVP5;4$Ghc8u~#D6V*@tv6K(%F0W2?Ns|GL$lJ zwWz!uJH`5HDbh(fsQ zYa7t%0nQHKAd^#+w}hzgo(@N}{NFoK-)LkKi$!n))(@w<4Rym1?t&ALzGy=EAqdDr z00G&(zO?mWwOwZW-GCaeVhz(NE6FGNX|f`>hj8S#tM9{_n_D*e8R!!6@i|AkSvA$V zZ^tOH%#%MZK6in7@M%=pT&+`Mm7~1w?f|eO@+X~fRNwOfW+3%}2HPf=j)#;ceSM=V zmGZ{=*0Va>xh2T5-Iz5s*1CXXG2QLT$xV+wuGlc13s(z3rk!t(Tg=cDU3Bmkt8PKX zBYuT>we661$qXjXoR?LO@>J^?e5|8`tOWbx&>t_+NXe596!c9jxZzAf>HTzogP> zhKOAS`5Y=g8uA?O0Y-}4t9Phs3MLXc0d=HWBFDdu4o@e7m&9O*9ZHz z{eKR)-v;`#Ha3@Lk^QgBOU{nIz_YR?!d#5GTM!^W$XMvjgoNWXO6q=Vc`typ_mTl` zVj)&(9zqgWe=n)^EGeuJTWIFhhbP@@LbRh17bncTI+KYnt|oZ#vH39&UhBetoUF#S zX`lxfa-xQJS+$uS3Kp1F$IkadQ0*ilXBGBJn0$Cj5tYq%n>cw-2~XV-?;>?LwS_^I z632I=on}3O1ZC2K6EEL_%DmhkSQP8e$O<>8FAq`&bUnbOrO6q7LPUXfzcj0e8@G>D z%F;0L*>B}O$bhCDU8jb_V<2V-H10UoVW#Y62YQB3x79#eSN6XTR9{2Vcv(w{5kn>9 zqVEJAAmv}SXe^CCg>NV+4eL8O_<0q%_V2(6P`1Olns&tMT2gx%L#YJBW^iAp= zgv8Yg6yoemIfJR{b?WG{Q>SgAs9uN@+Qz$x&9wHx>>715mwQ9igVF8xE3X9nujj7> zT)@#?e#P!{)$^txK=_sjd&RzY`gWKFcB2xWh%Hv3tl+!BaSS+Vw=VIAAESoGbQiS?m_4#p2JE18+>rV<8ooyi{EEh{u~|Jqxa z@am=dz;O2K{Zt(Q1AW?h|~t6XsCxwUar`nK>nyF+YzcfAq|%Psg*n zuT{kJ!xY||wr$(53;|Q?!Ie*7d*-^LSTvV=XJh&t@ymUrH!S^nr*(G;YhHnR!*tQ>S?4Y7xfrHv6#s>8_AtF zN$h4cS=&H$8qjlHz27wblZ#pf>3Vs@^;uui$%PY@7tP;yoUvn$2laQyPad%_@mq&* zz=Yo0QH~%exrKp|;d&lA9 zH|Bh0C(Sz`dP4+X5vrq=(?D&wzXipDXY{du7vb$YF_4x zL;QDL;!3jB?n;1b-I9`86a0S|+H&U04TC?pbPSiTRwnh=m@N_#a6hAK;(oXhHack4nuLv9S>OJFNqnjB?zeKS$Q%&FzbE4kUPN#pVF$=+)u5~rM+ zc+_UFHk-(3_lkT12O;(7VWOFVS#g<8fs@)8x&DZNt1lyn6|mQ_sDCIi0iXAKN4F$! zQcQgGm4)nto{7z4y>o&eogQnaH4d5QiJ1$Rm3rQ>Sl()hPI(@3au3^T$ocbmre&LHcl@Q6 z$<5P+q=fZ!W{OkC%L~U}vDt7K6RCK|bbqv1GG-K~V$<1w-`{M+Pl(U^;so3F*VO&S z=8fyEThcS?I}Ze0aGvHpifl&GuutuuS>dt;Z~i>}%>>E~f}ix@=AE~6$!(_8Gj{SX zWd|g8KKAm%%frJ9IJIDu0$R=<2ra;yKDzw;?o?cS=5w-X@gs77wTXbjC#!PJSw`Nm2PlO zT&I$poNa-yDZA5f(wV!4tZY1rZjV1#p;V1qug=W((YjaTV(7ay8_AC3HC!D#2@}rJ zgttRCgYQObGpaM53z}j0qM=`Wt+f6|t@XKTm>xdyP@41B>X*9wp=^9+44_P7F+1_A zP^e6Q9k1vEL9B34!q(v0<4+DB9zH$F#5y=_c;gNb=VPQg$F7Vo($OCPG`!)OXfH=s z#{i_d0Y;e%O8l!3_1I38DsWR#%=apnNu|6-I$&U_3(VJW=s$kOE5b^Ot&@hd`Cxo~ zb2mupn)%y>C-cF4#TPPyu~&?~=POS*h9oU+seI+ngQ`TG`zZ8#XQzl(M#s)*wz~?li3@=y-*jDSM^6vAd_)&0N2}(eY!I*IT_& z9a-f0aK?6}mAb*it>&Lm=NY37*iK(Ua*r(3cTLQNXK^n~+=~-Bmy|^6KVdo`ldaA= zgWX3!bzP#NTGGp4mgVQ+;Xwu(<*GG;8rF%C2MYsv501SPdU=4+5MbGP!9Cp&&cB2J z|K16s&z+p}^rAoag4f`z!cIIyx1WCCTlS08e(>mU=EqQ%ddN!l{ZI-lM>Q8}j;8rm$Tk=Y&QrbfU^eMvf#m zJ@U@1lNd^T@r)Ywqc|_qi9a2OL%_g#$SYf{=G{xY-Qb#(1oeV%jV^(E7KHXa$wcw5 z>jS^6qs`{GXQc;0U%iJ+V_#t~Rw#hQzK59h^_(=6B9s+eO+Ykoc;ze5Dg!gt73{2o zup3}KAZz=vMpvLY_C~r`J0cx`9S`N=h#~m?aS{B+>PljH6Td<+X*3cBKxs!%FkH7I zxDzJwGe|1P+$iGu4)sv)`E_NS-g{tYg>^V`XHNZUc!sMS5B*Gpn z=+O9i5+87;HGc5QbIKXtcq`#qE(!<6&AYGGlhYPcg(s8n7m0Kwi<5X6KfTpHS7msO z#7~Y|efG;*5P#6FMpt}E>v zK|>&fF=E`{F}>Zieh7yEfA#?IS0Na_4OZn6Se0YT3=^=EfJ-1s1ss9)a4kh(;Pe7` zTFjj94>e%lsD=!QC03;Vh0@Unk@KUeZvdHA9&%k%#6|$a$`P5ic@Ki75x0E(rFk>b zbfZ?GCH%b9YpqFXA;C?tGw_4h5wDu4)JhR7(^tKU^WC-<9>(i_Pf5R)#234zxTs9m zq{PSlx+$yr`;0H`WnM5#!uLNn-<&m0j&h}hwBdRZyPVxyd}R7RIpNG z@dIFcX$T-}ubAZ>%Zy!EzpxBt?Utf$0nZ@r@0;>4#>eT84Q|Tw=o=rvRK^=p1d>(9 z0zob2={Z(%w)sMl~x8O9PocY zn1dnEj?huw23k-AVYP_76O?4D? zR9)MSLrO83@kG&`kL9kb59i{LO=*b*OSb7cAS42m^6t|kJW~wXD~i5C^4qC!vR;SV zHdJcwJFp^8GwrX;=CD4qqTIMd_w+k3AGQZK1mWZn$AJ~X;s&4JTQS&qtpULr>BWum zu!np5dZE#-E^rrrHz2hAcL@KNIsj6>9lMDsYG|sg#JAZNC<{V%&_%#N7^gLJGT%5~>-f;$fuqRl(Sqc17(|dT(fvvL&cAgM z*X#|MhX$czmVu~qcB&bH%*cBGj>)!}6^y!M__b)+xY}Ts$A=!Atx8|WnBysS;VH#q z=ev*9c*eWw%~RU1exjUO0AL$)^0_k**ruZmw!tHIyX0seGqAT>M&Sm91%O3S2cKip zA2k87Pg+YBL+(SMZGf!=6WI$eHvp`KV1$5v;GZFb82}#sk0eG9e5tb1F}DLxC(w}x z=LUG^ZIIvx9ii^3n)?T~2gQpg5)HoA0|QL?L#U4(P1}KSSe<^@S!acbekFRN3na7S$VM8+>BB=1 zLOnT36^xJ1-%IM+jn!~fis);D1y&mImcU*_zrB)=D#LSND-wKowKw99x>DVCeaq@a!R5pGotr*qibeU_f^EpQnd2u-u5ioyNL^w#$(K zynPVH2D=|oTG+$v!vyy}dF1H{*kL^${oM_v8_r{KmQoKnuH zP4Kt?@%|sTroT#NA+(`+7uV45veRw(2dS8E zdJC}X$NoISTPF5e-np^8%~ZHmn=g!fZ7r9PW#YEFl2`}%<4)kKr<@LrZ%$LU^dbNB z@n?!>x4+$GhWzv<7FbWJX2XlIAq7tTNUAO$l69+M>_?4RovqrV)Wa7C*@t|8RCbe@ zkGR6`_r5PYyM=AL=>xy6E~R5c=fV?RExyVZAbWuz;9c8!6Z7v@*oL@JvhFy2DT^QR zx5GL5vjcZ7YVJ*du_&kylih7tSlfBWellD3&sgz-wd= zVY2>+OKgvJC9#GF-g0YH#1VRd*LEgLP8;Z+ zXTff}6^CU{D#7J1^&NCxu4927@00HdzF@unilMVbfZ4e#(jN%0T^xa%6cQQ#d$$;6 zI=KeT%>7)rq_6H=B?l4%`0JiOQ=qzfmi2pizo99>xImQ;OX_ol5Auqpl{>yqF1|cv z@P*J(;1G{4#z!Qu>!y^x1u;->SlQL{W5iNp3{s?;m$;B&B#e*QXBxd}==Y)_hqz*m z`6Ji$3aLbKt>8D^(_+geItIRM6LyxXjqPe*kpA0?-lA%q>9`MgMohnyeXe`)MH$ED zN=VEr0rBfykVAZ>=x!da*$FlQL|T1{e>7)T6`kVT7NAswQS(g zNtaphW=t-=2N4R{AVMJujFimj(UJ3aucg=J&YCzfo9C2r)qcz>*H0;Js0kew^kEAO zkIe1y7*HKf92Hl&AOobj}4Zw72%a^er%TGln^5`{IhX zJ_IdfigjiGr(VH9n}B-7_ohIWCIp7r4Uda@$sFoMq9M0bD|Rzp#7*|f5@V;>cVrRI zL!M+VF?S8YN^~75X(cLNCjWj#-%n$waPjbPy4tF7iOXW0FZ5WZohGtDj^rX~nc$L- zg4(O}mYOZUt0g-Yj*{$}Z$8qVbBIZ$iqU=}0DxceIQ$wbjRvpnP1R z!$1Cy{Uy2$>Rni#tHV~sW(<4)S+7_>F#NV&Z7qQ_ibG~Q&oYF9bSqQf*^2-8_ctLI zPk#(bXMa)QSMi=ih;(Nx5|dHT$PvQqlTlugi0eX>640TN_f{nZbftCy^ zuLk?Y&8RQ?4`c_`uKObKaBgUMB}Lr>aHI&v*B_vPKQvsk%kYEOB;Ozo3r@^ARKtYC3Q~D^W8>Z~$1g0q{ z&`~Xm?}MJJkmn`eR7eh)97$)0Nl1W+uSWS@WLpelpst>N(5dOf-Dl*NzMdiJO4!ND z&VbY;TWzsEcA4sOs&DVcr54n320Wc7lA42^T%OOq&$#pKzJajK&3-Y}aGE8Crwq4$ z3O>DWS;0zg^kPTI#M-17k`b1sXkAX@FdWXCbkP5(LJ{L6uWyMDASYT(51?_SnD0A& zF89suaEA2ppS`k5k&uotl~7kKeB^LHm3^X{(+` z1XdL)#E)$tBCKse&=tag2f&JT)TH%(S&9EpD*@h#mhg0?eQ2X1X;&mH{~wa`xpGX* z0oQ%I*4-_jE1}>GWv2-}f6u@Bv=!dkNNe2}thYp~5=n|{vtJke>F4jtGixy5QYaLw z5OkJT;H6AHm}IL>E2dE!8a9oYFt~D=s`CVr#lmxsHfm9y-mzPXg8scil$_?fjT)zT zG12(TWp(OJo*pafN+uH;P)^8VrlA;xJ6bIt(kkgam7NCm{^%UrAk5o1!w%lYUxBwL zp%GN}7Q%x-`0L?)=yqQ%ZPtpNkaUL?EQ<;FXlndZ75(?#>goXa`CbnH2+W}Y2^tP; z=^jW(w(F1fwsvvET&uvX3U*YhLKHgC9QpT-3l<-`&mOz@az|is4>yD*CZpy5O2q)G zU!%=Al?xaL?MX}*H2+7_qrBA@h>1_+VkdN`FgUp+XnK>uW7KRt?d(0?rw@V!=Gz9> zIzm3E>&;Pm+9K4M#cVkWQ%yc4PZk}N1k}XT6qlJ!Wh%@-abs#w+}Ol*>MqrL?tKV$ zRLGd_ato`y*qGmxIG?5k)mqC|f2~6CX?ne7d>|Cd2bN{TZ}NlgKo6p*5M3T`?B#%{WFRDF~B*2iF_3o`)BF@ zI3<*2VG5d>a=LJD8%qTg%2v+9+g=$c@;v}J1q?|wLEGi1Fa<*Mf6I5*1Eb4zb!Tst zHaqM}Cy*J>8Yu**oMkXv=?*C5s0rno;KrR6MqyHs*g)D(G!qqeDif~{-HU4uULxg^yv;BDOI@Y?CdNz9c|xCL{I=FuV5uKt|5pqwoNZkAwGJliJTu>(%7N&?Q%^M-Y}gx4H9F zWdBgQqWUI1)q!L?QRI!LlM3~k_o>rv9bboFfm#hqSLc{TML2*=DOW@Q2(=Di%6ZWP}J>GE-mo36_?HjQ(N4w&hwYNjZ;A?@8MGb!^7WdJGap(K&_jZcbd|S zDlN!y|Gd-+kIrXKdB3`)MVeC{&++OSS4f7Mt!SCEjBlL)5pp4Z^5yw&64Q*%YDXzT zB&BpV81LaukAUkgxHx>L>4HGMDah*5*)898= zT%&Q#mAIMSY8GXgP|vI;%tzL{VqmH;^T-#^r>2dW%H=}`mVh{PZvf}}=tlB~8XsJvROCH- zyRa|4{UZn0m#2I2Tc{`^MaHa*9Wo-qMms|Y_|0-mkelWzEvaHXy0x%6YN%;ANSf}4 z=U$&3_P!uaAZ+?MWhFO(U~#g_u)OYrWc`dh4ii1{B^JXh9YJqXp~2_NUwq&~F>_Yw z*4Q9gR{gsBP7?h_Jexs1YtreFO?l_E#Rch;)9_@=j z-7a$CA$QNF>fr}W9uYZjIQ!jD@_VA>_&ld}R&yj@oO(n3qd|+8SVisL9nzSF5E7%& z^7E|LhHV-?4M_p4;TPT-x*^UxWPjX2H$vXSbO@O7{vlwd`UZJDWq>!zn7<^)m) zFhx5BD9IBV8vnk0PJ~>{!fL~HFdXx?JjZkMUu!;~KK-r_51~0uGLaj)y2^>nIQJCg z-Du~e+Mj2_`NO|a?KeJai(0juoOIWs_VyTztP02zBhjdk&Z{?ask}B6I(4yRtF*Va z2yn`x($m?PoYm?N#MJ90hDa3ePqR(xM|f+Tqgpd{DlGLwsSWs>Z}gNGZOL5FA7N4H z?t7VW~qZNJYpAnEl_GPH{sk zmdtJPwZTqz8r34&Ujb;5S?^^M8c!PJB>;8J;nSIGG{$Pvd(QyFDYX>)?jy%_XVTrz zWQqaLZ9n1Ur(bY>+4cFRq{R2xj%-$g%HRQ{69Wez8eC?2LPpIkROO#4BV3l6yOd3A zHt7M4zgQj$rcJnKFK^MIyLGw5)C7x@NKB=Kt~vb37BC%C;%C#tGaW(=_s7}@liit4 zt-k-`yAA4W56;W&FM8%WEv1gFDq-DfKdn};MGB7n(#-;z{tV6jBYbq~EJx2w9{}mP z;y%;3?|MH4IPQ#q+|u3)B0u1#pV8T0uiTFTtpvvh%bUW@vxgC;bNS~ zj>?4ehZYw-)`=o&W`uCfLsRoNh>zO!n^!~U>et^HE~TI^V zEe*ZSp1}0PCm;y70g8AO@ITojAnef}Zo2MhbsCyajqgvcC9I0Y=D!7sT`146zvBxU zpk&_w@L1p#s2aCp$ECkKO2&d{wzydgAG z5sa`GrDd^y(ypztFz=A9bC86N2#jh5d2AM2P9q$#v1%}@bScZs8EKTMg_}Z#3-Gd- zE0eZlt=XmFT>R9SESZkcJ8T5u_$i{R=YP%u$!-(<=puFMQXKDJ>KZs$z28|h|E<%} zo*te6pbhtT_2m`%??VU)HGluuA2p^8fOLTdvZIYZTt&y;8=Be0sX}^kbM%?*Y)vZ^ir*^>BBvKLdNg)r%WTE#=JSB|rkzQlzi6$o znQP?QGgxTt-SaJh8*0My{W1MD{I805^VWXkxJJ8g38*#o7io~+(VX~dS!CEg&6f() zH=qR}*1X&>>1QQyE4EIfWS%HmXjCbB`knLkP3l{i=rCN=MYrO^!x7-mz@*o}5fPQ( zNd#BI<8LIMz+?_&U3Q*8Pagox%YaP&>xlUWWHOKKgq{ViJKgAWiL_jlO-^}cIbAEF zs&5Tm@nN_)hagU~pKDR%IcYT)aoO_|Y6)z1qJj9hE44bhw7nG$DjQ2gLO z!;Wmw8>cLao8ozC!I=~4shM3u*7A6yRje}XIq5s>muq`(Fp+ny?rm!_lR>6$lu+qG zpG8SDTGjnem@%FKwla;tbcwdU{li_zRwh)@BV6zz5=iZ^X~D7_tEQI&A=!4&S)web zZYl2vMhT7(J^^Ff{Eaio{1PS6%#F`9TKSm9KGEov>dwQP#XnEnZx=pN({uJ^h}%PX zfG%fSclr5UE}7(EuSlLVO=?2kmh2yuoH%`N2Oi$FOCEe5UGS36XM2#8U;c%OYC0OQ zofWPOwf4orOMuu?naI^=>bM`0~Oo)&6 z(G0v~!JCYl6t7S8yVOM`Stb=NYcX~G#}%Y&6{@xBA1CS*dw%X?Bscn@E7_LG+DR`n z32tujxdw@b@b|WTN!;bWg}+sZ=l!KSichaF;kn(Nc!%(H!YrF%Dw(_@m#?jEpD#jA z#IrrAn}FoJ{G<7EWm&D(Rh>W%gD_X6H!B-Veumo~BZ z-X+u%+h8ex6NIzWiCl3&)3DsAMB=gfFFLSNw^URkBMAMzYC){FsNm`oM16NAdk_?Wx;VIbTZ0KHsP+Lw9R`vSF`LZqUY^xy@vvE_muK}GqqMsMJF3<0 z4mR%{1_4f+JkZ>f02dgl8>{!;)U=_jNZS}|{YKCVqNxJk*?%+dLg>rDfp#|ih=fr) zqg*{^p?sHVCaNjQVS}<%KP`YGgQQ5i$*4y`#v5BIrXCQ*PP%^R(jy-&DrO(XuZ7}e z3i_s{9|~PZ=$hQ>=Z(7N^qkp=Tkg@!H;E)}2&#_KF zg8%hWP8i5L{u(~UC$W8aV;0Pt0w6^IkzbTQSQ@C5J@$11QN7lHhX?>%_JAh|MtSZo zFefmghK@RE4TyyJT)!&6Y^Wyt&D~&04_(%C9|f4*adQUEZsnmB_)a`D=Iep)?fux> zdcVpA6#_C*pb($}g}`nl-}^I!X@jhUoFaIR9pCDbAJ^ovb#?=-mT+i#Jo~&#abt3j zanEw&VD_7`(#D*m@b}svG{cUB@e}*Nco4`cd=Belczj;=%0=gMMLF6B0ySl{S=#E1 z-h#$9CaMnOkAnNiZX6-F6s>!4AG`*n*wxLduEiI)c^Ls#goy@Wl z^14cLaPVJCB{@qupl`s)8^DzaI>-S}v!={OIh9L%{1na0)~T#W$+}Y-E)U&h&OVWe z8}u|NfZ1kn+s$?~Upc(ddTnZS!TeKyZ`Caa!qlzl!3vroJFzcQKi9R&nxyo+Po2b$ zWL}%Q&1i>gH@Ot;z`|E~-YKcbm!XtL$gt?V!mIVwVtLyW*f54^pB|C&?*yKd1r8lG ziBEX^2S4V*kQws^sV0s|c^}AVDhCU_bxXOoc^@BIcd}_H5#wN0*9UG+SSow;cI==Swd8x-9+0oxhsjsY1>CrXiJym6;A-l9+UMYO!j%XhJix>t9Gmb0 z#P#+_9h8mpkwC{4W`lBd#RR-y00&1w2wIqPLGS6D^DT2~r4agfT)h@Ud=V>ho~krG z?PU12?tzi86X)XSPF?kQ1{hO-u^`?c(RFQum|mdp2wrw~j^ge`pK(WP<^+aTG=?sj zrEb;vNiCP}u65+{d2e=CS5ff~lZtEeIlQ9DqWM}qJyBd)!2YUW5R3^;8na*qqme7;EA0Nt`+&o7BI2{7M=RhJ(Z4bo(VBTBE}}{ppQEL_10q%lqJ@s!NI~ zSh7pt`TdJB)6w7CO%@1_;5r)8mcZU%g9L6F%&hPiDFgF%ck@-btjp}?Czm&#WkhnF zrRh=fP&or!{cjiza$vS&TvvLtuArcjlt~VCpho5{3Rl*43i2r_R~LEhyR5mozkn`y zJTNh(N^n|tvznP)^!oz8wz>RHeC6l6f>UAVs9B~(3}dv+-d&3?O^J(Ue}R|8w;-4@ z=AK>Pd=H__oh&GK$zdg-HhabNro|_B7jeqyLAHj3++iJOZu}I@p4!Kg#_ibr$v8I) zK~hV~Sebj{`P&Y6uUKcdr(e00+p)rLR4Q9TMxjaly<;%Y;wsE80He%QK3vu4UZj?J z6%SRWkgny}4bTaCwCn->23Yy^-M}17^OxxiK!Gp@*opYc`4Fb*9V~O;$5WHBsmlrt^{gvj6t9Y9vq=rqx7P56;_wz=v-K`!9^ARcg=k1p zp2)tUL-&aGK2Ln!>_ymOHvxL*0SKJL{n|#-F@-7HKNkvtCgOhk9UQPYV3ctDKviEs z0jX=_2<05Wp_u`w+|?X0YO44)aGDZ;FvSHGL-BPs65mTM0P|UWaNM4@aq0pIW)S-Q)aWj__W+>2!|#u?{EAD zT_D^26)~_#)6%RxcTbo2V?%TiPUZ;*P_u91UhO(zfBNt-7&+^BYIUI=7{}=@GRg5cU(HEV%C~k3Q#uG~3h_=OVG;xNCEe(Y%BuDJPNB zmz!bov)xW}r&9-ul*+A+2#Pq}D@B8Zk{UW^{F4jU9Ae@hJwg<61Wcg(Yu!(ACKjC{ zfZP_6tN9Pn!hK9l-}rq*`?sA)xYH#rhMTJq_HIZ&OC$X0ds>94!E|f)8$I*>&MB)A z&E-$2HGio<{yMZrBfMP@Fu-7hti-x#M`xrz&;?_L_Nvf37zUIu6HGQ;{XHW$g9Mp0 ztObdC=yVx6$paGUNEB;GR2xBJ7yZn&rw3+ZAJ=U))v7aIo;t^4oH?JfGQubljlUEU zBGukMrs+8rXk+xEU&zS1v48ZYZDP|xZohvTe(MD@)f|2O8l9H~{>y+EFTrr?dAwU! z16P(*4*piAb^80SmsD?6m6}E(^G@qC)Y0Cl(lLKbn6lQT;o9frZfa?g(K$$502B?t z4V!yS?y1E42cPT%h2)JCzssQP=3(z`?+k0+?7w|8oXFw^mRi_sS{CC&SPx-)J0;E`mUP}$c9 z>8L3Cgvbv0uJ)H|1f4>+@KSo23qFNK3F8a2-p;+GTKckbf zq2Qz+p829+qbRNF?P3H@aic=fu(GK*)151IO_q4+ozMC5v6vuuPo3XH$L8x0nJY~l z6oYZVS=}!rQqWByrFJz7=PXb%4%4#qd?x;R=mt9Zrc|Bl)b2MU;=!Uoi0We(#n!_S zv{h_@vewoEg$6As43zoLyRs7d0!`3;v&5_a-j!w79(sbivdylY>CZLLc-m_$PH(5A zK)&s4w!`5$eXmYew3C07jGNoo##vOC}hd@`wti z)AV(_w?S?LUQ&i%Pt|I@f9o!O_Y*dIvhTVxqBX4}b&OLRC2dU~UwhH+7$ptMPS`wd z(b}(^K2*Ny*dMEWBAyRu$ee+ZX|FBh4Nv6NjJ#!E6*mudo|y7z$CD;FdSEoB3k1wz z;hzr=cNE*J-#l=eF^Px-ZD`C+ee7@pQYhP?tUVBLb!lB)7$igZzf9&)4Hc44B3LCp zaW7)Cja24=r%(%cFwS!y2@xQG=PwV&!ODe#QMnqHQ#=8gLgpFhkc#o=Cuz@H+}r)J zN!(uAA$|lah^({1N$jgx&`CB+&J|b6vCaEdPT~!F;3wJ>asrym%wgJkXV+yphMdrcE+D1n4*QEIK_;8P!d>o z9;-1tYXYPvf+?=$KUG{CC4AzBcbXkprWC0u4R2STIQTj=g-2oul4Utzd5jU zHSDlE>kMT8Iz!>N%~B3l_ktQm>pDX%cui5Bm%Qp?#;^;pPJl2KbDF!%eh&YpCa#mV z@rKKR7f2d>e%I80z1$!%igX(oXzBt$0XXBqko&(^mcNQQZS1UF5J)h}Kzbcfo8wH6 zE{_J=rd^EPs4~Vwq|53lAKQoSDrd)JF2oNj_=|ud4xPEla zc?apgKL^=O!{ikpQ-L>&!<05%;cFyKf3f^-CiOiu@(p>{!H zl63zV{9s)H-+nBL7G!imHL8ajxTm|rLCzWUHwAA`a09{^7XJGyV+3DwtX^3c;Q_T@ zQ66w`&4gUYNEncSAz@%wg}w@EBmE91v~oFP^@Hj|E1w^h>dgd6ty=B!*Q8(3P<(h5 zWKG)T#~`eNsi$6n8DCu?wu0i&G9yQrE11rB7(ua6yzFu@chta zn|H)4X^CkP%ijH(@Z5LE{F1u(sv@edy-$`TcoPWLfsr1Bh;KX9iR{^SJamAiW$V9s z)hMpdCdM4{{zO(zy&x(@E=q>^&UC%0g;Q4NZCf2|z(meG&=4E*=!8OnrNx9<7D8%+DVI6!j{eKD!?_>$#+*jE1?n7D#riH)Z{ z90isPWJde`ci~Thg+Dd`1_d^04}?G5)zJfUX8mCRi=N*Xk-`lW@SZ%P%@u;FRGrxc zU;v}70I{uGnAlcwhVO%5Y8MHnc2?UG>s@ccPPIG9KE&sea?2%#W_EcMz);zv7Mdbv zb>rEF{5ypm1@HXSMuR^7Tp775I3zW&+x#ZAApS6-oVK)pu99|KVS@3MkAtmz z43+KF`47c)#l!jq->Rm5Y}V0GbU|rO;KTxHPF;J_w{x@i$5Pnuev8S?GTT$Ok)n+t zz3{UmsNeN1cM{%EsowU8TR@8UfJmZrpf-w><~ut}W?p!>_midzDI#GsMwG^p7kc#A z{vTcE9nSUshH+A4?~#l|BxGiUlv!5xOnmV*viB+}gskk5R92bUGs=kUWRsB{B7{ou zdp=*Cb2`7v`NPq5&UO0Zx!#}mc%J)y-GPesjuq-Z)Nk}YQ7>L$TXgpcwIU;o8D39r z&o^#ai?*zpVTl)MV6k>B)-)e;f3-9c@lrt{e2|S%xO9xG|AsCkGM~MLHo~VY;=#$| zd*-s4|D`l*O|g+(B!IPF%-gpt1XuBn-BmYb$)WuKGTNY2x`Ozjj>ne^(VzlN>nfbFqMiDvY!Mngv%b zYs_ropBwbljPK(<6D_^-a!oU|w$SR8k-10#_j z71w;{c>+dnrJbwMbC%7WsciB8WX|MCYZxpk|=@B2v1J3I(py{M=we!-y7 zu7&&HU4qRf*IIR^BZsuL^RDd@gQah}`6HE7>moO0LIAPd0pqw@&>MjdDtotK&=(1V zK40v5sh}{pP8@=Yu$(wx^}+}XI(5AJzy7aD7Zy$#+zkOKxH$1xtUegu)po62crua+ z5?&{Cb?;T=v+mrZ(0lrwtOLEL67+EkJ)@LHXQ?zYT4ebCP};TWtPFv++v^{GorF5_ zBJw(DH?8&OkI9Km?@>@XaPDo53Vrr^dM^BqN3v@9fKgC>vtm_Nxu<;YTHcTNnJa@D zZndW%mCbm&PU39PFh8(qIyQAa5MQqlmDE$SRk-bhCXjrk?wSs|Vn=i^An7hOh~T~Q zYMi8Fa^HKk0L$yg-O5|ob=LQw2$<&FflpvBYXJ7}SUEu0EtoeUTGQe zeL9Rb4x=3Tn>DcJXU))o_|oW(XEXss5)BTItNs4exd4$IoejgNM!=Y+2l8}5FsAVg zV}U$62#) zi!QrCq8Mz+5=Mq0k2s4L%p0)+IE&eOV&T$~PFS(Je>{Dp!dTm-@_fpH+SC1R@6$Aj z)k=A4jK5bB(JJUMU9_;@UKAf#O70@<;nQa>DCly^5K-=`p*qT;+aiR07Ml%dKCc8! zoDY*)@7nLlG&mSyx$hm8ua5Z$CHv3Oxtf)&7m62Uegj4mT4+zqn@$CJ>>{?0(Fe!G zS)?w=(IUDUQ{6kVTLE{W-9+u(zu#{9D;g~~(VzL3y4A--FuTfk+kbWzWS;C)p*^%X zG5A*8$08~uCNecpCov=yB(P7<2oKqt*ETBVN}I(b@C?Z90A`aCvAKS7*L(IUHL_Gz z=og*U(klnYTC@xHai7N*g;2zi9F$ z?)5)@I}E_8QjNGU*{CCw<`{)O+?o)Nt2a*05nmHZ*bWV&b9)D9S=zx}^6x)iQ9!Z}|4?v*?`d$XkrX{2D#7Tf!E1lj0C7w6rI6|R zX}PKhj%jit`i^aJp{!Tp$ibM9Ct0bdBMw^Eq=X{jRDmGes%&pD+ zy}SI^n`$zodD_<$d$!(RMj6C#-ZXyU_5B^UMd$-kthDZgsb`%4XMHG77QUfin8i1V zp9b+M0X^|75mY(=K%QGLl6sOZ=dC%l+h-iTTOn~OfmfqHQt05z2!{pgS+46UBP=> ztWPepjuQRp`Qv3V=a2eGWkCfYj?V9stvp87S%OlNiCaUr2B$-v(w}*l$y|njY4G10 zQ7;FW3-VsnI<9t{g%qngo?ezSQzPHAi~fXQ$b?bCks9$ z)8vgvk11l;Tt6+O$dX!USDuts;kDWBVc}FBix;45?C{1Rahjsh=mccWg!1(E^>WEC z*?tZ1=XoaTm?HV&8)*x(7dA8ZS`AU;!`-N#uZG2HZed?~6ai-+MtpGK!m_r8@Hk!> zH_%!ES^ zTzV^Vov;qWQY@Uc#E48o%M+&ezr@Z7eQa18W_>nk_FXtcI^{G4p$$O~%MQbnjFDCI zTFuVT*z$q$XmsvFf1OBur6kEH7wL|QTge5-oa%WZY$C{22aQG<%Y<_5s+lsKZ=Uu$vy+%8j+C~3yz*O1ElhPTs(pqH2&SLA(bg45^kjQ-5GPBhmi z*wA5#+$He6={QJg@`Xr*Y`bDt?BlC1;Kkn#h(!PX`VJf^t60Mqhql!Ki8zwB=b&N- zI(!J8-d{yv&O`ceh7Ch657k}N8P}mO7w>&q1ogF zWVydkraP{57*78{@E0k3I)>3CKv0p_#@=J3A$PP#{MAt&_PKM`So2;Z_ZF$T$Yj<- zlO$Is8Hwke$Hba?f0M_H$hvH996b|O}^5|$Vj{19J3`Vw|N?wDI?lc&x z-%GWA%qdo7aA>5|{rT)2`0v@{L-gbu&yOuXm+#li@-$xc6|yTbgYryOv_rU4 z&;@q9E!rH7!k?&7+zh>9F= zBZ5f^+TI!M3U6NrJ4Z`nFnwSMny$w1(uHO9S6=z3Dw0<|o%>ZMp6S7%TO_NkSTfmn z;!7LpONMp76x}>qqaIVM#k*%-x5u|i%1ld>R?;QdNRuXctSdi#madK8_U9?tujKf; zOyy%+!PatT=DXnSST^R zGk~m|dFnhi6|Sw8Ut*y3Xch=ey3R6nw})JhTc(u3z2(&2)HkVD^*j`>9&T*^M4+_9 zDMnJ*pWY?qLCM(u>&TOqft{O2TUCWuRpp92*w$R(q4SsE|BoZDb?C~5CU0m!fNag( zbIGDHO&ZCvzZKgb0}#dbr}gkTaI)MSzxC+NjIE`@Q@L&lAL4iVD+%ZMd&%8NyY*UL z1W7V%U5&J-ACBJ0)`|;mYV~fH&b=)3mWm`pAg-umN~`v{@_d|*mqwcw^=vIj>n7L2 zUVbYaJ=<8gNNzcC^`$Df;oCZedCv&M-8gzY=D3-GrY+a0_LS$ZRT!`R%3{@-Ac*daTnil zDRsk>p;qKjs{@}PmzA_NwPbX7<)OU^JS#r<>i)3Gnlv}RnY7D1qY{9D<^UkwsRk4%B0gOhD zO{JIhEZ@p2KbhpD$v*9__vvNd%MLZkSm|-rMF;i=PtU=w^e9W|LgJ|~mz$0B+O8VT z<0k$39mhK|qhChnQ?#Wmu8+sQlxE`eHa>gH={k{*5<3Z>^5d@77mZ7c&$vjZM|&9U zXy#JK!z$7@BRWh@*uJ7wTkzgD@jF~B_8T61~r2&OMyaLDGC;IfM=l8X~jjmR=zqyde^B@_{2Ps?$)X0~=wNpb2Jg?I0$0 z4w`8wQ`8Ly9I=u;;2T`aaNiLO73YgqJ;Wt5Wb`$p4d`6|9So|jz3gKt6?Geea~(#ZK10csUg|Ww(*IKc$#gQ zi-pjT4UV^CS>H6@*GYrnPGmr$y^8Xol^r;)wz>(Zf2L59;-w<|G&fD_dKjk^3g}5YaX~ z`=;nHDK1sX$Yc`PQyVWyQH)WkC>s?rRCWx!q&elFbY02?RVk2@Yo0!$Bjs!_ z+O2#te2k?3m>DiE*hopZ%M&YT`0DgL}8PmoRDnHhK00DXTvA)nWw zqos%3msbE9zg+9Z&EMs5)1jTE%}Mk%o|>Ij^5X*JH0;BWSNel`$UiLa-Tny5UvB{3 zV>4|AgZ><2K0Z)LV2-+hK(K7B-9YilwygS875%1o!yjut=yqK|&dflTN0JfTc z0c_=|&s0fQMgKTbrhPoutxC@9#!cB$jQKd@<|t32V<&58_T%;A>Pel+^xC@NCtVrs zM+)pSguQLfROnEY=@gGYgj|W6pW7*2M9*hQ_ytG;04uWM6p6@Ls)rX+t|etfOq5(& zH8oi;NJ+y>#<`3U0Dp=2XtBGuvkTLg<4v>2;6Uq^se`!##MvYLZ_ssu z*}>S{0g^XRpe%qQsbO^EnYumY1q!?U(Opru=!TrQc+#tBct;^>c(-{9GY zMfup&;YzXRlaLD}iOnwa<=vFms1*Cp3dzD95gqTyX{e4V{OIZ&aD8%xae#xfg{^}J z?;n4}5Q3cf&+-fW`EU1U3(zwC4Jb#MqwM~Fh=7TOKCpGAWD5!f6haZWB=`S-W##2Q z>)KqhT>BaY%|QU!sqF7@h4bZb%~-~~c_Sj_M_esK*ecj(z&;}4u)xqbwz5ZyiL@oFxN4yo05coR z)#5YzPg0((zKG9=?1q;6p7j2)6AQ6(2Y7~4mA7!ka)pho~ zdU;3Rod8Nh(`=W#_4g7_i`>j|bKc>G+cS~#n3|?N$m9RTG}vq1j=;x?d#de--IakT zk=I5=vPZ<|DK27CO<%lb z5#@|P2o~^)`T;Jyn??bo1s)1d-@KMkAbuy#6wfZ=d$((6ZTS{q_t!H!ohrwVJ?-RwgSB1c!za6^MIDdZs?U5OS7(L$$QnhjLq*tha->j8{f2jT&vI;Z zv2>^ z*AP};R6qZ=j{dcY65EM?ddhz%Fzms2+TJt-ML>%Rw7ed3B=OKrKG)7H5#y+P7YBo#?!x$Q7QC_UhAP-V{ueL&ifqv!hNA=OvA;RD4@Od8Rep zcN#BN^ljVo98Ifgn(Tpm89Vv>M=L*+*G8+iXPv}SFhNA)E66tqQNf|NoGPmYcvxR) zxQ@Rm%#Bq_@f*ORfa!GSarTxkg*$t4h@*?P=@+GsZ~(9aFrVasO9U>iuF?v;vRap5 zU&DBP|Gh*uhe%k8JmfB4djb(K3@sW4;9x~2W`6|>AV=x#J&?C|NUwJ>C!z$^JHY3b zJeW#~BIP&t_VOPbi*KO}6za$`Rd_7PpUx$Ga`@1hr;Fk5jt{TI3Md}Wu6nOB=pHFe z$3hT;Z{0cTZjdmWOiH)G&~|*weAJ2Jm#AytyO@H^8$nmLa(s1)TbP!}-2{aL(==ah zQP)#!#TOnuAvmwJSu=G@rM;7ayJ6SLi-hu)NpXov>N|a=Xq+%6HjuUtx`XFXes|9f zMBL)^AY<_J4?(7f7vSf>8><=0&e6pS_HJmcn;{Wa2uu!`o6dFz41oU=4-R$;OHbsw z0#%Ru#^AUAzk*QS7*%Ajs?-M6uc|> z8qCDmbEwnN!)9PB#lK->aD9@E!gv z$D(?{izeqx)5SbNUiB%0rMSGUq33St=DKNlSM2OTwmkt_pF`uG9Cx z6GC2)sLyn?b@NVGk5S2RAC5h`=BeE|p+3v|-5dG56G@Hgf*|dC-d^@C&dX@ZlS8)1 zV>hFnis+SQpQ=K)x!bX{BgvA8q=uCjEBDw60~7Br){n1NP83X>FN$n{C5Y7MK$?uZ zHG)8K1X0>0P{%{wxv>l!oCx>zk8V%H5FNoev zJ97%7FVf4EL-@O0gr_p1Rj%vqRC8j&8rY+=&Se?ZocHMEsBLO}CYTqM6%E>(KKJKb zVqe{cY0=t41yMdWuE`$puh{Nih<|9TnABu@`59JnHk=Rn?+fb3`g@)}b`Qq6KAoJ`S(6<@5q6A^HxFu zj!mWn;nkny1Ct6vQVDNRkskxQPEzKEMg;N*tld+_=Q4llUi^W&Kle<&Z?WR6%2;2q zS1jZ;B))yCr2OiafQsRTj|>Xk$5n3ipLFA)59v#+%NQ`xv`Kf%FTJ9hthgw<*79;Q z(~FZKgrE2#ig$2%@MB@Xr)C@(k?Oeh1s~s*i>yalMAcRru|&nyVBIjz@sTxle%=nVwz z5b76FMtA1T6pyUgb5wh9y7k}SY`fo{m3>`oFvapT`TOMcl|t3t`0h$J+I7Z(zP8io zWSSlMB-Q7aP=l#?QIcEIpSRs~a}5AbI$!*&ozW$c2X)pDnPI$m>lEJWPXzvlH}Maf z@Gv8cbRHQ7Y+6zTFQ~!XEJ3=tx3?M@U|EI)&@p(1O2CSO=jFkVCpqaWKoZo1o!=G! z?si^iCpT+A-~VNx{6j|2SpKSE;?!3Q8p6bCX&V1b|1uwxwqH20dm*InK2(|4da9m9d3vr^`$&pNl(eOTp zl;bQanTnaGbbs>Obd_+DzmHzGt08LCzAF0c!VJT`G@5X&>g-2(Bs%RJrKiu>RppP? zPK7rc*j2TiJ>NN4Cvh&chkOqEZ3lEH)?e~ERiYZKd|p)x1jb+Bs~(rTI#ealMB=!0 z0-A)|x3JGEILbKP{`{D7SBVrXPHAnCf>0$r_)$?l;1P8hb;CxM7Zg`k4j?RX{+I5` z-P+O~?dl2@G*>m(G=}c9H3VLP-07^w{~Pb|;qO22TH=BRp`59kE0Q_@siUsW2;1?` zH5$hF1anRMayT!V4#1+P+o-*RQ-${9Ms9ZY?yWA(?&L^+^_Swzm5&nB_p5KObVV~q zJV@kNr1_5ZG%2Pj^i1C{2X9+>pfg*_CI4nyA$y_~bwk^{f_C?W-Z|=?li<^Ajb2|x zC1q`1=t*flVKY_Qi0iSOe7!>3$a=(iN5{rIDC|+La%b3CP3I)2I3KeDO&jRn!HQVs zX*~7SM7oIiS5i9_$@-Njg^24`lLAfHR$EUXZ}iy*l*yf-wLJ#_fa_bURfv}*+=qDx zrXh47*xKI91IZZ?=2f%yMB56efFb$&%>!dVtcp%Py-{iDs>KvCK&H_|sGQ z;^IUjB;_`-_T_zLL)%GH8_r8e&w~sZsa-30^P**xzD1liY#@j=&YJ04K$HYv9iOM9 z<6Jw0+#k%gCShFkYj0`yvS%NC)8=u`p8FCtDT)!uB8ggt zwSQJ~M^kcqx*1VR`o_st#sJUo6J4h6(2l)$klVWoyAx!Geo6TvEi!UV$B%5KUpj0C zu+A1vG>2K}G+H4hlJKA5y_|fFYQ<1GN5(a_AB4##Z^979N=`(%3FKfQ+RL{pb&x#skX&}#< z+~f62@$upWip@bK`7SBn!y#kg2)pG{BEviSiwzYdzSFpt$5y=eq!twLZ_gfa`gQ2# zY(Nkc0(9(HswxM3lxXn7qh0oGB%oo(FuyP#o(r#qwKLkn)(e>sA>kCiCq=f#a9&zF zBKGin44;ZDaNnyWRc8AreTPp+e#_7Hm~{$;biupEt;5<@g!<+0`oEaed2NgX5mP(h zGG#snZ6V(l;yDf?Ik_~BpjL_knqJhX!d05YhUAR&;4b3`$^ulq>tnil=9utOCyGE( z&66})XPWToN-pK!a8v7ix?-F=P}(M>qOV)6yIGbiHmbOU#2x=nx ztZd1x>G%_u+%l|*4|Qyc4aKV8%B#9-qu?wZPs*~l+fT}Q+c1Kv;;JKl?hq7~2MT+j z8dM%_tp@ykO%-ikQ#4uyjUmygqTOJSqyp1h7;pa@*kYd|d;zQG1e-~CiYH{sAo_1a zIB_|J7*vL*3V#CLdwy^GVI(DCkGPazr+nHe7*1MWt0^YMtCZREEAg>?{8Yxar=Zt- z#VH(R@Bb!*dU*llxpMV+EFHqpN_8J^u=dBOdDnoft&Pzkc`@Fl)60V5m;pEmizno| zEv}fo70#Ow(I9wA-=2HPHBCoS=XA}DXXjD$*9XfdMyuPXYIzb~{T+lScB zM<@Nq<>No4C(a;(1-d8%s;fCe{;ioS;+F$ym+NnI@88OtqDN%GSL@S2C(aR(>Bs7G zNIID7D+o{0%PHE6F?Y5)sTE!FG>_fWVRT9J=2$87PM_K?Fr1)JedIBJxaRTz1aZCL z)0_##c&Zx~r1i|(unlgMMRzJX)Wr%HpVNphHW;k_9xD*Kd?oB<#!YeV-miTMDZO;g zVGll!l;^O^Y1@@G-^<9)CFplAY_qo+C85~NYCl`o(OoAY6pF(?2Tm}+N4{T95UnLc zLEX}zZrya&84Jf4<5v`^IWjbI&B+pTma6Go|0rbG^jN%i@T5)l6_} zj)_53>&-yw36=?`-TV1@^dWAX=kt!gREUK(cZL-tB3_&KdPqwhy;*Aa)!)95#_WX; zO7w*nSGA5w?}^pz>dzxi6P!FC{zkR8OKvy{WjJHMnSuL9+sk~XItmS^!WHwj83k|M z9z9CN?AF)|s!#6XkYMIXiA42rjh~;wJZX>-UguvX^@vmA*s*q~S5UB^ zyaFHU({vMy==C+`h>3~A5W}q!y_3Ipd3OR=6k>v{Xkw87k^gjM*t*!DT)jbA3Z1Wg z*H@$x01U`vkwn%Qchi(e2*q&wN!!X+Ebeb2q&tcGfjSb)+WmELhOg1)`09l&Ta}&` zR!wd)JALeLmr;*z)Sl&=x6BaHVc3Q4xAz>D!ZXg0T;-7Nlg~x;;})vTXB)%h>+_Z2)Xj@ z6yur8$`3udOT<`ATblP)nz8-C0+rkYuCMg97I7E+!Z=0A4u6Z<+jiRaGQA+WWRW#= zxm5Ou31hg8vDIzAPWN-S94tI<`nGwDUwsjw*Bo&+t3WZe2)mW7d?xd4)myomX=&}a zGax_J=2$`sgnvJvw7Im#_}M1`%*a-p$ZO0bA1}`}Q_TJJjzPL?R1URGy}QnzF>qv}ga z?J|+mQQKoDL`Jk|YV&Dd(O2H}XUW5UklkDL-F{Q!y>mZNjQg(D2!qGh`>moe5>L5gMEfAwu)988t45f1Bt;#_lRwS(y!16+reAqW9@ zz)J{?Lmp=f9yE}Wo#B!N6+;|5$o#Eb;l54!0>fBPkx=}R2y{k~5_rKuoU4s^!D%cu zOvbl=eVRUFN^pT2u_1KgzQ#)}e_FIgbhcTHd;B94!;ol8W_hKaq8mNrh-Pu0DGFlH zODv}#NxJ|3QcfK>5f-(tq`OvSNj)#`@7AE_B;*iWk8fXv*m6eR$~jJ&V?E;G$(JK9 zEA-#s@L9aJC*UeXvTk*bI6%m?yzpD`_%Fq&LtRaJhY%9`rqNPq=J%)c?Baosk{J|y z8)Ydofy5rtic^41b#ra3COg@bV;<%Cdk0l>(Ouz4CkYjge zmL#+k>|xbxd-p_}L|E7H-DIrWT&7=^^^FapNh}J=V_u=3+=u4Amp^t4?~OKb_j2BV-CZcB@rr) zZ*phG74s!URa~==9F>?@ziX3f>cz^?c1grw+b46Yb7--MwO@3ZvAUea3>RIU#Yg}Y z-uxFt+#hAb&{zaMRXPwWW0<_E%vZ&klHIlTb~Vu|jfzgPpWo~CIX|F*F*)O1CGwcw zn*dX0`g`agi?#+|gqk4vMhD)R+^uaOd-EJ}skAWG1gn=Zgy-2Ct0LpBKYPew{A6EP z(iA^4sT5*!9eyCGhP05O$&RsD4TpWr#E1rwH*)RL7+Wb_V!{(_@0Lby<Rl{rR_qw%&P=Z4tN#ALLH2+Vk-PMBeE+#E&Duqa|bf*nY^&S#(ykmoc=AsSrX|z z5taxg0*&q;{*-@zU}m-`J8NNKP)>N*xuAFv|IfZ#?q3r{tbZfy{y=9TY;#8|Wt6pm z5I?WBhUUNiH;m-`Kt-p5rZP0vTtRHki@^kec^{skmKcuGKf2%$j;{0}C1Km4dZutA z@$%Td-#~qJQC8#>LHy^tEr|3EaV+3vC2|;)Ox+}y5gALY{PHYezJ;unzPs}!x}gek zOde=2(>b5W8oB>D%C`PUa5=8Nzo(sezyIP73qRF{6{d^WR9STI;@k5_=f0`1sZtYJiUcc8J@ z7Fbg@Fp)$dKm!E30@Gd$((a#Amd%W#53FlDHX;{KL+iXg}Uhs&#YlZ_;iHjwcbK=W|SK9b776+!==@3&DmYGRzbA_ zkD7JPaV(Q~dwguKo}Z0B2B955TeVoM zpPs8fTgTeaM&Bm)h=W=PWN&$|w(qR-i#14KB|#g*IHZCng7hTaXEJx)tjuSG@*JX>GTLF(y=wW1A9F*y=q6Y*+kyw&F)8wKfSD0tFFlS4KNW=eFM;s_=P#!#`CNd_Xu;SlkK_Cz2`*aIWttuv=UTC zt}Dh0Xnryio1IP8U$#xV=9hSZA#{Sr!G^ATOO3B<)sA}pLakRk>0aQtdAJAEYYaQCo%#s(9bP&3`<-W+; z*qz@P0&$J)AhL45k>;6_`QV>t6{UFBgosJ^g+)=u7L5z>WBBypa|8_Gf*+4Qu)sIf z*U)vV^}hY1vZ&M0Ocy8sGk2#x=p}Dm=pSvs6LsvlH%)Ds|k&J=4#pY1o2~_+^huez#b1 zmy-H&XD~*Rk9hR5!=#h613ppKLlK6Q*<*X_Yqz-@h$(Jg{NWSC%(3^GDrHOxyN~Z7 z)C~(H{a6D0e--aPg)FemIU+C#8!s@in>qZSlm*F=KJeP3c|}DVQXj5r@aQV=YF@si zfe}>ID>hyvySJvjbt5{-DA~QFV(QWnIV&zEbhLeRU@3l0%>CH9ySyJmt1ksIjZTwp z{Mxu}EHa_c<1<)U%a?{`x{GbaP+^v&{flHEvirr=_RjV*w`zhm8=c)aD4ZPYi}*A+ zOqFa|v(;L)+Nd^hGcO4#S{zxF{xnKQ#!8jwNwikZRAb-fLoYqr!ld38TAb3U*kmwI zjmji2dV4MB<+T|dU7?AtXT_J-=J|ew5}FO^XNmd+h^0nLg>UzGSe}{59bmMj`H?L3 zc67kIrEe&2oA}-2#RgtH+Ey8}V_nV0d!HW%C}mzs@x|iS5BLCIHv%*}a6n*gjU3bP zwCA-&qa1laWq`RDBB=xWUER;ezAHGnMl84MMcdsmgRZUybaf5CrXpS4tadz1Km%m4 z1uSs9BH*!@UHvSh-Gq>b0T7=_yZ6elVx^U3&l)s^9}UhzhUE87;cAWHbV0G-IY-h z#q1($!8gpKwY)FdK0+7^lIh6hD>=SZDN(5=RPw6wOkmUmXZ5gLw@?-Iimf(Iz}RGd z8L}}*_meM61|Xz4n-ZA~U@5`RMXA5m-XHfBOH*qLUg&V|rwmHNBuNbu%>axp%$N_J zY;x`}-$RIe|2|s&Q&i=Q7Ubh}!Q1?&_%aQAUeqPhnV0F*Ao`EEukM^Pka6bm8%tAN(@teoLr}Jpi zeIiT;(_BMqJd5f+_d{Y9*m0hx_x2pXR>(!X?2LnL9s+$`3@qkAi2}^&P|jk!nwqMR zD*_`x%!!MjAOn#WQ>SReh8go+mEm;j^X}Z4SAI*n4|m-MNp~^#-?_a_6#k?og{J19 ztom}N<+B<|JHF$oFx;)31lG(+OZ8wEj2c(;q-JxhR=?_ic&0HIdTBtoTIju2+0A&+ zCnPn!HEgS}f0$*M>ZCFqJrNv#O=_}Ubg*w|Np)Lg(8luJ1AB4TVA1E7bdshbHwO~t zXB|&AOJD`oLXbibL;-p*(FO6RWGjbGdxQyT5QU|-o%&%*^8|U9@FDM#-o1%Uc$c)n zy9CV=d~HWivVJtTuv@ev~RIXb@1x3DZTIu6?ZJ>ZX|jTU#K>U zc<`1aU0!?n=VpS?=28Z^N2kw&qr3EMAWpO@ju4PLwjhNq@VlxEp4k}r#)886PkuE1 ztdgje#>3uj&?@maeuI*}Tk~-xKRf*DoPEpC^d#2ZI zkjr`?9wz2r4O9<<2QoyNB~?b%k{zca$459MzgS*d4x@^{PGGge+GX|D(o4u2bwqq; zCWKR;FBiWgfr^48?fm8MCvO~iB5NeA8=6x?D~%tOB%2JWl4HW%HP^o;XI&Hzw?g-y zXJL@IID5wLsr{wVtfDgi=DtUPPx--K>9&_3Xz3*VUd+#85ll>>n}#V4ZJ(^zZ#N4Z zQFw8FDxfEZsffsF#(R_D8*R&;I39DEpckq~8{(;~8Jr2YQdwX*CO-rsh z$EXg6mjAeI)SV5*M2^J++n8QRmkvf#@c31hk>|OjC8zd(O5;{h_s`AaPYHCm^;{!A zxSAR&#=K3U(uV3musS@eK;`)!;rBvL*>GZ7Bo45!RZ5P0iE1d$9a+QT5c%fkFG6r# zgm<=g>$_befqR$>n}v4$Rs>)AT|#wdqk$WSBJqT|vSm74)=jMQ@f#f-Gge6(b`@WL zog=kRZcLmgtgW}t93*zHj$tTM7q<+%WP4n{LPN2utjDK=3fP3ksfmr)t+Ut>!yB33 zHa(3sZck5X9G@c3^;%wNJVlgvQY^ARQKFJlnivKJ)}&1M_H%o$K+@q;cE_j=yOcN> zc4|yK$AP<&r5$j+JkY$3R>1GILXx(A;VXLZh~^Lm~&eN~2jzHjJ`^Kk=#!{fc18HcI0Qg=fsAy?%@rpsaCgBj0<7IM-o_{dKk4a zIff!qGQ1prkHa24dwWZf0Sa8;pTKeZbgvsKf~5Q%h(@{O>R=};&1(ZUSLFTuFW^?XZ#+M#HZ}TQIK`A`fB1ThHsXw2=-IPDqJ(S2pe@e-yJ-@*R2xm!6O*U;6jpqTEp7DVF z@x)O>QjL+PW&SJL{n`|3PkR)l%H1L#MH{5`SHBQ=Zi(v&OvPU_xwFYe(FFaPRvn1BQ!1hn6v<7o`v+VkK*rvm}Jyc?sg1+`)i~>_h)FD8iXT9yZlDAEJRwgZazW5X6xN zJGLl=Wh3m^_}sUx>-P8&CuOw2VNnXXlT#X0nb>>%;a{BdbxTrSA z6$>8acr9rC7N6jR`b!p_^jzV)_~oeSdm*%E<6?8>?W7{4Nf3tT8BMimgT;K~`R`eg z-kki}$KMlnXRIZA{1PfI*mf_Cu{1Ob`o)&?2={{WBgQj<@DkTu(s6QpZiB7TH#>6E z!P{u#W`&X^<;R(kXo_4xWE>@s2K-nN2S`fy%Bi#M}TSP-nF_j2kM}?yZNyTV7s$F~N+2oVI`;V{o zkTf9V^3y=j0$mC|FZaBG~<&BJ8$oD&5@6wLw6VRObwI!ju(I7+-~g7eqO z@VJo`Z65J_pZx}8NK}Q^{-IWXu1ak@IQ?Y$=)l+Nw z5h~n|`zU-%Q|?2jmuC%rb$O0W-4w~R5`Ubnikk5Vn>p>>9I^=9+h^QhJ_U~DhX z4}6}AsOzpGzOCVn^ARTNiB0l{b>c`3)S*0EC9o=^XvGRxcxkWr>f;SD%M` zb)1@8I1_e}m!L`r!0f>MRV7;oM+-EsvZfA(w){`ep)>6B&(n2>^012J>;+W0|4QyF zoO%tj>;f`w1prN~)=u0^0mA%OWaoLwhQsHbI&f<^2`M~ zQa-I!*=QfsACniJqmtPs-T7G3d8G~yKYMo4%o$!CM2%Gniyr?BX`^CzHFB9! zuG>9Bpox5W>QVy$%p&ha(G}$F8E*i56QjRAbusE5hcc%GbSa8kT z*g+H#)hYi_*LqoJ^oDM-k&*Tfbww@&G!MzWD}&b6i^md#?%A@&_TkpZ4`j)W<)j+) zmJy?_Mw5+pEvqk8Yrs8vHaVf)!s$fwHAvYrzCR)7V3T66gNPzB1Og==^S!^^BY6Id zjH_QL9%3K;g$KDaf-}1_kfySOMV8=@))fuI$pf!GNCL>g4_=f#AmHG!YmZPn&8=PR zF{H15009kQETZrbGgb*Bj(`2z%Ha^@g9IG6yTX-mV{gZ|#;A$n*QEbD&)t-@|<3LkJtvqKPiuvmJ3s$2d|jX>ptEp6Nu&9HDJEwWlBlx6*3<>@Xg@@gjY~`@ye-PlF@?x{GZpI&Y|xvQ$~S9tUZ#+m5}4Ba?(d5 zAm{oW$nj-BxLo@PshzQwz|ML@DgHMMAfor%oEf+-$W#ct20-srD>uQ+V2w%kGT|xUhSYMZvHBsH(fbKdA6sM^B!Za{PdHZ zI-xWMde)-PZiZGpZ>yiPk*Mfa6i0=mRiKxYrr$dW1XzRTugYQqm)C~Qft$~2zMDOA z4i{5?3$W{>pO$6g4m% zS_j6fy1ahWIeNd=kg@8Q+Z@61_wLYIjp|BhP)XR=f9J_jeq`jD)^R}-2cLuC+?47x zNf8gd=8B5`yyx?r9M6Fft4EMYhA({M!`%0HT7&e>Phn@OUi-5fZHt<=d?0tyOc)Xg zt8S>K4}JRSy1njf3=EjZs3xvqML2aiL@37%x7 z=T9Sd!V6fGgdaF1#=k2x-0r_*^a|VYW;pV8M+{L1-U%)2;UR(c<~21%nPHCeKe`#X zR9}!zIUg>DvJod+B^@v6fF!8f)L?Fe6UG+`PRdICkfVb^E8;PJ)){|G^sZr7<+~$! zZ?02K&BR!uT#>CNINsoM#z^+!3GsA8&D>&}xDQLZoZ+!apuftzxm51uJ9A`EDDbDe zUc+_ba9gM6i&^%zn*BR>Qu=dT`b5S1NW?!2yqVwZfg7PHg&EC7R6zyY2<<{QdU3;k z&0N*Wn*7Gd3UrO>%aGnXxp)alL&<|}Ikj|e0@;@5fwFa|%>CYZC*DBTgDjChZ^)JI*PbC0m=tA-&EB4nrLH!Ax67{lXm6O3t@11>B(#W1YI7Dbk*mF`dD^|;!wO>9t;q$oltgDeu zd9nBocV>)$U`W(ptOLvqSHA}%sp;}`{%+xAx^;{6wt2BHYCFQxYON#@Y^%FWepEL7AIA^iTzr3kmoq9*v z*tou@ixuSGU5>P_Z8)-lMTC&kS_u)NNjrU4$VscSWPHijUNEa!cU$ zm3+e`HgASAx>Z|AKX7ZZ*e%V?8T9oB!_|J)t?bDr3vrlIVL$&lYjAVv_Sk#Zd2CIQ zX2CeGW8a7=!(&gHJ#oH*?HFz{*<8O`z(UseXmRh=cb1PY?BNT+N>~VZ3(EqTL>>5r z*jc-R0nD8jwCulUN&e8;LbM1b%~J4%fLVgi1piPm9->6A{_KWeXpBa|##_5xhb##_ zV2J7K@$)67fV61@xe&xKulh=*^?@H34# z-OMxb*Ip`Ur*a6Vw(!l#{OzC?6i|x;ZM?3)U3IZUTRFqv1CntpRUmi+qelPZM)!^5 zisr-}Mvx&ejA~H)mLbefChk82_TzxTto4TKnqHDcs#QDv>m%zsY7Z;1zMGVkT;QoJ znEXH(y_RP>{{V-7OvhHXI56(oeH>amPUFD7oWW~-;S$0|@V1_N$(QtC>ze(uThQEk z!?U9|&c`3YaZ1aq@Non|ko|(eOjbqsLZNmR;~O;r8hBhALNI!RDSkVa`48uOU%Bxu zp@~)#feR^ikEUf;6Z0xK$<7mPB#e~v%g>|y+Yk-q-l<)llfElIQ_4)@#)V225v|m(6K2`?-td0oR^Xae?{gG54A1X@Syw^M8Cxfy+4$wq~gfLTx{sqh}>u_v+;Z-4mB*4Vm@Ev9u*=itJ@UKSe? zV(S_C$^ApZY{6BF(2}dd4LX@xE# n%aclTvpNM%y7;aA(0%AKgUBHGS%+htt1zt z7ou{;B%p4*c;eg`Q+$vdJ5hx+_3!_p}cLoPYbe6c9`CBi%}Yyvkc z0^w$=oN^{zjs zdSUixw2Yh<#f?!eHoKzo=WgNUC5j?Bk_&a8+gYllJxp_?69UUO<%k}f%3eaWNMdqu`Vq6{ z2l{AGke=jV$LnHeC*+I)&$c7e8@V?xJe88yl&)Y! zUf(yKma&(E$}UL`8A}IAl%>#+LdX&ZV`*j#!zfE7LWG3K)?zK&pbViw%GzehTF9Ec zq9n@i{*3cIr{8($zxn%qedd0i=f1D&eH~U;!A6Kyinegr==ul=9wGHk+fH^o5n!=m za{Nl3rf;(OTKx8?orkAtD1B6qdg_S~W@n1l?dFYT5?s(`NImkK0xSAfE+TaC^Opb^ zHartdf?DuGE!I@yI8gxWM52%(kA>*r<%M=2z#0b;2!m-GL?j?WEX<#NU*E+#_JRIn zqkIg}n$H;NG*{=x))aC}tAqXs`}B=Fn22``8=XU)F4R-J4re)~CV zL9M<#biOTYu14>#z4bx~#m;Rt?tNQy9^B1JkF#$tF#cB5|9#?yQr@^50U1^N>dfP)Q$$|foKEY_jz&yf$_Xb2+uSwJXKX+J(H|NC zG4%Eg)7XX>nd%QX%ca&}p69u@yY<)g?TYa;3GVU#oJPR0DMh=FDxP^<>us*YoKMV0 z({LsA0;>oG?ii(!-XYajrT$qG$HZ2QerRL~zAm9SoJdxm6&$@wweC+U@chC4s3+04 zAlwK(p%ELaw1SwFK%b3an4?53X&}4cfJ~8T(`uwzoJ<>WvWj$I%8oDnNh0kKZ z?fXgvIM7b=w|@~F5e_q1``BlkRBH+r>|Md>??i%!1QEK8t1&~2cv_w>P=;7B-ln=bntT5+V5GS4)!{Tt0_uA$yZJ)8W6 zeL|^b#>P+*tO&^dZOd0CoAEz!&(@H{+@?2rIqhR zoNxCR(x!28zJwe@j!GWx-n)Vnb3rRSKJs<3VE%rm@Pl8iY(;L?o?)3==LmbO3+O)M zQ!@tg(1w%u`s&Z}JtqVLe`maQW7z>N_coMn3@M`X_sn}9ghw?)e*>!1KB;7y6e^+ zt@!cJwZH7Ebh9jWBo(7zUqzU7Q>)g$QagaIeAYSbv=FEK3X%hFiYuU{Y5RuR{ygz< zlk9eDf{|+@CHIVk)yYAil5B(6p8W~E7eqqv2a}{iiz=}%VrI?PKf(*(RB~wb-spVm zUL8xk-^&|QW@%Eq=M6gPdkZfIiJPGKvR)-jblUIBeqZ-AL-uKG?p#+}#ZlQZ-o|t0 z*BXNoce*pcBp;&&oR(L55qc8>ojB|_!N1XO6QUtFO~5A`O64t_kuyb?;q&tx#e14q)u!`uRZCCbrQ((n z_KeDlC0bX@pMSKiN@Nxncg?kW*D0Sj6)6@7G z%>pieh)!Jftfx>B00JZ@w)6VWzG}Ls=@^B&v#?i&h|T87OA$3MAC(G#fta0Yj=^dF z`LyR^Cc>0Dn)P`ZhAY@O|7rzu4CTL=KXQI?BMV) z<`VN}n|ovSZ0fJ4?t-HC*2BVSKz4dV zs>c?f8!zs!`h%8RfsJNvyH{Y|HRHF_!S&?yJq~-K!GSw!Cld#o$+OLY`%{pe9>PaG z`wUF{wcOzreh+X-x)9p>IPy|sH#usv3u^#;Dki-(79q@wE2VH1o^s(TC3#DP!&`}g zg;;agr-$|RF~)g<7zNbEz=*(+?ZKMiusZBpz0n|gln7!>*FnwBE9%8Tf>xMGuxQ=@ zHLbMuDvt321ko(5#*^1eiQL3qa*;yWh4Y$qM!9aWTJrGy`m9$C#ghHZIbl>~TSuV- zY=GaVVM+1+Bb(SB)nLVM*_7>VI3{TsnL4#7o~D_1LQEfj|3L-i7Io1sf!p|9Ew5lF7TfK8pPT(|qUyUF{X$q0;56+)uh9z-B57}?>ixf92Ln@1N* z#L4NBNhG2%;w=0nhZGtxhA_GQQmg4WJ%ZT1t?T;rIw^_ONb$tgNby>=(JJ6CUj10Z zIkOA=mkBQGQ1wGS!}+)0H{U{!*>HXky9bb z6Dunq)_IJGb*SG$s-Y?pP?a^G2YMFAF-HwgXd3Iw9XaQ90@wyXee*KGIWum3$cLlD z5HfYS8B6vRX$5Qx9%>M+yn~Qlwk2yh3xU9ef^b-YD+yuVkl8%g9lMA z1_IU06Iq?D9QZO{detMkm5tilCnE>Yp?ZI8P*;7$M&p#?YAd$e)`+tqOF0F;8Xi2V zm}G9*U@SXPe*Y!1%G>=%Yux9lGZRbNeFHbk#i-blz1H1Lw1Okp7U?#=yS!&Ty-Ji+ zsFuN69f@hzKD|zueU6=GnS;1AH(T&u8WbJke&SMINWlr1LC6zD7Ns@YB<5SK5D*}` z+0cgx;_ZjseaA&vS18Q171U3mNT1)_co4PInkTzGg^b6kD9gcsolLSN|NS)oWxmc= z$oWmETXzG0wR8d)Ex10_RgMI zw40;*e`B@lH2J3B%!;hx%LO~iu5h6CNPfWnJv#)<3LpT%1lGalf9modJah3J0bA6j zX%n5|910vHyYp#ee%cN;4>qYn`5frZ&FxH?(nbkgm3p{|4#i`c<(4!?8V%5%U{c|6 zd(6+qMZYbn{|fDFi8=?SGp80+glEn7>PQpGBxc5(X*cdQ_=Kxi7@_PJV&KLQ1mKfN zkuhe@@E^Q}1<)j&!`;XV@;>JIEgOi8NQMfPbHE#0UNJ>@V|-7id2TcYQ=uNeRZkL~ zkQ_piryPbtaMdD_7}4~AhH84BXDBP6VPCX}YENGFC zQ?O8WQAhC#f4D)4JzcmB9#vs9m8E6ft|(L{p)Cq+);?*rC*RSe^Rjkm&a_peK=S#l zZ%FcP@cz7bsVzIOvYmtGL+IcU!(fQodw@v)Q+*+r)e+^ZfF%Duafst|yPI9c9t0xhV@CeHrqN9wD>7q~g ztU(<^8vL3&3_asxnr`UhWM?_XFQmUSMzf3t7FNILVL#hdbU&KQPjT1k7O<5KL>fp8 zo3!3Lri#T3pTGK-%9hlMB+gAbXC;bTrjNe$CHGpj$oc*JR_S+hF58aE)7({~d)y#_ zUohGr4fE%b&3i+?9{KY$>*;+*dgCd3W|nkX*r(ECQK!6JSmCr|o_2Ag2H8UB zoX-Hz0MV@{eN5O2p6(e-ntOU!%hI7|8usjy{0S=G9cy#SrNevlPQED0QO#^h&k=mq zYbc=+5Kbt7Q4cYTO@8BM__Ct2m{Igz_rhRH>@=;2f#)J2PRS_u!<8_FO;j1Dah&P zxUA28JLtTULLH4)&++&@X@~~}^0y9ABHn8;V{!EHtmhoKOAN(M-^5*ak=nLnSNLRC z`637IWz86?ZZ)^$^E_*z<9n$?fXQfz{^{O zn>CzB3!|Oyi7(n>nVq(`y(z^VY&qa0dfxDIO{poH`%y$MxBa=;xxno4mD%s&?@e)b z3#5wI!QY`CvhZJ8leVu*AlYLHhHzc-)Px%)q_;VEQ6St2#)eqH8acSwk^xnh`b$pl z{aCN@the5Sbzw`xUWS7APv<-pzT--*#(%QRSf3%x)VSU1=kiKb|Lj58omsC#V+-x= zZb4A*hAmffD^zNR_=Ka5s~GiVyRjGu%Y_Ae+A2Bhrm{JQq9Wo4@GGf&su_A;mwUCkxmH+!FIJ;$@Tlg~NFbACAxiNY$%R?QM|-+Tu& zz8C2meC^A^=Fa{Q<-V|l;_5tpdQU>_Z|(;O(|XOB8txp#r63d|Ie6IOAtMBu8=9fn zutk79p$M=?@$+pNynhsZ%QF+92a20%t!{BPSmti2u|m%8U6(Xi=8sm*0&H?84Aq`L z3BT^sxUHI5q?7LbiDiDqMRjNjdu5!M8tu@7PUFsw-oV~#Hn=sBEicpTz@s;s6f@vz}19NX?ba!ELWiD!SZ*BnXeQR>#IJV~hy9$IOZgf?&De);$ znwpMq%66u2*-p3RteU>v6%LRDC6q~m#fNQm-inw-%mOB2e$3R5`8BhH+{vtD&H<^1 zW!aW9oigo2cV(tXf*^2ka9;Ru@NtWl#aLup{l9RZId@#@FWyj3}yVByv;pJbwT4F;pe*)vqR^AoP;*hP&t;BocDlB2`Ig=5 z1&-CVSW?F4S%z}GHmRN{v`E?R{UQ7(Jwi=<%JMMVv~$hcP)lKApQ33n#bi%XW# z5v#5OTF?U^Ez%`m5ram06QDKASmHyOkVZM%-kRM>Gk86r^4Zo_{UBq(Ldi>>b0pmK z&iobXR}8Q?1opqW4BmuzWt-qGw5hz6g$Ju>r8?Z{89 z9-iB!V2p~a%z=JedaH+*&;%wz9LTh&dizn)>JC~>S<=*vuBN-1Zp|rJv%c&K6fiAK z?`eAH%rN?f)i?B(Mhrg2eL@o&K__#3C5B$X zeG$X`d@PDlnUI|QRsv~xG9uVSH-O|jDp=S<=w|hc)@guN;$KjrwK2&jbP>+ECc}gok|99wQ)o2_@x#xAJ{$3qd08&QZ1 z2y{u4Le9%N3Q&axZpS5lW*(twIt?N$b>jBZ<*pzN2E1VRhbeBClFiJ^sJPvvd>$8B zY)gK3xTLD-9I6tL81Xy80Jlp)BkJZzD&?j3EAyW~XS`yD`>mu=Eoc^73&QhCWXn;T z`Q7CP)ucYJY6zzxtOrK$;kOp1#L2?qLjL7yPK{{;#&kGa15yL}DhBjuzR*`e4GLaF zOiAF!4{l6(LMrMl8PAIk?o{753#`=LRgs0k+o{aQV=y1VM*f$N>q2sl7?_Gz+vG?@ z5k@wrx9iLMa5vSfa`NBzg&#!@Hp>OyCcE^K2lH=CFPFYyE`DSBvI6)hfR6(BY;={@ z1>aUP?ezdY2Kv8l?@b9nllywDFk8o2R8@!7QH-W{H-#Up5nC*)`mb!dNPdWkc{H3S;?5PBUA&V>_Kr zSFxJHSSo;0vsg@FEI)_G6vk3vER_gT7|X4D*O)#NWI6T4Rv;`NjK(|)VR^bVqNg{< zae2fP#&S~_%Ylevngkjy%aEE07rU|Qxx!g~+f4UzAY3&jkC7Z0FW)91qXoutWP*lR zfW%bj%U3eeJO<|i3lMglvW(+mz8c$CF}6n{0xJbvN5T^@uPPeDY))x0zM_+AHbbqL zzeUaG*D>aA74Y^62KE_%w{zIfBT%=)GDVthlY_zV_@tWFTQgwn&71c@aO-*=#Nd9x z_so^wQ)pa;##LzC&E`xpHDgUQuGM$UR;TkwFmBIKVBF6boCWDgIM`<7ig_|}Ak*q$ z!o6v*%Acj-^TC4SI5hC9%xOfRips2m@;oEH0E*?QfKtimcIlZRh+?lwyvN8De1-rK zgkLEEk{4wFRz`~)3;q?`wzUwGLgNn+6NCn zepgjvOY24ujQIugjDdxi_IHAv$=SSb(`G9K@@F#}hE@CRHW-RYP0d!CPyBI<`Yk_c z7pR&zjat9v=>`%7ISY|U7aS12}>DNv!<8bd(^0Z}Nnjq{j7u_+Xr5@8C(wsl`@_H#jO z3+~t|AlpMhnMVQHmd5nvKrRoNg0XE1#->sQDhS(S81Cf&A&!7?RZh9rGMc-`I|S}e zVv(g|p2uofU&XM#{ds37*B2*hKwrgxD#+otH|BdY+%Szuz;ZsS7WQl0fI=d^G$i5z zjPFrs#H!dfd0Pr?FlNb!{H>(P9<&sl+q%%|hxdUK>#skE6?}iPcT9%wKb#KsmT2`p zWeFMH^iLF8@LQT)3OMn(sefF}So3gRy>E9~J?l|$LECOw9b3T#A3r*~KoKYea`cpD zj0{*5k+Vt4YB1jro;qlk>}S6OZP=sx}|KM z5lw#OG#Pm!FGSKtG7lnrSta#UNj)1yrD+bvJF%Irp&O8wPsMv;!9S z4(nN4>tZuEW}Yc@<_qG#n?}aBfXV< z^(Zi_%F5VKI4f}QMIq9AAg!xQC|K*a&3G>dY$F%+V1B?8g_(ODLn@bN*=%`Mjq0ly z)iZi65)n@n{OyIo-xe3^A9DaUG^l_-EZZh;xEP2eDE&fYrv zDpx7<6bMd%;5MB-$k2@LdLTHvZ9yK^;W1f?=lAr`%2)Uc(w43Lf35+ zU3VvBtmk_)@Lapw)y=LST6)K1j%gYl)^n^LbGFunjHz^fit+AO$OO0G z!i{a+7jFMd*x2VpuWtHq3OUxC0Kn6o=SvQ`m(gsp)SdgvDHPea&4e!poH9G#cY55C zLaDqkl*;1b*W-XH15lG0lOUCt1;8f}!J(E5)IO4C4pUkE{63&cRk;GGD3FQ*scd>p z%+ky?K`K_?vRZD>eH2Q?X&D`Lu;XWp&R{kYh&R~*Cq+Br&{B_sXgvWom#7ta=X-Uz{@W|7vzDF)wj|N^#NYh)XnQd8=+^_d6U@`UO^&` z=|2nbSX*=T0p%io0S%CbB@x+(COnU~36AO|FDBb7Yd{1}HeyAM<4Lsoa{7KQ>3Vch z^~a|vzJwi;r)Ey+*dixY`+0rvWWHG%oHp;Y7Y(72quJ5k#Z)}YhBfwel6 z%21JO4RQ_tdUy|Pj0UdBdJgURUE4MN(6aoHIX!CWjM{of-%_wK1si)B6Zui0zN6=Q)4KoPy-71v~eC&z$XQKQX)(NpSJE@!g(s> z(;5Mo%6WSX;t4~EHwU!3qfk!`Kt1hF*O;(xG+OG!?WYRi^ldZT%Rx9D2^hEe3ZV+m zzlu@)iDxSF+3Ok3VN9cl#MPOh>ZHgQ2SOa|9GyHiOI8$!ga%@p>>-|oEu0KFzj1#m zthxquAf$pBDwyGB*HBGE(;aX@*Mb@9-M((OjBamzm?6B;ZFTE66wL5Zqq9S*v)oAS zl9TT+UsBdeC*RjFJx~KDlYtnqq##kXYego#LNMD{#L}|Ts-hVK34=MX5L%Zrlqho` zp+vrsVt&}`3|!zv$M4>U`HJdsQB+8b@MGic2uUPwVC4uWO|b7w}D6} zM>lNUwG`4xA)T7BYO9b=kLEO^qZ>U1bW%Vkg%W5Oiz%R!0y-(6Q^VLxG1p54I^75& zKNZkvjlfmyygdr(RK?4dz)sVf3h&eaywlGjy9gt3MQ*2YdidzTN58DlQr|Yaz8oM1 zBK-*Ue=C9PlDrCE^BRWrElWgu5x0RgwET+adHa$B#dDt~55S=WG??;? zRSR~3<#qj@Vfle>GcRPm&U(zZOyBE-f#doM9?Dx@yemB^$nL7l3j@9D=oY2a0At?p z7z?@q^Lw@#TA{sA*>nw$yki+J(2##Vj8(vbGSw=3OM}*OJhH6F7}GATCvZ3CGg5wN ziGL32+KD$I$V)p1r+Z&(Cb|!Rh79A(F#CF^uN$pS&t3yGwCar|(9leD?wlR&ydfcv zSU!WI?CuW+XtHO5`eV#)m4ka(;bewm>d}N?&Z&reXX=Lg{Wdu+{fKd&?D#y$$te^^ zq4u?bDhu!nIfUm|;km9G{l0GLu5CD>WqCTdv7V!MZO@@yC$xRfm$Ro~@Qb@q7Ed{q z@4yOtx#(#QP+9QYgksA@wyX`!)-(n(t|=vtlk_rim9usgA8fD9$%<)}0R5^0G*6kY zA%l)W-&fqGL}Y_Sq~cX288;^xDfmwGl8tCPVPz)rNm7ipI8=bliVCxrmEMP00vu@aC?7LbZ*m8^UQ zS?O`%iEEa(v(Ue>goEqcTD>l*m{3X1=1R^*7FMjP(;wkCdvXIjRZ72ZN-7&PbEnkEJ^R@ZpFMxxPD#XI=0%L;xi>An=K?2mnvzv0ci-d z_bxe}ah~G5uN*foL!;Nz+m=F&E2Z3QrL1^WNyg1d#x&;F?bH3i zO+N;1=(^EW9*h!}%@&q3_EmhU1m$x=FKTDTa81RDiW4{KMyF*IwGG=*e5d&CtMc9W zpA*fedHZO8FjUN@%)$nng^E>`Tx>`dQ((}L(L$b)sW?$_;zoRwwkM+EW=Kc3G?@{* z%J!YB(2ioqXLgxyaHp>Eq{u{2O7>LTskrlb)%K0#<89q^ds-oQs^NJX7m7C(Z$7)^ zyfj+6(&~ygH_e+#<}Uyru@XFnMnomHyh1=?1qolW}KEwaRatK*8zS50l{ zR;FW6?=Ylr+NkKyW2&y?JwI~c)!GvxzX4%32wDD zV8FmJPgt;`!P9D1H+KH?cl=^rlxaWnLv z#F)`s=B1WbmCUEnK~P<%y#A3fAO=keta*B;8kE}{@!|N#q$+%~y{n$zNxho|R+6qN^80*(BhQj4RftGPHRB0SNM5F? z$Ow+R5Zz71ySDa4+e+a>yx2RBkDzr&?F#xVaR>Xl7iGk({2#$PdVA2 zQVm|tXf~0YwBv(m2~sASkTdFEL=`I~6Y@p`6S9X|Gy~fCWLRXSk2&>$`2>74Za6XH za2;A6(rm;uBqSq8tf1Nsh~r7lbA0-)jA;V515P8DfFdd2-vPYJuE{AVD;ni6J9$bo z_&S@>7Ye=4Ml|97l#JPIGVSgU)*#b6MvIuH+S`%`Fvs2i-+^@!kX^=7ph$9AXB;KR zmDQL_-rC2RH-X8Ey$x&g0;|Re`ZJG$W|JmfZ#wQJfmR3&{vkgBb%b^%v|-AaKPJOT z0$oLJDs!AzZ&|{!s>M6OCChS}A!|*E)@*WQYfUJeL?_@)HtJrn*(M z`)u017SiFLJ~>DVHo{2;REVckLKRud^+JdU|0X;JdSa_*O~Tf3kD>2D!JVrNfL(Ewkh5`_1T5>5GagH;pup2na(ulG3gG)EbRy|77}F$Sugk*Rt63Ty zRaT5eP&peo#Sa3uWbwQYLv4UCZgAA*XLd|Q!r?TwJta`H>Eo?MpIs37Tx!7G;~f&9 zhnBP7N|yLiNJ8!Z@4x=HG4xz3#+fKbTeFx-LOZdE!WC*w&(Ujq@&poe3cMgn8G z)rldO(9_ylsF{Ib(J{Nmay%dhsfY!mpjZm{e@?PG0>TSaGj#v}!%AbGH($%HN2j5P z=!lPKKIfQ2kuWlKy++GyQ_k7Oo3Ty$TG(b#O9JTZK@^psfanUzfzo&^LudHC*U;}b z(7|y`vkUAc-0(#VLX+0PKG|(;E+x_5n5m_NT^HJ@j{Cr~B~jdL>a3@bcUev|G|2^M z8!)r0NPLbLC6vSz+A$>}twLYmcqe(vGa3lOz*32TPkN0f!#c}0C{#b>)Z-B^Cerx0 zf??0cJbe}2n=dfzG9Qd6xgLNlEj{ToP$U645fWuDqvKf)bK^VeK|zp&-#42E)_QSp zYnSX(P>e?+VA2EK&jffI9DB>iDZRoqnS|^%er99(fVIMEF=%qKFGP`ok1cN=3ZF_2 zKS*%-tv0n2xoV_=hG00S3w-n{+>HUKc9zRdPJOMH>|m`m)34Fut2``zjTSH{dfY?P z?B3zY^K10(edYU2#hEWfH5htw75$A65rY|gBBBYJcp$=t6I5Vzz!2YrG0!U74%osc zLsry|XtT+)=HPz-n-1oB4BDE|G!xf6rau4le-Z+;IQsNQAwY5O9_*47s6c+)J3VN8 zx7P_Ml}SKxMx&RPp{-H?zvCkrC!iBhYHwI1t_Zzr?vwBKt_zmn`sh!f4nSZuT<5LH z5Yw;{1_UZB$|(z2h~OzKx(^o!AP8O<0%bkEID?6x?eYp@vKVZ8f#IVy$NR@bTw$NC z#$cWog3$b;53j)lA^5eRjA}0kze?t(jmn@JSb+oz`4_mv0*t{itbia)xPZ>qi{E+5 z{8^Z;;aX))K0(OQWvxUCA@ORHnfO@dlRg=M0Lh@k6+~nk;YzhO+9sLsV3s8;&$mfF z0nZZwIMaeS3uz*fNem`f^H=QFgS!pO?2=8e#G6R?7c%dLT*#G@xb~>}Ab9g)n@G%C zQdFeaFC`@DECa#Yd`4P_8mBP!llkI&AG|FZAXT(5V+^c~JeMJCX^!ku9%28X4t(#` z5I_PTc@(jrPj(Ul25E-p|&F*x#9*Ag%9F}W^L=~&E%gbLc#Oeryw%5 zGZ8TffI)r1lDy1sAim$tcV5E_xbwQ%bii~3>v-_3EOi;mzWGZSuiRn1&0YH|qrp7xxFqz&9wY5*IcO65JS9FOnm! zo1C%{ZbiX(`6;XSq>Oobzm!2LuOz^Y2b+*9TfhJE`@ekLqGd4_S+$|c!yRo6*vlEP zQdm6NsqtA*7 zy0lAct9~iNU8O}<%1SPwgcr@Lo!3@DwwCrAPI4aO9PP^wE%Dz$T|4n61bK1i;B;>V zBhEjuQ=!6Ea^vk%zzYS2L7{}+a`6&(X^OCB5ZTx*Q#UP5?`ei{W|)1w)7OobZgu{B zYg$@rm)ZK+18=aI9+HeV9dT(Uf!xgmN$Vvy!qLVd|JXt{(yFbJWH%G3C$9goRf%&N zV2pa9QW7$XVU)Z@4hmU=#3m3lzkna4(X?26_OU>k@=zJr>|++Cs6`njJ76_{41L** zfPt1t$bD6b$7*1i{0PFa^KIglJSvFXoR(ZbjZ%o$A6K=a&-~-g@izH)u>S(FO?HPVF5H6sv-e?l=j=~8d1DU8I9+D#%v*BC1Yl9l zJW(lba5^y1fIJ-G{wFX+ryF=-W}%vjPT6c;)VH&G^5A7P%++tne~$hil5x+k!gF0W z`hA^Q9pCa<&ky~c)9tu4=ozMA25{RnJNDLmOSt5KCP-zq!~Rw7u>TtBQ)Puv`E{C` zUuS{i6{jlc`ux&$4~CyB$*TCZQT(cSP4U|E%f?lf=Rm}1#IDbWh2UTah?o`GLltxw(vdI!sajcS(FCr#% z#$uTvql7Wpox&;4-vWOZfprn22vfI_-wniEB0O|VRxN1!{awHaOL&kb(oAB z!So_#&EB3~pn*?=x*E$~p3d+5&m{Kh(cr>O2#u~Kxr~mosf5pU`l#q^D5$f#i2UE1qW=DT(`@J!}0oq3iKbS&F7^w8T<85NYzzM-Yb zTjE$niBwFpxiL+}uS!BbvxIE(JTF<^wk%ymC{={=1w<$-zFqI6auHykKmBitF%=Li z1qM};QFWpUCRd}0n(<_kY`B+)ZO7w+u&tBEcg+QT!H)G7DHZzcCta6ttZzBIUC zwv686l4H)_xFb2{5l?tbBf=4rJU>~WUJ&~0hl7D!3KGz=XG{BNQAPtAdWLUP+tw}5 z3~ZY@y4~$`oWNB<3T1+C$^^e9)s@euVie7dQ7Cp*@^Pc`F~w653#JLb5NVmu81Dv9 zT5+Qio-aUn-o?v`ZIuvx79px)yeh`~0%E*Vc3ms3iJPHOFemcZ%K4e$|D44k&c+#| zMNZD}j3F5SJ``8lA@B^IYd4fZUsgk`1~iu^4T0U;?8xsUXtam#+=$AAAb9cITfC;# z0bH_lULAV+YI=^*r#^WvZ{RU@c+@x|=8Bpl@Vv*Jv%&kLSJUG&=8qG3geQiYv!YK9 zZpGz>73LVRgsdRPW$dwxRL8bJZPjU@C$t#L;N7dC%6oN7`Sb39P zIVp2_V(29H1gI#ocg_6r!Sf%VVL{{`QoNcak&upQ0Dr>h*gNV=^^4ug|8JL9#& zI9k=w@^Vk{K*+!lG#8<;9xJXGh~0&Q=J)G^o0}aCt7h;zQNq)m{r!Uh+4*qxZtwW) zU~q8uH?niQbNIL6!H~S!8Sd?p_s3-S;O)WL&LP=9IDWf#dUASjd`5P6&UVQAeR8~a z_VfGGAIZt-`?L3h_lM-*=;Uzk2x{9oJ9vNG@aXy4lp2=VC7Z~g@fp1qiHId^dMCJK zS&nF)M%YBaVm0Vj;*m--%W1P(R`y56S z{?KTW^ZY#JX%(kAuPOpDoeC9LRDs1O2rQb`V*`uz8+Tx>7noRzq$v|mMHcF^e{A78 zraz&H@T!$ikWqdl{0m&@^DlA|^B|xTlysN;vQ8m^zl%)#W9!*4+Fgrw&A_2yCt$2Y zySmkNdLHXC+txc>(@=p;<+yLiaeq2BqTGJv_BYqxpLlCo=ef1vqws8@;FD28fiQL89 zA5#{^G%F_U4+-C8{*AobhX;ebu>j=p|e?#3I>5EtuF&$e_O z_|NRp&~i-2bAS`gjuYyJ!$K7oQ(oAHys&kJwDP-@-_=~dYXui8j#ZL#(~?tp^2(Ec z0iJxttZR7a`!L_gf;=QM$%9y{6Fd!uSBE>Cu(SYMnH)hk$S?(BWYrPInn!c+7KYV| zM6(e@YtyAFIN0rzT?7hUim04!#Kg_YBRuD!sN5Cp7DdP|G~ksGVmLn=2*#Pq&Brhq za`1-=M({+f5pL0i_rYDH1m+J++KN-a_d`V~kN*G$%9)=rS#q{@nPvAM0rCvNojVD$ z9JKkk6_!y{SNfLv+vJ#oB|uTY)OY7M=JJ*%7a+J%pk(uRB@Gs;+x?Y>G~?T3C*s=XG3W zdX~#<>iVqfhj!TU%#P`FJ2xzucQn#=f#E<{-Hrt<)aY2Y*X;zB=}rSlLUc!i}UZ0dICfXQXhz$lbnwdT3#cq zq1DBw{{_|$`R>pheoy`rfys)9PF{Ta1J7#Wxi%~VqE)|%VUX`WoEXOU+JM0T(1;Do z^wS@}f7J~}j^%)XXsd?tltI(MOJUWR7a);-D;fE2aC|!az8=b$GcWuncW{;q2k@+jHr+)QcFiWrZ4OUF`SeERPs zqZ7FOU&wc(Pk)H`(;q0g@Yq0n{(Xy_WrE*qU@q`HJrK_PW?TDcM*muI+cwFtCNaN* zYH`s$v36U}1_cfrC+u{+PzTG+GJA$^1)kIOEz9)`+cey@mpnFPicWnjb`Vs$o+<0> zt~qyye>SGka&vYLNt=*&-~?u5Co8xgF*KqN@kmv9=3!0g6|Lm~24LcT6zW6{|gx)n+p7 z986-xZEJCxu2}5zq?zKe^?9tNxa@I)ECw#im1(9#*b_1r!W$`H?DoeR@3_0Tn)b1Z zC(TKKu(c1Thg&6eJV3eN+-z4iO=OE^$m!V$r_{GaLc%(a$q?rh#dW#^``PGyTY zROu_{<0^iof-iRmUod_*7z}qSlES5Q7we!`PC<-@%Fy;4 zBCZJbh$RY8?N=gO-Z?S9yWF6f%;!}N;WWVdfQ)$ft%V`|`|#wTH9UBGymR==UIGe} zF`0057Ipr6ItKds`7iAu!V@EjZ$60WXc6=*nPR?GK(WNyEb$DmZ;_7D@=V0y?a2Yr z$SDg6qCQDb!iawQLkm9rk$(EW1=%?{A%@{}Oc0RdyQGZC!S3#{9Q83Y4!>8Bfaj@5 zaA>~2jT)z6$Rl1alSpLE9QXZi^n#JS5sN&QVSv59y>&l;{9$l6ZiQPSrQGweBVTxm zwc53D-In80#%$krdtKWxeO)(g+j0zM2R$R~^cKwff8RTPzx)2{Zo{8?L$7aced(&^ zDR;NaQegU5Il@7q} z-p*;C?D9}{y1<+;d)H}Jonc6Jrnyl;(4V&8k}#U{Xo4umkdMj?!6oGMct^|GZv;$) zi!c&bauRpmD5FO}RX#(bDe1hHbY25M z@>#@h7!u^09{D?501zij&DrH64w8+$myR4ccw(fQp|>2^QxCzY})CuIt$> z=+SQAhh|7+*|6idj@`BA!DThOnmxOq8>5WIz>1^8mE613(whxhZi1AmpSzau$_9U? z_$1L+EEs&#)VX77@H{WsvbdbxyffxhFSFhga*`$PFm=?*&4X!IzA%8wdEW0&J)tTJ zEZg*@>*7%F5>Ym}qqp0!%=_Y~OagKjTX)0)dCvT>A}o2wXaJITl_#}C-X(>KeyixWihh5E_==_J zov#@EcKf#8a`mo?ey<#WQ&6}3L`0KB#3(&4^;V~4nB<4_FF#N+&RF=L|FJ3+;ao7b zm#S#}8nvNB3U*k#s?BJ%T6Mku*!l|!Ym#$lUHviRhc^Ao5AF0X&lepw7;`$z>$zRm z@dN5wU<;eR+p%5KG8{)mhn4htSbC}G@arG>hQ_a4y<>H|&I^bRPg~l==&)`Xo$gX} z_}+N%v@!A`H^`6gv{&vU&l^QhyPh85^mNCU%nEENxaDf3CigH=4ZK~rOs@!*+p zqvFAjOT(?$wgK_NdtKE2)YE-jJkX>01x+6j3*3I7J{YTivMbK~3wmFz=C+D3stBWs zFn+F&?r6qCM|*v4gwfK$foOGlUz{)Xl@7oU>0gcoSs)B1j@q#<<$rMX3-Ee_*=96?Kr>{iDZncrcvI0dvd6m zG6{zY?ld;I)9u-AuVcS}VCl4_O$?SAhHE5B`NE(Rjo>6qqh7n`2_ z1eOcS`UsxX9~UYe@(~5^^m0Xr^r5Kek1#ZJn;c|(;qbcp>b}6JeDA@S>ySaiw#gn; zFt5G7TB_YztX;nMaP4CTy`EgS*6r%-<_B))NB8!a}3qIZDLk(Cx;P)(Ax2`NmQp za@vZ!LUD5F?g*5*wAa;b!#B}dH^Po-hutovjvxB25wHdY%GN#k`m=cZF~j@n!$QyJ zjy>-TZ>kSw>0P&HH6x~x-yhVdoBB*ajaxB_pq}7s3%k4eqb5azC&i60!WzDG102ek zI#CYgqC@$pgYTcglVWJ4^?L1-@xjTE{dPWGuLK_o2ld=A9_L5xvR@f+Cfg>b0$V-z zS!UPwK~SczKj^spL?gzpx5*LBxLEkg?D{^>t9tptZw$s1njA%+{@5l%9xr}odU>A* zHh=vEKwy+7^%XrSOVbTZYJDH10}7aDhSS%Hc1g1d=J0$s+`+PVe(NR6WYL1*2;eE;6CfM+ z@trZeU(kZ&tnOxu{NQDOA;$M4h8N{7au~U?O03CX)>Y*d!zPx=?WTWMoDTQDui9c_TYxvu>yF=Qh@{Tq=iy7TcHk#y z2i&;T%{rBsKL!e=Q%_`@Lq%;F217Qn2K|RYLRI9l$(j@GTF|--V`QYIQ?79?;TlmKbd@Jm9Eakr6 zmMJUU_l#L6gln>Pd#>K?x_;LUJ=+iUka~vYGt+Q-x}^t;hku5Al#kI6YLWxLfa_S! zz3pdArUL{P1)ZGVggAJUw%yX(5e7q%msK#J5q?icckOoEYfX8rWg4z-Ij+^ZG@2R} zxVOzs4G%mhOdsr+nlCS5zN`!wEK2fqLk3cs-ijH_?>_k|G+MFJ6T7296{KgmLG!yC zZ_tKU=_@6#f+{a5s4@Uc<9yjRe<+UfO+yQ2#}rfah)!9~@GvHF0QM&@N(?7`NWjL+ z^4cl=YstXC16>%%xv>zmIpWX)N)AEos$&4Q)HWrxP3i0GDljzz|NBbYl+0e==(Vhl ztF|ev9DsX)5zn$eiJBJmZMPeCb?OF7LCM57>QtDseNz;hbLR3>(AUF)_05?oJG!oU6(Fxr0<)bkz z$Q8|DqCyd1%z#|+VoXL%ROeXZ0oLgZ?In+I082O6Cag7T{o@~4m*!E#vgM5DG56bV zPfkf8Vl;}Oift&4x~Fj9ODLSIRORjk^1FtKn1TuB^t|MQK!UlFKqJ2fzafhycx0%=+9eXC2b>-ubts9!kl~syGDc$RkAXZDq2--9FG(^3 zuU$ch3MT6r{Q1vSYsNr6*|#eg`WY(?CeV@!JeX!E36Q7CoK@l6uN=7b!ydTb9h+Hh z*RpM+W9nAdFnez3xjke3!0q!WjSZvjj7OpFe8=qSUDlx;uVeWR1swn$0qdB)@A`~& zw{9*OllPz{l19ZV@4?;Ol~rhzM%wf>g|=%#SOx>+R@)aX-O+WUqc<32x2mc4#5q|q zT59TEGQz;nR|s+HHxA$YScy|$S0HmDXH^M4(4$%X)=_KKcelR$Y_+E_{p%3Y3*@|GLyEw9{Sb2DtS?k1ZmymV^`dg1odE<)GA_NMG=G1RV>M;zRK$AE#5 zx)Ry)h|TXVw`>FFk?+y07?UBcU;6UNOKSX}T3cE=O1(Kt@n^FYs>OAi<;5yov#1sm zNjP3}7;l*oKg#|ZT1w?euS{y*Ik#;OI!!F(~F9PB=g z?#IBl27gS=wz|63>vd}twbn^l-}IQGnhlCE%Fn~lEuVJ(+6rcy_nf#*?LFo zn1;PDn{KA#s>WE&sjoYyj?q9EmWfhHFK`msde7Eu+cg&^k)>492B*&uoP6?gi|n?@ z8Bl%Ew3|OtGwOxRC<`VN{OY;7zA&Tg#TiwPvYJ<4cV7J)kICPJP-^MrO(Iisb<@#I z$L%gmB1fsEO9p>xEg=*;dJERC1a}@7nl=r~1tm zrtVivPUH5SZmZ)uDml$vXJ}vQEu*F5iP_|bGJ?M%{uhv;FmDTj@`GGf37r4Q0`dcH znD|Rl#_T%HerU@pnRA#$k>*V_{C8f+qtha|U1EfZ2}zzZsHfg(f*VHiAKLIi@o9$oTc*}UQ;Xj}Gy4Pb) z;Cm`1&P$n3DkaX>R--Bt>9(V8> z)E5AKFdYK?BZsMeGQXkF<&6yjTbikN4Xx`KcY@C95ni#PvW%Ug3Df>2#Kkrqj|W`F zwieIFt8XY~Xe=|Bn$zocwXWsdiJ6)YvP$Qq(m6dUos*$Il+MYN`$Y_lb=&KubGq}5 z8`1F%gj|`;Lf7@(uEW4JwoJd{cEiwf4a+bbeeICzw~)+ckq_>SGS1uJIJ)h4fk``# zZuG*yWu2~V(5~ZHJ;!$!kC6M2TqNQOMdr+CR1wGL7Yhvvi`cGZcG^4;ek*49w+{_% zM4a(^r*iz8K57a%1hKzW1byxC1e+^3D%os`~{r>e0Eo=AR^lld@|c8L-35e#wKK_CUY(V2$FdOX`Ju}^*X&w0 z^#jxDu)yejQXXu0|28Kgs1=j literal 0 HcmV?d00001 From 9f73d93e624ae08ca61288503171226477fd87fd Mon Sep 17 00:00:00 2001 From: Serafeim Chatzopoulos Date: Tue, 3 Oct 2023 14:39:08 +0300 Subject: [PATCH 26/57] Add param for limiting repo Urls --- .../swh/CollectLastVisitRepositoryData.java | 34 ++++++++----------- .../swh/CollectSoftwareRepositoryURLs.java | 16 ++++++--- .../dnetlib/dhp/swh/PrepareSWHActionsets.java | 1 - ...nput_collect_software_repository_urls.json | 6 ++++ .../eu/dnetlib/dhp/swh/job.properties | 2 ++ .../eu/dnetlib/dhp/swh/oozie_app/workflow.xml | 1 + 6 files changed, 35 insertions(+), 25 deletions(-) diff --git a/dhp-workflows/dhp-swh/src/main/java/eu/dnetlib/dhp/swh/CollectLastVisitRepositoryData.java b/dhp-workflows/dhp-swh/src/main/java/eu/dnetlib/dhp/swh/CollectLastVisitRepositoryData.java index 296a4cce1..e602266a8 100644 --- a/dhp-workflows/dhp-swh/src/main/java/eu/dnetlib/dhp/swh/CollectLastVisitRepositoryData.java +++ b/dhp-workflows/dhp-swh/src/main/java/eu/dnetlib/dhp/swh/CollectLastVisitRepositoryData.java @@ -1,32 +1,26 @@ package eu.dnetlib.dhp.swh; -import static eu.dnetlib.dhp.utils.DHPUtils.getHadoopConfiguration; - -import java.io.BufferedReader; -import java.io.IOException; -import java.io.InputStreamReader; -import java.net.URISyntaxException; -import java.net.URL; -import java.nio.charset.StandardCharsets; - -import org.apache.commons.cli.ParseException; -import org.apache.commons.io.IOUtils; -import org.apache.hadoop.fs.FSDataInputStream; -import org.apache.hadoop.fs.FileStatus; -import org.apache.hadoop.fs.FileSystem; -import org.apache.hadoop.fs.Path; -import org.apache.hadoop.io.SequenceFile; -import org.apache.hadoop.io.Text; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - import eu.dnetlib.dhp.application.ArgumentApplicationParser; import eu.dnetlib.dhp.common.collection.CollectorException; import eu.dnetlib.dhp.common.collection.HttpClientParams; import eu.dnetlib.dhp.swh.utils.SWHConnection; import eu.dnetlib.dhp.swh.utils.SWHConstants; import eu.dnetlib.dhp.swh.utils.SWHUtils; +import org.apache.commons.cli.ParseException; +import org.apache.commons.io.IOUtils; +import org.apache.hadoop.fs.FileStatus; +import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.io.SequenceFile; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.io.BufferedReader; +import java.io.IOException; +import java.net.URL; + +import static eu.dnetlib.dhp.utils.DHPUtils.getHadoopConfiguration; /** * Given a file with software repository URLs, this class diff --git a/dhp-workflows/dhp-swh/src/main/java/eu/dnetlib/dhp/swh/CollectSoftwareRepositoryURLs.java b/dhp-workflows/dhp-swh/src/main/java/eu/dnetlib/dhp/swh/CollectSoftwareRepositoryURLs.java index 6232fa322..abd51bc5b 100644 --- a/dhp-workflows/dhp-swh/src/main/java/eu/dnetlib/dhp/swh/CollectSoftwareRepositoryURLs.java +++ b/dhp-workflows/dhp-swh/src/main/java/eu/dnetlib/dhp/swh/CollectSoftwareRepositoryURLs.java @@ -51,6 +51,9 @@ public class CollectSoftwareRepositoryURLs { final String hiveMetastoreUris = parser.get("hiveMetastoreUris"); log.info("hiveMetastoreUris: {}", hiveMetastoreUris); + final Integer softwareLimit = Integer.parseInt(parser.get("softwareLimit")); + log.info("softwareLimit: {}", softwareLimit); + SparkConf conf = new SparkConf(); conf.set("hive.metastore.uris", hiveMetastoreUris); @@ -58,18 +61,23 @@ public class CollectSoftwareRepositoryURLs { conf, isSparkSessionManaged, spark -> { - doRun(spark, hiveDbName, outputPath); + doRun(spark, hiveDbName, softwareLimit, outputPath); }); } - private static void doRun(SparkSession spark, String hiveDbName, String outputPath) { + private static void doRun(SparkSession spark, String hiveDbName, Integer limit, + String outputPath) { String queryTemplate = "SELECT distinct coderepositoryurl.value " + "FROM %s.software " + "WHERE coderepositoryurl.value IS NOT NULL " + "AND datainfo.deletedbyinference = FALSE " + - "AND datainfo.invisible = FALSE " + - "LIMIT 5000"; + "AND datainfo.invisible = FALSE "; + + if (limit != null) { + queryTemplate += String.format("LIMIT %s", limit); + } + String query = String.format(queryTemplate, hiveDbName); log.info("Hive query to fetch software code URLs: {}", query); diff --git a/dhp-workflows/dhp-swh/src/main/java/eu/dnetlib/dhp/swh/PrepareSWHActionsets.java b/dhp-workflows/dhp-swh/src/main/java/eu/dnetlib/dhp/swh/PrepareSWHActionsets.java index c0ab11bc4..c54e10d3e 100644 --- a/dhp-workflows/dhp-swh/src/main/java/eu/dnetlib/dhp/swh/PrepareSWHActionsets.java +++ b/dhp-workflows/dhp-swh/src/main/java/eu/dnetlib/dhp/swh/PrepareSWHActionsets.java @@ -82,7 +82,6 @@ public class PrepareSWHActionsets { softwareRDD .saveAsHadoopFile( outputPath, Text.class, Text.class, SequenceFileOutputFormat.class); -// , GzipCodec.class); }); } diff --git a/dhp-workflows/dhp-swh/src/main/resources/eu/dnetlib/dhp/swh/input_collect_software_repository_urls.json b/dhp-workflows/dhp-swh/src/main/resources/eu/dnetlib/dhp/swh/input_collect_software_repository_urls.json index 6e98c7673..4459fe9df 100644 --- a/dhp-workflows/dhp-swh/src/main/resources/eu/dnetlib/dhp/swh/input_collect_software_repository_urls.json +++ b/dhp-workflows/dhp-swh/src/main/resources/eu/dnetlib/dhp/swh/input_collect_software_repository_urls.json @@ -22,5 +22,11 @@ "paramLongName": "hiveMetastoreUris", "paramDescription": "the hive metastore uris", "paramRequired": true + }, + { + "paramName": "slim", + "paramLongName": "softwareLimit", + "paramDescription": "limit on the number of software repo URL to fetch", + "paramRequired": false } ] \ No newline at end of file diff --git a/dhp-workflows/dhp-swh/src/main/resources/eu/dnetlib/dhp/swh/job.properties b/dhp-workflows/dhp-swh/src/main/resources/eu/dnetlib/dhp/swh/job.properties index 651bae337..8dd0689a3 100644 --- a/dhp-workflows/dhp-swh/src/main/resources/eu/dnetlib/dhp/swh/job.properties +++ b/dhp-workflows/dhp-swh/src/main/resources/eu/dnetlib/dhp/swh/job.properties @@ -14,4 +14,6 @@ maxNumberOfRetry=2 retryDelay=1 requestDelay=100 +softwareLimit=500 + resume=collect-software-repository-urls diff --git a/dhp-workflows/dhp-swh/src/main/resources/eu/dnetlib/dhp/swh/oozie_app/workflow.xml b/dhp-workflows/dhp-swh/src/main/resources/eu/dnetlib/dhp/swh/oozie_app/workflow.xml index 7aa667a4a..e0763414f 100644 --- a/dhp-workflows/dhp-swh/src/main/resources/eu/dnetlib/dhp/swh/oozie_app/workflow.xml +++ b/dhp-workflows/dhp-swh/src/main/resources/eu/dnetlib/dhp/swh/oozie_app/workflow.xml @@ -83,6 +83,7 @@ --softwareCodeRepositoryURLs${softwareCodeRepositoryURLs} --hiveDbName${hiveDbName} --hiveMetastoreUris${hiveMetastoreUris} + --softwareLimit${softwareLimit} From 24c43e0c602254f506e81f1e3d361645b1a9d36e Mon Sep 17 00:00:00 2001 From: Serafeim Chatzopoulos Date: Tue, 3 Oct 2023 15:11:58 +0300 Subject: [PATCH 27/57] Restructure workflow parameters --- .../main/resources/eu/dnetlib/dhp/swh/job.properties | 2 -- .../eu/dnetlib/dhp/swh/oozie_app/config-default.xml | 4 ++++ .../eu/dnetlib/dhp/swh/oozie_app/workflow.xml | 12 ++++++++++++ 3 files changed, 16 insertions(+), 2 deletions(-) diff --git a/dhp-workflows/dhp-swh/src/main/resources/eu/dnetlib/dhp/swh/job.properties b/dhp-workflows/dhp-swh/src/main/resources/eu/dnetlib/dhp/swh/job.properties index 8dd0689a3..114181944 100644 --- a/dhp-workflows/dhp-swh/src/main/resources/eu/dnetlib/dhp/swh/job.properties +++ b/dhp-workflows/dhp-swh/src/main/resources/eu/dnetlib/dhp/swh/job.properties @@ -1,8 +1,6 @@ # hive hiveDbName=openaire_prod_20230914 -sparkSqlWarehouseDir=/user/hive/warehouse - # input/output files softwareCodeRepositoryURLs=${workingDir}/1_code_repo_urls.csv lastVisitsPath=${workingDir}/2_last_visits.seq diff --git a/dhp-workflows/dhp-swh/src/main/resources/eu/dnetlib/dhp/swh/oozie_app/config-default.xml b/dhp-workflows/dhp-swh/src/main/resources/eu/dnetlib/dhp/swh/oozie_app/config-default.xml index 7873d595e..3e45a53fa 100644 --- a/dhp-workflows/dhp-swh/src/main/resources/eu/dnetlib/dhp/swh/oozie_app/config-default.xml +++ b/dhp-workflows/dhp-swh/src/main/resources/eu/dnetlib/dhp/swh/oozie_app/config-default.xml @@ -47,4 +47,8 @@ oozie.launcher.mapreduce.user.classpath.first true + + sparkSqlWarehouseDir + /user/hive/warehouse + \ No newline at end of file diff --git a/dhp-workflows/dhp-swh/src/main/resources/eu/dnetlib/dhp/swh/oozie_app/workflow.xml b/dhp-workflows/dhp-swh/src/main/resources/eu/dnetlib/dhp/swh/oozie_app/workflow.xml index e0763414f..e29e5b43d 100644 --- a/dhp-workflows/dhp-swh/src/main/resources/eu/dnetlib/dhp/swh/oozie_app/workflow.xml +++ b/dhp-workflows/dhp-swh/src/main/resources/eu/dnetlib/dhp/swh/oozie_app/workflow.xml @@ -18,6 +18,14 @@ archiveRequestsPath The path in the HDFS to save the responses of the archive requests + + actionsetsPath + The path in the HDFS to save the action sets + + + graphPath + The path in the HDFS to the base folder of the graph + maxNumberOfRetry Max number of retries for failed API calls @@ -30,6 +38,10 @@ requestDelay Delay between API requests (in ms) + + softwareLimit + Limit on the number of repo URLs to use (Optional); for debug purposes + resume Variable that indicates the step to start from From b49a3ac9b21233edb889b2e0e60dfa3ed8c02734 Mon Sep 17 00:00:00 2001 From: Serafeim Chatzopoulos Date: Tue, 3 Oct 2023 15:43:38 +0300 Subject: [PATCH 28/57] Add actionsetsPath as a global WF param --- .../swh/CollectLastVisitRepositoryData.java | 23 ++++++++++--------- .../eu/dnetlib/dhp/swh/oozie_app/workflow.xml | 4 ++++ 2 files changed, 16 insertions(+), 11 deletions(-) diff --git a/dhp-workflows/dhp-swh/src/main/java/eu/dnetlib/dhp/swh/CollectLastVisitRepositoryData.java b/dhp-workflows/dhp-swh/src/main/java/eu/dnetlib/dhp/swh/CollectLastVisitRepositoryData.java index e602266a8..ce1b0bb46 100644 --- a/dhp-workflows/dhp-swh/src/main/java/eu/dnetlib/dhp/swh/CollectLastVisitRepositoryData.java +++ b/dhp-workflows/dhp-swh/src/main/java/eu/dnetlib/dhp/swh/CollectLastVisitRepositoryData.java @@ -1,12 +1,12 @@ package eu.dnetlib.dhp.swh; -import eu.dnetlib.dhp.application.ArgumentApplicationParser; -import eu.dnetlib.dhp.common.collection.CollectorException; -import eu.dnetlib.dhp.common.collection.HttpClientParams; -import eu.dnetlib.dhp.swh.utils.SWHConnection; -import eu.dnetlib.dhp.swh.utils.SWHConstants; -import eu.dnetlib.dhp.swh.utils.SWHUtils; +import static eu.dnetlib.dhp.utils.DHPUtils.getHadoopConfiguration; + +import java.io.BufferedReader; +import java.io.IOException; +import java.net.URL; + import org.apache.commons.cli.ParseException; import org.apache.commons.io.IOUtils; import org.apache.hadoop.fs.FileStatus; @@ -16,11 +16,12 @@ import org.apache.hadoop.io.SequenceFile; import org.slf4j.Logger; import org.slf4j.LoggerFactory; -import java.io.BufferedReader; -import java.io.IOException; -import java.net.URL; - -import static eu.dnetlib.dhp.utils.DHPUtils.getHadoopConfiguration; +import eu.dnetlib.dhp.application.ArgumentApplicationParser; +import eu.dnetlib.dhp.common.collection.CollectorException; +import eu.dnetlib.dhp.common.collection.HttpClientParams; +import eu.dnetlib.dhp.swh.utils.SWHConnection; +import eu.dnetlib.dhp.swh.utils.SWHConstants; +import eu.dnetlib.dhp.swh.utils.SWHUtils; /** * Given a file with software repository URLs, this class diff --git a/dhp-workflows/dhp-swh/src/main/resources/eu/dnetlib/dhp/swh/oozie_app/workflow.xml b/dhp-workflows/dhp-swh/src/main/resources/eu/dnetlib/dhp/swh/oozie_app/workflow.xml index e29e5b43d..c69ecd74d 100644 --- a/dhp-workflows/dhp-swh/src/main/resources/eu/dnetlib/dhp/swh/oozie_app/workflow.xml +++ b/dhp-workflows/dhp-swh/src/main/resources/eu/dnetlib/dhp/swh/oozie_app/workflow.xml @@ -57,6 +57,10 @@ oozie.action.sharelib.for.spark ${oozieActionShareLibForSpark2} + + actionsetsPath + ${actionsetsPath} + From cae75fc75da4410289e1d5107073c5b112520003 Mon Sep 17 00:00:00 2001 From: Serafeim Chatzopoulos Date: Tue, 3 Oct 2023 16:55:10 +0300 Subject: [PATCH 29/57] Add SWH in the collectedFrom field --- .../java/eu/dnetlib/dhp/swh/PrepareSWHActionsets.java | 8 ++++++++ .../main/java/eu/dnetlib/dhp/swh/utils/SWHConstants.java | 4 ++++ 2 files changed, 12 insertions(+) diff --git a/dhp-workflows/dhp-swh/src/main/java/eu/dnetlib/dhp/swh/PrepareSWHActionsets.java b/dhp-workflows/dhp-swh/src/main/java/eu/dnetlib/dhp/swh/PrepareSWHActionsets.java index c54e10d3e..93d111039 100644 --- a/dhp-workflows/dhp-swh/src/main/java/eu/dnetlib/dhp/swh/PrepareSWHActionsets.java +++ b/dhp-workflows/dhp-swh/src/main/java/eu/dnetlib/dhp/swh/PrepareSWHActionsets.java @@ -165,6 +165,14 @@ public class PrepareSWHActionsets { row.getString(row.fieldIndex("swhid")), qualifier, dataInfo))); + + // add SWH in the `collectedFrom` field + KeyValue kv = new KeyValue(); + kv.setKey(SWHConstants.SWH_ID); + kv.setValue(SWHConstants.SWH_NAME); + + s.setCollectedfrom(Arrays.asList(kv)); + return s; }, Encoders.bean(Software.class)) .toJavaRDD() diff --git a/dhp-workflows/dhp-swh/src/main/java/eu/dnetlib/dhp/swh/utils/SWHConstants.java b/dhp-workflows/dhp-swh/src/main/java/eu/dnetlib/dhp/swh/utils/SWHConstants.java index 08400f28b..efd0e708b 100644 --- a/dhp-workflows/dhp-swh/src/main/java/eu/dnetlib/dhp/swh/utils/SWHConstants.java +++ b/dhp-workflows/dhp-swh/src/main/java/eu/dnetlib/dhp/swh/utils/SWHConstants.java @@ -16,4 +16,8 @@ public class SWHConstants { public static final String SWHID_CLASSNAME = "Software Heritage Identifier"; + public static final String SWH_ID = "10|openaire____::dbfd07503aaa1ed31beed7dec942f3f4"; + + public static final String SWH_NAME = "Software Heritage"; + } From e9f24df21cce089aec7945b2f50f20cbe1acc062 Mon Sep 17 00:00:00 2001 From: Serafeim Chatzopoulos Date: Tue, 3 Oct 2023 20:57:57 +0300 Subject: [PATCH 30/57] Move SWH API Key from constants to workflow param --- .../java/eu/dnetlib/dhp/swh/ArchiveRepositoryURLs.java | 5 ++++- .../dhp/swh/CollectLastVisitRepositoryData.java | 5 ++++- .../java/eu/dnetlib/dhp/swh/utils/SWHConnection.java | 6 ++++-- .../java/eu/dnetlib/dhp/swh/utils/SWHConstants.java | 2 -- .../dnetlib/dhp/swh/input_archive_repository_urls.json | 6 ++++++ .../swh/input_collect_last_visit_repository_data.json | 6 ++++++ .../main/resources/eu/dnetlib/dhp/swh/job.properties | 2 ++ .../eu/dnetlib/dhp/swh/oozie_app/workflow.xml | 10 ++++++++++ .../java/eu/dnetlib/dhp/swh/SWHConnectionTest.java | 4 ++-- 9 files changed, 38 insertions(+), 8 deletions(-) diff --git a/dhp-workflows/dhp-swh/src/main/java/eu/dnetlib/dhp/swh/ArchiveRepositoryURLs.java b/dhp-workflows/dhp-swh/src/main/java/eu/dnetlib/dhp/swh/ArchiveRepositoryURLs.java index f02861953..baa510346 100644 --- a/dhp-workflows/dhp-swh/src/main/java/eu/dnetlib/dhp/swh/ArchiveRepositoryURLs.java +++ b/dhp-workflows/dhp-swh/src/main/java/eu/dnetlib/dhp/swh/ArchiveRepositoryURLs.java @@ -63,9 +63,12 @@ public class ArchiveRepositoryURLs { final Integer archiveThresholdInDays = Integer.parseInt(argumentParser.get("archiveThresholdInDays")); log.info("archiveThresholdInDays: {}", archiveThresholdInDays); + final String apiAccessToken = argumentParser.get("apiAccessToken"); + log.info("apiAccessToken: {}", apiAccessToken); + final HttpClientParams clientParams = SWHUtils.getClientParams(argumentParser); - swhConnection = new SWHConnection(clientParams); + swhConnection = new SWHConnection(clientParams, apiAccessToken); final FileSystem fs = FileSystem.get(getHadoopConfiguration(hdfsuri)); diff --git a/dhp-workflows/dhp-swh/src/main/java/eu/dnetlib/dhp/swh/CollectLastVisitRepositoryData.java b/dhp-workflows/dhp-swh/src/main/java/eu/dnetlib/dhp/swh/CollectLastVisitRepositoryData.java index ce1b0bb46..ebb9176ff 100644 --- a/dhp-workflows/dhp-swh/src/main/java/eu/dnetlib/dhp/swh/CollectLastVisitRepositoryData.java +++ b/dhp-workflows/dhp-swh/src/main/java/eu/dnetlib/dhp/swh/CollectLastVisitRepositoryData.java @@ -55,9 +55,12 @@ public class CollectLastVisitRepositoryData { final String outputPath = argumentParser.get("lastVisitsPath"); log.info("outputPath: {}", outputPath); + final String apiAccessToken = argumentParser.get("apiAccessToken"); + log.info("apiAccessToken: {}", apiAccessToken); + final HttpClientParams clientParams = SWHUtils.getClientParams(argumentParser); - swhConnection = new SWHConnection(clientParams); + swhConnection = new SWHConnection(clientParams, apiAccessToken); final FileSystem fs = FileSystem.get(getHadoopConfiguration(hdfsuri)); diff --git a/dhp-workflows/dhp-swh/src/main/java/eu/dnetlib/dhp/swh/utils/SWHConnection.java b/dhp-workflows/dhp-swh/src/main/java/eu/dnetlib/dhp/swh/utils/SWHConnection.java index 9c145fc19..80249e816 100644 --- a/dhp-workflows/dhp-swh/src/main/java/eu/dnetlib/dhp/swh/utils/SWHConnection.java +++ b/dhp-workflows/dhp-swh/src/main/java/eu/dnetlib/dhp/swh/utils/SWHConnection.java @@ -14,13 +14,15 @@ public class SWHConnection { HttpConnector2 conn; - public SWHConnection(HttpClientParams clientParams) { + public SWHConnection(HttpClientParams clientParams, String accessToken) { // set custom headers Map headers = new HashMap() { { put(HttpHeaders.ACCEPT, "application/json"); - put(HttpHeaders.AUTHORIZATION, String.format("Bearer %s", SWHConstants.ACCESS_TOKEN)); + if (accessToken != null) { + put(HttpHeaders.AUTHORIZATION, String.format("Bearer %s", accessToken)); + } } }; diff --git a/dhp-workflows/dhp-swh/src/main/java/eu/dnetlib/dhp/swh/utils/SWHConstants.java b/dhp-workflows/dhp-swh/src/main/java/eu/dnetlib/dhp/swh/utils/SWHConstants.java index efd0e708b..eae839cfd 100644 --- a/dhp-workflows/dhp-swh/src/main/java/eu/dnetlib/dhp/swh/utils/SWHConstants.java +++ b/dhp-workflows/dhp-swh/src/main/java/eu/dnetlib/dhp/swh/utils/SWHConstants.java @@ -6,8 +6,6 @@ public class SWHConstants { public static final String SWH_ARCHIVE_URL = "https://archive.softwareheritage.org/api/1/origin/save/%s/url/%s/"; - public static final String ACCESS_TOKEN = "eyJhbGciOiJIUzI1NiIsInR5cCIgOiAiSldUIiwia2lkIiA6ICJhMTMxYTQ1My1hM2IyLTQwMTUtODQ2Ny05MzAyZjk3MTFkOGEifQ.eyJpYXQiOjE2OTQ2MzYwMjAsImp0aSI6IjkwZjdkNTNjLTQ5YTktNGFiMy1hY2E0LTcwMTViMjEyZTNjNiIsImlzcyI6Imh0dHBzOi8vYXV0aC5zb2Z0d2FyZWhlcml0YWdlLm9yZy9hdXRoL3JlYWxtcy9Tb2Z0d2FyZUhlcml0YWdlIiwiYXVkIjoiaHR0cHM6Ly9hdXRoLnNvZnR3YXJlaGVyaXRhZ2Uub3JnL2F1dGgvcmVhbG1zL1NvZnR3YXJlSGVyaXRhZ2UiLCJzdWIiOiIzMTY5OWZkNC0xNmE0LTQxOWItYTdhMi00NjI5MDY4ZjI3OWEiLCJ0eXAiOiJPZmZsaW5lIiwiYXpwIjoic3doLXdlYiIsInNlc3Npb25fc3RhdGUiOiIzMjYzMzEwMS00ZDRkLTQwMjItODU2NC1iMzNlMTJiNTE3ZDkiLCJzY29wZSI6Im9wZW5pZCBvZmZsaW5lX2FjY2VzcyBwcm9maWxlIGVtYWlsIn0.XHj1VIZu1dZ4Ej32-oU84mFmaox9cLNjXosNxwZM0Xs"; - public static final String DEFAULT_VISIT_TYPE = "git"; public static final String VISIT_STATUS_NOT_FOUND = "not_found"; diff --git a/dhp-workflows/dhp-swh/src/main/resources/eu/dnetlib/dhp/swh/input_archive_repository_urls.json b/dhp-workflows/dhp-swh/src/main/resources/eu/dnetlib/dhp/swh/input_archive_repository_urls.json index ce80d6f4a..e8671f71b 100644 --- a/dhp-workflows/dhp-swh/src/main/resources/eu/dnetlib/dhp/swh/input_archive_repository_urls.json +++ b/dhp-workflows/dhp-swh/src/main/resources/eu/dnetlib/dhp/swh/input_archive_repository_urls.json @@ -46,5 +46,11 @@ "paramLongName": "archiveThresholdInDays", "paramDescription": "the thershold (in days) required to issue an archive request", "paramRequired": false + }, + { + "paramName": "aat", + "paramLongName": "apiAccessToken", + "paramDescription": "the API access token of the SWH API", + "paramRequired": false } ] \ No newline at end of file diff --git a/dhp-workflows/dhp-swh/src/main/resources/eu/dnetlib/dhp/swh/input_collect_last_visit_repository_data.json b/dhp-workflows/dhp-swh/src/main/resources/eu/dnetlib/dhp/swh/input_collect_last_visit_repository_data.json index 8bf41f0ae..662582dfe 100644 --- a/dhp-workflows/dhp-swh/src/main/resources/eu/dnetlib/dhp/swh/input_collect_last_visit_repository_data.json +++ b/dhp-workflows/dhp-swh/src/main/resources/eu/dnetlib/dhp/swh/input_collect_last_visit_repository_data.json @@ -40,5 +40,11 @@ "paramLongName": "requestMethod", "paramDescription": "the method of the requests to perform", "paramRequired": false + }, + { + "paramName": "aat", + "paramLongName": "apiAccessToken", + "paramDescription": "the API access token of the SWH API", + "paramRequired": false } ] \ No newline at end of file diff --git a/dhp-workflows/dhp-swh/src/main/resources/eu/dnetlib/dhp/swh/job.properties b/dhp-workflows/dhp-swh/src/main/resources/eu/dnetlib/dhp/swh/job.properties index 114181944..35c068286 100644 --- a/dhp-workflows/dhp-swh/src/main/resources/eu/dnetlib/dhp/swh/job.properties +++ b/dhp-workflows/dhp-swh/src/main/resources/eu/dnetlib/dhp/swh/job.properties @@ -8,6 +8,8 @@ archiveRequestsPath=${workingDir}/3_archive_requests.seq actionsetsPath=${workingDir}/4_actionsets graphPath=/tmp/prod_provision/graph/18_graph_blacklisted +apiAccessToken=eyJhbGciOiJIUzI1NiIsInR5cCIgOiAiSldUIiwia2lkIiA6ICJhMTMxYTQ1My1hM2IyLTQwMTUtODQ2Ny05MzAyZjk3MTFkOGEifQ.eyJpYXQiOjE2OTQ2MzYwMjAsImp0aSI6IjkwZjdkNTNjLTQ5YTktNGFiMy1hY2E0LTcwMTViMjEyZTNjNiIsImlzcyI6Imh0dHBzOi8vYXV0aC5zb2Z0d2FyZWhlcml0YWdlLm9yZy9hdXRoL3JlYWxtcy9Tb2Z0d2FyZUhlcml0YWdlIiwiYXVkIjoiaHR0cHM6Ly9hdXRoLnNvZnR3YXJlaGVyaXRhZ2Uub3JnL2F1dGgvcmVhbG1zL1NvZnR3YXJlSGVyaXRhZ2UiLCJzdWIiOiIzMTY5OWZkNC0xNmE0LTQxOWItYTdhMi00NjI5MDY4ZjI3OWEiLCJ0eXAiOiJPZmZsaW5lIiwiYXpwIjoic3doLXdlYiIsInNlc3Npb25fc3RhdGUiOiIzMjYzMzEwMS00ZDRkLTQwMjItODU2NC1iMzNlMTJiNTE3ZDkiLCJzY29wZSI6Im9wZW5pZCBvZmZsaW5lX2FjY2VzcyBwcm9maWxlIGVtYWlsIn0.XHj1VIZu1dZ4Ej32-oU84mFmaox9cLNjXosNxwZM0Xs + maxNumberOfRetry=2 retryDelay=1 requestDelay=100 diff --git a/dhp-workflows/dhp-swh/src/main/resources/eu/dnetlib/dhp/swh/oozie_app/workflow.xml b/dhp-workflows/dhp-swh/src/main/resources/eu/dnetlib/dhp/swh/oozie_app/workflow.xml index c69ecd74d..64dc0d2aa 100644 --- a/dhp-workflows/dhp-swh/src/main/resources/eu/dnetlib/dhp/swh/oozie_app/workflow.xml +++ b/dhp-workflows/dhp-swh/src/main/resources/eu/dnetlib/dhp/swh/oozie_app/workflow.xml @@ -38,6 +38,10 @@ requestDelay Delay between API requests (in ms) + + apiAccessToken + The API Key of the SWH API + softwareLimit Limit on the number of repo URLs to use (Optional); for debug purposes @@ -61,6 +65,10 @@ actionsetsPath ${actionsetsPath} + + apiAccessToken + ${apiAccessToken} + @@ -117,6 +125,7 @@ --requestDelay${requestDelay} --retryDelay${retryDelay} --requestMethodGET + --apiAccessToken${apiAccessToken} @@ -136,6 +145,7 @@ --requestDelay${requestDelay} --retryDelay${retryDelay} --requestMethodPOST + --apiAccessToken${apiAccessToken} diff --git a/dhp-workflows/dhp-swh/src/test/java/eu/dnetlib/dhp/swh/SWHConnectionTest.java b/dhp-workflows/dhp-swh/src/test/java/eu/dnetlib/dhp/swh/SWHConnectionTest.java index 28210f1b3..b19e0e7ac 100644 --- a/dhp-workflows/dhp-swh/src/test/java/eu/dnetlib/dhp/swh/SWHConnectionTest.java +++ b/dhp-workflows/dhp-swh/src/test/java/eu/dnetlib/dhp/swh/SWHConnectionTest.java @@ -25,7 +25,7 @@ public class SWHConnectionTest { HttpClientParams clientParams = new HttpClientParams(); clientParams.setRequestMethod("GET"); - SWHConnection swhConnection = new SWHConnection(clientParams); + SWHConnection swhConnection = new SWHConnection(clientParams, null); String repoUrl = "https://github.com/stanford-futuredata/FAST"; URL url = new URL(String.format(SWHConstants.SWH_LATEST_VISIT_URL, repoUrl)); @@ -43,7 +43,7 @@ public class SWHConnectionTest { HttpClientParams clientParams = new HttpClientParams(); clientParams.setRequestMethod("POST"); - SWHConnection swhConnection = new SWHConnection(clientParams); + SWHConnection swhConnection = new SWHConnection(clientParams, null); String repoUrl = "https://github.com/stanford-futuredata/FAST"; URL url = new URL(String.format(SWHConstants.SWH_ARCHIVE_URL, SWHConstants.DEFAULT_VISIT_TYPE, repoUrl)); From ee8a39e7d2dcd121d88c1bb84eb4794024fe6a16 Mon Sep 17 00:00:00 2001 From: Claudio Atzori Date: Wed, 4 Oct 2023 12:32:05 +0200 Subject: [PATCH 31/57] cleanup and refinements --- .../CreateActionSetSparkJob.java | 63 ++++++++++++------- .../opencitations/GetOpenCitationsRefs.java | 3 +- .../actionmanager/opencitations/ReadCOCI.java | 9 +-- .../opencitations/as_parameters.json | 18 +++--- .../opencitations/oozie_app/workflow.xml | 3 +- 5 files changed, 55 insertions(+), 41 deletions(-) diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/opencitations/CreateActionSetSparkJob.java b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/opencitations/CreateActionSetSparkJob.java index e3a9833b3..2db756a94 100644 --- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/opencitations/CreateActionSetSparkJob.java +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/opencitations/CreateActionSetSparkJob.java @@ -32,18 +32,28 @@ import eu.dnetlib.dhp.schema.oaf.*; import eu.dnetlib.dhp.schema.oaf.utils.CleaningFunctions; import eu.dnetlib.dhp.schema.oaf.utils.IdentifierFactory; import eu.dnetlib.dhp.schema.oaf.utils.OafMapperUtils; +import eu.dnetlib.dhp.schema.oaf.utils.PidType; import eu.dnetlib.dhp.utils.DHPUtils; import scala.Tuple2; public class CreateActionSetSparkJob implements Serializable { public static final String OPENCITATIONS_CLASSID = "sysimport:crosswalk:opencitations"; public static final String OPENCITATIONS_CLASSNAME = "Imported from OpenCitations"; + + // DOI-to-DOI citations + public static final String COCI = "COCI"; + + // PMID-to-PMID citations + public static final String POCI = "POCI"; + private static final String DOI_PREFIX = "50|doi_________::"; private static final String PMID_PREFIX = "50|pmid________::"; + private static final String TRUST = "0.91"; private static final Logger log = LoggerFactory.getLogger(CreateActionSetSparkJob.class); + private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper(); public static void main(final String[] args) throws IOException, ParseException { @@ -67,7 +77,7 @@ public class CreateActionSetSparkJob implements Serializable { log.info("isSparkSessionManaged: {}", isSparkSessionManaged); final String inputPath = parser.get("inputPath"); - log.info("inputPath {}", inputPath.toString()); + log.info("inputPath {}", inputPath); final String outputPath = parser.get("outputPath"); log.info("outputPath {}", outputPath); @@ -81,19 +91,16 @@ public class CreateActionSetSparkJob implements Serializable { runWithSparkSession( conf, isSparkSessionManaged, - spark -> { - extractContent(spark, inputPath, outputPath, shouldDuplicateRels); - }); + spark -> extractContent(spark, inputPath, outputPath, shouldDuplicateRels)); } private static void extractContent(SparkSession spark, String inputPath, String outputPath, boolean shouldDuplicateRels) { - getTextTextJavaPairRDD(spark, inputPath, shouldDuplicateRels, "COCI") - .union(getTextTextJavaPairRDD(spark, inputPath, shouldDuplicateRels, "POCI")) + getTextTextJavaPairRDD(spark, inputPath, shouldDuplicateRels, COCI) + .union(getTextTextJavaPairRDD(spark, inputPath, shouldDuplicateRels, POCI)) .saveAsHadoopFile(outputPath, Text.class, Text.class, SequenceFileOutputFormat.class); - } private static JavaPairRDD getTextTextJavaPairRDD(SparkSession spark, String inputPath, @@ -109,7 +116,7 @@ public class CreateActionSetSparkJob implements Serializable { value, shouldDuplicateRels, prefix) .iterator(), Encoders.bean(Relation.class)) - .filter((FilterFunction) value -> value != null) + .filter((FilterFunction) Objects::nonNull) .toJavaRDD() .map(p -> new AtomicAction(p.getClass(), p)) .mapToPair( @@ -123,20 +130,28 @@ public class CreateActionSetSparkJob implements Serializable { String prefix; String citing; String cited; - if (p.equals("COCI")) { - prefix = DOI_PREFIX; - citing = prefix - + IdentifierFactory.md5(CleaningFunctions.normalizePidValue("doi", value.getCiting())); - cited = prefix - + IdentifierFactory.md5(CleaningFunctions.normalizePidValue("doi", value.getCited())); - - } else { - prefix = PMID_PREFIX; - citing = prefix - + IdentifierFactory.md5(CleaningFunctions.normalizePidValue("pmid", value.getCiting())); - cited = prefix - + IdentifierFactory.md5(CleaningFunctions.normalizePidValue("pmid", value.getCited())); + switch (p) { + case COCI: + prefix = DOI_PREFIX; + citing = prefix + + IdentifierFactory + .md5(CleaningFunctions.normalizePidValue(PidType.doi.toString(), value.getCiting())); + cited = prefix + + IdentifierFactory + .md5(CleaningFunctions.normalizePidValue(PidType.doi.toString(), value.getCited())); + break; + case POCI: + prefix = PMID_PREFIX; + citing = prefix + + IdentifierFactory + .md5(CleaningFunctions.normalizePidValue(PidType.pmid.toString(), value.getCiting())); + cited = prefix + + IdentifierFactory + .md5(CleaningFunctions.normalizePidValue(PidType.pmid.toString(), value.getCited())); + break; + default: + throw new IllegalStateException("Invalid prefix: " + p); } if (!citing.equals(cited)) { @@ -162,7 +177,7 @@ public class CreateActionSetSparkJob implements Serializable { public static Relation getRelation( String source, String target, - String relclass) { + String relClass) { return OafMapperUtils .getRelation( @@ -170,7 +185,7 @@ public class CreateActionSetSparkJob implements Serializable { target, ModelConstants.RESULT_RESULT, ModelConstants.CITATION, - relclass, + relClass, Arrays .asList( OafMapperUtils.keyValue(ModelConstants.OPENOCITATIONS_ID, ModelConstants.OPENOCITATIONS_NAME)), @@ -183,6 +198,6 @@ public class CreateActionSetSparkJob implements Serializable { ModelConstants.DNET_PROVENANCE_ACTIONS, ModelConstants.DNET_PROVENANCE_ACTIONS), TRUST), null); - } + } diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/opencitations/GetOpenCitationsRefs.java b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/opencitations/GetOpenCitationsRefs.java index 60dc998ef..600cf7df1 100644 --- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/opencitations/GetOpenCitationsRefs.java +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/opencitations/GetOpenCitationsRefs.java @@ -3,6 +3,7 @@ package eu.dnetlib.dhp.actionmanager.opencitations; import java.io.*; import java.io.Serializable; +import java.util.Arrays; import java.util.Objects; import java.util.zip.GZIPOutputStream; import java.util.zip.ZipEntry; @@ -37,7 +38,7 @@ public class GetOpenCitationsRefs implements Serializable { parser.parseArgument(args); final String[] inputFile = parser.get("inputFile").split(";"); - log.info("inputFile {}", inputFile.toString()); + log.info("inputFile {}", Arrays.asList(inputFile)); final String workingPath = parser.get("workingPath"); log.info("workingPath {}", workingPath); diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/opencitations/ReadCOCI.java b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/opencitations/ReadCOCI.java index 3d384de9d..b9c24df3b 100644 --- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/opencitations/ReadCOCI.java +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/opencitations/ReadCOCI.java @@ -7,6 +7,7 @@ import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession; import java.io.IOException; import java.io.Serializable; +import java.util.Arrays; import java.util.Optional; import org.apache.commons.io.IOUtils; @@ -42,7 +43,7 @@ public class ReadCOCI implements Serializable { log.info("outputPath: {}", outputPath); final String[] inputFile = parser.get("inputFile").split(";"); - log.info("inputFile {}", inputFile.toString()); + log.info("inputFile {}", Arrays.asList(inputFile)); Boolean isSparkSessionManaged = isSparkSessionManaged(parser); log.info("isSparkSessionManaged: {}", isSparkSessionManaged); @@ -74,10 +75,10 @@ public class ReadCOCI implements Serializable { private static void doRead(SparkSession spark, String workingPath, String[] inputFiles, String outputPath, - String delimiter, String format) throws IOException { + String delimiter, String format) { for (String inputFile : inputFiles) { - String p_string = workingPath + "/" + inputFile + ".gz"; + String pString = workingPath + "/" + inputFile + ".gz"; Dataset cociData = spark .read() @@ -86,7 +87,7 @@ public class ReadCOCI implements Serializable { .option("inferSchema", "true") .option("header", "true") .option("quotes", "\"") - .load(p_string) + .load(pString) .repartition(100); cociData.map((MapFunction) row -> { diff --git a/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/actionmanager/opencitations/as_parameters.json b/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/actionmanager/opencitations/as_parameters.json index e25d1f4b8..5244a6fe4 100644 --- a/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/actionmanager/opencitations/as_parameters.json +++ b/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/actionmanager/opencitations/as_parameters.json @@ -16,15 +16,11 @@ "paramLongName": "isSparkSessionManaged", "paramDescription": "the hdfs name node", "paramRequired": false - }, { - "paramName": "sdr", - "paramLongName": "shouldDuplicateRels", - "paramDescription": "the hdfs name node", - "paramRequired": false -},{ - "paramName": "p", - "paramLongName": "prefix", - "paramDescription": "the hdfs name node", - "paramRequired": true -} + }, + { + "paramName": "sdr", + "paramLongName": "shouldDuplicateRels", + "paramDescription": "activates/deactivates the construction of bidirectional relations Cites/IsCitedBy", + "paramRequired": false + } ] diff --git a/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/actionmanager/opencitations/oozie_app/workflow.xml b/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/actionmanager/opencitations/oozie_app/workflow.xml index d87dfa2ba..deb32459b 100644 --- a/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/actionmanager/opencitations/oozie_app/workflow.xml +++ b/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/actionmanager/opencitations/oozie_app/workflow.xml @@ -34,6 +34,7 @@ Action failed, error message[${wf:errorMessage(wf:lastErrorNode())}] + ${jobTracker} @@ -54,6 +55,7 @@ + eu.dnetlib.dhp.actionmanager.opencitations.GetOpenCitationsRefs @@ -112,7 +114,6 @@ --inputPath${workingPath} --outputPath${outputPath} - --prefix${prefix} From 1bb83b91880933cee1a39aaa59ad0c70f5086156 Mon Sep 17 00:00:00 2001 From: Serafeim Chatzopoulos Date: Wed, 4 Oct 2023 20:31:45 +0300 Subject: [PATCH 32/57] Add prefix in SWH ID --- .../src/main/java/eu/dnetlib/dhp/swh/PrepareSWHActionsets.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dhp-workflows/dhp-swh/src/main/java/eu/dnetlib/dhp/swh/PrepareSWHActionsets.java b/dhp-workflows/dhp-swh/src/main/java/eu/dnetlib/dhp/swh/PrepareSWHActionsets.java index 93d111039..2239244d6 100644 --- a/dhp-workflows/dhp-swh/src/main/java/eu/dnetlib/dhp/swh/PrepareSWHActionsets.java +++ b/dhp-workflows/dhp-swh/src/main/java/eu/dnetlib/dhp/swh/PrepareSWHActionsets.java @@ -162,7 +162,7 @@ public class PrepareSWHActionsets { .asList( OafMapperUtils .structuredProperty( - row.getString(row.fieldIndex("swhid")), + String.format("swh:1:snp:%s", row.getString(row.fieldIndex("swhid"))), qualifier, dataInfo))); From 13f332ce77e3ef7a03ba6e0b31419cf7caf3ea8e Mon Sep 17 00:00:00 2001 From: Sandro La Bruzzo Date: Fri, 6 Oct 2023 10:40:05 +0200 Subject: [PATCH 33/57] ignored jenv prop --- .gitignore | 1 + 1 file changed, 1 insertion(+) diff --git a/.gitignore b/.gitignore index 73d9179fa..14cd4d345 100644 --- a/.gitignore +++ b/.gitignore @@ -26,3 +26,4 @@ spark-warehouse /**/*.log /**/.factorypath /**/.scalafmt.conf +/.java-version From 42a2dad975051211dae5bf56e6d129d82d1401f9 Mon Sep 17 00:00:00 2001 From: Sandro La Bruzzo Date: Fri, 6 Oct 2023 11:52:33 +0200 Subject: [PATCH 34/57] implemented relation to irish funder from a Json list --- .../dhp/doiboost/crossref/irish_funder.json | 946 ++++++++++++++++++ .../doiboost/crossref/Crossref2Oaf.scala | 25 + .../crossref/CrossrefMappingTest.scala | 2 +- 3 files changed, 972 insertions(+), 1 deletion(-) create mode 100644 dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/crossref/irish_funder.json diff --git a/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/crossref/irish_funder.json b/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/crossref/irish_funder.json new file mode 100644 index 000000000..6c6912c2e --- /dev/null +++ b/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/crossref/irish_funder.json @@ -0,0 +1,946 @@ +[ + { + "id": "100007630", + "uri": "http://dx.doi.org/10.13039/100007630", + "name": "College of Engineering and Informatics, National University of Ireland, Galway", + "synonym": [] + }, + { + "id": "100007731", + "uri": "http://dx.doi.org/10.13039/100007731", + "name": "Endo International", + "synonym": [] + }, + { + "id": "100008099", + "uri": "http://dx.doi.org/10.13039/100008099", + "name": "Food Safety Authority of Ireland", + "synonym": [] + }, + { + "id": "100008124", + "uri": "http://dx.doi.org/10.13039/100008124", + "name": "Department of Jobs, Enterprise and Innovation", + "synonym": [] + }, + { + "id": "100009098", + "uri": "http://dx.doi.org/10.13039/100009098", + "name": "Department of Foreign Affairs and Trade, Ireland", + "synonym": [] + }, + { + "id": "100009099", + "uri": "http://dx.doi.org/10.13039/100009099", + "name": "Irish Aid", + "synonym": [] + }, + { + "id": "100009770", + "uri": "http://dx.doi.org/10.13039/100009770", + "name": "National University of Ireland", + "synonym": [] + }, + { + "id": "100009985", + "uri": "http://dx.doi.org/10.13039/100009985", + "name": "Parkinson's Association of Ireland", + "synonym": [] + }, + { + "id": "100010399", + "uri": "http://dx.doi.org/10.13039/100010399", + "name": "European Society of Cataract and Refractive Surgeons", + "synonym": [] + }, + { + "id": "100010414", + "uri": "http://dx.doi.org/10.13039/100010414", + "name": "Health Research Board", + "synonym": [ + "501100001590" + ] + }, + { + "id": "100010546", + "uri": "http://dx.doi.org/10.13039/100010546", + "name": "Deparment of Children and Youth Affairs, Ireland", + "synonym": [] + }, + { + "id": "100010993", + "uri": "http://dx.doi.org/10.13039/100010993", + "name": "Irish Nephrology Society", + "synonym": [] + }, + { + "id": "100011062", + "uri": "http://dx.doi.org/10.13039/100011062", + "name": "Asian Spinal Cord Network", + "synonym": [] + }, + { + "id": "100011096", + "uri": "http://dx.doi.org/10.13039/100011096", + "name": "Jazz Pharmaceuticals", + "synonym": [] + }, + { + "id": "100011396", + "uri": "http://dx.doi.org/10.13039/100011396", + "name": "Irish College of General Practitioners", + "synonym": [] + }, + { + "id": "100012734", + "uri": "http://dx.doi.org/10.13039/100012734", + "name": "Department for Culture, Heritage and the Gaeltacht, Ireland", + "synonym": [] + }, + { + "id": "100012754", + "uri": "http://dx.doi.org/10.13039/100012754", + "name": "Horizon Pharma", + "synonym": [] + }, + { + "id": "100012891", + "uri": "http://dx.doi.org/10.13039/100012891", + "name": "Medical Research Charities Group", + "synonym": [] + }, + { + "id": "100012919", + "uri": "http://dx.doi.org/10.13039/100012919", + "name": "Epilepsy Ireland", + "synonym": [] + }, + { + "id": "100012920", + "uri": "http://dx.doi.org/10.13039/100012920", + "name": "GLEN", + "synonym": [] + }, + { + "id": "100012921", + "uri": "http://dx.doi.org/10.13039/100012921", + "name": "Royal College of Surgeons in Ireland", + "synonym": [] + }, + { + "id": "100013029", + "uri": "http://dx.doi.org/10.13039/100013029", + "name": "Iris O'Brien Foundation", + "synonym": [] + }, + { + "id": "100013206", + "uri": "http://dx.doi.org/10.13039/100013206", + "name": "Food Institutional Research Measure", + "synonym": [] + }, + { + "id": "100013381", + "uri": "http://dx.doi.org/10.13039/100013381", + "name": "Irish Phytochemical Food Network", + "synonym": [] + }, + { + "id": "100013433", + "uri": "http://dx.doi.org/10.13039/100013433", + "name": "Transport Infrastructure Ireland", + "synonym": [] + }, + { + "id": "100013461", + "uri": "http://dx.doi.org/10.13039/100013461", + "name": "Arts and Disability Ireland", + "synonym": [] + }, + { + "id": "100013548", + "uri": "http://dx.doi.org/10.13039/100013548", + "name": "Filmbase", + "synonym": [] + }, + { + "id": "100013917", + "uri": "http://dx.doi.org/10.13039/100013917", + "name": "Society for Musicology in Ireland", + "synonym": [] + }, + { + "id": "100014251", + "uri": "http://dx.doi.org/10.13039/100014251", + "name": "Humanities in the European Research Area", + "synonym": [] + }, + { + "id": "100014364", + "uri": "http://dx.doi.org/10.13039/100014364", + "name": "National Children's Research Centre", + "synonym": [] + }, + { + "id": "100014384", + "uri": "http://dx.doi.org/10.13039/100014384", + "name": "Amarin Corporation", + "synonym": [] + }, + { + "id": "100014902", + "uri": "http://dx.doi.org/10.13039/100014902", + "name": "Irish Association for Cancer Research", + "synonym": [] + }, + { + "id": "100015023", + "uri": "http://dx.doi.org/10.13039/100015023", + "name": "Ireland Funds", + "synonym": [] + }, + { + "id": "100015037", + "uri": "http://dx.doi.org/10.13039/100015037", + "name": "Simon Cumbers Media Fund", + "synonym": [] + }, + { + "id": "100015319", + "uri": "http://dx.doi.org/10.13039/100015319", + "name": "Sport Ireland Institute", + "synonym": [] + }, + { + "id": "100015320", + "uri": "http://dx.doi.org/10.13039/100015320", + "name": "Paralympics Ireland", + "synonym": [] + }, + { + "id": "100015442", + "uri": "http://dx.doi.org/10.13039/100015442", + "name": "Global Brain Health Institute", + "synonym": [] + }, + { + "id": "100015776", + "uri": "http://dx.doi.org/10.13039/100015776", + "name": "Health and Social Care Board", + "synonym": [] + }, + { + "id": "100015992", + "uri": "http://dx.doi.org/10.13039/100015992", + "name": "St. Luke's Institute of Cancer Research", + "synonym": [] + }, + { + "id": "100017897", + "uri": "http://dx.doi.org/10.13039/100017897", + "name": "Friedreich\u2019s Ataxia Research Alliance Ireland", + "synonym": [] + }, + { + "id": "100018064", + "uri": "http://dx.doi.org/10.13039/100018064", + "name": "Department of Tourism, Culture, Arts, Gaeltacht, Sport and Media", + "synonym": [] + }, + { + "id": "100018172", + "uri": "http://dx.doi.org/10.13039/100018172", + "name": "Department of the Environment, Climate and Communications", + "synonym": [] + }, + { + "id": "100018175", + "uri": "http://dx.doi.org/10.13039/100018175", + "name": "Dairy Processing Technology Centre", + "synonym": [] + }, + { + "id": "100018270", + "uri": "http://dx.doi.org/10.13039/100018270", + "name": "Health Service Executive", + "synonym": [] + }, + { + "id": "100018529", + "uri": "http://dx.doi.org/10.13039/100018529", + "name": "Alkermes", + "synonym": [] + }, + { + "id": "100018542", + "uri": "http://dx.doi.org/10.13039/100018542", + "name": "Irish Endocrine Society", + "synonym": [] + }, + { + "id": "100018754", + "uri": "http://dx.doi.org/10.13039/100018754", + "name": "An Roinn Sl\u00e1inte", + "synonym": [] + }, + { + "id": "100018998", + "uri": "http://dx.doi.org/10.13039/100018998", + "name": "Irish Research eLibrary", + "synonym": [] + }, + { + "id": "100019428", + "uri": "http://dx.doi.org/10.13039/100019428", + "name": "Nabriva Therapeutics", + "synonym": [] + }, + { + "id": "100019637", + "uri": "http://dx.doi.org/10.13039/100019637", + "name": "Horizon Therapeutics", + "synonym": [] + }, + { + "id": "100020174", + "uri": "http://dx.doi.org/10.13039/100020174", + "name": "Health Research Charities Ireland", + "synonym": [] + }, + { + "id": "100020202", + "uri": "http://dx.doi.org/10.13039/100020202", + "name": "UCD Foundation", + "synonym": [] + }, + { + "id": "100020233", + "uri": "http://dx.doi.org/10.13039/100020233", + "name": "Ireland Canada University Foundation", + "synonym": [] + }, + { + "id": "100022943", + "uri": "http://dx.doi.org/10.13039/100022943", + "name": "National Cancer Registry Ireland", + "synonym": [] + }, + { + "id": "501100001581", + "uri": "http://dx.doi.org/10.13039/501100001581", + "name": "Arts Council of Ireland", + "synonym": [] + }, + { + "id": "501100001582", + "uri": "http://dx.doi.org/10.13039/501100001582", + "name": "Centre for Ageing Research and Development in Ireland", + "synonym": [] + }, + { + "id": "501100001583", + "uri": "http://dx.doi.org/10.13039/501100001583", + "name": "Cystinosis Foundation Ireland", + "synonym": [] + }, + { + "id": "501100001584", + "uri": "http://dx.doi.org/10.13039/501100001584", + "name": "Department of Agriculture, Food and the Marine, Ireland", + "synonym": [] + }, + { + "id": "501100001586", + "uri": "http://dx.doi.org/10.13039/501100001586", + "name": "Department of Education and Skills, Ireland", + "synonym": [] + }, + { + "id": "501100001587", + "uri": "http://dx.doi.org/10.13039/501100001587", + "name": "Economic and Social Research Institute", + "synonym": [] + }, + { + "id": "501100001588", + "uri": "http://dx.doi.org/10.13039/501100001588", + "name": "Enterprise Ireland", + "synonym": [] + }, + { + "id": "501100001589", + "uri": "http://dx.doi.org/10.13039/501100001589", + "name": "Environmental Protection Agency", + "synonym": [] + }, + { + "id": "501100001591", + "uri": "http://dx.doi.org/10.13039/501100001591", + "name": "Heritage Council", + "synonym": [] + }, + { + "id": "501100001592", + "uri": "http://dx.doi.org/10.13039/501100001592", + "name": "Higher Education Authority", + "synonym": [] + }, + { + "id": "501100001593", + "uri": "http://dx.doi.org/10.13039/501100001593", + "name": "Irish Cancer Society", + "synonym": [] + }, + { + "id": "501100001594", + "uri": "http://dx.doi.org/10.13039/501100001594", + "name": "Irish Heart Foundation", + "synonym": [] + }, + { + "id": "501100001595", + "uri": "http://dx.doi.org/10.13039/501100001595", + "name": "Irish Hospice Foundation", + "synonym": [] + }, + { + "id": "501100001596", + "uri": "http://dx.doi.org/10.13039/501100001596", + "name": "Irish Research Council for Science, Engineering and Technology", + "synonym": [] + }, + { + "id": "501100001597", + "uri": "http://dx.doi.org/10.13039/501100001597", + "name": "Irish Research Council for the Humanities and Social Sciences", + "synonym": [] + }, + { + "id": "501100001598", + "uri": "http://dx.doi.org/10.13039/501100001598", + "name": "Mental Health Commission", + "synonym": [] + }, + { + "id": "501100001600", + "uri": "http://dx.doi.org/10.13039/501100001600", + "name": "Research and Education Foundation, Sligo General Hospital", + "synonym": [] + }, + { + "id": "501100001601", + "uri": "http://dx.doi.org/10.13039/501100001601", + "name": "Royal Irish Academy", + "synonym": [] + }, + { + "id": "501100001602", + "uri": "http://dx.doi.org/10.13039/501100001602", + "name": "Science Foundation Ireland", + "synonym": [] + }, + { + "id": "501100001603", + "uri": "http://dx.doi.org/10.13039/501100001603", + "name": "Sustainable Energy Authority of Ireland", + "synonym": [] + }, + { + "id": "501100001604", + "uri": "http://dx.doi.org/10.13039/501100001604", + "name": "Teagasc", + "synonym": [] + }, + { + "id": "501100001627", + "uri": "http://dx.doi.org/10.13039/501100001627", + "name": "Marine Institute", + "synonym": [] + }, + { + "id": "501100001628", + "uri": "http://dx.doi.org/10.13039/501100001628", + "name": "Central Remedial Clinic", + "synonym": [] + }, + { + "id": "501100001629", + "uri": "http://dx.doi.org/10.13039/501100001629", + "name": "Royal Dublin Society", + "synonym": [] + }, + { + "id": "501100001630", + "uri": "http://dx.doi.org/10.13039/501100001630", + "name": "Dublin Institute for Advanced Studies", + "synonym": [] + }, + { + "id": "501100001631", + "uri": "http://dx.doi.org/10.13039/501100001631", + "name": "University College Dublin", + "synonym": [] + }, + { + "id": "501100001633", + "uri": "http://dx.doi.org/10.13039/501100001633", + "name": "National University of Ireland, Maynooth", + "synonym": [] + }, + { + "id": "501100001634", + "uri": "http://dx.doi.org/10.13039/501100001634", + "name": "University of Galway", + "synonym": [] + }, + { + "id": "501100001635", + "uri": "http://dx.doi.org/10.13039/501100001635", + "name": "University of Limerick", + "synonym": [] + }, + { + "id": "501100001636", + "uri": "http://dx.doi.org/10.13039/501100001636", + "name": "University College Cork", + "synonym": [] + }, + { + "id": "501100001637", + "uri": "http://dx.doi.org/10.13039/501100001637", + "name": "Trinity College Dublin", + "synonym": [] + }, + { + "id": "501100001638", + "uri": "http://dx.doi.org/10.13039/501100001638", + "name": "Dublin City University", + "synonym": [] + }, + { + "id": "501100002081", + "uri": "http://dx.doi.org/10.13039/501100002081", + "name": "Irish Research Council", + "synonym": [] + }, + { + "id": "501100002736", + "uri": "http://dx.doi.org/10.13039/501100002736", + "name": "Covidien", + "synonym": [] + }, + { + "id": "501100002755", + "uri": "http://dx.doi.org/10.13039/501100002755", + "name": "Brennan and Company", + "synonym": [] + }, + { + "id": "501100002919", + "uri": "http://dx.doi.org/10.13039/501100002919", + "name": "Cork Institute of Technology", + "synonym": [] + }, + { + "id": "501100002959", + "uri": "http://dx.doi.org/10.13039/501100002959", + "name": "Dublin City Council", + "synonym": [] + }, + { + "id": "501100003036", + "uri": "http://dx.doi.org/10.13039/501100003036", + "name": "Perrigo Company Charitable Foundation", + "synonym": [] + }, + { + "id": "501100003037", + "uri": "http://dx.doi.org/10.13039/501100003037", + "name": "Elan", + "synonym": [] + }, + { + "id": "501100003496", + "uri": "http://dx.doi.org/10.13039/501100003496", + "name": "HeyStaks Technologies", + "synonym": [] + }, + { + "id": "501100003553", + "uri": "http://dx.doi.org/10.13039/501100003553", + "name": "Gaelic Athletic Association", + "synonym": [] + }, + { + "id": "501100003840", + "uri": "http://dx.doi.org/10.13039/501100003840", + "name": "Irish Institute of Clinical Neuroscience", + "synonym": [] + }, + { + "id": "501100003956", + "uri": "http://dx.doi.org/10.13039/501100003956", + "name": "Aspect Medical Systems", + "synonym": [] + }, + { + "id": "501100004162", + "uri": "http://dx.doi.org/10.13039/501100004162", + "name": "Meath Foundation", + "synonym": [] + }, + { + "id": "501100004210", + "uri": "http://dx.doi.org/10.13039/501100004210", + "name": "Our Lady's Children's Hospital, Crumlin", + "synonym": [] + }, + { + "id": "501100004321", + "uri": "http://dx.doi.org/10.13039/501100004321", + "name": "Shire", + "synonym": [] + }, + { + "id": "501100004981", + "uri": "http://dx.doi.org/10.13039/501100004981", + "name": "Athlone Institute of Technology", + "synonym": [] + }, + { + "id": "501100006518", + "uri": "http://dx.doi.org/10.13039/501100006518", + "name": "Department of Communications, Energy and Natural Resources, Ireland", + "synonym": [] + }, + { + "id": "501100006553", + "uri": "http://dx.doi.org/10.13039/501100006553", + "name": "Collaborative Centre for Applied Nanotechnology", + "synonym": [] + }, + { + "id": "501100006759", + "uri": "http://dx.doi.org/10.13039/501100006759", + "name": "CLARITY Centre for Sensor Web Technologies", + "synonym": [] + }, + { + "id": "501100009246", + "uri": "http://dx.doi.org/10.13039/501100009246", + "name": "Technological University Dublin", + "synonym": [] + }, + { + "id": "501100009269", + "uri": "http://dx.doi.org/10.13039/501100009269", + "name": "Programme of Competitive Forestry Research for Development", + "synonym": [] + }, + { + "id": "501100009315", + "uri": "http://dx.doi.org/10.13039/501100009315", + "name": "Cystinosis Ireland", + "synonym": [] + }, + { + "id": "501100010808", + "uri": "http://dx.doi.org/10.13039/501100010808", + "name": "Geological Survey of Ireland", + "synonym": [] + }, + { + "id": "501100011030", + "uri": "http://dx.doi.org/10.13039/501100011030", + "name": "Alimentary Glycoscience Research Cluster", + "synonym": [] + }, + { + "id": "501100011031", + "uri": "http://dx.doi.org/10.13039/501100011031", + "name": "Alimentary Health", + "synonym": [] + }, + { + "id": "501100011103", + "uri": "http://dx.doi.org/10.13039/501100011103", + "name": "Rann\u00eds", + "synonym": [] + }, + { + "id": "501100012354", + "uri": "http://dx.doi.org/10.13039/501100012354", + "name": "Inland Fisheries Ireland", + "synonym": [] + }, + { + "id": "501100014384", + "uri": "http://dx.doi.org/10.13039/501100014384", + "name": "X-Bolt Orthopaedics", + "synonym": [] + }, + { + "id": "501100014710", + "uri": "http://dx.doi.org/10.13039/501100014710", + "name": "PrecisionBiotics Group", + "synonym": [] + }, + { + "id": "501100014827", + "uri": "http://dx.doi.org/10.13039/501100014827", + "name": "Dormant Accounts Fund", + "synonym": [] + }, + { + "id": "501100016041", + "uri": "http://dx.doi.org/10.13039/501100016041", + "name": "St Vincents Anaesthesia Foundation", + "synonym": [] + }, + { + "id": "501100017501", + "uri": "http://dx.doi.org/10.13039/501100017501", + "name": "FotoNation", + "synonym": [] + }, + { + "id": "501100018641", + "uri": "http://dx.doi.org/10.13039/501100018641", + "name": "Dairy Research Ireland", + "synonym": [] + }, + { + "id": "501100018839", + "uri": "http://dx.doi.org/10.13039/501100018839", + "name": "Irish Centre for High-End Computing", + "synonym": [] + }, + { + "id": "501100019905", + "uri": "http://dx.doi.org/10.13039/501100019905", + "name": "Galway University Foundation", + "synonym": [] + }, + { + "id": "501100020036", + "uri": "http://dx.doi.org/10.13039/501100020036", + "name": "Dystonia Ireland", + "synonym": [] + }, + { + "id": "501100020221", + "uri": "http://dx.doi.org/10.13039/501100020221", + "name": "Irish Motor Neurone Disease Association", + "synonym": [] + }, + { + "id": "501100020270", + "uri": "http://dx.doi.org/10.13039/501100020270", + "name": "Advanced Materials and Bioengineering Research", + "synonym": [] + }, + { + "id": "501100020403", + "uri": "http://dx.doi.org/10.13039/501100020403", + "name": "Irish Composites Centre", + "synonym": [] + }, + { + "id": "501100020425", + "uri": "http://dx.doi.org/10.13039/501100020425", + "name": "Irish Thoracic Society", + "synonym": [] + }, + { + "id": "501100021102", + "uri": "http://dx.doi.org/10.13039/501100021102", + "name": "Waterford Institute of Technology", + "synonym": [] + }, + { + "id": "501100021110", + "uri": "http://dx.doi.org/10.13039/501100021110", + "name": "Irish MPS Society", + "synonym": [] + }, + { + "id": "501100021525", + "uri": "http://dx.doi.org/10.13039/501100021525", + "name": "Insight SFI Research Centre for Data Analytics", + "synonym": [] + }, + { + "id": "501100021694", + "uri": "http://dx.doi.org/10.13039/501100021694", + "name": "Elan Pharma International", + "synonym": [] + }, + { + "id": "501100021838", + "uri": "http://dx.doi.org/10.13039/501100021838", + "name": "Royal College of Physicians of Ireland", + "synonym": [] + }, + { + "id": "501100022542", + "uri": "http://dx.doi.org/10.13039/501100022542", + "name": "Breakthrough Cancer Research", + "synonym": [] + }, + { + "id": "501100022610", + "uri": "http://dx.doi.org/10.13039/501100022610", + "name": "Breast Cancer Ireland", + "synonym": [] + }, + { + "id": "501100022728", + "uri": "http://dx.doi.org/10.13039/501100022728", + "name": "Munster Technological University", + "synonym": [] + }, + { + "id": "501100022729", + "uri": "http://dx.doi.org/10.13039/501100022729", + "name": "Institute of Technology, Tralee", + "synonym": [] + }, + { + "id": "501100023273", + "uri": "http://dx.doi.org/10.13039/501100023273", + "name": "HRB Clinical Research Facility Galway", + "synonym": [] + }, + { + "id": "501100023378", + "uri": "http://dx.doi.org/10.13039/501100023378", + "name": "Lauritzson Foundation", + "synonym": [] + }, + { + "id": "501100023551", + "uri": "http://dx.doi.org/10.13039/501100023551", + "name": "Cystic Fibrosis Ireland", + "synonym": [] + }, + { + "id": "501100023970", + "uri": "http://dx.doi.org/10.13039/501100023970", + "name": "Tyndall National Institute", + "synonym": [] + }, + { + "id": "501100024094", + "uri": "http://dx.doi.org/10.13039/501100024094", + "name": "Raidi\u00f3 Teilif\u00eds \u00c9ireann", + "synonym": [] + }, + { + "id": "501100024242", + "uri": "http://dx.doi.org/10.13039/501100024242", + "name": "Synthesis and Solid State Pharmaceutical Centre", + "synonym": [] + }, + { + "id": "501100024313", + "uri": "http://dx.doi.org/10.13039/501100024313", + "name": "Irish Rugby Football Union", + "synonym": [] + }, + { + "id": "100007490", + "uri": "http://dx.doi.org/10.13039/100007490", + "name": "Bausch and Lomb Ireland", + "synonym": [] + }, + { + "id": "100007819", + "uri": "http://dx.doi.org/10.13039/100007819", + "name": "Allergan", + "synonym": [] + }, + { + "id": "100010547", + "uri": "http://dx.doi.org/10.13039/100010547", + "name": "Irish Youth Justice Service", + "synonym": [] + }, + { + "id": "100012733", + "uri": "http://dx.doi.org/10.13039/100012733", + "name": "National Parks and Wildlife Service", + "synonym": [] + }, + { + "id": "100015278", + "uri": "http://dx.doi.org/10.13039/100015278", + "name": "Pfizer Healthcare Ireland", + "synonym": [] + }, + { + "id": "100017144", + "uri": "http://dx.doi.org/10.13039/100017144", + "name": "Shell E and P Ireland", + "synonym": [] + }, + { + "id": "100022895", + "uri": "http://dx.doi.org/10.13039/100022895", + "name": "Health Research Institute, University of Limerick", + "synonym": [] + }, + { + "id": "501100001599", + "uri": "http://dx.doi.org/10.13039/501100001599", + "name": "National Council for Forest Research and Development", + "synonym": [] + }, + { + "id": "501100006554", + "uri": "http://dx.doi.org/10.13039/501100006554", + "name": "IDA Ireland", + "synonym": [] + }, + { + "id": "501100011626", + "uri": "http://dx.doi.org/10.13039/501100011626", + "name": "Energy Policy Research Centre, Economic and Social Research Institute", + "synonym": [] + }, + { + "id": "501100014531", + "uri": "http://dx.doi.org/10.13039/501100014531", + "name": "Physical Education and Sport Sciences Department, University of Limerick", + "synonym": [] + }, + { + "id": "501100014745", + "uri": "http://dx.doi.org/10.13039/501100014745", + "name": "APC Microbiome Institute", + "synonym": [] + }, + { + "id": "501100014826", + "uri": "http://dx.doi.org/10.13039/501100014826", + "name": "ADAPT - Centre for Digital Content Technology", + "synonym": [] + }, + { + "id": "501100020570", + "uri": "http://dx.doi.org/10.13039/501100020570", + "name": "College of Medicine, Nursing and Health Sciences, National University of Ireland, Galway", + "synonym": [] + }, + { + "id": "501100020871", + "uri": "http://dx.doi.org/10.13039/501100020871", + "name": "Bernal Institute, University of Limerick", + "synonym": [] + }, + { + "id": "501100023852", + "uri": "http://dx.doi.org/10.13039/501100023852", + "name": "Moore Institute for Research in the Humanities and Social Studies, University of Galway", + "synonym": [] + } +] \ No newline at end of file diff --git a/dhp-workflows/dhp-doiboost/src/main/scala/eu/dnetlib/doiboost/crossref/Crossref2Oaf.scala b/dhp-workflows/dhp-doiboost/src/main/scala/eu/dnetlib/doiboost/crossref/Crossref2Oaf.scala index 9c63b709b..e0fdb9ce4 100644 --- a/dhp-workflows/dhp-doiboost/src/main/scala/eu/dnetlib/doiboost/crossref/Crossref2Oaf.scala +++ b/dhp-workflows/dhp-doiboost/src/main/scala/eu/dnetlib/doiboost/crossref/Crossref2Oaf.scala @@ -16,6 +16,7 @@ import org.slf4j.{Logger, LoggerFactory} import java.util import scala.collection.JavaConverters._ import scala.collection.mutable +import scala.io.Source import scala.util.matching.Regex case class CrossrefDT(doi: String, json: String, timestamp: Long) {} @@ -30,11 +31,22 @@ case class mappingAuthor( affiliation: Option[mappingAffiliation] ) {} +case class funderInfo(id:String,uri:String, name:String,synonym:List[String] ) {} + + + case class mappingFunder(name: String, DOI: Option[String], award: Option[List[String]]) {} case object Crossref2Oaf { val logger: Logger = LoggerFactory.getLogger(Crossref2Oaf.getClass) + val irishFunder: List[funderInfo] = { + val s = Source.fromInputStream(getClass.getResourceAsStream("/eu/dnetlib/dhp/doiboost/crossref/irish_funder.json")).mkString + implicit lazy val formats: DefaultFormats.type = org.json4s.DefaultFormats + lazy val json: org.json4s.JValue = parse(s) + json.extract[List[funderInfo]] + } + val mappingCrossrefType = Map( "book-section" -> "publication", "book" -> "publication", @@ -88,6 +100,11 @@ case object Crossref2Oaf { "report" -> "0017 Report" ) + def getIrishId(doi:String):Option[String] = { + val id =doi.split("/").last + irishFunder.find(f => id.equalsIgnoreCase(f.id) || (f.synonym.nonEmpty && f.synonym.exists(s => s.equalsIgnoreCase(id)))).map(f => f.id) + } + def mappingResult(result: Result, json: JValue, cobjCategory: String): Result = { implicit lazy val formats: DefaultFormats.type = org.json4s.DefaultFormats @@ -467,6 +484,14 @@ case object Crossref2Oaf { if (funders != null) funders.foreach(funder => { if (funder.DOI.isDefined && funder.DOI.get.nonEmpty) { + + if (getIrishId(funder.DOI.get).isDefined) { + val nsPrefix = getIrishId(funder.DOI.get).get.padTo(12, '_') + val targetId = getProjectId(nsPrefix, "1e5e62235d094afd01cd56e65112fc63") + queue += generateRelation(sourceId, targetId, ModelConstants.IS_PRODUCED_BY) + queue += generateRelation(targetId, sourceId, ModelConstants.PRODUCES) + } + funder.DOI.get match { case "10.13039/100010663" | "10.13039/100010661" | "10.13039/501100007601" | "10.13039/501100000780" | "10.13039/100010665" => diff --git a/dhp-workflows/dhp-doiboost/src/test/scala/eu/dnetlib/dhp/doiboost/crossref/CrossrefMappingTest.scala b/dhp-workflows/dhp-doiboost/src/test/scala/eu/dnetlib/dhp/doiboost/crossref/CrossrefMappingTest.scala index fbf6f72c0..7961376c5 100644 --- a/dhp-workflows/dhp-doiboost/src/test/scala/eu/dnetlib/dhp/doiboost/crossref/CrossrefMappingTest.scala +++ b/dhp-workflows/dhp-doiboost/src/test/scala/eu/dnetlib/dhp/doiboost/crossref/CrossrefMappingTest.scala @@ -50,7 +50,7 @@ class CrossrefMappingTest { } } - def checkRelation(generatedOAF: List[Oaf]): Unit = { + def checkRelation(generatedOAF: List[Oaf]): Unit = { val rels: List[Relation] = generatedOAF.filter(p => p.isInstanceOf[Relation]).asInstanceOf[List[Relation]] From eed9fe090220b93789f0d051303fb718814af23d Mon Sep 17 00:00:00 2001 From: Claudio Atzori Date: Fri, 6 Oct 2023 12:31:17 +0200 Subject: [PATCH 35/57] code formatting --- .../dhp/oa/merge/GroupEntitiesSparkJob.java | 4 +- .../dhp/oa/dedup/SparkPropagateRelation.java | 72 +++---- .../group/GroupEntitiesSparkJobTest.java | 190 +++++++++--------- 3 files changed, 133 insertions(+), 133 deletions(-) diff --git a/dhp-common/src/main/java/eu/dnetlib/dhp/oa/merge/GroupEntitiesSparkJob.java b/dhp-common/src/main/java/eu/dnetlib/dhp/oa/merge/GroupEntitiesSparkJob.java index 87510c108..99981bf6a 100644 --- a/dhp-common/src/main/java/eu/dnetlib/dhp/oa/merge/GroupEntitiesSparkJob.java +++ b/dhp-common/src/main/java/eu/dnetlib/dhp/oa/merge/GroupEntitiesSparkJob.java @@ -33,7 +33,7 @@ import scala.Tuple2; public class GroupEntitiesSparkJob { private static final Logger log = LoggerFactory.getLogger(GroupEntitiesSparkJob.class); - private static final Encoder OAFENTITY_KRYO_ENC = Encoders.kryo(OafEntity.class); + private static final Encoder OAFENTITY_KRYO_ENC = Encoders.kryo(OafEntity.class); public static void main(String[] args) throws Exception { @@ -114,7 +114,7 @@ public class GroupEntitiesSparkJob { Encoders.tuple(Encoders.STRING(), OAFENTITY_KRYO_ENC)); // pivot on "_1" (classname of the entity) - // created columns containing only entities of the same class + // created columns containing only entities of the same class for (Map.Entry e : ModelSupport.entityTypes.entrySet()) { String entity = e.getKey().name(); Class entityClass = e.getValue(); diff --git a/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/SparkPropagateRelation.java b/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/SparkPropagateRelation.java index 739295c91..cb1c70059 100644 --- a/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/SparkPropagateRelation.java +++ b/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/SparkPropagateRelation.java @@ -67,60 +67,60 @@ public class SparkPropagateRelation extends AbstractSparkAction { log.info("graphOutputPath: '{}'", graphOutputPath); Dataset mergeRels = spark - .read() - .load(DedupUtility.createMergeRelPath(workingPath, "*", "*")) - .as(REL_BEAN_ENC); + .read() + .load(DedupUtility.createMergeRelPath(workingPath, "*", "*")) + .as(REL_BEAN_ENC); // Dataset idsToMerge = mergeRels - .where(col("relClass").equalTo(ModelConstants.MERGES)) - .select(col("source").as("dedupID"), col("target").as("mergedObjectID")) - .distinct(); + .where(col("relClass").equalTo(ModelConstants.MERGES)) + .select(col("source").as("dedupID"), col("target").as("mergedObjectID")) + .distinct(); Dataset allRels = spark - .read() - .schema(REL_BEAN_ENC.schema()) - .json(graphBasePath + "/relation"); + .read() + .schema(REL_BEAN_ENC.schema()) + .json(graphBasePath + "/relation"); Dataset dedupedRels = allRels - .joinWith(idsToMerge, allRels.col("source").equalTo(idsToMerge.col("mergedObjectID")), "left_outer") - .joinWith(idsToMerge, col("_1.target").equalTo(idsToMerge.col("mergedObjectID")), "left_outer") - .select("_1._1", "_1._2.dedupID", "_2.dedupID") - .as(Encoders.tuple(REL_BEAN_ENC, Encoders.STRING(), Encoders.STRING())) - .map((MapFunction, Relation>) t -> { - Relation rel = t._1(); - String newSource = t._2(); - String newTarget = t._3(); + .joinWith(idsToMerge, allRels.col("source").equalTo(idsToMerge.col("mergedObjectID")), "left_outer") + .joinWith(idsToMerge, col("_1.target").equalTo(idsToMerge.col("mergedObjectID")), "left_outer") + .select("_1._1", "_1._2.dedupID", "_2.dedupID") + .as(Encoders.tuple(REL_BEAN_ENC, Encoders.STRING(), Encoders.STRING())) + .map((MapFunction, Relation>) t -> { + Relation rel = t._1(); + String newSource = t._2(); + String newTarget = t._3(); - if (rel.getDataInfo() == null) { - rel.setDataInfo(new DataInfo()); - } + if (rel.getDataInfo() == null) { + rel.setDataInfo(new DataInfo()); + } - if (newSource != null || newTarget != null) { - rel.getDataInfo().setDeletedbyinference(false); + if (newSource != null || newTarget != null) { + rel.getDataInfo().setDeletedbyinference(false); - if (newSource != null) - rel.setSource(newSource); + if (newSource != null) + rel.setSource(newSource); - if (newTarget != null) - rel.setTarget(newTarget); - } + if (newTarget != null) + rel.setTarget(newTarget); + } - return rel; - }, REL_BEAN_ENC); + return rel; + }, REL_BEAN_ENC); // ids of records that are both not deletedbyinference and not invisible Dataset ids = validIds(spark, graphBasePath); // filter relations that point to valid records, can force them to be visible Dataset cleanedRels = dedupedRels - .join(ids, col("source").equalTo(ids.col("id")), "leftsemi") - .join(ids, col("target").equalTo(ids.col("id")), "leftsemi") - .as(REL_BEAN_ENC) - .map((MapFunction) r -> { - r.getDataInfo().setInvisible(false); - return r; - }, REL_KRYO_ENC); + .join(ids, col("source").equalTo(ids.col("id")), "leftsemi") + .join(ids, col("target").equalTo(ids.col("id")), "leftsemi") + .as(REL_BEAN_ENC) + .map((MapFunction) r -> { + r.getDataInfo().setInvisible(false); + return r; + }, REL_KRYO_ENC); Dataset distinctRels = cleanedRels .groupByKey( diff --git a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/group/GroupEntitiesSparkJobTest.java b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/group/GroupEntitiesSparkJobTest.java index b878e778e..0887adf45 100644 --- a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/group/GroupEntitiesSparkJobTest.java +++ b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/group/GroupEntitiesSparkJobTest.java @@ -1,14 +1,14 @@ package eu.dnetlib.dhp.oa.graph.group; -import com.fasterxml.jackson.databind.DeserializationFeature; -import com.fasterxml.jackson.databind.ObjectMapper; -import eu.dnetlib.dhp.common.HdfsSupport; -import eu.dnetlib.dhp.oa.merge.GroupEntitiesSparkJob; -import eu.dnetlib.dhp.schema.common.ModelSupport; -import eu.dnetlib.dhp.schema.oaf.OafEntity; -import eu.dnetlib.dhp.schema.oaf.Result; -import eu.dnetlib.dhp.utils.DHPUtils; +import static org.junit.jupiter.api.Assertions.assertEquals; + +import java.io.IOException; +import java.net.URISyntaxException; +import java.nio.file.Files; +import java.nio.file.Path; +import java.nio.file.Paths; + import org.apache.commons.io.FileUtils; import org.apache.spark.SparkConf; import org.apache.spark.api.java.function.FilterFunction; @@ -18,108 +18,108 @@ import org.apache.spark.sql.Encoders; import org.apache.spark.sql.SparkSession; import org.junit.jupiter.api.*; -import java.io.IOException; -import java.net.URISyntaxException; -import java.nio.file.Files; -import java.nio.file.Path; -import java.nio.file.Paths; +import com.fasterxml.jackson.databind.DeserializationFeature; +import com.fasterxml.jackson.databind.ObjectMapper; -import static org.junit.jupiter.api.Assertions.assertEquals; +import eu.dnetlib.dhp.common.HdfsSupport; +import eu.dnetlib.dhp.oa.merge.GroupEntitiesSparkJob; +import eu.dnetlib.dhp.schema.common.ModelSupport; +import eu.dnetlib.dhp.schema.oaf.OafEntity; +import eu.dnetlib.dhp.schema.oaf.Result; +import eu.dnetlib.dhp.utils.DHPUtils; @TestMethodOrder(MethodOrderer.OrderAnnotation.class) public class GroupEntitiesSparkJobTest { - private static SparkSession spark; + private static SparkSession spark; - private static ObjectMapper mapper = new ObjectMapper() - .configure(DeserializationFeature.FAIL_ON_UNKNOWN_PROPERTIES, false); + private static ObjectMapper mapper = new ObjectMapper() + .configure(DeserializationFeature.FAIL_ON_UNKNOWN_PROPERTIES, false); - private static Path workingDir; - private Path dataInputPath; + private static Path workingDir; + private Path dataInputPath; - private Path checkpointPath; + private Path checkpointPath; - private Path outputPath; + private Path outputPath; - @BeforeAll - public static void beforeAll() throws IOException { - workingDir = Files.createTempDirectory(GroupEntitiesSparkJob.class.getSimpleName()); + @BeforeAll + public static void beforeAll() throws IOException { + workingDir = Files.createTempDirectory(GroupEntitiesSparkJob.class.getSimpleName()); - SparkConf conf = new SparkConf(); - conf.setAppName(GroupEntitiesSparkJob.class.getSimpleName()); - conf.setMaster("local"); - conf.set("spark.serializer", "org.apache.spark.serializer.KryoSerializer"); - conf.registerKryoClasses(ModelSupport.getOafModelClasses()); - spark = SparkSession.builder().config(conf).getOrCreate(); - } + SparkConf conf = new SparkConf(); + conf.setAppName(GroupEntitiesSparkJob.class.getSimpleName()); + conf.setMaster("local"); + conf.set("spark.serializer", "org.apache.spark.serializer.KryoSerializer"); + conf.registerKryoClasses(ModelSupport.getOafModelClasses()); + spark = SparkSession.builder().config(conf).getOrCreate(); + } - @BeforeEach - public void beforeEach() throws IOException, URISyntaxException { - dataInputPath = Paths.get(ClassLoader.getSystemResource("eu/dnetlib/dhp/oa/graph/group").toURI()); - checkpointPath = workingDir.resolve("grouped_entity"); - outputPath = workingDir.resolve("dispatched_entity"); - } + @BeforeEach + public void beforeEach() throws IOException, URISyntaxException { + dataInputPath = Paths.get(ClassLoader.getSystemResource("eu/dnetlib/dhp/oa/graph/group").toURI()); + checkpointPath = workingDir.resolve("grouped_entity"); + outputPath = workingDir.resolve("dispatched_entity"); + } - @AfterAll - public static void afterAll() throws IOException { - spark.stop(); - FileUtils.deleteDirectory(workingDir.toFile()); - } + @AfterAll + public static void afterAll() throws IOException { + spark.stop(); + FileUtils.deleteDirectory(workingDir.toFile()); + } - @Test - @Order(1) - void testGroupEntities() throws Exception { - GroupEntitiesSparkJob.main(new String[]{ - "-isSparkSessionManaged", - Boolean.FALSE.toString(), - "-graphInputPath", - dataInputPath.toString(), - "-checkpointPath", - checkpointPath.toString(), - "-outputPath", - outputPath.toString(), - "-filterInvisible", - Boolean.FALSE.toString() - }); + @Test + @Order(1) + void testGroupEntities() throws Exception { + GroupEntitiesSparkJob.main(new String[] { + "-isSparkSessionManaged", + Boolean.FALSE.toString(), + "-graphInputPath", + dataInputPath.toString(), + "-checkpointPath", + checkpointPath.toString(), + "-outputPath", + outputPath.toString(), + "-filterInvisible", + Boolean.FALSE.toString() + }); - Dataset checkpointTable = spark - .read() - .load(checkpointPath.toString()) - .selectExpr("COALESCE(*)") - .as(Encoders.kryo(OafEntity.class)); + Dataset checkpointTable = spark + .read() + .load(checkpointPath.toString()) + .selectExpr("COALESCE(*)") + .as(Encoders.kryo(OafEntity.class)); + assertEquals( + 1, + checkpointTable + .filter( + (FilterFunction) r -> "50|doi_________::09821844208a5cd6300b2bfb13bca1b9" + .equals(r.getId()) && + r.getCollectedfrom().stream().anyMatch(kv -> kv.getValue().equalsIgnoreCase("zenodo"))) + .count()); - assertEquals( - 1, - checkpointTable - .filter( - (FilterFunction) r -> "50|doi_________::09821844208a5cd6300b2bfb13bca1b9" - .equals(r.getId()) && - r.getCollectedfrom().stream().anyMatch(kv -> kv.getValue().equalsIgnoreCase("zenodo"))) - .count()); + Dataset output = spark + .read() + .textFile( + DHPUtils + .toSeq( + HdfsSupport + .listFiles(outputPath.toString(), spark.sparkContext().hadoopConfiguration()))) + .map((MapFunction) s -> mapper.readValue(s, Result.class), Encoders.bean(Result.class)); - - Dataset output = spark - .read() - .textFile( - DHPUtils - .toSeq( - HdfsSupport - .listFiles(outputPath.toString(), spark.sparkContext().hadoopConfiguration()))) - .map((MapFunction) s -> mapper.readValue(s, Result.class), Encoders.bean(Result.class)); - - assertEquals(3, output.count()); - assertEquals( - 2, - output - .map((MapFunction) r -> r.getResulttype().getClassid(), Encoders.STRING()) - .filter((FilterFunction) s -> s.equals("publication")) - .count()); - assertEquals( - 1, - output - .map((MapFunction) r -> r.getResulttype().getClassid(), Encoders.STRING()) - .filter((FilterFunction) s -> s.equals("dataset")) - .count()); - } -} \ No newline at end of file + assertEquals(3, output.count()); + assertEquals( + 2, + output + .map((MapFunction) r -> r.getResulttype().getClassid(), Encoders.STRING()) + .filter((FilterFunction) s -> s.equals("publication")) + .count()); + assertEquals( + 1, + output + .map((MapFunction) r -> r.getResulttype().getClassid(), Encoders.STRING()) + .filter((FilterFunction) s -> s.equals("dataset")) + .count()); + } +} From f759b18bca5260ff61a4603a023aa484585e05fe Mon Sep 17 00:00:00 2001 From: Claudio Atzori Date: Fri, 6 Oct 2023 13:43:20 +0200 Subject: [PATCH 36/57] [SWH] aligned parameter name --- .../resources/eu/dnetlib/dhp/swh/oozie_app/workflow.xml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/dhp-workflows/dhp-swh/src/main/resources/eu/dnetlib/dhp/swh/oozie_app/workflow.xml b/dhp-workflows/dhp-swh/src/main/resources/eu/dnetlib/dhp/swh/oozie_app/workflow.xml index 64dc0d2aa..c625fcb5b 100644 --- a/dhp-workflows/dhp-swh/src/main/resources/eu/dnetlib/dhp/swh/oozie_app/workflow.xml +++ b/dhp-workflows/dhp-swh/src/main/resources/eu/dnetlib/dhp/swh/oozie_app/workflow.xml @@ -47,7 +47,7 @@ Limit on the number of repo URLs to use (Optional); for debug purposes - resume + resumeFrom Variable that indicates the step to start from @@ -80,8 +80,8 @@ - ${wf:conf('startFrom') eq 'collect-software-repository-urls'} - ${wf:conf('startFrom') eq 'create-swh-actionsets'} + ${wf:conf('resumeFrom') eq 'collect-software-repository-urls'} + ${wf:conf('resumeFrom') eq 'create-swh-actionsets'} From 858931ccb63ee47849129686cbfe3406cee4f997 Mon Sep 17 00:00:00 2001 From: Claudio Atzori Date: Fri, 6 Oct 2023 14:03:33 +0200 Subject: [PATCH 37/57] [SWH] compress the output actionset --- .../src/main/java/eu/dnetlib/dhp/swh/PrepareSWHActionsets.java | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/dhp-workflows/dhp-swh/src/main/java/eu/dnetlib/dhp/swh/PrepareSWHActionsets.java b/dhp-workflows/dhp-swh/src/main/java/eu/dnetlib/dhp/swh/PrepareSWHActionsets.java index 2239244d6..2691d4b7e 100644 --- a/dhp-workflows/dhp-swh/src/main/java/eu/dnetlib/dhp/swh/PrepareSWHActionsets.java +++ b/dhp-workflows/dhp-swh/src/main/java/eu/dnetlib/dhp/swh/PrepareSWHActionsets.java @@ -11,6 +11,7 @@ import java.util.Optional; import org.apache.commons.io.IOUtils; import org.apache.hadoop.io.Text; +import org.apache.hadoop.io.compress.GzipCodec; import org.apache.hadoop.mapred.SequenceFileOutputFormat; import org.apache.spark.SparkConf; import org.apache.spark.api.java.JavaPairRDD; @@ -81,7 +82,7 @@ public class PrepareSWHActionsets { JavaPairRDD softwareRDD = prepareActionsets(spark, inputPath, softwareInputPath); softwareRDD .saveAsHadoopFile( - outputPath, Text.class, Text.class, SequenceFileOutputFormat.class); + outputPath, Text.class, Text.class, SequenceFileOutputFormat.class, GzipCodec.class); }); } From 2f3cf6d0e700b3a96c493e1d81cb2274bb45bad2 Mon Sep 17 00:00:00 2001 From: Giambattista Bloisi Date: Fri, 6 Oct 2023 12:35:54 +0200 Subject: [PATCH 38/57] Fix cleaning of Pmid where parsing of numbers stopped at first not leading 0' character --- .../eu/dnetlib/dhp/schema/oaf/utils/PmidCleaningRule.java | 4 ++-- .../dnetlib/dhp/schema/oaf/utils/PmidCleaningRuleTest.java | 6 ++++++ 2 files changed, 8 insertions(+), 2 deletions(-) diff --git a/dhp-common/src/main/java/eu/dnetlib/dhp/schema/oaf/utils/PmidCleaningRule.java b/dhp-common/src/main/java/eu/dnetlib/dhp/schema/oaf/utils/PmidCleaningRule.java index d0f5a3b27..c0c451b88 100644 --- a/dhp-common/src/main/java/eu/dnetlib/dhp/schema/oaf/utils/PmidCleaningRule.java +++ b/dhp-common/src/main/java/eu/dnetlib/dhp/schema/oaf/utils/PmidCleaningRule.java @@ -7,7 +7,7 @@ import java.util.regex.Pattern; // https://researchguides.stevens.edu/c.php?g=442331&p=6577176 public class PmidCleaningRule { - public static final Pattern PATTERN = Pattern.compile("[1-9]{1,8}"); + public static final Pattern PATTERN = Pattern.compile("0*(\\d{1,8})"); public static String clean(String pmid) { String s = pmid @@ -17,7 +17,7 @@ public class PmidCleaningRule { final Matcher m = PATTERN.matcher(s); if (m.find()) { - return m.group(); + return m.group(1); } return ""; } diff --git a/dhp-common/src/test/java/eu/dnetlib/dhp/schema/oaf/utils/PmidCleaningRuleTest.java b/dhp-common/src/test/java/eu/dnetlib/dhp/schema/oaf/utils/PmidCleaningRuleTest.java index 9562adf7e..295eac85f 100644 --- a/dhp-common/src/test/java/eu/dnetlib/dhp/schema/oaf/utils/PmidCleaningRuleTest.java +++ b/dhp-common/src/test/java/eu/dnetlib/dhp/schema/oaf/utils/PmidCleaningRuleTest.java @@ -9,10 +9,16 @@ class PmidCleaningRuleTest { @Test void testCleaning() { + // leading zeros are removed assertEquals("1234", PmidCleaningRule.clean("01234")); + // tolerant to spaces in the middle assertEquals("1234567", PmidCleaningRule.clean("0123 4567")); + // stop parsing at first not numerical char assertEquals("123", PmidCleaningRule.clean("0123x4567")); + // invalid id leading to empty result assertEquals("", PmidCleaningRule.clean("abc")); + // valid id with zeroes in the number + assertEquals("20794075", PmidCleaningRule.clean("20794075")); } } From 46034630cf62fcb5fcd66366a9be48f62f363d0b Mon Sep 17 00:00:00 2001 From: Claudio Atzori Date: Fri, 6 Oct 2023 14:42:02 +0200 Subject: [PATCH 39/57] [OC] compress the output actionset --- .../actionmanager/opencitations/CreateActionSetSparkJob.java | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/opencitations/CreateActionSetSparkJob.java b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/opencitations/CreateActionSetSparkJob.java index 2db756a94..ed0a54efa 100644 --- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/opencitations/CreateActionSetSparkJob.java +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/opencitations/CreateActionSetSparkJob.java @@ -10,6 +10,7 @@ import java.util.*; import org.apache.commons.cli.ParseException; import org.apache.commons.io.IOUtils; import org.apache.hadoop.io.Text; +import org.apache.hadoop.io.compress.GzipCodec; import org.apache.hadoop.mapred.SequenceFileOutputFormat; import org.apache.spark.SparkConf; import org.apache.spark.api.java.JavaPairRDD; @@ -27,7 +28,6 @@ import eu.dnetlib.dhp.actionmanager.opencitations.model.COCI; import eu.dnetlib.dhp.application.ArgumentApplicationParser; import eu.dnetlib.dhp.schema.action.AtomicAction; import eu.dnetlib.dhp.schema.common.ModelConstants; -import eu.dnetlib.dhp.schema.common.ModelSupport; import eu.dnetlib.dhp.schema.oaf.*; import eu.dnetlib.dhp.schema.oaf.utils.CleaningFunctions; import eu.dnetlib.dhp.schema.oaf.utils.IdentifierFactory; @@ -100,7 +100,7 @@ public class CreateActionSetSparkJob implements Serializable { getTextTextJavaPairRDD(spark, inputPath, shouldDuplicateRels, COCI) .union(getTextTextJavaPairRDD(spark, inputPath, shouldDuplicateRels, POCI)) - .saveAsHadoopFile(outputPath, Text.class, Text.class, SequenceFileOutputFormat.class); + .saveAsHadoopFile(outputPath, Text.class, Text.class, SequenceFileOutputFormat.class, GzipCodec.class); } private static JavaPairRDD getTextTextJavaPairRDD(SparkSession spark, String inputPath, From 84a58802ab51ea5a496b14059799a78366d74d07 Mon Sep 17 00:00:00 2001 From: Claudio Atzori Date: Fri, 6 Oct 2023 14:48:05 +0200 Subject: [PATCH 40/57] [OC] using the common pid cleaning function --- .../opencitations/CreateActionSetSparkJob.java | 13 +++++-------- 1 file changed, 5 insertions(+), 8 deletions(-) diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/opencitations/CreateActionSetSparkJob.java b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/opencitations/CreateActionSetSparkJob.java index ed0a54efa..a367ba852 100644 --- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/opencitations/CreateActionSetSparkJob.java +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/opencitations/CreateActionSetSparkJob.java @@ -7,6 +7,7 @@ import java.io.IOException; import java.io.Serializable; import java.util.*; +import eu.dnetlib.dhp.schema.oaf.utils.*; import org.apache.commons.cli.ParseException; import org.apache.commons.io.IOUtils; import org.apache.hadoop.io.Text; @@ -29,10 +30,6 @@ import eu.dnetlib.dhp.application.ArgumentApplicationParser; import eu.dnetlib.dhp.schema.action.AtomicAction; import eu.dnetlib.dhp.schema.common.ModelConstants; import eu.dnetlib.dhp.schema.oaf.*; -import eu.dnetlib.dhp.schema.oaf.utils.CleaningFunctions; -import eu.dnetlib.dhp.schema.oaf.utils.IdentifierFactory; -import eu.dnetlib.dhp.schema.oaf.utils.OafMapperUtils; -import eu.dnetlib.dhp.schema.oaf.utils.PidType; import eu.dnetlib.dhp.utils.DHPUtils; import scala.Tuple2; @@ -136,19 +133,19 @@ public class CreateActionSetSparkJob implements Serializable { prefix = DOI_PREFIX; citing = prefix + IdentifierFactory - .md5(CleaningFunctions.normalizePidValue(PidType.doi.toString(), value.getCiting())); + .md5(PidCleaner.normalizePidValue(PidType.doi.toString(), value.getCiting())); cited = prefix + IdentifierFactory - .md5(CleaningFunctions.normalizePidValue(PidType.doi.toString(), value.getCited())); + .md5(PidCleaner.normalizePidValue(PidType.doi.toString(), value.getCited())); break; case POCI: prefix = PMID_PREFIX; citing = prefix + IdentifierFactory - .md5(CleaningFunctions.normalizePidValue(PidType.pmid.toString(), value.getCiting())); + .md5(PidCleaner.normalizePidValue(PidType.pmid.toString(), value.getCiting())); cited = prefix + IdentifierFactory - .md5(CleaningFunctions.normalizePidValue(PidType.pmid.toString(), value.getCited())); + .md5(PidCleaner.normalizePidValue(PidType.pmid.toString(), value.getCited())); break; default: throw new IllegalStateException("Invalid prefix: " + p); From ef833840c357f97595ce563393bff3331f0d987d Mon Sep 17 00:00:00 2001 From: Claudio Atzori Date: Fri, 6 Oct 2023 15:48:18 +0200 Subject: [PATCH 41/57] [Doiboost] removed linkage to SFI unidentified project --- .../eu/dnetlib/dhp/doiboost/crossref/irish_funder.json | 6 ------ 1 file changed, 6 deletions(-) diff --git a/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/crossref/irish_funder.json b/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/crossref/irish_funder.json index 6c6912c2e..15eb1b711 100644 --- a/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/crossref/irish_funder.json +++ b/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/crossref/irish_funder.json @@ -433,12 +433,6 @@ "name": "Royal Irish Academy", "synonym": [] }, - { - "id": "501100001602", - "uri": "http://dx.doi.org/10.13039/501100001602", - "name": "Science Foundation Ireland", - "synonym": [] - }, { "id": "501100001603", "uri": "http://dx.doi.org/10.13039/501100001603", From 489a082f044cc89215f2183eb06ff764826f8578 Mon Sep 17 00:00:00 2001 From: dimitrispie Date: Mon, 9 Oct 2023 14:00:50 +0300 Subject: [PATCH 42/57] Update step16-createIndicatorsTables.sql Change scripts for gold, hybrid, bronze indicators --- .../scripts/step16-createIndicatorsTables.sql | 353 ++++++++++++------ 1 file changed, 245 insertions(+), 108 deletions(-) diff --git a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step16-createIndicatorsTables.sql b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step16-createIndicatorsTables.sql index ae95727a6..6af486340 100755 --- a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step16-createIndicatorsTables.sql +++ b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step16-createIndicatorsTables.sql @@ -1,4 +1,6 @@ -- Sprint 1 ---- +drop table if exists ${stats_db_name}.indi_pub_green_oa purge; + create table if not exists ${stats_db_name}.indi_pub_green_oa stored as parquet as select distinct p.id, coalesce(green_oa, 0) as green_oa from ${stats_db_name}.publication p @@ -12,6 +14,8 @@ from ${stats_db_name}.publication p or ri.accessright = 'Embargo' or ri.accessright = 'Open Source')) tmp on p.id= tmp.id; +drop table if exists ${stats_db_name}.indi_pub_grey_lit purge; + create table if not exists ${stats_db_name}.indi_pub_grey_lit stored as parquet as select distinct p.id, coalesce(grey_lit, 0) as grey_lit from ${stats_db_name}.publication p @@ -23,6 +27,8 @@ from ${stats_db_name}.publication p not exists (select 1 from ${stats_db_name}.result_classifications rc where type ='Other literature type' and rc.id=p.id)) tmp on p.id=tmp.id; +drop table if exists ${stats_db_name}.indi_pub_doi_from_crossref purge; + create table if not exists ${stats_db_name}.indi_pub_doi_from_crossref stored as parquet as select distinct p.id, coalesce(doi_from_crossref, 0) as doi_from_crossref from ${stats_db_name}.publication p @@ -33,6 +39,8 @@ from ${stats_db_name}.publication p on tmp.id=p.id; -- Sprint 2 ---- +drop table if exists ${stats_db_name}.indi_result_has_cc_licence purge; + create table if not exists ${stats_db_name}.indi_result_has_cc_licence stored as parquet as select distinct r.id, (case when lic='' or lic is null then 0 else 1 end) as has_cc_license from ${stats_db_name}.result r @@ -41,6 +49,8 @@ left outer join (select r.id, license.type as lic from ${stats_db_name}.result r where lower(license.type) LIKE '%creativecommons.org%' OR lower(license.type) LIKE '%cc-%') tmp on r.id= tmp.id; +drop table if exists ${stats_db_name}.indi_result_has_cc_licence_url purge; + create table if not exists ${stats_db_name}.indi_result_has_cc_licence_url stored as parquet as select distinct r.id, case when lic_host='' or lic_host is null then 0 else 1 end as has_cc_license_url from ${stats_db_name}.result r @@ -50,10 +60,14 @@ from ${stats_db_name}.result r WHERE lower(parse_url(license.type, "HOST")) = "creativecommons.org") tmp on r.id= tmp.id; +drop table if exists ${stats_db_name}.indi_pub_has_abstract purge; + create table if not exists ${stats_db_name}.indi_pub_has_abstract stored as parquet as select distinct publication.id, cast(coalesce(abstract, true) as int) has_abstract from ${stats_db_name}.publication; +drop table if exists ${stats_db_name}.indi_result_with_orcid purge; + create table if not exists ${stats_db_name}.indi_result_with_orcid stored as parquet as select distinct r.id, coalesce(has_orcid, 0) as has_orcid from ${stats_db_name}.result r @@ -61,6 +75,9 @@ from ${stats_db_name}.result r on r.id= tmp.id; ---- Sprint 3 ---- + +drop table if exists ${stats_db_name}.indi_funded_result_with_fundref purge; + create table if not exists ${stats_db_name}.indi_funded_result_with_fundref stored as parquet as select distinct r.result as id, coalesce(fundref, 0) as fundref from ${stats_db_name}.project_results r @@ -79,6 +96,8 @@ from ${stats_db_name}.project_results r create TEMPORARY TABLE ${stats_db_name}.tmp AS SELECT ro.organization organization, ro.id, o.name from ${stats_db_name}.result_organization ro join ${stats_db_name}.organization o on o.id=ro.organization where o.name is not null; +drop table if exists ${stats_db_name}.indi_result_org_collab purge; + create table if not exists ${stats_db_name}.indi_result_org_collab stored as parquet as select o1.organization org1, o1.name org1name1, o2.organization org2, o2.name org2name2, count(o1.id) as collaborations from ${stats_db_name}.tmp as o1 @@ -91,6 +110,8 @@ create TEMPORARY TABLE ${stats_db_name}.tmp AS select distinct ro.organization organization, ro.id, o.name, o.country from ${stats_db_name}.result_organization ro join ${stats_db_name}.organization o on o.id=ro.organization where country <> 'UNKNOWN' and o.name is not null; +drop table if exists ${stats_db_name}.indi_result_org_country_collab purge; + create table if not exists ${stats_db_name}.indi_result_org_country_collab stored as parquet as select o1.organization org1,o1.name org1name1, o2.country country2, count(o1.id) as collaborations from ${stats_db_name}.tmp as o1 join ${stats_db_name}.tmp as o2 on o1.id=o2.id @@ -103,6 +124,8 @@ create TEMPORARY TABLE ${stats_db_name}.tmp AS select o.id organization, o.name, ro.project as project from ${stats_db_name}.organization o join ${stats_db_name}.organization_projects ro on o.id=ro.id where o.name is not null; +drop table if exists ${stats_db_name}.indi_project_collab_org purge; + create table if not exists ${stats_db_name}.indi_project_collab_org stored as parquet as select o1.organization org1,o1.name orgname1, o2.organization org2, o2.name orgname2, count(distinct o1.project) as collaborations from ${stats_db_name}.tmp as o1 @@ -117,6 +140,8 @@ select o.id organization, o.name, o.country , ro.project as project from ${stat join ${stats_db_name}.organization_projects ro on o.id=ro.id and o.country <> 'UNKNOWN' and o.name is not null; +drop table if exists ${stats_db_name}.indi_project_collab_org_country purge; + create table if not exists ${stats_db_name}.indi_project_collab_org_country stored as parquet as select o1.organization org1,o1.name org1name, o2.country country2, count(distinct o1.project) as collaborations from ${stats_db_name}.tmp as o1 @@ -126,6 +151,8 @@ group by o1.organization, o2.country, o1.name; drop table ${stats_db_name}.tmp purge; +drop table if exists ${stats_db_name}.indi_funder_country_collab purge; + create table if not exists ${stats_db_name}.indi_funder_country_collab stored as parquet as with tmp as (select funder, project, country from ${stats_db_name}.organization_projects op join ${stats_db_name}.organization o on o.id=op.id @@ -142,6 +169,8 @@ select distinct country, ro.id as result from ${stats_db_name}.organization o join ${stats_db_name}.result_organization ro on o.id=ro.organization where country <> 'UNKNOWN' and o.name is not null; +drop table if exists ${stats_db_name}.indi_result_country_collab purge; + create table if not exists ${stats_db_name}.indi_result_country_collab stored as parquet as select o1.country country1, o2.country country2, count(o1.result) as collaborations from ${stats_db_name}.tmp as o1 @@ -152,6 +181,8 @@ group by o1.country, o2.country; drop table ${stats_db_name}.tmp purge; ---- Sprint 4 ---- +drop table if exists ${stats_db_name}.indi_pub_diamond purge; + create table if not exists ${stats_db_name}.indi_pub_diamond stored as parquet as select distinct pd.id, coalesce(in_diamond_journal, 0) as in_diamond_journal from ${stats_db_name}.publication_datasources pd @@ -162,6 +193,8 @@ from ${stats_db_name}.publication_datasources pd and (ps.journal_is_in_doaj=true or ps.journal_is_oa=true) and ps.has_apc=false) tmp on pd.id=tmp.id; +drop table if exists ${stats_db_name}.indi_pub_in_transformative purge; + create table if not exists ${stats_db_name}.indi_pub_in_transformative stored as parquet as select distinct pd.id, coalesce(is_transformative, 0) as is_transformative from ${stats_db_name}.publication pd @@ -172,6 +205,8 @@ from ${stats_db_name}.publication pd and ps.is_transformative_journal=true) tmp on pd.id=tmp.id; +drop table if exists ${stats_db_name}.indi_pub_closed_other_open purge; + create table if not exists ${stats_db_name}.indi_pub_closed_other_open stored as parquet as select distinct ri.id, coalesce(pub_closed_other_open, 0) as pub_closed_other_open from ${stats_db_name}.result_instance ri left outer join @@ -183,10 +218,14 @@ select distinct ri.id, coalesce(pub_closed_other_open, 0) as pub_closed_other_op on tmp.id=ri.id; ---- Sprint 5 ---- +drop table if exists ${stats_db_name}.indi_result_no_of_copies purge; + create table if not exists ${stats_db_name}.indi_result_no_of_copies stored as parquet as select id, count(id) as number_of_copies from ${stats_db_name}.result_instance group by id; ---- Sprint 6 ---- +drop table if exists ${stats_db_name}.indi_pub_downloads purge; + create table if not exists ${stats_db_name}.indi_pub_downloads stored as parquet as SELECT result_id, sum(downloads) no_downloads from openaire_prod_usage_stats.usage_stats join ${stats_db_name}.publication on result_id=id @@ -196,6 +235,8 @@ order by no_downloads desc; --ANALYZE TABLE ${stats_db_name}.indi_pub_downloads COMPUTE STATISTICS; +drop table if exists ${stats_db_name}.indi_pub_downloads_datasource purge; + create table if not exists ${stats_db_name}.indi_pub_downloads_datasource stored as parquet as SELECT result_id, repository_id, sum(downloads) no_downloads from openaire_prod_usage_stats.usage_stats join ${stats_db_name}.publication on result_id=id @@ -203,12 +244,16 @@ where downloads>0 GROUP BY result_id, repository_id order by result_id; +drop table if exists ${stats_db_name}.indi_pub_downloads_year purge; + create table if not exists ${stats_db_name}.indi_pub_downloads_year stored as parquet as SELECT result_id, cast(substring(us.`date`, 1,4) as int) as `year`, sum(downloads) no_downloads from openaire_prod_usage_stats.usage_stats us join ${stats_db_name}.publication on result_id=id where downloads>0 GROUP BY result_id, substring(us.`date`, 1,4); +drop table if exists ${stats_db_name}.indi_pub_downloads_datasource_year purge; + create table if not exists ${stats_db_name}.indi_pub_downloads_datasource_year stored as parquet as SELECT result_id, cast(substring(us.`date`, 1,4) as int) as `year`, repository_id, sum(downloads) no_downloads from openaire_prod_usage_stats.usage_stats us join ${stats_db_name}.publication on result_id=id @@ -216,54 +261,81 @@ where downloads>0 GROUP BY result_id, repository_id, substring(us.`date`, 1,4); ---- Sprint 7 ---- +drop table if exists ${stats_db_name}.indi_pub_gold_oa purge; + +--create table if not exists ${stats_db_name}.indi_pub_gold_oa stored as parquet as +-- WITH gold_oa AS ( SELECT +-- issn_l, +-- journal_is_in_doaj, +-- journal_is_oa, +-- issn_1 as issn +-- FROM +-- STATS_EXT.oa_journals +-- WHERE +-- issn_1 != "" +-- UNION +-- ALL SELECT +-- issn_l, +-- journal_is_in_doaj, +-- journal_is_oa, +-- issn_2 as issn +-- FROM +-- STATS_EXT.oa_journals +-- WHERE +-- issn_2 != "" ), issn AS ( SELECT +-- * +-- FROM +--( SELECT +-- id, +-- issn_printed as issn +-- FROM +-- ${stats_db_name}.datasource +-- WHERE +-- issn_printed IS NOT NULL +-- UNION ALL +-- SELECT +-- id, +-- issn_online as issn +-- FROM +-- ${stats_db_name}.datasource +-- WHERE +-- issn_online IS NOT NULL or id like '%doajarticles%') as issn +-- WHERE +-- LENGTH(issn) > 7) +--SELECT +-- DISTINCT pd.id, coalesce(is_gold, 0) as is_gold +--FROM +-- ${stats_db_name}.publication_datasources pd +-- left outer join( +-- select pd.id, 1 as is_gold FROM ${stats_db_name}.publication_datasources pd +-- JOIN issn on issn.id=pd.datasource +-- JOIN gold_oa on issn.issn = gold_oa.issn) tmp +-- on pd.id=tmp.id; + create table if not exists ${stats_db_name}.indi_pub_gold_oa stored as parquet as - WITH gold_oa AS ( SELECT - issn_l, - journal_is_in_doaj, - journal_is_oa, - issn_1 as issn - FROM - STATS_EXT.oa_journals - WHERE - issn_1 != "" - UNION - ALL SELECT - issn_l, - journal_is_in_doaj, - journal_is_oa, - issn_2 as issn - FROM - STATS_EXT.oa_journals - WHERE - issn_2 != "" ), issn AS ( SELECT - * - FROM -( SELECT - id, - issn_printed as issn - FROM - ${stats_db_name}.datasource - WHERE - issn_printed IS NOT NULL - UNION ALL - SELECT - id, - issn_online as issn - FROM - ${stats_db_name}.datasource - WHERE - issn_online IS NOT NULL or id like '%doajarticles%') as issn - WHERE - LENGTH(issn) > 7) -SELECT - DISTINCT pd.id, coalesce(is_gold, 0) as is_gold -FROM - ${stats_db_name}.publication_datasources pd - left outer join( - select pd.id, 1 as is_gold FROM ${stats_db_name}.publication_datasources pd - JOIN issn on issn.id=pd.datasource - JOIN gold_oa on issn.issn = gold_oa.issn) tmp - on pd.id=tmp.id; +with gold_oa as ( +SELECT issn,issn_l from stats_ext.issn_gold_oa_dataset_v5), +issn AS (SELECT * FROM +(SELECT id,issn_printed as issn FROM ${stats_db_name}.datasource +WHERE issn_printed IS NOT NULL +UNION ALL +SELECT id, issn_online as issn FROM ${stats_db_name}.datasource +WHERE issn_online IS NOT NULL or id like '%doajarticles%') as issn +WHERE LENGTH(issn) > 7), +alljournals AS(select issn, issn_l from stats_ext.alljournals +where journal_is_in_doaj=true or journal_is_oa=true) +SELECT DISTINCT pd.id, coalesce(is_gold, 0) as is_gold +FROM ${stats_db_name}.publication_datasources pd +left outer join ( +select pd.id, 1 as is_gold FROM ${stats_db_name}.publication_datasources pd +JOIN issn on issn.id=pd.datasource +JOIN gold_oa on issn.issn = gold_oa.issn +join alljournals on issn.issn=alljournals.issn +left outer join ${stats_db_name}.result_instance ri on ri.id=pd.id +and ri.accessright!='Closed Access' and ri.accessright_uw='gold') tmp +on pd.id=tmp.id; + +drop table if exists ${stats_db_name}.indi_pub_hybrid_oa_with_cc purge; create table if not exists ${stats_db_name}.indi_pub_hybrid_oa_with_cc stored as parquet as WITH hybrid_oa AS ( @@ -296,56 +368,71 @@ FROM ${stats_db_name}.publication_datasources pd JOIN ${stats_db_name}.indi_pub_gold_oa ga on pd.id=ga.id where cc.has_cc_license=1 and ga.is_gold=0) tmp on pd.id=tmp.id; +drop table if exists ${stats_db_name}.indi_pub_hybrid purge; + +--create table if not exists ${stats_db_name}.indi_pub_hybrid stored as parquet as +-- WITH gold_oa AS ( SELECT +-- issn_l, +-- journal_is_in_doaj, +-- journal_is_oa, +-- issn_1 as issn, +-- has_apc +-- FROM +-- STATS_EXT.oa_journals +-- WHERE +-- issn_1 != "" +-- UNION +-- ALL SELECT +-- issn_l, +-- journal_is_in_doaj, +-- journal_is_oa, +-- issn_2 as issn, +-- has_apc +-- FROM +-- STATS_EXT.oa_journals +-- WHERE +-- issn_2 != "" ), issn AS ( SELECT +-- * +-- FROM +--( SELECT +-- id, +-- issn_printed as issn +-- FROM +-- ${stats_db_name}.datasource +-- WHERE +-- issn_printed IS NOT NULL +-- UNION ALL +-- SELECT +-- id, +-- issn_online as issn +-- FROM +-- ${stats_db_name}.datasource +-- WHERE +-- issn_online IS NOT NULL or id like '%doajarticles%') as issn +-- WHERE +-- LENGTH(issn) > 7) +--select distinct pd.id, coalesce(is_hybrid, 0) as is_hybrid +--from ${stats_db_name}.publication_datasources pd +-- left outer join ( +-- select pd.id, 1 as is_hybrid from ${stats_db_name}.publication_datasources pd +-- join ${stats_db_name}.datasource d on d.id=pd.datasource +-- join issn on issn.id=pd.datasource +-- join gold_oa on issn.issn=gold_oa.issn +-- where (gold_oa.journal_is_in_doaj=false or gold_oa.journal_is_oa=false))tmp +-- on pd.id=tmp.id; + create table if not exists ${stats_db_name}.indi_pub_hybrid stored as parquet as - WITH gold_oa AS ( SELECT - issn_l, - journal_is_in_doaj, - journal_is_oa, - issn_1 as issn, - has_apc - FROM - STATS_EXT.oa_journals - WHERE - issn_1 != "" - UNION - ALL SELECT - issn_l, - journal_is_in_doaj, - journal_is_oa, - issn_2 as issn, - has_apc - FROM - STATS_EXT.oa_journals - WHERE - issn_2 != "" ), issn AS ( SELECT - * - FROM -( SELECT - id, - issn_printed as issn - FROM - ${stats_db_name}.datasource - WHERE - issn_printed IS NOT NULL - UNION ALL - SELECT - id, - issn_online as issn - FROM - ${stats_db_name}.datasource - WHERE - issn_online IS NOT NULL or id like '%doajarticles%') as issn - WHERE - LENGTH(issn) > 7) -select distinct pd.id, coalesce(is_hybrid, 0) as is_hybrid -from ${stats_db_name}.publication_datasources pd - left outer join ( - select pd.id, 1 as is_hybrid from ${stats_db_name}.publication_datasources pd - join ${stats_db_name}.datasource d on d.id=pd.datasource - join issn on issn.id=pd.datasource - join gold_oa on issn.issn=gold_oa.issn - where (gold_oa.journal_is_in_doaj=false or gold_oa.journal_is_oa=false))tmp - on pd.id=tmp.id; +select pd.id,coalesce(is_hybrid,0) is_hybrid from ${stats_db_name}.publication_datasources pd +left outer join (select pd.id, 1 as is_hybrid from ${stats_db_name}.publication_datasources pd +join ${stats_db_name}.datasource d on pd.datasource=d.id +join ${stats_db_name}.result_instance ri on ri.id=pd.id +join ${stats_db_name}.indi_pub_gold_oa indi_gold on indi_gold.id=pd.id +join ${stats_db_name}.result_accessroute ra on ra.id=pd.id +where d.type like '%Journal%' and ri.accessright!='Closed Access' and (ri.accessright_uw!='gold' +or indi_gold.is_gold=0) and (ra.accessroute='hybrid' or ri.license is not null)) tmp +on pd.id=tmp.id; + +drop table if exists ${stats_db_name}.indi_org_fairness purge; create table if not exists ${stats_db_name}.indi_org_fairness stored as parquet as --return results with PIDs, and rich metadata group by organization @@ -381,6 +468,8 @@ select ro.organization, count(distinct ro.id) no_allresults from ${stats_db_name where cast(year as int)>2003 group by ro.organization; +drop table if exists ${stats_db_name}.indi_org_fairness_pub_pr purge; + create table if not exists ${stats_db_name}.indi_org_fairness_pub_pr stored as parquet as select ar.organization, rf.no_result_fair/ar.no_allresults org_fairness from ${stats_db_name}.allresults ar @@ -400,6 +489,8 @@ CREATE TEMPORARY TABLE ${stats_db_name}.allresults as select year, ro.organizati where cast(year as int)>2003 group by ro.organization, year; +drop table if exists ${stats_db_name}.indi_org_fairness_pub_year purge; + create table if not exists ${stats_db_name}.indi_org_fairness_pub_year stored as parquet as select allresults.year, allresults.organization, result_fair.no_result_fair/allresults.no_allresults org_fairness from ${stats_db_name}.allresults @@ -422,6 +513,8 @@ CREATE TEMPORARY TABLE ${stats_db_name}.allresults as where cast(year as int)>2003 group by ro.organization; +drop table if exists ${stats_db_name}.indi_org_fairness_pub purge; + create table if not exists ${stats_db_name}.indi_org_fairness_pub as select ar.organization, rf.no_result_fair/ar.no_allresults org_fairness from ${stats_db_name}.allresults ar join ${stats_db_name}.result_fair rf @@ -443,6 +536,8 @@ CREATE TEMPORARY TABLE ${stats_db_name}.allresults as where cast(year as int)>2003 group by ro.organization, year; +drop table if exists ${stats_db_name}.indi_org_fairness_year purge; + create table if not exists ${stats_db_name}.indi_org_fairness_year stored as parquet as select cast(allresults.year as int) year, allresults.organization, result_fair.no_result_fair/allresults.no_allresults org_fairness from ${stats_db_name}.allresults @@ -464,6 +559,8 @@ CREATE TEMPORARY TABLE ${stats_db_name}.allresults as where cast(year as int) >2003 group by ro.organization, year; +drop table if exists ${stats_db_name}.indi_org_findable_year purge; + create table if not exists ${stats_db_name}.indi_org_findable_year stored as parquet as select cast(allresults.year as int) year, allresults.organization, result_with_pid.no_result_with_pid/allresults.no_allresults org_findable from ${stats_db_name}.allresults @@ -485,6 +582,8 @@ select ro.organization, count(distinct ro.id) no_allresults from ${stats_db_name where cast(year as int) >2003 group by ro.organization; +drop table if exists ${stats_db_name}.indi_org_findable purge; + create table if not exists ${stats_db_name}.indi_org_findable stored as parquet as select allresults.organization, result_with_pid.no_result_with_pid/allresults.no_allresults org_findable from ${stats_db_name}.allresults @@ -549,6 +648,8 @@ select software_oa.organization, software_oa.no_oasoftware/allsoftware.no_allsof from ${stats_db_name}.allsoftware join ${stats_db_name}.software_oa on allsoftware.organization=software_oa.organization; +drop table if exists ${stats_db_name}.indi_org_openess purge; + create table if not exists ${stats_db_name}.indi_org_openess stored as parquet as select allpubsshare.organization, (p+if(isnull(s),0,s)+if(isnull(d),0,d))/(1+(case when s is null then 0 else 1 end) @@ -624,6 +725,7 @@ select allsoftware.year, software_oa.organization, software_oa.no_oasoftware/all from ${stats_db_name}.allsoftware join ${stats_db_name}.software_oa on allsoftware.organization=software_oa.organization where cast(allsoftware.year as INT)=cast(software_oa.year as int); +drop table if exists ${stats_db_name}.indi_org_openess_year purge; create table if not exists ${stats_db_name}.indi_org_openess_year stored as parquet as select cast(allpubsshare.year as int) year, allpubsshare.organization, @@ -647,6 +749,8 @@ DROP TABLE ${stats_db_name}.allpubsshare purge; DROP TABLE ${stats_db_name}.alldatasetssshare purge; DROP TABLE ${stats_db_name}.allsoftwaresshare purge; +drop table if exists ${stats_db_name}.indi_pub_has_preprint purge; + create table if not exists ${stats_db_name}.indi_pub_has_preprint stored as parquet as select distinct p.id, coalesce(has_preprint, 0) as has_preprint from ${stats_db_name}.publication_classifications p @@ -655,6 +759,7 @@ from ${stats_db_name}.publication_classifications p from ${stats_db_name}.publication_classifications p where p.type='Preprint') tmp on p.id= tmp.id; +drop table if exists ${stats_db_name}.indi_pub_in_subscribed purge; create table if not exists ${stats_db_name}.indi_pub_in_subscribed stored as parquet as select distinct p.id, coalesce(is_subscription, 0) as is_subscription @@ -667,6 +772,8 @@ from ${stats_db_name}.publication p where g.is_gold=0 and h.is_hybrid=0 and t.is_transformative=0) tmp on p.id=tmp.id; +drop table if exists ${stats_db_name}.indi_result_with_pid purge; + create table if not exists ${stats_db_name}.indi_result_with_pid as select distinct p.id, coalesce(result_with_pid, 0) as result_with_pid from ${stats_db_name}.result p @@ -679,6 +786,8 @@ CREATE TEMPORARY TABLE ${stats_db_name}.pub_fos_totals as select rf.id, count(distinct lvl3) totals from ${stats_db_name}.result_fos rf group by rf.id; +drop table if exists ${stats_db_name}.indi_pub_interdisciplinarity purge; + create table if not exists ${stats_db_name}.indi_pub_interdisciplinarity as select distinct p.id as id, coalesce(is_interdisciplinary, 0) as is_interdisciplinary @@ -689,18 +798,31 @@ where totals>1) tmp on p.id=tmp.id; drop table ${stats_db_name}.pub_fos_totals purge; -create table if not exists ${stats_db_name}.indi_pub_bronze_oa stored as parquet as -select distinct p.id, coalesce(is_bronze_oa,0) as is_bronze_oa -from ${stats_db_name}.publication p -left outer join -(select p.id, 1 as is_bronze_oa from ${stats_db_name}.publication p -join ${stats_db_name}.indi_result_has_cc_licence cc on cc.id=p.id -join ${stats_db_name}.indi_pub_gold_oa ga on ga.id=p.id -join ${stats_db_name}.result_instance ri on ri.id=p.id -join ${stats_db_name}.datasource d on d.id=ri.hostedby -where cc.has_cc_license=0 and ga.is_gold=0 -and (d.type='Journal' or d.type='Journal Aggregator/Publisher') -and ri.accessright='Open Access') tmp on tmp.id=p.id; +drop table if exists ${stats_db_name}.indi_pub_bronze_oa purge; + +--create table if not exists ${stats_db_name}.indi_pub_bronze_oa stored as parquet as +--select distinct p.id, coalesce(is_bronze_oa,0) as is_bronze_oa +--from ${stats_db_name}.publication p +--left outer join +--(select p.id, 1 as is_bronze_oa from ${stats_db_name}.publication p +--join ${stats_db_name}.indi_result_has_cc_licence cc on cc.id=p.id +--join ${stats_db_name}.indi_pub_gold_oa ga on ga.id=p.id +--join ${stats_db_name}.result_instance ri on ri.id=p.id +--join ${stats_db_name}.datasource d on d.id=ri.hostedby +--where cc.has_cc_license=0 and ga.is_gold=0 +--and (d.type='Journal' or d.type='Journal Aggregator/Publisher') +--and ri.accessright='Open Access') tmp on tmp.id=p.id; + +create table ${stats_db_name}.indi_pub_bronze stored as parquet as +select pd.id,coalesce(is_bronze_oa,0) is_bronze_oa from ${stats_db_name}.publication_datasources pd +left outer join (select pd.id, 1 as is_bronze_oa from ${stats_db_name}.publication_datasources pd +join ${stats_db_name}.datasource d on pd.datasource=d.id +join ${stats_db_name}.result_instance ri on ri.id=pd.id +join ${stats_db_name}.indi_pub_gold_oa indi_gold on indi_gold.id=pd.id +join ${stats_db_name}.result_accessroute ra on ra.id=pd.id +where d.type like '%Journal%' and ri.accessright!='Closed Access' and (ri.accessright_uw!='gold' +or indi_gold.is_gold=0) and (ra.accessroute='bronze' or ri.license is null)) tmp +on pd.id=tmp.id; CREATE TEMPORARY TABLE ${stats_db_name}.project_year_result_year as select p.id project_id, acronym, r.id result_id, r.year, p.end_year @@ -709,6 +831,8 @@ join ${stats_db_name}.result_projects rp on p.id=rp.project join ${stats_db_name}.result r on r.id=rp.id where p.end_year is NOT NULL and r.year is not null; +drop table if exists ${stats_db_name}.indi_is_project_result_after purge; + create table if not exists ${stats_db_name}.indi_is_project_result_after stored as parquet as select pry.project_id, pry.acronym, pry.result_id, coalesce(is_project_result_after, 0) as is_project_result_after @@ -719,6 +843,8 @@ where pry.year>pry.end_year) tmp on pry.result_id=tmp.result_id; drop table ${stats_db_name}.project_year_result_year purge; +drop table ${stats_db_name}.indi_is_funder_plan_s purge; + create table if not exists ${stats_db_name}.indi_is_funder_plan_s stored as parquet as select distinct f.id, f.name, coalesce(is_funder_plan_s, 0) as is_funder_plan_s from ${stats_db_name}.funder f @@ -727,6 +853,7 @@ from ${stats_db_name}.funder f on f.name= tmp.name; --Funder Fairness +drop table ${stats_db_name}.indi_funder_fairness purge; create table if not exists ${stats_db_name}.indi_funder_fairness stored as parquet as with result_fair as @@ -745,6 +872,8 @@ from allresults join result_fair on result_fair.funder=allresults.funder; --RIs Fairness +drop table ${stats_db_name}.indi_ris_fairness purge; + create table if not exists ${stats_db_name}.indi_ris_fairness stored as parquet as with result_contexts as (select distinct rc.id, context.name ri_initiative from ${stats_db_name}.result_concepts rc @@ -830,6 +959,8 @@ select software_oa.funder, software_oa.no_oasoftware/allsoftware.no_allsoftware from ${stats_db_name}.allsoftware join ${stats_db_name}.software_oa on allsoftware.funder=software_oa.funder; +drop table ${stats_db_name}.indi_funder_openess purge; + create table if not exists ${stats_db_name}.indi_funder_openess stored as parquet as select allpubsshare.funder, (p+if(isnull(s),0,s)+if(isnull(d),0,d))/(1+(case when s is null then 0 else 1 end) @@ -916,6 +1047,8 @@ select software_oa.ri_initiative, software_oa.no_oasoftware/allsoftware.no_allso from ${stats_db_name}.allsoftware join ${stats_db_name}.software_oa on allsoftware.ri_initiative=software_oa.ri_initiative; +drop table ${stats_db_name}.indi_ris_openess purge; + create table if not exists ${stats_db_name}.indi_ris_openess stored as parquet as select allpubsshare.ri_initiative, (p+if(isnull(s),0,s)+if(isnull(d),0,d))/(1+(case when s is null then 0 else 1 end) @@ -940,6 +1073,8 @@ DROP TABLE ${stats_db_name}.alldatasetssshare purge; DROP TABLE ${stats_db_name}.allsoftwaresshare purge; --Funder Findability +drop table ${stats_db_name}.indi_funder_findable purge; + create table if not exists ${stats_db_name}.indi_funder_findable stored as parquet as with result_findable as (select p.funder funder, count(distinct rp.id) no_result_findable from ${stats_db_name}.result_projects rp @@ -958,6 +1093,8 @@ from allresults join result_findable on result_findable.funder=allresults.funder; --RIs Findability +drop table ${stats_db_name}.indi_ris_findable purge; + create table if not exists ${stats_db_name}.indi_ris_findable stored as parquet as with result_contexts as (select distinct rc.id, context.name ri_initiative from ${stats_db_name}.result_concepts rc From 17586f0ff8d0e8d6225ecff52fa072ed4e66c3d4 Mon Sep 17 00:00:00 2001 From: dimitrispie Date: Mon, 9 Oct 2023 14:21:31 +0300 Subject: [PATCH 43/57] Update step20-createMonitorDB.sql Add result_orcid table to monitor dbs --- .../oa/graph/stats/oozie_app/scripts/step20-createMonitorDB.sql | 2 ++ 1 file changed, 2 insertions(+) diff --git a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step20-createMonitorDB.sql b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step20-createMonitorDB.sql index 586bee347..d5d242230 100644 --- a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step20-createMonitorDB.sql +++ b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step20-createMonitorDB.sql @@ -162,6 +162,8 @@ create table TARGET.result_fos stored as parquet as select * from SOURCE.result_ create table TARGET.result_accessroute stored as parquet as select * from SOURCE.result_accessroute orig where exists (select 1 from TARGET.result r where r.id=orig.id); --ANALYZE TABLE TARGET.result_accessroute COMPUTE STATISTICS; +create table TARGET.result_orcid stored as parquet as select * from SOURCE.result_orcid orig where exists (select 1 from TARGET.result r where r.id=orig.id); + create view TARGET.foo1 as select * from SOURCE.result_result rr where rr.source in (select id from TARGET.result); create view TARGET.foo2 as select * from SOURCE.result_result rr where rr.target in (select id from TARGET.result); create table TARGET.result_result STORED AS PARQUET as select distinct * from (select * from TARGET.foo1 union all select * from TARGET.foo2) foufou; From 9a98f408b36d6ebcd0b1bdeaaa64565c0a899f03 Mon Sep 17 00:00:00 2001 From: Claudio Atzori Date: Tue, 10 Oct 2023 09:36:11 +0200 Subject: [PATCH 44/57] code formatting --- .../opencitations/CreateActionSetSparkJob.java | 2 +- .../dnetlib/doiboost/crossref/Crossref2Oaf.scala | 16 +++++++++------- .../doiboost/crossref/CrossrefMappingTest.scala | 2 +- 3 files changed, 11 insertions(+), 9 deletions(-) diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/opencitations/CreateActionSetSparkJob.java b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/opencitations/CreateActionSetSparkJob.java index a367ba852..b707fdcd3 100644 --- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/opencitations/CreateActionSetSparkJob.java +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/opencitations/CreateActionSetSparkJob.java @@ -7,7 +7,6 @@ import java.io.IOException; import java.io.Serializable; import java.util.*; -import eu.dnetlib.dhp.schema.oaf.utils.*; import org.apache.commons.cli.ParseException; import org.apache.commons.io.IOUtils; import org.apache.hadoop.io.Text; @@ -30,6 +29,7 @@ import eu.dnetlib.dhp.application.ArgumentApplicationParser; import eu.dnetlib.dhp.schema.action.AtomicAction; import eu.dnetlib.dhp.schema.common.ModelConstants; import eu.dnetlib.dhp.schema.oaf.*; +import eu.dnetlib.dhp.schema.oaf.utils.*; import eu.dnetlib.dhp.utils.DHPUtils; import scala.Tuple2; diff --git a/dhp-workflows/dhp-doiboost/src/main/scala/eu/dnetlib/doiboost/crossref/Crossref2Oaf.scala b/dhp-workflows/dhp-doiboost/src/main/scala/eu/dnetlib/doiboost/crossref/Crossref2Oaf.scala index e0fdb9ce4..565d34e62 100644 --- a/dhp-workflows/dhp-doiboost/src/main/scala/eu/dnetlib/doiboost/crossref/Crossref2Oaf.scala +++ b/dhp-workflows/dhp-doiboost/src/main/scala/eu/dnetlib/doiboost/crossref/Crossref2Oaf.scala @@ -31,9 +31,7 @@ case class mappingAuthor( affiliation: Option[mappingAffiliation] ) {} -case class funderInfo(id:String,uri:String, name:String,synonym:List[String] ) {} - - +case class funderInfo(id: String, uri: String, name: String, synonym: List[String]) {} case class mappingFunder(name: String, DOI: Option[String], award: Option[List[String]]) {} @@ -41,7 +39,9 @@ case object Crossref2Oaf { val logger: Logger = LoggerFactory.getLogger(Crossref2Oaf.getClass) val irishFunder: List[funderInfo] = { - val s = Source.fromInputStream(getClass.getResourceAsStream("/eu/dnetlib/dhp/doiboost/crossref/irish_funder.json")).mkString + val s = Source + .fromInputStream(getClass.getResourceAsStream("/eu/dnetlib/dhp/doiboost/crossref/irish_funder.json")) + .mkString implicit lazy val formats: DefaultFormats.type = org.json4s.DefaultFormats lazy val json: org.json4s.JValue = parse(s) json.extract[List[funderInfo]] @@ -100,9 +100,11 @@ case object Crossref2Oaf { "report" -> "0017 Report" ) - def getIrishId(doi:String):Option[String] = { - val id =doi.split("/").last - irishFunder.find(f => id.equalsIgnoreCase(f.id) || (f.synonym.nonEmpty && f.synonym.exists(s => s.equalsIgnoreCase(id)))).map(f => f.id) + def getIrishId(doi: String): Option[String] = { + val id = doi.split("/").last + irishFunder + .find(f => id.equalsIgnoreCase(f.id) || (f.synonym.nonEmpty && f.synonym.exists(s => s.equalsIgnoreCase(id)))) + .map(f => f.id) } def mappingResult(result: Result, json: JValue, cobjCategory: String): Result = { diff --git a/dhp-workflows/dhp-doiboost/src/test/scala/eu/dnetlib/dhp/doiboost/crossref/CrossrefMappingTest.scala b/dhp-workflows/dhp-doiboost/src/test/scala/eu/dnetlib/dhp/doiboost/crossref/CrossrefMappingTest.scala index 7961376c5..fbf6f72c0 100644 --- a/dhp-workflows/dhp-doiboost/src/test/scala/eu/dnetlib/dhp/doiboost/crossref/CrossrefMappingTest.scala +++ b/dhp-workflows/dhp-doiboost/src/test/scala/eu/dnetlib/dhp/doiboost/crossref/CrossrefMappingTest.scala @@ -50,7 +50,7 @@ class CrossrefMappingTest { } } - def checkRelation(generatedOAF: List[Oaf]): Unit = { + def checkRelation(generatedOAF: List[Oaf]): Unit = { val rels: List[Relation] = generatedOAF.filter(p => p.isInstanceOf[Relation]).asInstanceOf[List[Relation]] From 110ce4b40fc54c2d60fe8120927e76b84580b8c9 Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Tue, 10 Oct 2023 09:46:40 +0200 Subject: [PATCH 45/57] extend the fos model to include the level4 and the scores for level3 and level4. removed bip indicators from the instance --- .../dnetlib/dhp/actionmanager/Constants.java | 20 ++++-- .../GetFOSSparkJob.java | 9 ++- .../PrepareFOSSparkJob.java | 27 +++++-- .../SparkSaveUnresolved.java | 6 +- .../model/FOSDataModel.java | 63 +++++++++++++++-- .../CreateActionSetSparkJob.java | 2 +- .../oozie_app/workflow.xml | 56 +++++++-------- .../createunresolvedentities/GetFosTest.java | 39 +++++++++-- .../createunresolvedentities/PrepareTest.java | 70 +++++++++++++++++++ .../createunresolvedentities/ProduceTest.java | 34 +++++++++ .../createunresolvedentities/fos/fos_sbs2.csv | 26 +++++++ .../fos/fos_sbs_2.json | 25 +++++++ .../doiboost/crossref/Crossref2Oaf.scala | 16 +++-- .../crossref/CrossrefMappingTest.scala | 2 +- 14 files changed, 334 insertions(+), 61 deletions(-) create mode 100644 dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/actionmanager/createunresolvedentities/fos/fos_sbs2.csv create mode 100644 dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/actionmanager/createunresolvedentities/fos/fos_sbs_2.json diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/Constants.java b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/Constants.java index 62556b16b..006d3af76 100644 --- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/Constants.java +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/Constants.java @@ -40,6 +40,7 @@ public class Constants { public static final String SDG_CLASS_NAME = "Sustainable Development Goals"; public static final String NULL = "NULL"; + public static final String NA = "N/A"; public static final ObjectMapper OBJECT_MAPPER = new ObjectMapper(); @@ -61,10 +62,16 @@ public class Constants { .map((MapFunction) value -> OBJECT_MAPPER.readValue(value, clazz), Encoders.bean(clazz)); } - public static Subject getSubject(String sbj, String classid, String classname, - String diqualifierclassid) { - if (sbj == null || sbj.equals(NULL)) + public static Subject getSubject(String sbj, String classid, String classname, String diqualifierclassid, + Boolean split) { + if (sbj == null || sbj.equals(NULL) || sbj.startsWith(NA)) return null; + String trust = ""; + String subject = sbj; + if (split) { + sbj = subject.split("@@")[0]; + trust = subject.split("@@")[1]; + } Subject s = new Subject(); s.setValue(sbj); s @@ -89,9 +96,14 @@ public class Constants { UPDATE_CLASS_NAME, ModelConstants.DNET_PROVENANCE_ACTIONS, ModelConstants.DNET_PROVENANCE_ACTIONS), - "")); + trust)); return s; + } + + public static Subject getSubject(String sbj, String classid, String classname, + String diqualifierclassid) { + return getSubject(sbj, classid, classname, diqualifierclassid, false); } diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/createunresolvedentities/GetFOSSparkJob.java b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/createunresolvedentities/GetFOSSparkJob.java index 0cc2f93df..abea6acd7 100644 --- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/createunresolvedentities/GetFOSSparkJob.java +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/createunresolvedentities/GetFOSSparkJob.java @@ -75,9 +75,12 @@ public class GetFOSSparkJob implements Serializable { fosData.map((MapFunction) r -> { FOSDataModel fosDataModel = new FOSDataModel(); fosDataModel.setDoi(r.getString(0).toLowerCase()); - fosDataModel.setLevel1(r.getString(1)); - fosDataModel.setLevel2(r.getString(2)); - fosDataModel.setLevel3(r.getString(3)); + fosDataModel.setLevel1(r.getString(2)); + fosDataModel.setLevel2(r.getString(3)); + fosDataModel.setLevel3(r.getString(4)); + fosDataModel.setLevel4(r.getString(5)); + fosDataModel.setScoreL3(String.valueOf(r.getDouble(6))); + fosDataModel.setScoreL4(String.valueOf(r.getDouble(7))); return fosDataModel; }, Encoders.bean(FOSDataModel.class)) .write() diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/createunresolvedentities/PrepareFOSSparkJob.java b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/createunresolvedentities/PrepareFOSSparkJob.java index 4d2d25215..57ad8b96a 100644 --- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/createunresolvedentities/PrepareFOSSparkJob.java +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/createunresolvedentities/PrepareFOSSparkJob.java @@ -78,12 +78,20 @@ public class PrepareFOSSparkJob implements Serializable { HashSet level1 = new HashSet<>(); HashSet level2 = new HashSet<>(); HashSet level3 = new HashSet<>(); - addLevels(level1, level2, level3, first); - it.forEachRemaining(v -> addLevels(level1, level2, level3, v)); + HashSet level4 = new HashSet<>(); + addLevels(level1, level2, level3, level4, first); + it.forEachRemaining(v -> addLevels(level1, level2, level3, level4, v)); List sbjs = new ArrayList<>(); - level1.forEach(l -> sbjs.add(getSubject(l, FOS_CLASS_ID, FOS_CLASS_NAME, UPDATE_SUBJECT_FOS_CLASS_ID))); - level2.forEach(l -> sbjs.add(getSubject(l, FOS_CLASS_ID, FOS_CLASS_NAME, UPDATE_SUBJECT_FOS_CLASS_ID))); - level3.forEach(l -> sbjs.add(getSubject(l, FOS_CLASS_ID, FOS_CLASS_NAME, UPDATE_SUBJECT_FOS_CLASS_ID))); + level1 + .forEach(l -> add(sbjs, getSubject(l, FOS_CLASS_ID, FOS_CLASS_NAME, UPDATE_SUBJECT_FOS_CLASS_ID))); + level2 + .forEach(l -> add(sbjs, getSubject(l, FOS_CLASS_ID, FOS_CLASS_NAME, UPDATE_SUBJECT_FOS_CLASS_ID))); + level3 + .forEach( + l -> add(sbjs, getSubject(l, FOS_CLASS_ID, FOS_CLASS_NAME, UPDATE_SUBJECT_FOS_CLASS_ID, true))); + level4 + .forEach( + l -> add(sbjs, getSubject(l, FOS_CLASS_ID, FOS_CLASS_NAME, UPDATE_SUBJECT_FOS_CLASS_ID, true))); r.setSubject(sbjs); r .setDataInfo( @@ -106,11 +114,18 @@ public class PrepareFOSSparkJob implements Serializable { .json(outputPath + "/fos"); } + private static void add(List sbsjs, Subject sbj) { + if (sbj != null) + sbsjs.add(sbj); + } + private static void addLevels(HashSet level1, HashSet level2, HashSet level3, + HashSet level4, FOSDataModel first) { level1.add(first.getLevel1()); level2.add(first.getLevel2()); - level3.add(first.getLevel3()); + level3.add(first.getLevel3() + "@@" + first.getScoreL3()); + level4.add(first.getLevel4() + "@@" + first.getScoreL4()); } } diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/createunresolvedentities/SparkSaveUnresolved.java b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/createunresolvedentities/SparkSaveUnresolved.java index 3b9775094..93bbfcc88 100644 --- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/createunresolvedentities/SparkSaveUnresolved.java +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/createunresolvedentities/SparkSaveUnresolved.java @@ -69,9 +69,9 @@ public class SparkSaveUnresolved implements Serializable { .mapGroups((MapGroupsFunction) (k, it) -> { Result ret = it.next(); it.forEachRemaining(r -> { - if (r.getInstance() != null) { - ret.setInstance(r.getInstance()); - } +// if (r.getInstance() != null) { +// ret.setInstance(r.getInstance()); +// } if (r.getSubject() != null) { if (ret.getSubject() != null) ret.getSubject().addAll(r.getSubject()); diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/createunresolvedentities/model/FOSDataModel.java b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/createunresolvedentities/model/FOSDataModel.java index e98ba74a1..a82d7bfd6 100644 --- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/createunresolvedentities/model/FOSDataModel.java +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/createunresolvedentities/model/FOSDataModel.java @@ -11,21 +11,43 @@ public class FOSDataModel implements Serializable { private String doi; @CsvBindByPosition(position = 1) +// @CsvBindByName(column = "doi") + private String oaid; + @CsvBindByPosition(position = 2) // @CsvBindByName(column = "level1") private String level1; - @CsvBindByPosition(position = 2) + @CsvBindByPosition(position = 3) // @CsvBindByName(column = "level2") private String level2; - @CsvBindByPosition(position = 3) + @CsvBindByPosition(position = 4) // @CsvBindByName(column = "level3") private String level3; + @CsvBindByPosition(position = 5) +// @CsvBindByName(column = "level3") + private String level4; + @CsvBindByPosition(position = 6) + private String scoreL3; + @CsvBindByPosition(position = 7) + private String scoreL4; + public FOSDataModel() { } + public FOSDataModel(String doi, String level1, String level2, String level3, String level4, String l3score, + String l4score) { + this.doi = doi; + this.level1 = level1; + this.level2 = level2; + this.level3 = level3; + this.level4 = level4; + this.scoreL3 = l3score; + this.scoreL4 = l4score; + } + public FOSDataModel(String doi, String level1, String level2, String level3) { this.doi = doi; this.level1 = level1; @@ -33,8 +55,41 @@ public class FOSDataModel implements Serializable { this.level3 = level3; } - public static FOSDataModel newInstance(String d, String level1, String level2, String level3) { - return new FOSDataModel(d, level1, level2, level3); + public static FOSDataModel newInstance(String d, String level1, String level2, String level3, String level4, + String scorel3, String scorel4) { + return new FOSDataModel(d, level1, level2, level3, level4, scorel3, scorel4); + } + + public String getOaid() { + return oaid; + } + + public void setOaid(String oaid) { + this.oaid = oaid; + } + + public String getLevel4() { + return level4; + } + + public void setLevel4(String level4) { + this.level4 = level4; + } + + public String getScoreL3() { + return scoreL3; + } + + public void setScoreL3(String scoreL3) { + this.scoreL3 = scoreL3; + } + + public String getScoreL4() { + return scoreL4; + } + + public void setScoreL4(String scoreL4) { + this.scoreL4 = scoreL4; } public String getDoi() { diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/opencitations/CreateActionSetSparkJob.java b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/opencitations/CreateActionSetSparkJob.java index a367ba852..b707fdcd3 100644 --- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/opencitations/CreateActionSetSparkJob.java +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/opencitations/CreateActionSetSparkJob.java @@ -7,7 +7,6 @@ import java.io.IOException; import java.io.Serializable; import java.util.*; -import eu.dnetlib.dhp.schema.oaf.utils.*; import org.apache.commons.cli.ParseException; import org.apache.commons.io.IOUtils; import org.apache.hadoop.io.Text; @@ -30,6 +29,7 @@ import eu.dnetlib.dhp.application.ArgumentApplicationParser; import eu.dnetlib.dhp.schema.action.AtomicAction; import eu.dnetlib.dhp.schema.common.ModelConstants; import eu.dnetlib.dhp.schema.oaf.*; +import eu.dnetlib.dhp.schema.oaf.utils.*; import eu.dnetlib.dhp.utils.DHPUtils; import scala.Tuple2; diff --git a/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/actionmanager/createunresolvedentities/oozie_app/workflow.xml b/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/actionmanager/createunresolvedentities/oozie_app/workflow.xml index c8af64594..a2935a71d 100644 --- a/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/actionmanager/createunresolvedentities/oozie_app/workflow.xml +++ b/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/actionmanager/createunresolvedentities/oozie_app/workflow.xml @@ -6,10 +6,10 @@ the input path of the resources to be extended - - bipScorePath - the path where to find the bipFinder scores - + + + + outputPath the path where to store the actionset @@ -77,34 +77,34 @@ - + - - - yarn - cluster - Produces the unresolved from BIP! Finder - eu.dnetlib.dhp.actionmanager.createunresolvedentities.PrepareBipFinder - dhp-aggregation-${projectVersion}.jar - - --executor-memory=${sparkExecutorMemory} - --executor-cores=${sparkExecutorCores} - --driver-memory=${sparkDriverMemory} - --conf spark.extraListeners=${spark2ExtraListeners} - --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} - --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} - --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} - --conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir} - - --sourcePath${bipScorePath} - --outputPath${workingDir}/prepared - - - - + + + + + + + + + + + + + + + + + + + + + + + diff --git a/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/actionmanager/createunresolvedentities/GetFosTest.java b/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/actionmanager/createunresolvedentities/GetFosTest.java index 7e0acc2bb..d4fe129df 100644 --- a/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/actionmanager/createunresolvedentities/GetFosTest.java +++ b/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/actionmanager/createunresolvedentities/GetFosTest.java @@ -13,10 +13,7 @@ import org.apache.spark.SparkConf; import org.apache.spark.api.java.JavaRDD; import org.apache.spark.api.java.JavaSparkContext; import org.apache.spark.sql.SparkSession; -import org.junit.jupiter.api.AfterAll; -import org.junit.jupiter.api.Assertions; -import org.junit.jupiter.api.BeforeAll; -import org.junit.jupiter.api.Test; +import org.junit.jupiter.api.*; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -68,6 +65,7 @@ public class GetFosTest { } @Test + @Disabled void test3() throws Exception { final String sourcePath = getClass() .getResource("/eu/dnetlib/dhp/actionmanager/createunresolvedentities/fos/fos_sbs.tsv") @@ -96,4 +94,37 @@ public class GetFosTest { tmp.foreach(t -> Assertions.assertTrue(t.getLevel3() != null)); } + + @Test + void test4() throws Exception { + final String sourcePath = getClass() + .getResource("/eu/dnetlib/dhp/actionmanager/createunresolvedentities/fos/fos_sbs2.csv") + .getPath(); + + final String outputPath = workingDir.toString() + "/fos.json"; + GetFOSSparkJob + .main( + new String[] { + "--isSparkSessionManaged", Boolean.FALSE.toString(), + "--sourcePath", sourcePath, + "--delimiter", ",", + "-outputPath", outputPath + + }); + + final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext()); + + JavaRDD tmp = sc + .textFile(outputPath) + .map(item -> OBJECT_MAPPER.readValue(item, FOSDataModel.class)); + + tmp.foreach(t -> Assertions.assertTrue(t.getDoi() != null)); + tmp.foreach(t -> Assertions.assertTrue(t.getLevel1() != null)); + tmp.foreach(t -> Assertions.assertTrue(t.getLevel2() != null)); + tmp.foreach(t -> Assertions.assertTrue(t.getLevel3() != null)); + tmp.foreach(t -> Assertions.assertTrue(t.getLevel4() != null)); + tmp.foreach(t -> Assertions.assertTrue(t.getScoreL3() != null)); + tmp.foreach(t -> Assertions.assertTrue(t.getScoreL4() != null)); + + } } diff --git a/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/actionmanager/createunresolvedentities/PrepareTest.java b/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/actionmanager/createunresolvedentities/PrepareTest.java index cc8108bde..ccb0ebbff 100644 --- a/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/actionmanager/createunresolvedentities/PrepareTest.java +++ b/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/actionmanager/createunresolvedentities/PrepareTest.java @@ -222,6 +222,76 @@ public class PrepareTest { } + @Test + void fosPrepareTest2() throws Exception { + final String sourcePath = getClass() + .getResource("/eu/dnetlib/dhp/actionmanager/createunresolvedentities/fos/fos_sbs_2.json") + .getPath(); + + PrepareFOSSparkJob + .main( + new String[] { + "--isSparkSessionManaged", Boolean.FALSE.toString(), + "--sourcePath", sourcePath, + + "-outputPath", workingDir.toString() + "/work" + + }); + + final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext()); + + JavaRDD tmp = sc + .textFile(workingDir.toString() + "/work/fos") + .map(item -> OBJECT_MAPPER.readValue(item, Result.class)); + + String doi1 = "unresolved::10.1016/j.revmed.2006.07.012::doi"; + + assertEquals(13, tmp.count()); + assertEquals(1, tmp.filter(row -> row.getId().equals(doi1)).count()); + + Result result = tmp + .filter(r -> r.getId().equals(doi1)) + .first(); + + result.getSubject().forEach(s -> System.out.println(s.getValue() + " trust = " + s.getDataInfo().getTrust())); + Assertions.assertEquals(6, result.getSubject().size()); + + assertTrue( + result + .getSubject() + .stream() + .anyMatch( + s -> s.getValue().contains("03 medical and health sciences") + && s.getDataInfo().getTrust().equals(""))); + + assertTrue( + result + .getSubject() + .stream() + .anyMatch( + s -> s.getValue().contains("0302 clinical medicine") && s.getDataInfo().getTrust().equals(""))); + + assertTrue( + result + .getSubject() + .stream() + .anyMatch( + s -> s + .getValue() + .contains("030204 cardiovascular system & hematology") + && s.getDataInfo().getTrust().equals("0.5101401805877686"))); + assertTrue( + result + .getSubject() + .stream() + .anyMatch( + s -> s + .getValue() + .contains("03020409 Hematology/Coagulopathies") + && s.getDataInfo().getTrust().equals("0.0546871414174914"))); + + } + @Test void sdgPrepareTest() throws Exception { final String sourcePath = getClass() diff --git a/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/actionmanager/createunresolvedentities/ProduceTest.java b/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/actionmanager/createunresolvedentities/ProduceTest.java index c3c110f09..fce6c1e97 100644 --- a/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/actionmanager/createunresolvedentities/ProduceTest.java +++ b/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/actionmanager/createunresolvedentities/ProduceTest.java @@ -379,6 +379,40 @@ public class ProduceTest { .map(item -> OBJECT_MAPPER.readValue(item, Result.class)); } + @Test + public JavaRDD getResultFosJavaRDD() throws Exception { + + final String fosPath = getClass() + .getResource("/eu/dnetlib/dhp/actionmanager/createunresolvedentities/fos/fos_sbs_2.json") + .getPath(); + + PrepareFOSSparkJob + .main( + new String[] { + "--isSparkSessionManaged", Boolean.FALSE.toString(), + "--sourcePath", fosPath, + "-outputPath", workingDir.toString() + "/work" + }); + + SparkSaveUnresolved.main(new String[] { + "--isSparkSessionManaged", Boolean.FALSE.toString(), + "--sourcePath", workingDir.toString() + "/work", + + "-outputPath", workingDir.toString() + "/unresolved" + + }); + + final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext()); + + JavaRDD tmp = sc + .textFile(workingDir.toString() + "/unresolved") + .map(item -> OBJECT_MAPPER.readValue(item, Result.class)); + tmp.foreach(r -> System.out.println(new ObjectMapper().writeValueAsString(r))); + + return tmp; + + } + @Test void prepareTest5Subjects() throws Exception { final String doi = "unresolved::10.1063/5.0032658::doi"; diff --git a/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/actionmanager/createunresolvedentities/fos/fos_sbs2.csv b/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/actionmanager/createunresolvedentities/fos/fos_sbs2.csv new file mode 100644 index 000000000..3b1f2304f --- /dev/null +++ b/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/actionmanager/createunresolvedentities/fos/fos_sbs2.csv @@ -0,0 +1,26 @@ +DOI,OAID,level1,level2,level3,level4,score_for_L3,score_for_L4 +10.1016/j.anucene.2006.02.004,doi_________::00059d9963edf633bec756fb21b5bd72,02 engineering and technology,"0202 electrical engineering, electronic engineering, information engineering",020209 energy,02020908 Climate change policy/Ethanol fuel,0.5,0.5 +10.1016/j.anucene.2006.02.004,doi_________::00059d9963edf633bec756fb21b5bd72,02 engineering and technology,0211 other engineering and technologies,021108 energy,02110808 Climate change policy/Ethanol fuel,0.5,0.5 +10.1016/j.revmed.2006.07.010,doi_________::0026476c1651a92c933d752ff12496c7,03 medical and health sciences,0302 clinical medicine,030220 oncology & carcinogenesis,N/A,0.5036656856536865,0.0 +10.1016/j.revmed.2006.07.010,doi_________::0026476c1651a92c933d752ff12496c7,03 medical and health sciences,0302 clinical medicine,030212 general & internal medicine,N/A,0.4963343143463135,0.0 +10.20965/jrm.2006.p0312,doi_________::0028336a2f3826cc83c47dbefac71543,02 engineering and technology,0209 industrial biotechnology,020901 industrial engineering & automation,02090104 Robotics/Robots,0.6111094951629639,0.5053805979936855 +10.20965/jrm.2006.p0312,doi_________::0028336a2f3826cc83c47dbefac71543,01 natural sciences,0104 chemical sciences,010401 analytical chemistry,N/A,0.3888905048370361,0.0 +10.1111/j.1747-7379.2006.040_1.x,doi_________::002c7077e7c114a8304eb90f59e45fa4,05 social sciences,0506 political science,050602 political science & public administration,05060202 Ethnic groups/Ethnicity,0.6159052848815918,0.7369035568037298 +10.1111/j.1747-7379.2006.040_1.x,doi_________::002c7077e7c114a8304eb90f59e45fa4,05 social sciences,0502 economics and business,050207 economics,N/A,0.3840946555137634,0.0 +10.1007/s10512-006-0049-9,doi_________::003f29f9254819cf4c78558b1bc25f10,02 engineering and technology,"0202 electrical engineering, electronic engineering, information engineering",020209 energy,02020908 Climate change policy/Ethanol fuel,0.5,0.5 +10.1007/s10512-006-0049-9,doi_________::003f29f9254819cf4c78558b1bc25f10,02 engineering and technology,0211 other engineering and technologies,021108 energy,02110808 Climate change policy/Ethanol fuel,0.5,0.5 +10.1111/j.1365-2621.2005.01045.x,doi_________::00419355b4c3e0646bd0e1b301164c8e,04 agricultural and veterinary sciences,0404 agricultural biotechnology,040401 food science,04040102 Food science/Food industry,0.5,0.5 +10.1111/j.1365-2621.2005.01045.x,doi_________::00419355b4c3e0646bd0e1b301164c8e,04 agricultural and veterinary sciences,0405 other agricultural sciences,040502 food science,04050202 Food science/Food industry,0.5,0.5 +10.1002/chin.200617262,doi_________::004c8cef80668904961b9e62841793c8,01 natural sciences,0104 chemical sciences,010405 organic chemistry,01040508 Functional groups/Ethers,0.5566747188568115,0.5582916736602783 +10.1002/chin.200617262,doi_________::004c8cef80668904961b9e62841793c8,01 natural sciences,0104 chemical sciences,010402 general chemistry,01040207 Chemical synthesis/Total synthesis,0.4433253407478332,0.4417082965373993 +10.1016/j.revmed.2006.07.012,doi_________::005b1d0fb650b680abaf6cfe26a21604,03 medical and health sciences,0302 clinical medicine,030204 cardiovascular system & hematology,03020409 Hematology/Coagulopathies,0.5101401805877686,0.0546871414174914 +10.1016/j.revmed.2006.07.012,doi_________::005b1d0fb650b680abaf6cfe26a21604,03 medical and health sciences,0301 basic medicine,030105 genetics & heredity,N/A,0.4898599088191986,0.0 +10.4109/jslab.17.132,doi_________::00889baa06de363e37930daaf8e800c0,03 medical and health sciences,0301 basic medicine,030104 developmental biology,N/A,0.5,0.0 +10.4109/jslab.17.132,doi_________::00889baa06de363e37930daaf8e800c0,03 medical and health sciences,0303 health sciences,030304 developmental biology,N/A,0.5,0.0 +10.1108/00251740610715687,doi_________::0092cb1b1920d556719385a26363ecaa,05 social sciences,0502 economics and business,050203 business & management,05020311 International business/International trade,0.605047881603241,0.2156608108845153 +10.1108/00251740610715687,doi_________::0092cb1b1920d556719385a26363ecaa,05 social sciences,0502 economics and business,050211 marketing,N/A,0.394952118396759,0.0 +10.1080/03067310500248098,doi_________::00a76678d230e3f20b6356804448028f,04 agricultural and veterinary sciences,0404 agricultural biotechnology,040401 food science,04040102 Food science/Food industry,0.5,0.5 +10.1080/03067310500248098,doi_________::00a76678d230e3f20b6356804448028f,04 agricultural and veterinary sciences,0405 other agricultural sciences,040502 food science,04050202 Food science/Food industry,0.5,0.5 +10.3152/147154306781778533,doi_________::00acc520f3939e5a6675343881fed4f2,05 social sciences,0502 economics and business,050203 business & management,05020307 Innovation/Product management,0.5293408632278442,0.5326762795448303 +10.3152/147154306781778533,doi_________::00acc520f3939e5a6675343881fed4f2,05 social sciences,0509 other social sciences,050905 science studies,05090502 Social philosophy/Capitalism,0.4706590473651886,0.4673237204551697 +10.1785/0120050806,doi_________::00d5831d329e7ae4523d78bfc3042e98,02 engineering and technology,0211 other engineering and technologies,021101 geological & geomatics engineering,02110103 Concrete/Building materials,0.5343400835990906,0.3285667930180677 \ No newline at end of file diff --git a/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/actionmanager/createunresolvedentities/fos/fos_sbs_2.json b/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/actionmanager/createunresolvedentities/fos/fos_sbs_2.json new file mode 100644 index 000000000..00ffad70c --- /dev/null +++ b/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/actionmanager/createunresolvedentities/fos/fos_sbs_2.json @@ -0,0 +1,25 @@ +{"doi":"10.1016/j.anucene.2006.02.004","level1":"02 engineering and technology","level2":"0202 electrical engineering, electronic engineering, information engineering","level3":"020209 energy","level4":"02020908 Climate change policy/Ethanol fuel","scoreL3":"0.5","scoreL4":"0.5"} +{"doi":"10.1016/j.anucene.2006.02.004","level1":"02 engineering and technology","level2":"0211 other engineering and technologies","level3":"021108 energy","level4":"02110808 Climate change policy/Ethanol fuel","scoreL3":"0.5","scoreL4":"0.5"} +{"doi":"10.1016/j.revmed.2006.07.010","level1":"03 medical and health sciences","level2":"0302 clinical medicine","level3":"030220 oncology & carcinogenesis","level4":"N/A","scoreL3":"0.5036656856536865","scoreL4":"0.0"} +{"doi":"10.1016/j.revmed.2006.07.010","level1":"03 medical and health sciences","level2":"0302 clinical medicine","level3":"030212 general & internal medicine","level4":"N/A","scoreL3":"0.4963343143463135","scoreL4":"0.0"} +{"doi":"10.20965/jrm.2006.p0312","level1":"02 engineering and technology","level2":"0209 industrial biotechnology","level3":"020901 industrial engineering & automation","level4":"02090104 Robotics/Robots","scoreL3":"0.6111094951629639","scoreL4":"0.5053805979936855"} +{"doi":"10.20965/jrm.2006.p0312","level1":"01 natural sciences","level2":"0104 chemical sciences","level3":"010401 analytical chemistry","level4":"N/A","scoreL3":"0.3888905048370361","scoreL4":"0.0"} +{"doi":"10.1111/j.1747-7379.2006.040_1.x","level1":"05 social sciences","level2":"0506 political science","level3":"050602 political science & public administration","level4":"05060202 Ethnic groups/Ethnicity","scoreL3":"0.6159052848815918","scoreL4":"0.7369035568037298"} +{"doi":"10.1111/j.1747-7379.2006.040_1.x","level1":"05 social sciences","level2":"0502 economics and business","level3":"050207 economics","level4":"N/A","scoreL3":"0.3840946555137634","scoreL4":"0.0"} +{"doi":"10.1007/s10512-006-0049-9","level1":"02 engineering and technology","level2":"0202 electrical engineering, electronic engineering, information engineering","level3":"020209 energy","level4":"02020908 Climate change policy/Ethanol fuel","scoreL3":"0.5","scoreL4":"0.5"} +{"doi":"10.1007/s10512-006-0049-9","level1":"02 engineering and technology","level2":"0211 other engineering and technologies","level3":"021108 energy","level4":"02110808 Climate change policy/Ethanol fuel","scoreL3":"0.5","scoreL4":"0.5"} +{"doi":"10.1111/j.1365-2621.2005.01045.x","level1":"04 agricultural and veterinary sciences","level2":"0404 agricultural biotechnology","level3":"040401 food science","level4":"04040102 Food science/Food industry","scoreL3":"0.5","scoreL4":"0.5"} +{"doi":"10.1111/j.1365-2621.2005.01045.x","level1":"04 agricultural and veterinary sciences","level2":"0405 other agricultural sciences","level3":"040502 food science","level4":"04050202 Food science/Food industry","scoreL3":"0.5","scoreL4":"0.5"} +{"doi":"10.1002/chin.200617262","level1":"01 natural sciences","level2":"0104 chemical sciences","level3":"010405 organic chemistry","level4":"01040508 Functional groups/Ethers","scoreL3":"0.5566747188568115","scoreL4":"0.5582916736602783"} +{"doi":"10.1002/chin.200617262","level1":"01 natural sciences","level2":"0104 chemical sciences","level3":"010402 general chemistry","level4":"01040207 Chemical synthesis/Total synthesis","scoreL3":"0.4433253407478332","scoreL4":"0.4417082965373993"} +{"doi":"10.1016/j.revmed.2006.07.012","level1":"03 medical and health sciences","level2":"0302 clinical medicine","level3":"030204 cardiovascular system & hematology","level4":"03020409 Hematology/Coagulopathies","scoreL3":"0.5101401805877686","scoreL4":"0.0546871414174914"} +{"doi":"10.1016/j.revmed.2006.07.012","level1":"03 medical and health sciences","level2":"0301 basic medicine","level3":"030105 genetics & heredity","level4":"N/A","scoreL3":"0.4898599088191986","scoreL4":"0.0"} +{"doi":"10.4109/jslab.17.132","level1":"03 medical and health sciences","level2":"0301 basic medicine","level3":"030104 developmental biology","level4":"N/A","scoreL3":"0.5","scoreL4":"0.0"} +{"doi":"10.4109/jslab.17.132","level1":"03 medical and health sciences","level2":"0303 health sciences","level3":"030304 developmental biology","level4":"N/A","scoreL3":"0.5","scoreL4":"0.0"} +{"doi":"10.1108/00251740610715687","level1":"05 social sciences","level2":"0502 economics and business","level3":"050203 business & management","level4":"05020311 International business/International trade","scoreL3":"0.605047881603241","scoreL4":"0.2156608108845153"} +{"doi":"10.1108/00251740610715687","level1":"05 social sciences","level2":"0502 economics and business","level3":"050211 marketing","level4":"N/A","scoreL3":"0.394952118396759","scoreL4":"0.0"} +{"doi":"10.1080/03067310500248098","level1":"04 agricultural and veterinary sciences","level2":"0404 agricultural biotechnology","level3":"040401 food science","level4":"04040102 Food science/Food industry","scoreL3":"0.5","scoreL4":"0.5"} +{"doi":"10.1080/03067310500248098","level1":"04 agricultural and veterinary sciences","level2":"0405 other agricultural sciences","level3":"040502 food science","level4":"04050202 Food science/Food industry","scoreL3":"0.5","scoreL4":"0.5"} +{"doi":"10.3152/147154306781778533","level1":"05 social sciences","level2":"0502 economics and business","level3":"050203 business & management","level4":"05020307 Innovation/Product management","scoreL3":"0.5293408632278442","scoreL4":"0.5326762795448303"} +{"doi":"10.3152/147154306781778533","level1":"05 social sciences","level2":"0509 other social sciences","level3":"050905 science studies","level4":"05090502 Social philosophy/Capitalism","scoreL3":"0.4706590473651886","scoreL4":"0.4673237204551697"} +{"doi":"10.1785/0120050806","level1":"02 engineering and technology","level2":"0211 other engineering and technologies","level3":"021101 geological & geomatics engineering","level4":"02110103 Concrete/Building materials","scoreL3":"0.5343400835990906","scoreL4":"0.3285667930180677"} diff --git a/dhp-workflows/dhp-doiboost/src/main/scala/eu/dnetlib/doiboost/crossref/Crossref2Oaf.scala b/dhp-workflows/dhp-doiboost/src/main/scala/eu/dnetlib/doiboost/crossref/Crossref2Oaf.scala index e0fdb9ce4..565d34e62 100644 --- a/dhp-workflows/dhp-doiboost/src/main/scala/eu/dnetlib/doiboost/crossref/Crossref2Oaf.scala +++ b/dhp-workflows/dhp-doiboost/src/main/scala/eu/dnetlib/doiboost/crossref/Crossref2Oaf.scala @@ -31,9 +31,7 @@ case class mappingAuthor( affiliation: Option[mappingAffiliation] ) {} -case class funderInfo(id:String,uri:String, name:String,synonym:List[String] ) {} - - +case class funderInfo(id: String, uri: String, name: String, synonym: List[String]) {} case class mappingFunder(name: String, DOI: Option[String], award: Option[List[String]]) {} @@ -41,7 +39,9 @@ case object Crossref2Oaf { val logger: Logger = LoggerFactory.getLogger(Crossref2Oaf.getClass) val irishFunder: List[funderInfo] = { - val s = Source.fromInputStream(getClass.getResourceAsStream("/eu/dnetlib/dhp/doiboost/crossref/irish_funder.json")).mkString + val s = Source + .fromInputStream(getClass.getResourceAsStream("/eu/dnetlib/dhp/doiboost/crossref/irish_funder.json")) + .mkString implicit lazy val formats: DefaultFormats.type = org.json4s.DefaultFormats lazy val json: org.json4s.JValue = parse(s) json.extract[List[funderInfo]] @@ -100,9 +100,11 @@ case object Crossref2Oaf { "report" -> "0017 Report" ) - def getIrishId(doi:String):Option[String] = { - val id =doi.split("/").last - irishFunder.find(f => id.equalsIgnoreCase(f.id) || (f.synonym.nonEmpty && f.synonym.exists(s => s.equalsIgnoreCase(id)))).map(f => f.id) + def getIrishId(doi: String): Option[String] = { + val id = doi.split("/").last + irishFunder + .find(f => id.equalsIgnoreCase(f.id) || (f.synonym.nonEmpty && f.synonym.exists(s => s.equalsIgnoreCase(id)))) + .map(f => f.id) } def mappingResult(result: Result, json: JValue, cobjCategory: String): Result = { diff --git a/dhp-workflows/dhp-doiboost/src/test/scala/eu/dnetlib/dhp/doiboost/crossref/CrossrefMappingTest.scala b/dhp-workflows/dhp-doiboost/src/test/scala/eu/dnetlib/dhp/doiboost/crossref/CrossrefMappingTest.scala index 7961376c5..fbf6f72c0 100644 --- a/dhp-workflows/dhp-doiboost/src/test/scala/eu/dnetlib/dhp/doiboost/crossref/CrossrefMappingTest.scala +++ b/dhp-workflows/dhp-doiboost/src/test/scala/eu/dnetlib/dhp/doiboost/crossref/CrossrefMappingTest.scala @@ -50,7 +50,7 @@ class CrossrefMappingTest { } } - def checkRelation(generatedOAF: List[Oaf]): Unit = { + def checkRelation(generatedOAF: List[Oaf]): Unit = { val rels: List[Relation] = generatedOAF.filter(p => p.isInstanceOf[Relation]).asInstanceOf[List[Relation]] From ed9282ef2a3e40a76308fbff9226a3bfa6a90df6 Mon Sep 17 00:00:00 2001 From: Claudio Atzori Date: Tue, 10 Oct 2023 09:52:03 +0200 Subject: [PATCH 46/57] removed module dhp-stats-monitor-update --- .../oozie_app/config-default.xml | 30 ---- .../oozie_app/copyDataToImpalaCluster.sh | 75 --------- .../oozie_app/finalizeImpalaCluster.sh | 29 ---- .../graph/stats-monitor/oozie_app/monitor.sh | 54 ------- .../oozie_app/scripts/updateMonitorDB.sql | 138 ---------------- .../oozie_app/scripts/updateMonitorDBAll.sql | 150 ------------------ .../scripts/updateMonitorDB_institutions.sql | 12 -- .../stats-monitor/oozie_app/workflow.xml | 110 ------------- 8 files changed, 598 deletions(-) delete mode 100644 dhp-workflows/dhp-stats-monitor-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats-monitor/oozie_app/config-default.xml delete mode 100644 dhp-workflows/dhp-stats-monitor-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats-monitor/oozie_app/copyDataToImpalaCluster.sh delete mode 100644 dhp-workflows/dhp-stats-monitor-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats-monitor/oozie_app/finalizeImpalaCluster.sh delete mode 100644 dhp-workflows/dhp-stats-monitor-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats-monitor/oozie_app/monitor.sh delete mode 100644 dhp-workflows/dhp-stats-monitor-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats-monitor/oozie_app/scripts/updateMonitorDB.sql delete mode 100644 dhp-workflows/dhp-stats-monitor-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats-monitor/oozie_app/scripts/updateMonitorDBAll.sql delete mode 100644 dhp-workflows/dhp-stats-monitor-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats-monitor/oozie_app/scripts/updateMonitorDB_institutions.sql delete mode 100644 dhp-workflows/dhp-stats-monitor-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats-monitor/oozie_app/workflow.xml diff --git a/dhp-workflows/dhp-stats-monitor-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats-monitor/oozie_app/config-default.xml b/dhp-workflows/dhp-stats-monitor-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats-monitor/oozie_app/config-default.xml deleted file mode 100644 index b2a1322e6..000000000 --- a/dhp-workflows/dhp-stats-monitor-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats-monitor/oozie_app/config-default.xml +++ /dev/null @@ -1,30 +0,0 @@ - - - jobTracker - ${jobTracker} - - - nameNode - ${nameNode} - - - oozie.use.system.libpath - true - - - oozie.action.sharelib.for.spark - spark2 - - - hive_metastore_uris - thrift://iis-cdh5-test-m3.ocean.icm.edu.pl:9083 - - - hive_jdbc_url - jdbc:hive2://iis-cdh5-test-m3.ocean.icm.edu.pl:10000/;UseNativeQuery=1;?spark.executor.memory=22166291558;spark.yarn.executor.memoryOverhead=3225;spark.driver.memory=15596411699;spark.yarn.driver.memoryOverhead=1228 - - - oozie.wf.workflow.notification.url - {serviceUrl}/v1/oozieNotification/jobUpdate?jobId=$jobId%26status=$status - - \ No newline at end of file diff --git a/dhp-workflows/dhp-stats-monitor-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats-monitor/oozie_app/copyDataToImpalaCluster.sh b/dhp-workflows/dhp-stats-monitor-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats-monitor/oozie_app/copyDataToImpalaCluster.sh deleted file mode 100644 index 1587f7152..000000000 --- a/dhp-workflows/dhp-stats-monitor-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats-monitor/oozie_app/copyDataToImpalaCluster.sh +++ /dev/null @@ -1,75 +0,0 @@ -export PYTHON_EGG_CACHE=/home/$(whoami)/.python-eggs -export link_folder=/tmp/impala-shell-python-egg-cache-$(whoami) -if ! [ -L $link_folder ] -then - rm -Rf "$link_folder" - ln -sfn ${PYTHON_EGG_CACHE}${link_folder} ${link_folder} -fi - -#export HADOOP_USER_NAME=$2 - -function copydb() { - - export HADOOP_USER="dimitris.pierrakos" - export HADOOP_USER_NAME='dimitris.pierrakos' - - db=$1 - FILE=("hive_wf_tmp_"$RANDOM) - hdfs dfs -mkdir hdfs://impala-cluster-mn1.openaire.eu:8020/tmp/$FILE/ - - # change ownership to impala -# hdfs dfs -conf /etc/impala_cluster/hdfs-site.xml -chmod -R 777 /tmp/$FILE/${db}.db - hdfs dfs -conf /etc/impala_cluster/hdfs-site.xml -chmod -R 777 /tmp/$FILE/ - - - # copy the databases from ocean to impala - echo "copying $db" - hadoop distcp -Dmapreduce.map.memory.mb=6144 -pb hdfs://nameservice1/user/hive/warehouse/${db}.db hdfs://impala-cluster-mn1.openaire.eu:8020/tmp/$FILE/ - - hdfs dfs -conf /etc/impala_cluster/hdfs-site.xml -chmod -R 777 /tmp/$FILE/${db}.db - - # drop tables from db - for i in `impala-shell -i impala-cluster-dn1.openaire.eu -d ${db} --delimited -q "show tables"`; - do - `impala-shell -i impala-cluster-dn1.openaire.eu -d ${db} -q "drop table $i;"`; - done - - # drop views from db - for i in `impala-shell -i impala-cluster-dn1.openaire.eu -d ${db} --delimited -q "show tables"`; - do - `impala-shell -i impala-cluster-dn1.openaire.eu -d ${db} -q "drop view $i;"`; - done - - # delete the database - impala-shell -i impala-cluster-dn1.openaire.eu -q "drop database if exists ${db} cascade"; - - # create the databases - impala-shell -i impala-cluster-dn1.openaire.eu -q "create database ${db}"; - - impala-shell -q "INVALIDATE METADATA" - echo "creating schema for ${db}" - for (( k = 0; k < 5; k ++ )); do - for i in `impala-shell -d ${db} --delimited -q "show tables"`; - do - impala-shell -d ${db} --delimited -q "show create table $i"; - done | sed 's/"$/;/' | sed 's/^"//' | sed 's/[[:space:]]\date[[:space:]]/`date`/g' | impala-shell --user $HADOOP_USER_NAME -i impala-cluster-dn1.openaire.eu -c -f - - done - - # load the data from /tmp in the respective tables - echo "copying data in tables and computing stats" - for i in `impala-shell -i impala-cluster-dn1.openaire.eu -d ${db} --delimited -q "show tables"`; - do - impala-shell -i impala-cluster-dn1.openaire.eu -d ${db} -q "load data inpath '/tmp/$FILE/${db}.db/$i' into table $i"; - impala-shell -i impala-cluster-dn1.openaire.eu -d ${db} -q "compute stats $i"; - done - - # deleting the remaining directory from hdfs -hdfs dfs -conf /etc/impala_cluster/hdfs-site.xml -rm -R /tmp/$FILE/${db}.db -} - -MONITOR_DB=$1 -#HADOOP_USER_NAME=$2 - -copydb $MONITOR_DB'_institutions' -copydb $MONITOR_DB - diff --git a/dhp-workflows/dhp-stats-monitor-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats-monitor/oozie_app/finalizeImpalaCluster.sh b/dhp-workflows/dhp-stats-monitor-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats-monitor/oozie_app/finalizeImpalaCluster.sh deleted file mode 100644 index a7227e0c8..000000000 --- a/dhp-workflows/dhp-stats-monitor-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats-monitor/oozie_app/finalizeImpalaCluster.sh +++ /dev/null @@ -1,29 +0,0 @@ -export PYTHON_EGG_CACHE=/home/$(whoami)/.python-eggs -export link_folder=/tmp/impala-shell-python-egg-cache-$(whoami) -if ! [ -L $link_folder ] -then - rm -Rf "$link_folder" - ln -sfn ${PYTHON_EGG_CACHE}${link_folder} ${link_folder} -fi - -function createShadowDB() { - SOURCE=$1 - SHADOW=$2 - - # drop views from db - for i in `impala-shell -i impala-cluster-dn1.openaire.eu -d ${SHADOW} --delimited -q "show tables"`; - do - `impala-shell -i impala-cluster-dn1.openaire.eu -d ${SHADOW} -q "drop view $i;"`; - done - - impala-shell -i impala-cluster-dn1.openaire.eu -q "drop database ${SHADOW} CASCADE"; - impala-shell -i impala-cluster-dn1.openaire.eu -q "create database if not exists ${SHADOW}"; -# impala-shell -i impala-cluster-dn1.openaire.eu -d ${SHADOW} -q "show tables" | sed "s/^/drop view if exists ${SHADOW}./" | sed "s/$/;/" | impala-shell -i impala-cluster-dn1.openaire.eu -f - - impala-shell -i impala-cluster-dn1.openaire.eu -d ${SOURCE} -q "show tables" --delimited | sed "s/\(.*\)/create view ${SHADOW}.\1 as select * from ${SOURCE}.\1;/" | impala-shell -i impala-cluster-dn1.openaire.eu -f - -} - -MONITOR_DB=$1 -MONITOR_DB_SHADOW=$2 - -createShadowDB $MONITOR_DB'_institutions' $MONITOR_DB'_institutions_shadow' -createShadowDB $MONITOR_DB $MONITOR_DB'_shadow' diff --git a/dhp-workflows/dhp-stats-monitor-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats-monitor/oozie_app/monitor.sh b/dhp-workflows/dhp-stats-monitor-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats-monitor/oozie_app/monitor.sh deleted file mode 100644 index 4f1889c9e..000000000 --- a/dhp-workflows/dhp-stats-monitor-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats-monitor/oozie_app/monitor.sh +++ /dev/null @@ -1,54 +0,0 @@ -export PYTHON_EGG_CACHE=/home/$(whoami)/.python-eggs -export link_folder=/tmp/impala-shell-python-egg-cache-$(whoami) -if ! [ -L $link_folder ] -then - rm -Rf "$link_folder" - ln -sfn ${PYTHON_EGG_CACHE}${link_folder} ${link_folder} -fi - -export SOURCE=$1 -export TARGET=$2 -export SHADOW=$3 -export SCRIPT_PATH=$4 -export SCRIPT_PATH2=$5 -export SCRIPT_PATH2=$6 - -export HIVE_OPTS="-hiveconf mapred.job.queue.name=analytics -hiveconf hive.spark.client.connect.timeout=120000ms -hiveconf hive.spark.client.server.connect.timeout=300000ms -hiveconf spark.executor.memory=19166291558 -hiveconf spark.yarn.executor.memoryOverhead=3225 -hiveconf spark.driver.memory=11596411699 -hiveconf spark.yarn.driver.memoryOverhead=1228" -export HADOOP_USER_NAME="oozie" - -echo "Getting file from " $4 -hdfs dfs -copyToLocal $4 - -echo "Getting file from " $5 -hdfs dfs -copyToLocal $5 - -echo "Getting file from " $6 -hdfs dfs -copyToLocal $6 - -#update Institutions DB -cat updateMonitorDB_institutions.sql | sed "s/SOURCE/$1/g" | sed "s/TARGET/$2_institutions/g1" > foo -hive $HIVE_OPTS -f foo -cat updateMonitorDB.sql | sed "s/SOURCE/$1/g" | sed "s/TARGET/$2_institutions/g1" > foo -hive $HIVE_OPTS -f foo - -echo "Hive shell finished" - -echo "Updating shadow monitor insitutions database" -hive -e "drop database if exists ${SHADOW}_institutions cascade" -hive -e "create database if not exists ${SHADOW}_institutions" -hive $HIVE_OPTS --database ${2}_institutions -e "show tables" | grep -v WARN | sed "s/\(.*\)/create view ${SHADOW}_institutions.\1 as select * from ${2}_institutions.\1;/" > foo -hive -f foo -echo "Shadow db monitor insitutions ready!" - -#update Monitor DB -cat updateMonitorDBAll.sql | sed "s/SOURCE/$1/g" | sed "s/TARGET/$2/g1" > foo -hive $HIVE_OPTS -f foo - -echo "Hive shell finished" - -echo "Updating shadow monitor database" -hive -e "drop database if exists ${SHADOW} cascade" -hive -e "create database if not exists ${SHADOW}" -hive $HIVE_OPTS --database ${2} -e "show tables" | grep -v WARN | sed "s/\(.*\)/create view ${SHADOW}.\1 as select * from ${2}.\1;/" > foo -hive -f foo -echo "Shadow db monitor insitutions ready!" diff --git a/dhp-workflows/dhp-stats-monitor-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats-monitor/oozie_app/scripts/updateMonitorDB.sql b/dhp-workflows/dhp-stats-monitor-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats-monitor/oozie_app/scripts/updateMonitorDB.sql deleted file mode 100644 index 248b7e564..000000000 --- a/dhp-workflows/dhp-stats-monitor-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats-monitor/oozie_app/scripts/updateMonitorDB.sql +++ /dev/null @@ -1,138 +0,0 @@ -INSERT INTO TARGET.result select * from TARGET.result_new; -ANALYZE TABLE TARGET.result COMPUTE STATISTICS; - -INSERT INTO TARGET.result_citations select * from SOURCE.result_citations orig where exists (select 1 from TARGET.result_new r where r.id=orig.id); -ANALYZE TABLE TARGET.result_citations COMPUTE STATISTICS; - -INSERT INTO TARGET.result_references_oc select * from SOURCE.result_references_oc orig where exists (select 1 from TARGET.result_new r where r.id=orig.id); -ANALYZE TABLE TARGET.result_references_oc COMPUTE STATISTICS; - -INSERT INTO TARGET.result_classifications select * from SOURCE.result_classifications orig where exists (select 1 from TARGET.result_new r where r.id=orig.id); -ANALYZE TABLE TARGET.result_classifications COMPUTE STATISTICS; - -INSERT INTO TARGET.result_apc select * from SOURCE.result_apc orig where exists (select 1 from TARGET.result_new r where r.id=orig.id); -ANALYZE TABLE TARGET.result_apc COMPUTE STATISTICS; - -INSERT INTO TARGET.result_concepts select * from SOURCE.result_concepts orig where exists (select 1 from TARGET.result_new r where r.id=orig.id); -ANALYZE TABLE TARGET.result_concepts COMPUTE STATISTICS; - -INSERT INTO TARGET.result_datasources select * from SOURCE.result_datasources orig where exists (select 1 from TARGET.result_new r where r.id=orig.id); -ANALYZE TABLE TARGET.result_datasources COMPUTE STATISTICS; - -INSERT INTO TARGET.result_fundercount select * from SOURCE.result_fundercount orig where exists (select 1 from TARGET.result_new r where r.id=orig.id); -ANALYZE TABLE TARGET.result_fundercount COMPUTE STATISTICS; - -INSERT INTO TARGET.result_gold select * from SOURCE.result_gold orig where exists (select 1 from TARGET.result_new r where r.id=orig.id); -ANALYZE TABLE TARGET.result_gold COMPUTE STATISTICS; - -INSERT INTO TARGET.result_greenoa select * from SOURCE.result_greenoa orig where exists (select 1 from TARGET.result_new r where r.id=orig.id); -ANALYZE TABLE TARGET.result_greenoa COMPUTE STATISTICS; - -INSERT INTO TARGET.result_languages select * from SOURCE.result_languages orig where exists (select 1 from TARGET.result_new r where r.id=orig.id); -ANALYZE TABLE TARGET.result_languages COMPUTE STATISTICS; - -INSERT INTO TARGET.result_licenses select * from SOURCE.result_licenses orig where exists (select 1 from TARGET.result_new r where r.id=orig.id); -ANALYZE TABLE TARGET.result_licenses COMPUTE STATISTICS; - -INSERT INTO TARGET.result_oids select * from SOURCE.result_oids orig where exists (select 1 from TARGET.result_new r where r.id=orig.id); -ANALYZE TABLE TARGET.result_oids COMPUTE STATISTICS; - -INSERT INTO TARGET.result_organization select * from SOURCE.result_organization orig where exists (select 1 from TARGET.result_new r where r.id=orig.id); -ANALYZE TABLE TARGET.result_organization COMPUTE STATISTICS; - -INSERT INTO TARGET.result_peerreviewed select * from SOURCE.result_peerreviewed orig where exists (select 1 from TARGET.result_new r where r.id=orig.id); -ANALYZE TABLE TARGET.result_peerreviewed COMPUTE STATISTICS; - -INSERT INTO TARGET.result_pids select * from SOURCE.result_pids orig where exists (select 1 from TARGET.result_new r where r.id=orig.id); -ANALYZE TABLE TARGET.result_pids COMPUTE STATISTICS; - -INSERT INTO TARGET.result_projectcount select * from SOURCE.result_projectcount orig where exists (select 1 from TARGET.result_new r where r.id=orig.id); -ANALYZE TABLE TARGET.result_projectcount COMPUTE STATISTICS; - -INSERT INTO TARGET.result_projects select * from SOURCE.result_projects orig where exists (select 1 from TARGET.result_new r where r.id=orig.id); -ANALYZE TABLE TARGET.result_projects COMPUTE STATISTICS; - -INSERT INTO TARGET.result_refereed select * from SOURCE.result_refereed orig where exists (select 1 from TARGET.result_new r where r.id=orig.id); -ANALYZE TABLE TARGET.result_refereed COMPUTE STATISTICS; - -INSERT INTO TARGET.result_sources select * from SOURCE.result_sources orig where exists (select 1 from TARGET.result_new r where r.id=orig.id); -ANALYZE TABLE TARGET.result_sources COMPUTE STATISTICS; - -INSERT INTO TARGET.result_topics select * from SOURCE.result_topics orig where exists (select 1 from TARGET.result_new r where r.id=orig.id); -ANALYZE TABLE TARGET.result_topics COMPUTE STATISTICS; - -INSERT INTO TARGET.result_fos select * from SOURCE.result_fos orig where exists (select 1 from TARGET.result_new r where r.id=orig.id); -ANALYZE TABLE TARGET.result_fos COMPUTE STATISTICS; - -INSERT INTO TARGET.result_accessroute select * from SOURCE.result_accessroute orig where exists (select 1 from TARGET.result_new r where r.id=orig.id); -ANALYZE TABLE TARGET.result_accessroute COMPUTE STATISTICS; - -create or replace view TARGET.foo1 as select * from SOURCE.result_result rr where rr.source in (select id from TARGET.result_new); -create or replace view TARGET.foo2 as select * from SOURCE.result_result rr where rr.target in (select id from TARGET.result_new); -insert into TARGET.result_result select distinct * from (select * from TARGET.foo1 union all select * from TARGET.foo2) foufou; -drop view TARGET.foo1; -drop view TARGET.foo2; -ANALYZE TABLE TARGET.result_result COMPUTE STATISTICS; - - --- indicators --- Sprint 1 ---- -INSERT INTO TARGET.indi_pub_green_oa select * from SOURCE.indi_pub_green_oa orig where exists (select 1 from TARGET.result_new r where r.id=orig.id); -ANALYZE TABLE TARGET.indi_pub_green_oa COMPUTE STATISTICS; -INSERT INTO TARGET.indi_pub_grey_lit select * from SOURCE.indi_pub_grey_lit orig where exists (select 1 from TARGET.result_new r where r.id=orig.id); -ANALYZE TABLE TARGET.indi_pub_grey_lit COMPUTE STATISTICS; -INSERT INTO TARGET.indi_pub_doi_from_crossref select * from SOURCE.indi_pub_doi_from_crossref orig where exists (select 1 from TARGET.result_new r where r.id=orig.id); -ANALYZE TABLE TARGET.indi_pub_doi_from_crossref COMPUTE STATISTICS; --- Sprint 2 ---- -INSERT INTO TARGET.indi_result_has_cc_licence select * from SOURCE.indi_result_has_cc_licence orig where exists (select 1 from TARGET.result_new r where r.id=orig.id); -ANALYZE TABLE TARGET.indi_result_has_cc_licence COMPUTE STATISTICS; -INSERT INTO TARGET.indi_result_has_cc_licence_url select * from SOURCE.indi_result_has_cc_licence_url orig where exists (select 1 from TARGET.result_new r where r.id=orig.id); -ANALYZE TABLE TARGET.indi_result_has_cc_licence_url COMPUTE STATISTICS; -INSERT INTO TARGET.indi_pub_has_abstract select * from SOURCE.indi_pub_has_abstract orig where exists (select 1 from TARGET.result_new r where r.id=orig.id); -ANALYZE TABLE TARGET.indi_pub_has_abstract COMPUTE STATISTICS; -INSERT INTO TARGET.indi_result_with_orcid select * from SOURCE.indi_result_with_orcid orig where exists (select 1 from TARGET.result_new r where r.id=orig.id); -ANALYZE TABLE TARGET.indi_result_with_orcid COMPUTE STATISTICS; ----- Sprint 3 ---- -INSERT INTO TARGET.indi_funded_result_with_fundref select * from SOURCE.indi_funded_result_with_fundref orig where exists (select 1 from TARGET.result_new r where r.id=orig.id); -ANALYZE TABLE TARGET.indi_funded_result_with_fundref COMPUTE STATISTICS; - ----- Sprint 4 ---- -INSERT INTO TARGET.indi_pub_diamond select * from SOURCE.indi_pub_diamond orig where exists (select 1 from TARGET.result_new r where r.id=orig.id); -ANALYZE TABLE TARGET.indi_pub_diamond COMPUTE STATISTICS; -INSERT INTO TARGET.indi_pub_in_transformative select * from SOURCE.indi_pub_in_transformative orig where exists (select 1 from TARGET.result_new r where r.id=orig.id); -ANALYZE TABLE TARGET.indi_pub_in_transformative COMPUTE STATISTICS; -INSERT INTO TARGET.indi_pub_closed_other_open select * from SOURCE.indi_pub_closed_other_open orig where exists (select 1 from TARGET.result_new r where r.id=orig.id); -ANALYZE TABLE TARGET.indi_pub_closed_other_open COMPUTE STATISTICS; ----- Sprint 5 ---- -INSERT INTO TARGET.indi_result_no_of_copies select * from SOURCE.indi_result_no_of_copies orig where exists (select 1 from TARGET.result_new r where r.id=orig.id); -ANALYZE TABLE TARGET.indi_result_no_of_copies COMPUTE STATISTICS; ----- Sprint 6 ---- -INSERT INTO TARGET.indi_pub_hybrid_oa_with_cc select * from SOURCE.indi_pub_hybrid_oa_with_cc orig where exists (select 1 from TARGET.result_new r where r.id=orig.id); -ANALYZE TABLE TARGET.indi_pub_hybrid_oa_with_cc COMPUTE STATISTICS; -INSERT INTO TARGET.indi_pub_bronze_oa select * from SOURCE.indi_pub_bronze_oa orig where exists (select 1 from TARGET.result_new r where r.id=orig.id); -ANALYZE TABLE TARGET.indi_pub_bronze_oa COMPUTE STATISTICS; -INSERT INTO TARGET.indi_pub_downloads select * from SOURCE.indi_pub_downloads orig where exists (select 1 from TARGET.result_new r where r.id=orig.result_id); -ANALYZE TABLE TARGET.indi_pub_downloads COMPUTE STATISTICS; -INSERT INTO TARGET.indi_pub_downloads_datasource select * from SOURCE.indi_pub_downloads_datasource orig where exists (select 1 from TARGET.result_new r where r.id=orig.result_id); -ANALYZE TABLE TARGET.indi_pub_downloads_datasource COMPUTE STATISTICS; -INSERT INTO TARGET.indi_pub_downloads_year select * from SOURCE.indi_pub_downloads_year orig where exists (select 1 from TARGET.result_new r where r.id=orig.result_id); -ANALYZE TABLE TARGET.indi_pub_downloads_year COMPUTE STATISTICS; -INSERT INTO TARGET.indi_pub_downloads_datasource_year select * from SOURCE.indi_pub_downloads_datasource_year orig where exists (select 1 from TARGET.result_new r where r.id=orig.result_id); -ANALYZE TABLE TARGET.indi_pub_downloads_datasource_year COMPUTE STATISTICS; ----- Sprint 7 ---- -INSERT INTO TARGET.indi_pub_gold_oa select * from SOURCE.indi_pub_gold_oa orig where exists (select 1 from TARGET.result_new r where r.id=orig.id); -ANALYZE TABLE TARGET.indi_pub_gold_oa COMPUTE STATISTICS; -INSERT INTO TARGET.indi_pub_hybrid select * from SOURCE.indi_pub_hybrid orig where exists (select 1 from TARGET.result_new r where r.id=orig.id); -ANALYZE TABLE TARGET.indi_pub_hybrid COMPUTE STATISTICS; -INSERT INTO TARGET.indi_pub_has_preprint select * from SOURCE.indi_pub_has_preprint orig where exists (select 1 from TARGET.result_new r where r.id=orig.id); -ANALYZE TABLE TARGET.indi_pub_has_preprint COMPUTE STATISTICS; -INSERT INTO TARGET.indi_pub_in_subscribed select * from SOURCE.indi_pub_in_subscribed orig where exists (select 1 from TARGET.result_new r where r.id=orig.id); -ANALYZE TABLE TARGET.indi_pub_in_subscribed COMPUTE STATISTICS; -INSERT INTO TARGET.indi_result_with_pid select * from SOURCE.indi_result_with_pid orig where exists (select 1 from TARGET.result_new r where r.id=orig.id); -ANALYZE TABLE TARGET.indi_result_with_pid COMPUTE STATISTICS; -INSERT INTO TARGET.indi_impact_measures select * from SOURCE.indi_impact_measures orig where exists (select 1 from TARGET.result_new r where r.id=orig.id); -ANALYZE TABLE TARGET.indi_impact_measures COMPUTE STATISTICS; -INSERT INTO TARGET.indi_pub_interdisciplinarity select * from SOURCE.indi_pub_interdisciplinarity orig where exists (select 1 from TARGET.result_new r where r.id=orig.id); -ANALYZE TABLE TARGET.indi_pub_interdisciplinarity COMPUTE STATISTICS; - -DROP TABLE IF EXISTS TARGET.result_new; \ No newline at end of file diff --git a/dhp-workflows/dhp-stats-monitor-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats-monitor/oozie_app/scripts/updateMonitorDBAll.sql b/dhp-workflows/dhp-stats-monitor-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats-monitor/oozie_app/scripts/updateMonitorDBAll.sql deleted file mode 100644 index 478e3824e..000000000 --- a/dhp-workflows/dhp-stats-monitor-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats-monitor/oozie_app/scripts/updateMonitorDBAll.sql +++ /dev/null @@ -1,150 +0,0 @@ -DROP TABLE IF EXISTS TARGET.result_new; - -create table TARGET.result_new as - select distinct * from ( - select * from SOURCE.result r where exists (select 1 from SOURCE.result_organization ro where ro.id=r.id and ro.organization in ( - 'openorgs____::4d4051b56708688235252f1d8fddb8c1', --Iscte - Instituto Universitário de Lisboa - 'openorgs____::ab4ac74c35fa5dada770cf08e5110fab' -- Universidade Católica Portuguesa - ) )) foo; - -INSERT INTO TARGET.result select * from TARGET.result_new; -ANALYZE TABLE TARGET.result_new COMPUTE STATISTICS; - -INSERT INTO TARGET.result select * from TARGET.result_new; -ANALYZE TABLE TARGET.result COMPUTE STATISTICS; - -INSERT INTO TARGET.result_citations select * from SOURCE.result_citations orig where exists (select 1 from TARGET.result_new r where r.id=orig.id); -ANALYZE TABLE TARGET.result_citations COMPUTE STATISTICS; - -INSERT INTO TARGET.result_references_oc select * from SOURCE.result_references_oc orig where exists (select 1 from TARGET.result_new r where r.id=orig.id); -ANALYZE TABLE TARGET.result_references_oc COMPUTE STATISTICS; - -INSERT INTO TARGET.result_classifications select * from SOURCE.result_classifications orig where exists (select 1 from TARGET.result_new r where r.id=orig.id); -ANALYZE TABLE TARGET.result_classifications COMPUTE STATISTICS; - -INSERT INTO TARGET.result_apc select * from SOURCE.result_apc orig where exists (select 1 from TARGET.result_new r where r.id=orig.id); -ANALYZE TABLE TARGET.result_apc COMPUTE STATISTICS; - -INSERT INTO TARGET.result_concepts select * from SOURCE.result_concepts orig where exists (select 1 from TARGET.result_new r where r.id=orig.id); -ANALYZE TABLE TARGET.result_concepts COMPUTE STATISTICS; - -INSERT INTO TARGET.result_datasources select * from SOURCE.result_datasources orig where exists (select 1 from TARGET.result_new r where r.id=orig.id); -ANALYZE TABLE TARGET.result_datasources COMPUTE STATISTICS; - -INSERT INTO TARGET.result_fundercount select * from SOURCE.result_fundercount orig where exists (select 1 from TARGET.result_new r where r.id=orig.id); -ANALYZE TABLE TARGET.result_fundercount COMPUTE STATISTICS; - -INSERT INTO TARGET.result_gold select * from SOURCE.result_gold orig where exists (select 1 from TARGET.result_new r where r.id=orig.id); -ANALYZE TABLE TARGET.result_gold COMPUTE STATISTICS; - -INSERT INTO TARGET.result_greenoa select * from SOURCE.result_greenoa orig where exists (select 1 from TARGET.result_new r where r.id=orig.id); -ANALYZE TABLE TARGET.result_greenoa COMPUTE STATISTICS; - -INSERT INTO TARGET.result_languages select * from SOURCE.result_languages orig where exists (select 1 from TARGET.result_new r where r.id=orig.id); -ANALYZE TABLE TARGET.result_languages COMPUTE STATISTICS; - -INSERT INTO TARGET.result_licenses select * from SOURCE.result_licenses orig where exists (select 1 from TARGET.result_new r where r.id=orig.id); -ANALYZE TABLE TARGET.result_licenses COMPUTE STATISTICS; - -INSERT INTO TARGET.result_oids select * from SOURCE.result_oids orig where exists (select 1 from TARGET.result_new r where r.id=orig.id); -ANALYZE TABLE TARGET.result_oids COMPUTE STATISTICS; - -INSERT INTO TARGET.result_organization select * from SOURCE.result_organization orig where exists (select 1 from TARGET.result_new r where r.id=orig.id); -ANALYZE TABLE TARGET.result_organization COMPUTE STATISTICS; - -INSERT INTO TARGET.result_peerreviewed select * from SOURCE.result_peerreviewed orig where exists (select 1 from TARGET.result_new r where r.id=orig.id); -ANALYZE TABLE TARGET.result_peerreviewed COMPUTE STATISTICS; - -INSERT INTO TARGET.result_pids select * from SOURCE.result_pids orig where exists (select 1 from TARGET.result_new r where r.id=orig.id); -ANALYZE TABLE TARGET.result_pids COMPUTE STATISTICS; - -INSERT INTO TARGET.result_projectcount select * from SOURCE.result_projectcount orig where exists (select 1 from TARGET.result_new r where r.id=orig.id); -ANALYZE TABLE TARGET.result_projectcount COMPUTE STATISTICS; - -INSERT INTO TARGET.result_projects select * from SOURCE.result_projects orig where exists (select 1 from TARGET.result_new r where r.id=orig.id); -ANALYZE TABLE TARGET.result_projects COMPUTE STATISTICS; - -INSERT INTO TARGET.result_refereed select * from SOURCE.result_refereed orig where exists (select 1 from TARGET.result_new r where r.id=orig.id); -ANALYZE TABLE TARGET.result_refereed COMPUTE STATISTICS; - -INSERT INTO TARGET.result_sources select * from SOURCE.result_sources orig where exists (select 1 from TARGET.result_new r where r.id=orig.id); -ANALYZE TABLE TARGET.result_sources COMPUTE STATISTICS; - -INSERT INTO TARGET.result_topics select * from SOURCE.result_topics orig where exists (select 1 from TARGET.result_new r where r.id=orig.id); -ANALYZE TABLE TARGET.result_topics COMPUTE STATISTICS; - -INSERT INTO TARGET.result_fos select * from SOURCE.result_fos orig where exists (select 1 from TARGET.result_new r where r.id=orig.id); -ANALYZE TABLE TARGET.result_fos COMPUTE STATISTICS; - -INSERT INTO TARGET.result_accessroute select * from SOURCE.result_accessroute orig where exists (select 1 from TARGET.result_new r where r.id=orig.id); -ANALYZE TABLE TARGET.result_accessroute COMPUTE STATISTICS; - -create or replace view TARGET.foo1 as select * from SOURCE.result_result rr where rr.source in (select id from TARGET.result_new); -create or replace view TARGET.foo2 as select * from SOURCE.result_result rr where rr.target in (select id from TARGET.result_new); -insert into TARGET.result_result select distinct * from (select * from TARGET.foo1 union all select * from TARGET.foo2) foufou; -drop view TARGET.foo1; -drop view TARGET.foo2; -ANALYZE TABLE TARGET.result_result COMPUTE STATISTICS; - - --- indicators --- Sprint 1 ---- -INSERT INTO TARGET.indi_pub_green_oa select * from SOURCE.indi_pub_green_oa orig where exists (select 1 from TARGET.result_new r where r.id=orig.id); -ANALYZE TABLE TARGET.indi_pub_green_oa COMPUTE STATISTICS; -INSERT INTO TARGET.indi_pub_grey_lit select * from SOURCE.indi_pub_grey_lit orig where exists (select 1 from TARGET.result_new r where r.id=orig.id); -ANALYZE TABLE TARGET.indi_pub_grey_lit COMPUTE STATISTICS; -INSERT INTO TARGET.indi_pub_doi_from_crossref select * from SOURCE.indi_pub_doi_from_crossref orig where exists (select 1 from TARGET.result_new r where r.id=orig.id); -ANALYZE TABLE TARGET.indi_pub_doi_from_crossref COMPUTE STATISTICS; --- Sprint 2 ---- -INSERT INTO TARGET.indi_result_has_cc_licence select * from SOURCE.indi_result_has_cc_licence orig where exists (select 1 from TARGET.result_new r where r.id=orig.id); -ANALYZE TABLE TARGET.indi_result_has_cc_licence COMPUTE STATISTICS; -INSERT INTO TARGET.indi_result_has_cc_licence_url select * from SOURCE.indi_result_has_cc_licence_url orig where exists (select 1 from TARGET.result_new r where r.id=orig.id); -ANALYZE TABLE TARGET.indi_result_has_cc_licence_url COMPUTE STATISTICS; -INSERT INTO TARGET.indi_pub_has_abstract select * from SOURCE.indi_pub_has_abstract orig where exists (select 1 from TARGET.result_new r where r.id=orig.id); -ANALYZE TABLE TARGET.indi_pub_has_abstract COMPUTE STATISTICS; -INSERT INTO TARGET.indi_result_with_orcid select * from SOURCE.indi_result_with_orcid orig where exists (select 1 from TARGET.result_new r where r.id=orig.id); -ANALYZE TABLE TARGET.indi_result_with_orcid COMPUTE STATISTICS; ----- Sprint 3 ---- -INSERT INTO TARGET.indi_funded_result_with_fundref select * from SOURCE.indi_funded_result_with_fundref orig where exists (select 1 from TARGET.result_new r where r.id=orig.id); -ANALYZE TABLE TARGET.indi_funded_result_with_fundref COMPUTE STATISTICS; - ----- Sprint 4 ---- -INSERT INTO TARGET.indi_pub_diamond select * from SOURCE.indi_pub_diamond orig where exists (select 1 from TARGET.result_new r where r.id=orig.id); -ANALYZE TABLE TARGET.indi_pub_diamond COMPUTE STATISTICS; -INSERT INTO TARGET.indi_pub_in_transformative select * from SOURCE.indi_pub_in_transformative orig where exists (select 1 from TARGET.result_new r where r.id=orig.id); -ANALYZE TABLE TARGET.indi_pub_in_transformative COMPUTE STATISTICS; -INSERT INTO TARGET.indi_pub_closed_other_open select * from SOURCE.indi_pub_closed_other_open orig where exists (select 1 from TARGET.result_new r where r.id=orig.id); -ANALYZE TABLE TARGET.indi_pub_closed_other_open COMPUTE STATISTICS; ----- Sprint 5 ---- -INSERT INTO TARGET.indi_result_no_of_copies select * from SOURCE.indi_result_no_of_copies orig where exists (select 1 from TARGET.result_new r where r.id=orig.id); -ANALYZE TABLE TARGET.indi_result_no_of_copies COMPUTE STATISTICS; ----- Sprint 6 ---- -INSERT INTO TARGET.indi_pub_hybrid_oa_with_cc select * from SOURCE.indi_pub_hybrid_oa_with_cc orig where exists (select 1 from TARGET.result_new r where r.id=orig.id); -ANALYZE TABLE TARGET.indi_pub_hybrid_oa_with_cc COMPUTE STATISTICS; -INSERT INTO TARGET.indi_pub_bronze_oa select * from SOURCE.indi_pub_bronze_oa orig where exists (select 1 from TARGET.result_new r where r.id=orig.id); -ANALYZE TABLE TARGET.indi_pub_bronze_oa COMPUTE STATISTICS; -INSERT INTO TARGET.indi_pub_downloads select * from SOURCE.indi_pub_downloads orig where exists (select 1 from TARGET.result_new r where r.id=orig.result_id); -ANALYZE TABLE TARGET.indi_pub_downloads COMPUTE STATISTICS; -INSERT INTO TARGET.indi_pub_downloads_datasource select * from SOURCE.indi_pub_downloads_datasource orig where exists (select 1 from TARGET.result_new r where r.id=orig.result_id); -ANALYZE TABLE TARGET.indi_pub_downloads_datasource COMPUTE STATISTICS; -INSERT INTO TARGET.indi_pub_downloads_year select * from SOURCE.indi_pub_downloads_year orig where exists (select 1 from TARGET.result_new r where r.id=orig.result_id); -ANALYZE TABLE TARGET.indi_pub_downloads_year COMPUTE STATISTICS; -INSERT INTO TARGET.indi_pub_downloads_datasource_year select * from SOURCE.indi_pub_downloads_datasource_year orig where exists (select 1 from TARGET.result_new r where r.id=orig.result_id); -ANALYZE TABLE TARGET.indi_pub_downloads_datasource_year COMPUTE STATISTICS; ----- Sprint 7 ---- -INSERT INTO TARGET.indi_pub_gold_oa select * from SOURCE.indi_pub_gold_oa orig where exists (select 1 from TARGET.result_new r where r.id=orig.id); -ANALYZE TABLE TARGET.indi_pub_gold_oa COMPUTE STATISTICS; -INSERT INTO TARGET.indi_pub_hybrid select * from SOURCE.indi_pub_hybrid orig where exists (select 1 from TARGET.result_new r where r.id=orig.id); -ANALYZE TABLE TARGET.indi_pub_hybrid COMPUTE STATISTICS; -INSERT INTO TARGET.indi_pub_has_preprint select * from SOURCE.indi_pub_has_preprint orig where exists (select 1 from TARGET.result_new r where r.id=orig.id); -ANALYZE TABLE TARGET.indi_pub_has_preprint COMPUTE STATISTICS; -INSERT INTO TARGET.indi_pub_in_subscribed select * from SOURCE.indi_pub_in_subscribed orig where exists (select 1 from TARGET.result_new r where r.id=orig.id); -ANALYZE TABLE TARGET.indi_pub_in_subscribed COMPUTE STATISTICS; -INSERT INTO TARGET.indi_result_with_pid select * from SOURCE.indi_result_with_pid orig where exists (select 1 from TARGET.result_new r where r.id=orig.id); -ANALYZE TABLE TARGET.indi_result_with_pid COMPUTE STATISTICS; -INSERT INTO TARGET.indi_impact_measures select * from SOURCE.indi_impact_measures orig where exists (select 1 from TARGET.result_new r where r.id=orig.id); -ANALYZE TABLE TARGET.indi_impact_measures COMPUTE STATISTICS; -INSERT INTO TARGET.indi_pub_interdisciplinarity select * from SOURCE.indi_pub_interdisciplinarity orig where exists (select 1 from TARGET.result_new r where r.id=orig.id); -ANALYZE TABLE TARGET.indi_pub_interdisciplinarity COMPUTE STATISTICS; - -DROP TABLE IF EXISTS TARGET.result_new; diff --git a/dhp-workflows/dhp-stats-monitor-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats-monitor/oozie_app/scripts/updateMonitorDB_institutions.sql b/dhp-workflows/dhp-stats-monitor-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats-monitor/oozie_app/scripts/updateMonitorDB_institutions.sql deleted file mode 100644 index 236f3733f..000000000 --- a/dhp-workflows/dhp-stats-monitor-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats-monitor/oozie_app/scripts/updateMonitorDB_institutions.sql +++ /dev/null @@ -1,12 +0,0 @@ -DROP TABLE IF EXISTS TARGET.result_new; - -create table TARGET.result_new as - select distinct * from ( - select * from SOURCE.result r where exists (select 1 from SOURCE.result_organization ro where ro.id=r.id and ro.organization in ( - 'openorgs____::4d4051b56708688235252f1d8fddb8c1', --Iscte - Instituto Universitário de Lisboa - 'openorgs____::ab4ac74c35fa5dada770cf08e5110fab' -- Universidade Católica Portuguesa - ) )) foo; - -INSERT INTO TARGET.result select * from TARGET.result_new; -ANALYZE TABLE TARGET.result_new COMPUTE STATISTICS; - diff --git a/dhp-workflows/dhp-stats-monitor-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats-monitor/oozie_app/workflow.xml b/dhp-workflows/dhp-stats-monitor-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats-monitor/oozie_app/workflow.xml deleted file mode 100644 index 7b999a843..000000000 --- a/dhp-workflows/dhp-stats-monitor-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats-monitor/oozie_app/workflow.xml +++ /dev/null @@ -1,110 +0,0 @@ - - - - stats_db_name - the target stats database name - - - monitor_db_name - the target monitor db name - - - monitor_db_shadow_name - the name of the shadow monitor db - - - hive_metastore_uris - hive server metastore URIs - - - hive_jdbc_url - hive server jdbc url - - - hive_timeout - the time period, in seconds, after which Hive fails a transaction if a Hive client has not sent a hearbeat. The default value is 300 seconds. - - - hadoop_user_name - user name of the wf owner - - - - - ${jobTracker} - ${nameNode} - - - hive.metastore.uris - ${hive_metastore_uris} - - - hive.txn.timeout - ${hive_timeout} - - - mapred.job.queue.name - analytics - - - - - - - - ${wf:conf('resumeFrom') eq 'Step1-updateMonitorDB'} - ${wf:conf('resumeFrom') eq 'Step2-copyDataToImpalaCluster'} - ${wf:conf('resumeFrom') eq 'Step3-finalizeImpalaCluster'} - - - - - - Action failed, error message[${wf:errorMessage(wf:lastErrorNode())}] - - - - - ${jobTracker} - ${nameNode} - monitor.sh - ${stats_db_name} - ${monitor_db_name} - ${monitor_db_shadow_name} - ${wf:appPath()}/scripts/updateMonitorDB_institutions.sql - ${wf:appPath()}/scripts/updateMonitorDB.sql - ${wf:appPath()}/scripts/updateMonitorDBAll.sql - monitor.sh - - - - - - - - ${jobTracker} - ${nameNode} - copyDataToImpalaCluster.sh - ${monitor_db_name} - ${hadoop_user_name} - copyDataToImpalaCluster.sh - - - - - - - - ${jobTracker} - ${nameNode} - finalizeImpalaCluster.sh - ${monitor_db_name} - ${monitor_db_shadow_name} - finalizeImpalaCluster.sh - - - - - - - From a431b04814dc55c56ab2bde7d2a2663b7fc0950a Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Tue, 10 Oct 2023 12:53:57 +0200 Subject: [PATCH 47/57] leftover for the properties and removal of bipfinder --- .../PrepareBipFinder.java | 178 ------------------ .../oozie_app/workflow.xml | 31 +-- .../createunresolvedentities/PrepareTest.java | 139 -------------- .../createunresolvedentities/ProduceTest.java | 30 --- 4 files changed, 1 insertion(+), 377 deletions(-) delete mode 100644 dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/createunresolvedentities/PrepareBipFinder.java diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/createunresolvedentities/PrepareBipFinder.java b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/createunresolvedentities/PrepareBipFinder.java deleted file mode 100644 index 0507f90e5..000000000 --- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/createunresolvedentities/PrepareBipFinder.java +++ /dev/null @@ -1,178 +0,0 @@ - -package eu.dnetlib.dhp.actionmanager.createunresolvedentities; - -import static eu.dnetlib.dhp.actionmanager.Constants.*; -import static eu.dnetlib.dhp.actionmanager.Constants.UPDATE_CLASS_NAME; -import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession; - -import java.io.Serializable; -import java.util.Arrays; -import java.util.List; -import java.util.Optional; -import java.util.stream.Collectors; - -import org.apache.commons.io.IOUtils; -import org.apache.spark.SparkConf; -import org.apache.spark.api.java.JavaRDD; -import org.apache.spark.api.java.JavaSparkContext; -import org.apache.spark.api.java.function.MapFunction; -import org.apache.spark.sql.Encoders; -import org.apache.spark.sql.SaveMode; -import org.apache.spark.sql.SparkSession; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -import com.fasterxml.jackson.databind.ObjectMapper; - -import eu.dnetlib.dhp.actionmanager.bipmodel.BipScore; -import eu.dnetlib.dhp.actionmanager.bipmodel.score.deserializers.BipResultModel; -import eu.dnetlib.dhp.application.ArgumentApplicationParser; -import eu.dnetlib.dhp.common.HdfsSupport; -import eu.dnetlib.dhp.schema.common.ModelConstants; -import eu.dnetlib.dhp.schema.oaf.Instance; -import eu.dnetlib.dhp.schema.oaf.KeyValue; -import eu.dnetlib.dhp.schema.oaf.Measure; -import eu.dnetlib.dhp.schema.oaf.Result; -import eu.dnetlib.dhp.schema.oaf.utils.CleaningFunctions; -import eu.dnetlib.dhp.schema.oaf.utils.OafMapperUtils; -import eu.dnetlib.dhp.utils.DHPUtils; - -public class PrepareBipFinder implements Serializable { - - private static final Logger log = LoggerFactory.getLogger(PrepareBipFinder.class); - private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper(); - - public static void main(String[] args) throws Exception { - - String jsonConfiguration = IOUtils - .toString( - PrepareBipFinder.class - .getResourceAsStream( - "/eu/dnetlib/dhp/actionmanager/createunresolvedentities/prepare_parameters.json")); - - final ArgumentApplicationParser parser = new ArgumentApplicationParser(jsonConfiguration); - - parser.parseArgument(args); - - Boolean isSparkSessionManaged = Optional - .ofNullable(parser.get("isSparkSessionManaged")) - .map(Boolean::valueOf) - .orElse(Boolean.TRUE); - - log.info("isSparkSessionManaged: {}", isSparkSessionManaged); - - final String sourcePath = parser.get("sourcePath"); - log.info("sourcePath {}: ", sourcePath); - - final String outputPath = parser.get("outputPath"); - log.info("outputPath {}: ", outputPath); - - SparkConf conf = new SparkConf(); - - runWithSparkSession( - conf, - isSparkSessionManaged, - spark -> { - HdfsSupport.remove(outputPath, spark.sparkContext().hadoopConfiguration()); - prepareResults(spark, sourcePath, outputPath); - }); - } - - private static void prepareResults(SparkSession spark, String inputPath, String outputPath) { - - final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext()); - - JavaRDD bipDeserializeJavaRDD = sc - .textFile(inputPath) - .map(item -> OBJECT_MAPPER.readValue(item, BipResultModel.class)); - - spark - .createDataset(bipDeserializeJavaRDD.flatMap(entry -> entry.keySet().stream().map(key -> { - BipScore bs = new BipScore(); - bs.setId(key); - bs.setScoreList(entry.get(key)); - - return bs; - }).collect(Collectors.toList()).iterator()).rdd(), Encoders.bean(BipScore.class)) - .map((MapFunction) v -> { - Result r = new Result(); - final String cleanedPid = CleaningFunctions.normalizePidValue(DOI, v.getId()); - - r.setId(DHPUtils.generateUnresolvedIdentifier(v.getId(), DOI)); - Instance inst = new Instance(); - inst.setMeasures(getMeasure(v)); - - inst - .setPid( - Arrays - .asList( - OafMapperUtils - .structuredProperty( - cleanedPid, - OafMapperUtils - .qualifier( - DOI, DOI_CLASSNAME, - ModelConstants.DNET_PID_TYPES, - ModelConstants.DNET_PID_TYPES), - null))); - r.setInstance(Arrays.asList(inst)); - r - .setDataInfo( - OafMapperUtils - .dataInfo( - false, null, true, - false, - OafMapperUtils - .qualifier( - ModelConstants.PROVENANCE_ENRICH, - null, - ModelConstants.DNET_PROVENANCE_ACTIONS, - ModelConstants.DNET_PROVENANCE_ACTIONS), - null)); - return r; - }, Encoders.bean(Result.class)) - .write() - .mode(SaveMode.Overwrite) - .option("compression", "gzip") - .json(outputPath + "/bip"); - } - - private static List getMeasure(BipScore value) { - return value - .getScoreList() - .stream() - .map(score -> { - Measure m = new Measure(); - m.setId(score.getId()); - m - .setUnit( - score - .getUnit() - .stream() - .map(unit -> { - KeyValue kv = new KeyValue(); - kv.setValue(unit.getValue()); - kv.setKey(unit.getKey()); - kv - .setDataInfo( - OafMapperUtils - .dataInfo( - false, - UPDATE_DATA_INFO_TYPE, - true, - false, - OafMapperUtils - .qualifier( - UPDATE_MEASURE_BIP_CLASS_ID, - UPDATE_CLASS_NAME, - ModelConstants.DNET_PROVENANCE_ACTIONS, - ModelConstants.DNET_PROVENANCE_ACTIONS), - "")); - return kv; - }) - .collect(Collectors.toList())); - return m; - }) - .collect(Collectors.toList()); - } -} diff --git a/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/actionmanager/createunresolvedentities/oozie_app/workflow.xml b/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/actionmanager/createunresolvedentities/oozie_app/workflow.xml index a2935a71d..a5388f28b 100644 --- a/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/actionmanager/createunresolvedentities/oozie_app/workflow.xml +++ b/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/actionmanager/createunresolvedentities/oozie_app/workflow.xml @@ -5,11 +5,6 @@ fosPath the input path of the resources to be extended - - - - - outputPath the path where to store the actionset @@ -77,35 +72,10 @@ - - - - - - - - - - - - - - - - - - - - - - - - - yarn @@ -125,6 +95,7 @@ --sourcePath${fosPath} --outputPath${workingDir}/input/fos + --delimiter${delimiter} diff --git a/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/actionmanager/createunresolvedentities/PrepareTest.java b/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/actionmanager/createunresolvedentities/PrepareTest.java index ccb0ebbff..da7bcd3de 100644 --- a/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/actionmanager/createunresolvedentities/PrepareTest.java +++ b/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/actionmanager/createunresolvedentities/PrepareTest.java @@ -67,92 +67,6 @@ public class PrepareTest { spark.stop(); } - @Test - void bipPrepareTest() throws Exception { - final String sourcePath = getClass() - .getResource("/eu/dnetlib/dhp/actionmanager/createunresolvedentities/bip/bip.json") - .getPath(); - - PrepareBipFinder - .main( - new String[] { - "--isSparkSessionManaged", Boolean.FALSE.toString(), - "--sourcePath", sourcePath, - "--outputPath", workingDir.toString() + "/work" - - }); - - final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext()); - - JavaRDD tmp = sc - .textFile(workingDir.toString() + "/work/bip") - .map(item -> OBJECT_MAPPER.readValue(item, Result.class)); - - Assertions.assertEquals(86, tmp.count()); - - String doi1 = "unresolved::10.0000/096020199389707::doi"; - - Assertions.assertEquals(1, tmp.filter(r -> r.getId().equals(doi1)).count()); - Assertions.assertEquals(1, tmp.filter(r -> r.getId().equals(doi1)).collect().get(0).getInstance().size()); - Assertions - .assertEquals( - 3, tmp.filter(r -> r.getId().equals(doi1)).collect().get(0).getInstance().get(0).getMeasures().size()); - Assertions - .assertEquals( - "6.34596412687e-09", tmp - .filter(r -> r.getId().equals(doi1)) - .collect() - .get(0) - .getInstance() - .get(0) - .getMeasures() - .stream() - .filter(sl -> sl.getId().equals("influence")) - .collect(Collectors.toList()) - .get(0) - .getUnit() - .get(0) - .getValue()); - Assertions - .assertEquals( - "0.641151896994", tmp - .filter(r -> r.getId().equals(doi1)) - .collect() - .get(0) - .getInstance() - .get(0) - .getMeasures() - .stream() - .filter(sl -> sl.getId().equals("popularity_alt")) - .collect(Collectors.toList()) - .get(0) - .getUnit() - .get(0) - .getValue()); - Assertions - .assertEquals( - "2.33375102921e-09", tmp - .filter(r -> r.getId().equals(doi1)) - .collect() - .get(0) - .getInstance() - .get(0) - .getMeasures() - .stream() - .filter(sl -> sl.getId().equals("popularity")) - .collect(Collectors.toList()) - .get(0) - .getUnit() - .get(0) - .getValue()); - - final String doi2 = "unresolved::10.3390/s18072310::doi"; - - Assertions.assertEquals(1, tmp.filter(r -> r.getId().equals(doi2)).count()); - Assertions.assertEquals(1, tmp.filter(r -> r.getId().equals(doi2)).collect().get(0).getInstance().size()); - - } - @Test void fosPrepareTest() throws Exception { final String sourcePath = getClass() @@ -338,57 +252,4 @@ public class PrepareTest { } -// @Test -// void test3() throws Exception { -// final String sourcePath = "/Users/miriam.baglioni/Downloads/doi_fos_results_20_12_2021.csv.gz"; -// -// final String outputPath = workingDir.toString() + "/fos.json"; -// GetFOSSparkJob -// .main( -// new String[] { -// "--isSparkSessionManaged", Boolean.FALSE.toString(), -// "--sourcePath", sourcePath, -// -// "-outputPath", outputPath -// -// }); -// -// final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext()); -// -// JavaRDD tmp = sc -// .textFile(outputPath) -// .map(item -> OBJECT_MAPPER.readValue(item, FOSDataModel.class)); -// -// tmp.foreach(t -> Assertions.assertTrue(t.getDoi() != null)); -// tmp.foreach(t -> Assertions.assertTrue(t.getLevel1() != null)); -// tmp.foreach(t -> Assertions.assertTrue(t.getLevel2() != null)); -// tmp.foreach(t -> Assertions.assertTrue(t.getLevel3() != null)); -// -// } -// -// @Test -// void test4() throws Exception { -// final String sourcePath = "/Users/miriam.baglioni/Downloads/doi_sdg_results_20_12_21.csv.gz"; -// -// final String outputPath = workingDir.toString() + "/sdg.json"; -// GetSDGSparkJob -// .main( -// new String[] { -// "--isSparkSessionManaged", Boolean.FALSE.toString(), -// "--sourcePath", sourcePath, -// -// "-outputPath", outputPath -// -// }); -// -// final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext()); -// -// JavaRDD tmp = sc -// .textFile(outputPath) -// .map(item -> OBJECT_MAPPER.readValue(item, SDGDataModel.class)); -// -// tmp.foreach(t -> Assertions.assertTrue(t.getDoi() != null)); -// tmp.foreach(t -> Assertions.assertTrue(t.getSbj() != null)); -// -// } } diff --git a/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/actionmanager/createunresolvedentities/ProduceTest.java b/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/actionmanager/createunresolvedentities/ProduceTest.java index fce6c1e97..ce116688a 100644 --- a/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/actionmanager/createunresolvedentities/ProduceTest.java +++ b/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/actionmanager/createunresolvedentities/ProduceTest.java @@ -340,18 +340,7 @@ public class ProduceTest { } private JavaRDD getResultJavaRDD() throws Exception { - final String bipPath = getClass() - .getResource("/eu/dnetlib/dhp/actionmanager/createunresolvedentities/bip/bip.json") - .getPath(); - PrepareBipFinder - .main( - new String[] { - "--isSparkSessionManaged", Boolean.FALSE.toString(), - "--sourcePath", bipPath, - "--outputPath", workingDir.toString() + "/work" - - }); final String fosPath = getClass() .getResource("/eu/dnetlib/dhp/actionmanager/createunresolvedentities/fos/fos.json") .getPath(); @@ -449,18 +438,7 @@ public class ProduceTest { } private JavaRDD getResultJavaRDDPlusSDG() throws Exception { - final String bipPath = getClass() - .getResource("/eu/dnetlib/dhp/actionmanager/createunresolvedentities/bip/bip.json") - .getPath(); - PrepareBipFinder - .main( - new String[] { - "--isSparkSessionManaged", Boolean.FALSE.toString(), - "--sourcePath", bipPath, - "--outputPath", workingDir.toString() + "/work" - - }); final String fosPath = getClass() .getResource("/eu/dnetlib/dhp/actionmanager/createunresolvedentities/fos/fos.json") .getPath(); @@ -517,14 +495,6 @@ public class ProduceTest { .filter(row -> row.getSubject() != null) .count()); - Assertions - .assertEquals( - 85, - tmp - .filter(row -> !row.getId().equals(doi)) - .filter(r -> r.getInstance() != null && r.getInstance().size() > 0) - .count()); - } @Test From a460ebe215ebe1f535905d0d3121a84bcd087c2b Mon Sep 17 00:00:00 2001 From: Claudio Atzori Date: Tue, 10 Oct 2023 15:50:11 +0200 Subject: [PATCH 48/57] [UnresolvedEntities] updated action name --- .../createunresolvedentities/oozie_app/workflow.xml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/actionmanager/createunresolvedentities/oozie_app/workflow.xml b/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/actionmanager/createunresolvedentities/oozie_app/workflow.xml index a5388f28b..c8e9547dc 100644 --- a/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/actionmanager/createunresolvedentities/oozie_app/workflow.xml +++ b/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/actionmanager/createunresolvedentities/oozie_app/workflow.xml @@ -184,7 +184,7 @@ yarn cluster - Saves the result produced for bip and fos by grouping results with the same id + Save the unresolved entities grouping results with the same id eu.dnetlib.dhp.actionmanager.createunresolvedentities.SparkSaveUnresolved dhp-aggregation-${projectVersion}.jar From 05ee7d8b0950bbb93214f7d84d7da9f089526fe3 Mon Sep 17 00:00:00 2001 From: Claudio Atzori Date: Thu, 12 Oct 2023 09:13:42 +0200 Subject: [PATCH 49/57] [graph cleaning] avoid NPEs --- .../oaf/utils/GraphCleaningFunctions.java | 19 +++++++++++++------ 1 file changed, 13 insertions(+), 6 deletions(-) diff --git a/dhp-common/src/main/java/eu/dnetlib/dhp/schema/oaf/utils/GraphCleaningFunctions.java b/dhp-common/src/main/java/eu/dnetlib/dhp/schema/oaf/utils/GraphCleaningFunctions.java index 3c3e8052e..324e3dd58 100644 --- a/dhp-common/src/main/java/eu/dnetlib/dhp/schema/oaf/utils/GraphCleaningFunctions.java +++ b/dhp-common/src/main/java/eu/dnetlib/dhp/schema/oaf/utils/GraphCleaningFunctions.java @@ -509,12 +509,19 @@ public class GraphCleaningFunctions extends CleaningFunctions { // from the script from Dimitris if ("0000".equals(i.getRefereed().getClassid())) { - final boolean isFromCrossref = ModelConstants.CROSSREF_ID - .equals(i.getCollectedfrom().getKey()); - final boolean hasDoi = i - .getPid() - .stream() - .anyMatch(pid -> PidType.doi.toString().equals(pid.getQualifier().getClassid())); + final boolean isFromCrossref = Optional + .ofNullable(i.getCollectedfrom()) + .map(KeyValue::getKey) + .map(id -> id.equals(ModelConstants.CROSSREF_ID)) + .orElse(false); + final boolean hasDoi = Optional + .ofNullable(i.getPid()) + .map( + pid -> pid + .stream() + .anyMatch( + p -> PidType.doi.toString().equals(p.getQualifier().getClassid()))) + .orElse(false); final boolean isPeerReviewedType = PEER_REVIEWED_TYPES .contains(i.getInstancetype().getClassname()); final boolean noOtherLitType = r From dda602fff7c65341d0db9dcb9b0b5db2cf5be7ee Mon Sep 17 00:00:00 2001 From: Claudio Atzori Date: Thu, 12 Oct 2023 10:05:46 +0200 Subject: [PATCH 50/57] [AMF] docs --- dhp-workflows/dhp-actionmanager/README.md | 72 +++++++++++++++++++++++ 1 file changed, 72 insertions(+) create mode 100644 dhp-workflows/dhp-actionmanager/README.md diff --git a/dhp-workflows/dhp-actionmanager/README.md b/dhp-workflows/dhp-actionmanager/README.md new file mode 100644 index 000000000..9899c4a98 --- /dev/null +++ b/dhp-workflows/dhp-actionmanager/README.md @@ -0,0 +1,72 @@ +# Action Management Framework + +This module implements the oozie workflow for the integration of pre-built contents into the OpenAIRE Graph. + +Such contents can be + +* brand new, non-existing records to be introduced as nodes of the graph +* updates (or enrichment) for records that does exist in the graph (e.g. a new subject term for a publication) +* relations among existing nodes + +The actionset contents are organised into logical containers, each of them can contain multiple versions contents and is characterised by + +* a name +* an identifier +* the paths on HDFS where each version of the contents is stored + +Each version is then characterised by + +* the creation date +* the last update date +* the indication where it is the latest one or it is an expired version, candidate for garbage collection + +## ActionSet serialization + +Each actionset version contains records compliant to the graph internal data model, i.e. subclasses of `eu.dnetlib.dhp.schema.oaf.Oaf`, +defined in the external schemas module + +``` + + eu.dnetlib.dhp + ${dhp-schemas.artifact} + ${dhp-schemas.version} + +``` + +When the actionset contains a relationship, the model class to use is `eu.dnetlib.dhp.schema.oaf.Relation`, otherwise +when the actionset contains an entity, it is a `eu.dnetlib.dhp.schema.oaf.OafEntity` or one of its subclasses +`Datasource`, `Organization`, `Project`, `Result` (or one of its subclasses `Publication`, `Dataset`, etc...). + +Then, each OpenAIRE Graph model class instance must be wrapped using the class `eu.dnetlib.dhp.schema.action.AtomicAction`, a generic +container that defines two attributes + +* `T payload` the OpenAIRE Graph class instance containing the data; +* `Class clazz` must contain the class whose instance is contained in the payload. + +Each AtomicAction can be then serialised in JSON format using `com.fasterxml.jackson.databind.ObjectMapper` from + +``` + + com.fasterxml.jackson.core + jackson-databind + ${dhp.jackson.version} + +``` + +Then, the JSON serialization must be stored as a GZip compressed sequence file (`org.apache.hadoop.mapred.SequenceFileOutputFormat`). +As such, it contains a set of tuples, a key and a value defined as `org.apache.hadoop.io.Text` where + +* the `key` must be set to the class canonical name contained in the `AtomicAction`; +* the `value` must be set to the AtomicAction JSON serialization. + +The following snippet provides an example of how create an actionset version of Relation records: + +``` + rels // JavaRDD + .map(relation -> new AtomicAction(Relation.class, relation)) + .mapToPair( + aa -> new Tuple2<>(new Text(aa.getClazz().getCanonicalName()), + new Text(OBJECT_MAPPER.writeValueAsString(aa)))) + .saveAsHadoopFile(outputPath, Text.class, Text.class, SequenceFileOutputFormat.class, GzipCodec.class); +``` + From 76447958bb538c75872d6b5f0fef184e97b42d55 Mon Sep 17 00:00:00 2001 From: Claudio Atzori Date: Thu, 12 Oct 2023 12:23:20 +0200 Subject: [PATCH 51/57] cleanup & docs --- README.md | 128 +++++++++++++++++- dhp-workflows/dhp-distcp/pom.xml | 13 -- .../dhp/distcp/oozie_app/config-default.xml | 18 --- .../dnetlib/dhp/distcp/oozie_app/workflow.xml | 46 ------- dhp-workflows/docs/oozie-installer.markdown | 111 --------------- dhp-workflows/pom.xml | 1 - 6 files changed, 127 insertions(+), 190 deletions(-) delete mode 100644 dhp-workflows/dhp-distcp/pom.xml delete mode 100644 dhp-workflows/dhp-distcp/src/main/resources/eu/dnetlib/dhp/distcp/oozie_app/config-default.xml delete mode 100644 dhp-workflows/dhp-distcp/src/main/resources/eu/dnetlib/dhp/distcp/oozie_app/workflow.xml delete mode 100644 dhp-workflows/docs/oozie-installer.markdown diff --git a/README.md b/README.md index 0a0bd82ab..2c1440f44 100644 --- a/README.md +++ b/README.md @@ -1,2 +1,128 @@ # dnet-hadoop -Dnet-hadoop is the project that defined all the OOZIE workflows for the OpenAIRE Graph construction, processing, provisioning. \ No newline at end of file + +Dnet-hadoop is the project that defined all the [OOZIE workflows](https://oozie.apache.org/) for the OpenAIRE Graph construction, processing, provisioning. + +How to build, package and run oozie workflows +==================== + +Oozie-installer is a utility allowing building, uploading and running oozie workflows. In practice, it creates a `*.tar.gz` +package that contains resources that define a workflow and some helper scripts. + +This module is automatically executed when running: + +`mvn package -Poozie-package -Dworkflow.source.dir=classpath/to/parent/directory/of/oozie_app` + +on module having set: + +``` + + eu.dnetlib.dhp + dhp-workflows + +``` + +in `pom.xml` file. `oozie-package` profile initializes oozie workflow packaging, `workflow.source.dir` property points to +a workflow (notice: this is not a relative path but a classpath to directory usually holding `oozie_app` subdirectory). + +The outcome of this packaging is `oozie-package.tar.gz` file containing inside all the resources required to run Oozie workflow: + +- jar packages +- workflow definitions +- job properties +- maintenance scripts + +Required properties +==================== + +In order to include proper workflow within package, `workflow.source.dir` property has to be set. It could be provided +by setting `-Dworkflow.source.dir=some/job/dir` maven parameter. + +In oder to define full set of cluster environment properties one should create `~/.dhp/application.properties` file with +the following properties: + +- `dhp.hadoop.frontend.user.name` - your user name on hadoop cluster and frontend machine +- `dhp.hadoop.frontend.host.name` - frontend host name +- `dhp.hadoop.frontend.temp.dir` - frontend directory for temporary files +- `dhp.hadoop.frontend.port.ssh` - frontend machine ssh port +- `oozieServiceLoc` - oozie service location required by run_workflow.sh script executing oozie job +- `nameNode` - name node address +- `jobTracker` - job tracker address +- `oozie.execution.log.file.location` - location of file that will be created when executing oozie job, it contains output +produced by `run_workflow.sh` script (needed to obtain oozie job id) +- `maven.executable` - mvn command location, requires parameterization due to a different setup of CI cluster +- `sparkDriverMemory` - amount of memory assigned to spark jobs driver +- `sparkExecutorMemory` - amount of memory assigned to spark jobs executors +- `sparkExecutorCores` - number of cores assigned to spark jobs executors + +All values will be overriden with the ones from `job.properties` and eventually `job-override.properties` stored in module's +main folder. + +When overriding properties from `job.properties`, `job-override.properties` file can be created in main module directory +(the one containing `pom.xml` file) and define all new properties which will override existing properties. +One can provide those properties one by one as command line `-D` arguments. + +Properties overriding order is the following: + +1. `pom.xml` defined properties (located in the project root dir) +2. `~/.dhp/application.properties` defined properties +3. `${workflow.source.dir}/job.properties` +4. `job-override.properties` (located in the project root dir) +5. `maven -Dparam=value` + +where the maven `-Dparam` property is overriding all the other ones. + +Workflow definition requirements +==================== + +`workflow.source.dir` property should point to the following directory structure: + + [${workflow.source.dir}] + | + |-job.properties (optional) + | + \-[oozie_app] + | + \-workflow.xml + +This property can be set using maven `-D` switch. + +`[oozie_app]` is the default directory name however it can be set to any value as soon as `oozieAppDir` property is +provided with directory name as value. + +Sub-workflows are supported as well and sub-workflow directories should be nested within `[oozie_app]` directory. + +Creating oozie installer step-by-step +===================================== + +Automated oozie-installer steps are the following: + +1. creating jar packages: `*.jar` and `*tests.jar` along with copying all dependencies in `target/dependencies` +2. reading properties from maven, `~/.dhp/application.properties`, `job.properties`, `job-override.properties` +3. invoking priming mechanism linking resources from import.txt file (currently resolving subworkflow resources) +4. assembling shell scripts for preparing Hadoop filesystem, uploading Oozie application and starting workflow +5. copying whole `${workflow.source.dir}` content to `target/${oozie.package.file.name}` +6. generating updated `job.properties` file in `target/${oozie.package.file.name}` based on maven, +`~/.dhp/application.properties`, `job.properties` and `job-override.properties` +7. creating `lib` directory (or multiple directories for sub-workflows for each nested directory) and copying jar packages +created at step (1) to each one of them +8. bundling whole `${oozie.package.file.name}` directory into single tar.gz package + +Uploading oozie package and running workflow on cluster +======================================================= + +In order to simplify deployment and execution process two dedicated profiles were introduced: + +- `deploy` +- `run` + +to be used along with `oozie-package` profile e.g. by providing `-Poozie-package,deploy,run` maven parameters. + +The `deploy` profile supplements packaging process with: +1) uploading oozie-package via scp to `/home/${user.name}/oozie-packages` directory on `${dhp.hadoop.frontend.host.name}` machine +2) extracting uploaded package +3) uploading oozie content to hadoop cluster HDFS location defined in `oozie.wf.application.path` property (generated dynamically by maven build process, based on `${dhp.hadoop.frontend.user.name}` and `workflow.source.dir` properties) + +The `run` profile introduces: +1) executing oozie application uploaded to HDFS cluster using `deploy` command. Triggers `run_workflow.sh` script providing runtime properties defined in `job.properties` file. + +Notice: ssh access to frontend machine has to be configured on system level and it is preferable to set key-based authentication in order to simplify remote operations. \ No newline at end of file diff --git a/dhp-workflows/dhp-distcp/pom.xml b/dhp-workflows/dhp-distcp/pom.xml deleted file mode 100644 index c3d3a7375..000000000 --- a/dhp-workflows/dhp-distcp/pom.xml +++ /dev/null @@ -1,13 +0,0 @@ - - - - dhp-workflows - eu.dnetlib.dhp - 1.2.5-SNAPSHOT - - 4.0.0 - - dhp-distcp - - - \ No newline at end of file diff --git a/dhp-workflows/dhp-distcp/src/main/resources/eu/dnetlib/dhp/distcp/oozie_app/config-default.xml b/dhp-workflows/dhp-distcp/src/main/resources/eu/dnetlib/dhp/distcp/oozie_app/config-default.xml deleted file mode 100644 index 905fb9984..000000000 --- a/dhp-workflows/dhp-distcp/src/main/resources/eu/dnetlib/dhp/distcp/oozie_app/config-default.xml +++ /dev/null @@ -1,18 +0,0 @@ - - - jobTracker - yarnRM - - - nameNode - hdfs://nameservice1 - - - sourceNN - webhdfs://namenode2.hadoop.dm.openaire.eu:50071 - - - oozie.use.system.libpath - true - - \ No newline at end of file diff --git a/dhp-workflows/dhp-distcp/src/main/resources/eu/dnetlib/dhp/distcp/oozie_app/workflow.xml b/dhp-workflows/dhp-distcp/src/main/resources/eu/dnetlib/dhp/distcp/oozie_app/workflow.xml deleted file mode 100644 index 91b97332b..000000000 --- a/dhp-workflows/dhp-distcp/src/main/resources/eu/dnetlib/dhp/distcp/oozie_app/workflow.xml +++ /dev/null @@ -1,46 +0,0 @@ - - - - sourceNN - the source name node - - - sourcePath - the source path - - - targetPath - the target path - - - hbase_dump_distcp_memory_mb - 6144 - memory for distcp action copying InfoSpace dump from remote cluster - - - hbase_dump_distcp_num_maps - 1 - maximum number of simultaneous copies of InfoSpace dump from remote location - - - - - - - Action failed, error message[${wf:errorMessage(wf:lastErrorNode())}] - - - - - -Dmapreduce.map.memory.mb=${hbase_dump_distcp_memory_mb} - -pb - -m ${hbase_dump_distcp_num_maps} - ${sourceNN}/${sourcePath} - ${nameNode}/${targetPath} - - - - - - - \ No newline at end of file diff --git a/dhp-workflows/docs/oozie-installer.markdown b/dhp-workflows/docs/oozie-installer.markdown deleted file mode 100644 index d2de80dcc..000000000 --- a/dhp-workflows/docs/oozie-installer.markdown +++ /dev/null @@ -1,111 +0,0 @@ -General notes -==================== - -Oozie-installer is a utility allowing building, uploading and running oozie workflows. In practice, it creates a `*.tar.gz` package that contains resouces that define a workflow and some helper scripts. - -This module is automatically executed when running: - -`mvn package -Poozie-package -Dworkflow.source.dir=classpath/to/parent/directory/of/oozie_app` - -on module having set: - - - eu.dnetlib.dhp - dhp-workflows - - -in `pom.xml` file. `oozie-package` profile initializes oozie workflow packaging, `workflow.source.dir` property points to a workflow (notice: this is not a relative path but a classpath to directory usually holding `oozie_app` subdirectory). - -The outcome of this packaging is `oozie-package.tar.gz` file containing inside all the resources required to run Oozie workflow: - -- jar packages -- workflow definitions -- job properties -- maintenance scripts - -Required properties -==================== - -In order to include proper workflow within package, `workflow.source.dir` property has to be set. It could be provided by setting `-Dworkflow.source.dir=some/job/dir` maven parameter. - -In oder to define full set of cluster environment properties one should create `~/.dhp/application.properties` file with the following properties: - -- `dhp.hadoop.frontend.user.name` - your user name on hadoop cluster and frontend machine -- `dhp.hadoop.frontend.host.name` - frontend host name -- `dhp.hadoop.frontend.temp.dir` - frontend directory for temporary files -- `dhp.hadoop.frontend.port.ssh` - frontend machine ssh port -- `oozieServiceLoc` - oozie service location required by run_workflow.sh script executing oozie job -- `nameNode` - name node address -- `jobTracker` - job tracker address -- `oozie.execution.log.file.location` - location of file that will be created when executing oozie job, it contains output produced by `run_workflow.sh` script (needed to obtain oozie job id) -- `maven.executable` - mvn command location, requires parameterization due to a different setup of CI cluster -- `sparkDriverMemory` - amount of memory assigned to spark jobs driver -- `sparkExecutorMemory` - amount of memory assigned to spark jobs executors -- `sparkExecutorCores` - number of cores assigned to spark jobs executors - -All values will be overriden with the ones from `job.properties` and eventually `job-override.properties` stored in module's main folder. - -When overriding properties from `job.properties`, `job-override.properties` file can be created in main module directory (the one containing `pom.xml` file) and define all new properties which will override existing properties. One can provide those properties one by one as command line -D arguments. - -Properties overriding order is the following: - -1. `pom.xml` defined properties (located in the project root dir) -2. `~/.dhp/application.properties` defined properties -3. `${workflow.source.dir}/job.properties` -4. `job-override.properties` (located in the project root dir) -5. `maven -Dparam=value` - -where the maven `-Dparam` property is overriding all the other ones. - -Workflow definition requirements -==================== - -`workflow.source.dir` property should point to the following directory structure: - - [${workflow.source.dir}] - | - |-job.properties (optional) - | - \-[oozie_app] - | - \-workflow.xml - -This property can be set using maven `-D` switch. - -`[oozie_app]` is the default directory name however it can be set to any value as soon as `oozieAppDir` property is provided with directory name as value. - -Subworkflows are supported as well and subworkflow directories should be nested within `[oozie_app]` directory. - -Creating oozie installer step-by-step -===================================== - -Automated oozie-installer steps are the following: - -1. creating jar packages: `*.jar` and `*tests.jar` along with copying all dependancies in `target/dependencies` -2. reading properties from maven, `~/.dhp/application.properties`, `job.properties`, `job-override.properties` -3. invoking priming mechanism linking resources from import.txt file (currently resolving subworkflow resources) -4. assembling shell scripts for preparing Hadoop filesystem, uploading Oozie application and starting workflow -5. copying whole `${workflow.source.dir}` content to `target/${oozie.package.file.name}` -6. generating updated `job.properties` file in `target/${oozie.package.file.name}` based on maven, `~/.dhp/application.properties`, `job.properties` and `job-override.properties` -7. creating `lib` directory (or multiple directories for subworkflows for each nested directory) and copying jar packages created at step (1) to each one of them -8. bundling whole `${oozie.package.file.name}` directory into single tar.gz package - -Uploading oozie package and running workflow on cluster -======================================================= - -In order to simplify deployment and execution process two dedicated profiles were introduced: - -- `deploy` -- `run` - -to be used along with `oozie-package` profile e.g. by providing `-Poozie-package,deploy,run` maven parameters. - -`deploy` profile supplements packaging process with: -1) uploading oozie-package via scp to `/home/${user.name}/oozie-packages` directory on `${dhp.hadoop.frontend.host.name}` machine -2) extracting uploaded package -3) uploading oozie content to hadoop cluster HDFS location defined in `oozie.wf.application.path` property (generated dynamically by maven build process, based on `${dhp.hadoop.frontend.user.name}` and `workflow.source.dir` properties) - -`run` profile introduces: -1) executing oozie application uploaded to HDFS cluster using `deploy` command. Triggers `run_workflow.sh` script providing runtime properties defined in `job.properties` file. - -Notice: ssh access to frontend machine has to be configured on system level and it is preferable to set key-based authentication in order to simplify remote operations. \ No newline at end of file diff --git a/dhp-workflows/pom.xml b/dhp-workflows/pom.xml index 64f5f2d26..369c71b5b 100644 --- a/dhp-workflows/pom.xml +++ b/dhp-workflows/pom.xml @@ -25,7 +25,6 @@ dhp-workflow-profiles dhp-aggregation - dhp-distcp dhp-actionmanager dhp-graph-mapper dhp-dedup-openaire From 6cf64d5d8b3b9826bce76d94f0548ee96eff2736 Mon Sep 17 00:00:00 2001 From: Claudio Atzori Date: Fri, 13 Oct 2023 10:09:26 +0200 Subject: [PATCH 52/57] [SWH] renamed 'Software Heritage Identifier' to 'Software Hash Identifier' --- .../src/main/java/eu/dnetlib/dhp/swh/utils/SWHConstants.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dhp-workflows/dhp-swh/src/main/java/eu/dnetlib/dhp/swh/utils/SWHConstants.java b/dhp-workflows/dhp-swh/src/main/java/eu/dnetlib/dhp/swh/utils/SWHConstants.java index eae839cfd..2a0403044 100644 --- a/dhp-workflows/dhp-swh/src/main/java/eu/dnetlib/dhp/swh/utils/SWHConstants.java +++ b/dhp-workflows/dhp-swh/src/main/java/eu/dnetlib/dhp/swh/utils/SWHConstants.java @@ -12,7 +12,7 @@ public class SWHConstants { public static final String SWHID = "swhid"; - public static final String SWHID_CLASSNAME = "Software Heritage Identifier"; + public static final String SWHID_CLASSNAME = "Software Hash Identifier"; public static final String SWH_ID = "10|openaire____::dbfd07503aaa1ed31beed7dec942f3f4"; From 03670bb9ce8609a17277e2d6ab6e53190cc8fe7e Mon Sep 17 00:00:00 2001 From: Claudio Atzori Date: Mon, 16 Oct 2023 10:55:47 +0200 Subject: [PATCH 53/57] [dedup] use common saveParquet and save methods to ensure outputs are compressed --- .../dhp/oa/dedup/SparkCopyOpenorgsMergeRels.java | 10 +++++----- .../dhp/oa/dedup/SparkCopyRelationsNoOpenorgs.java | 7 +------ .../eu/dnetlib/dhp/oa/dedup/SparkCreateMergeRels.java | 2 +- .../dhp/oa/dedup/SparkCreateOrgsDedupRecord.java | 6 +----- .../eu/dnetlib/dhp/oa/dedup/SparkCreateSimRels.java | 2 -- .../eu/dnetlib/dhp/oa/dedup/SparkWhitelistSimRels.java | 2 -- 6 files changed, 8 insertions(+), 21 deletions(-) diff --git a/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/SparkCopyOpenorgsMergeRels.java b/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/SparkCopyOpenorgsMergeRels.java index 9d0f61007..eca2193af 100644 --- a/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/SparkCopyOpenorgsMergeRels.java +++ b/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/SparkCopyOpenorgsMergeRels.java @@ -7,6 +7,7 @@ import java.util.Optional; import org.apache.commons.io.IOUtils; import org.apache.spark.SparkConf; import org.apache.spark.api.java.JavaRDD; +import org.apache.spark.sql.Dataset; import org.apache.spark.sql.Encoders; import org.apache.spark.sql.SaveMode; import org.apache.spark.sql.SparkSession; @@ -77,13 +78,12 @@ public class SparkCopyOpenorgsMergeRels extends AbstractSparkAction { log.info("Number of Openorgs Merge Relations collected: {}", mergeRelsRDD.count()); - spark + final Dataset relations = spark .createDataset( mergeRelsRDD.rdd(), - Encoders.bean(Relation.class)) - .write() - .mode(SaveMode.Append) - .parquet(outputPath); + Encoders.bean(Relation.class)); + + saveParquet(relations, outputPath, SaveMode.Append); } private boolean isMergeRel(Relation rel) { diff --git a/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/SparkCopyRelationsNoOpenorgs.java b/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/SparkCopyRelationsNoOpenorgs.java index 62cbb5bff..e10f41c82 100644 --- a/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/SparkCopyRelationsNoOpenorgs.java +++ b/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/SparkCopyRelationsNoOpenorgs.java @@ -67,12 +67,7 @@ public class SparkCopyRelationsNoOpenorgs extends AbstractSparkAction { log.debug("Number of non-Openorgs relations collected: {}", simRels.count()); } - spark - .createDataset(simRels.rdd(), Encoders.bean(Relation.class)) - .write() - .mode(SaveMode.Overwrite) - .json(outputPath); - + save(spark.createDataset(simRels.rdd(), Encoders.bean(Relation.class)), outputPath, SaveMode.Overwrite); } } diff --git a/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/SparkCreateMergeRels.java b/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/SparkCreateMergeRels.java index 2f551b244..babbaaabd 100644 --- a/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/SparkCreateMergeRels.java +++ b/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/SparkCreateMergeRels.java @@ -155,7 +155,7 @@ public class SparkCreateMergeRels extends AbstractSparkAction { (FlatMapFunction) cc -> ccToMergeRel(cc, dedupConf), Encoders.bean(Relation.class)); - mergeRels.write().mode(SaveMode.Overwrite).parquet(mergeRelPath); + saveParquet(mergeRels, mergeRelPath, SaveMode.Overwrite); } } diff --git a/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/SparkCreateOrgsDedupRecord.java b/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/SparkCreateOrgsDedupRecord.java index 8e5e9fd69..25e394f25 100644 --- a/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/SparkCreateOrgsDedupRecord.java +++ b/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/SparkCreateOrgsDedupRecord.java @@ -72,11 +72,7 @@ public class SparkCreateOrgsDedupRecord extends AbstractSparkAction { final String mergeRelsPath = DedupUtility.createMergeRelPath(workingPath, actionSetId, "organization"); - rootOrganization(spark, entityPath, mergeRelsPath) - .write() - .mode(SaveMode.Overwrite) - .option("compression", "gzip") - .json(outputPath); + save(rootOrganization(spark, entityPath, mergeRelsPath), outputPath, SaveMode.Overwrite); } diff --git a/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/SparkCreateSimRels.java b/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/SparkCreateSimRels.java index 5b3cc3111..5f54c34df 100644 --- a/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/SparkCreateSimRels.java +++ b/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/SparkCreateSimRels.java @@ -82,8 +82,6 @@ public class SparkCreateSimRels extends AbstractSparkAction { final String outputPath = DedupUtility.createSimRelPath(workingPath, actionSetId, subEntity); removeOutputDir(spark, outputPath); - JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext()); - SparkDeduper deduper = new SparkDeduper(dedupConf); Dataset simRels = spark diff --git a/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/SparkWhitelistSimRels.java b/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/SparkWhitelistSimRels.java index 94a09ed05..65ad0c327 100644 --- a/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/SparkWhitelistSimRels.java +++ b/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/SparkWhitelistSimRels.java @@ -67,8 +67,6 @@ public class SparkWhitelistSimRels extends AbstractSparkAction { log.info("workingPath: '{}'", workingPath); log.info("whiteListPath: '{}'", whiteListPath); - JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext()); - // file format: source####target Dataset whiteListRels = spark .read() From 0e44b037a52558e20bbe418a5d313fc7fd8e966f Mon Sep 17 00:00:00 2001 From: Giambattista Bloisi Date: Tue, 17 Oct 2023 07:54:01 +0200 Subject: [PATCH 54/57] FIX: GroupEntitiesSparkJob deletes whole graph outputPath instead of its temporary folder --- .../java/eu/dnetlib/dhp/oa/merge/GroupEntitiesSparkJob.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dhp-common/src/main/java/eu/dnetlib/dhp/oa/merge/GroupEntitiesSparkJob.java b/dhp-common/src/main/java/eu/dnetlib/dhp/oa/merge/GroupEntitiesSparkJob.java index 99981bf6a..f5c8eea19 100644 --- a/dhp-common/src/main/java/eu/dnetlib/dhp/oa/merge/GroupEntitiesSparkJob.java +++ b/dhp-common/src/main/java/eu/dnetlib/dhp/oa/merge/GroupEntitiesSparkJob.java @@ -71,7 +71,7 @@ public class GroupEntitiesSparkJob { conf, isSparkSessionManaged, spark -> { - HdfsSupport.remove(outputPath, spark.sparkContext().hadoopConfiguration()); + HdfsSupport.remove(checkpointPath, spark.sparkContext().hadoopConfiguration()); groupEntities(spark, graphInputPath, checkpointPath, outputPath, filterInvisible); }); } From b0fed1725edc8c000619906751ad46a105c9449b Mon Sep 17 00:00:00 2001 From: Claudio Atzori Date: Thu, 19 Oct 2023 12:13:45 +0200 Subject: [PATCH 55/57] avoid NPEs --- .../oaf/utils/GraphCleaningFunctions.java | 10 ++++++ .../dnetlib/dhp/bulktag/SparkBulkTagJob.java | 7 ++-- .../PrepareResultCountrySet.java | 32 ++++++++++++++----- 3 files changed, 39 insertions(+), 10 deletions(-) diff --git a/dhp-common/src/main/java/eu/dnetlib/dhp/schema/oaf/utils/GraphCleaningFunctions.java b/dhp-common/src/main/java/eu/dnetlib/dhp/schema/oaf/utils/GraphCleaningFunctions.java index 324e3dd58..b4402a2fb 100644 --- a/dhp-common/src/main/java/eu/dnetlib/dhp/schema/oaf/utils/GraphCleaningFunctions.java +++ b/dhp-common/src/main/java/eu/dnetlib/dhp/schema/oaf/utils/GraphCleaningFunctions.java @@ -286,6 +286,12 @@ public class GraphCleaningFunctions extends CleaningFunctions { public static T cleanup(T value, VocabularyGroup vocs) { + if (Objects.isNull(value.getDataInfo())) { + final DataInfo d = new DataInfo(); + d.setDeletedbyinference(false); + value.setDataInfo(d); + } + if (value instanceof OafEntity) { OafEntity e = (OafEntity) value; @@ -305,6 +311,10 @@ public class GraphCleaningFunctions extends CleaningFunctions { } else if (value instanceof Result) { Result r = (Result) value; + if (Objects.isNull(r.getContext())) { + r.setContext(new ArrayList<>()); + } + if (Objects.nonNull(r.getFulltext()) && (ModelConstants.SOFTWARE_RESULTTYPE_CLASSID.equals(r.getResulttype().getClassid()) || ModelConstants.DATASET_RESULTTYPE_CLASSID.equals(r.getResulttype().getClassid()))) { diff --git a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/SparkBulkTagJob.java b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/SparkBulkTagJob.java index 3186ed5c0..fc3882b73 100644 --- a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/SparkBulkTagJob.java +++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/SparkBulkTagJob.java @@ -25,6 +25,7 @@ import eu.dnetlib.dhp.application.ArgumentApplicationParser; import eu.dnetlib.dhp.bulktag.community.*; import eu.dnetlib.dhp.schema.oaf.Datasource; import eu.dnetlib.dhp.schema.oaf.Result; +import eu.dnetlib.dhp.schema.oaf.utils.OafMapperUtils; public class SparkBulkTagJob { @@ -170,10 +171,12 @@ public class SparkBulkTagJob { // TODO remove this hack as soon as the values fixed by this method will be provided as NON null private static MapFunction patchResult() { return r -> { - if (r.getDataInfo().getDeletedbyinference() == null) { + if (Objects.isNull(r.getDataInfo())) { + r.setDataInfo(OafMapperUtils.dataInfo(false, "", false, false, OafMapperUtils.unknown("", ""), "")); + } else if (r.getDataInfo().getDeletedbyinference() == null) { r.getDataInfo().setDeletedbyinference(false); } - if (r.getContext() == null) { + if (Objects.isNull(r.getContext())) { r.setContext(new ArrayList<>()); } return r; diff --git a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/countrypropagation/PrepareResultCountrySet.java b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/countrypropagation/PrepareResultCountrySet.java index 28b6f616d..184d24751 100644 --- a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/countrypropagation/PrepareResultCountrySet.java +++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/countrypropagation/PrepareResultCountrySet.java @@ -5,10 +5,7 @@ import static eu.dnetlib.dhp.PropagationConstant.*; import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkHiveSession; import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession; -import java.util.ArrayList; -import java.util.Arrays; -import java.util.HashSet; -import java.util.Set; +import java.util.*; import java.util.stream.Collectors; import org.apache.commons.io.IOUtils; @@ -88,14 +85,33 @@ public class PrepareResultCountrySet { // selects all the results non deleted by inference and non invisible Dataset result = readPath(spark, inputPath, resultClazz) .filter( - (FilterFunction) r -> !r.getDataInfo().getDeletedbyinference() && - !r.getDataInfo().getInvisible()); + (FilterFunction) r -> Optional + .ofNullable(r.getDataInfo()) + .map(dataInfo -> !dataInfo.getDeletedbyinference() && !dataInfo.getInvisible()) + .orElse(true)); // of the results collects the distinct keys for collected from (at the level of the result) and hosted by // and produces pairs resultId, key for each distinct key associated to the result result.flatMap((FlatMapFunction) r -> { - Set cfhb = r.getCollectedfrom().stream().map(cf -> cf.getKey()).collect(Collectors.toSet()); - cfhb.addAll(r.getInstance().stream().map(i -> i.getHostedby().getKey()).collect(Collectors.toSet())); + Set cfhb = Optional + .ofNullable(r.getCollectedfrom()) + .map(cf -> cf.stream().map(KeyValue::getKey).collect(Collectors.toSet())) + .orElse(new HashSet<>()); + cfhb + .addAll( + Optional + .ofNullable(r.getInstance()) + .map( + i -> i + .stream() + .map( + ii -> Optional + .ofNullable(ii.getHostedby()) + .map(KeyValue::getKey) + .orElse(null)) + .filter(Objects::nonNull) + .collect(Collectors.toSet())) + .orElse(new HashSet<>())); return cfhb .stream() .map(value -> EntityEntityRel.newInstance(r.getId(), value)) From 7fc621cdecaa23f57aa7744f51b3f15c46366dd3 Mon Sep 17 00:00:00 2001 From: Claudio Atzori Date: Fri, 20 Oct 2023 22:28:12 +0200 Subject: [PATCH 56/57] added defaults to the graph resolution workflow config-default.xml --- .../resolution/oozie_app/config-default.xml | 32 +++++++++++++++++++ 1 file changed, 32 insertions(+) diff --git a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/resolution/oozie_app/config-default.xml b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/resolution/oozie_app/config-default.xml index 6fb2a1253..86847ed46 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/resolution/oozie_app/config-default.xml +++ b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/resolution/oozie_app/config-default.xml @@ -1,4 +1,12 @@ + + jobTracker + yarnRM + + + nameNode + hdfs://nameservice1 + oozie.use.system.libpath true @@ -7,4 +15,28 @@ oozie.action.sharelib.for.spark spark2 + + hive_metastore_uris + thrift://iis-cdh5-test-m3.ocean.icm.edu.pl:9083 + + + spark2YarnHistoryServerAddress + http://iis-cdh5-test-gw.ocean.icm.edu.pl:18089 + + + spark2ExtraListeners + com.cloudera.spark.lineage.NavigatorAppListener + + + spark2SqlQueryExecutionListeners + com.cloudera.spark.lineage.NavigatorQueryListener + + + sparkExecutorNumber + 4 + + + spark2EventLogDir + /user/spark/spark2ApplicationHistory + \ No newline at end of file From a870aa2b093929e190ae48cbb15cf98d732e2926 Mon Sep 17 00:00:00 2001 From: Claudio Atzori Date: Fri, 20 Oct 2023 22:28:39 +0200 Subject: [PATCH 57/57] depending on dhp-schemas:3.17.2 --- pom.xml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pom.xml b/pom.xml index 9cd82a343..f361a266c 100644 --- a/pom.xml +++ b/pom.xml @@ -888,7 +888,7 @@ 3.3.3 3.4.2 [2.12,3.0) - [3.17.1] + [3.17.2] [4.0.3] [6.0.5] [3.1.6]