From 87f14a38991986b5929e1d26255f8d9c9ddac25e Mon Sep 17 00:00:00 2001 From: antleb Date: Tue, 29 Jun 2021 16:31:51 +0300 Subject: [PATCH] added the missing indicators files --- .../oa/graph/stats/oozie_app/indicators.sh | 19 +++++++++ .../step16_7-createIndicatorsTables.sql | 42 +++++++++++++++++++ 2 files changed, 61 insertions(+) create mode 100644 dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/indicators.sh create mode 100644 dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step16_7-createIndicatorsTables.sql diff --git a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/indicators.sh b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/indicators.sh new file mode 100644 index 000000000..d5aa207d1 --- /dev/null +++ b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/indicators.sh @@ -0,0 +1,19 @@ +export PYTHON_EGG_CACHE=/home/$(whoami)/.python-eggs +export link_folder=/tmp/impala-shell-python-egg-cache-$(whoami) +if ! [ -L $link_folder ] +then + rm -Rf "$link_folder" + ln -sfn ${PYTHON_EGG_CACHE}${link_folder} ${link_folder} +fi + +export TARGET=$1 +export SCRIPT_PATH=$2 + +echo "Getting file from " $SCRIPT_PATH +hdfs dfs -copyToLocal $SCRIPT_PATH + +echo "Creating indicators" +impala-shell -d ${TARGET} -q "invalidate metadata" +impala-shell -d ${TARGET} -q "show tables" --delimited | sed "s/^\(.*\)/compute stats ${TARGET}.\1;/" | impala-shell -c -f - +cat step16_7-createIndicatorsTables.sql | impala-shell -d $TARGET -f - +echo "Indicators created" \ No newline at end of file diff --git a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step16_7-createIndicatorsTables.sql b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step16_7-createIndicatorsTables.sql new file mode 100644 index 000000000..8998cb9fc --- /dev/null +++ b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step16_7-createIndicatorsTables.sql @@ -0,0 +1,42 @@ +create table indi_pub_green_oa stored as parquet as +select distinct p.id, coalesce(green_oa, 0) as green_oa +from publication p +left outer join ( +select p.id, 1 as green_oa +from publication p +join result_instance ri on ri.id = p.id +join datasource on datasource.id = ri.hostedby +where datasource.type like '%Repository%' +and (ri.accessright = 'Open Access' +or ri.accessright = 'Embargo')) tmp +on p.id= tmp.id; + +create table indi_pub_grey_lit stored as parquet as +select distinct p.id, coalesce(grey_lit, 0) as grey_lit +from publication p +left outer join ( +select p.id, 1 as grey_lit +from publication p +join result_classifications rt on rt.id = p.id +where rt.type not in ('Article','Part of book or chapter of book','Book','Doctoral thesis','Master thesis','Data Paper', 'Thesis', 'Bachelor thesis', 'Conference object') and +not exists (select 1 from result_classifications rc where type ='Other literature type' and rc.id=p.id)) tmp on p.id=tmp.id; + +create table indi_pub_doi_from_crossref stored as parquet as +select distinct p.id, coalesce(doi_from_crossref, 0) as doi_from_crossref +from publication p +left outer join +(select ri.id, 1 as doi_from_crossref from result_instance ri +join datasource d on d.id = ri.collectedfrom +where pidtype='Digital Object Identifier' and d.name ='Crossref') tmp +on tmp.id=p.id; + +create table indi_pub_gold_oa stored as parquet as +select distinct p.id, coalesce(gold_oa, 0) as gold_oa +from publication p +left outer join ( +select p.id, 1 as gold_oa +from publication p +join result_instance ri on ri.id = p.id +join datasource on datasource.id = ri.hostedby +where datasource.id like '%doajarticles%') tmp +on p.id= tmp.id; \ No newline at end of file