forked from D-Net/dnet-hadoop
Merge pull request 'added the missing indicators files' (#120) from antonis.lempesis/dnet-hadoop:stable_ids into stable_ids
Reviewed-on: D-Net/dnet-hadoop#120
This commit is contained in:
commit
6d3f960238
|
@ -0,0 +1,19 @@
|
||||||
|
export PYTHON_EGG_CACHE=/home/$(whoami)/.python-eggs
|
||||||
|
export link_folder=/tmp/impala-shell-python-egg-cache-$(whoami)
|
||||||
|
if ! [ -L $link_folder ]
|
||||||
|
then
|
||||||
|
rm -Rf "$link_folder"
|
||||||
|
ln -sfn ${PYTHON_EGG_CACHE}${link_folder} ${link_folder}
|
||||||
|
fi
|
||||||
|
|
||||||
|
export TARGET=$1
|
||||||
|
export SCRIPT_PATH=$2
|
||||||
|
|
||||||
|
echo "Getting file from " $SCRIPT_PATH
|
||||||
|
hdfs dfs -copyToLocal $SCRIPT_PATH
|
||||||
|
|
||||||
|
echo "Creating indicators"
|
||||||
|
impala-shell -d ${TARGET} -q "invalidate metadata"
|
||||||
|
impala-shell -d ${TARGET} -q "show tables" --delimited | sed "s/^\(.*\)/compute stats ${TARGET}.\1;/" | impala-shell -c -f -
|
||||||
|
cat step16_7-createIndicatorsTables.sql | impala-shell -d $TARGET -f -
|
||||||
|
echo "Indicators created"
|
|
@ -0,0 +1,42 @@
|
||||||
|
create table indi_pub_green_oa stored as parquet as
|
||||||
|
select distinct p.id, coalesce(green_oa, 0) as green_oa
|
||||||
|
from publication p
|
||||||
|
left outer join (
|
||||||
|
select p.id, 1 as green_oa
|
||||||
|
from publication p
|
||||||
|
join result_instance ri on ri.id = p.id
|
||||||
|
join datasource on datasource.id = ri.hostedby
|
||||||
|
where datasource.type like '%Repository%'
|
||||||
|
and (ri.accessright = 'Open Access'
|
||||||
|
or ri.accessright = 'Embargo')) tmp
|
||||||
|
on p.id= tmp.id;
|
||||||
|
|
||||||
|
create table indi_pub_grey_lit stored as parquet as
|
||||||
|
select distinct p.id, coalesce(grey_lit, 0) as grey_lit
|
||||||
|
from publication p
|
||||||
|
left outer join (
|
||||||
|
select p.id, 1 as grey_lit
|
||||||
|
from publication p
|
||||||
|
join result_classifications rt on rt.id = p.id
|
||||||
|
where rt.type not in ('Article','Part of book or chapter of book','Book','Doctoral thesis','Master thesis','Data Paper', 'Thesis', 'Bachelor thesis', 'Conference object') and
|
||||||
|
not exists (select 1 from result_classifications rc where type ='Other literature type' and rc.id=p.id)) tmp on p.id=tmp.id;
|
||||||
|
|
||||||
|
create table indi_pub_doi_from_crossref stored as parquet as
|
||||||
|
select distinct p.id, coalesce(doi_from_crossref, 0) as doi_from_crossref
|
||||||
|
from publication p
|
||||||
|
left outer join
|
||||||
|
(select ri.id, 1 as doi_from_crossref from result_instance ri
|
||||||
|
join datasource d on d.id = ri.collectedfrom
|
||||||
|
where pidtype='Digital Object Identifier' and d.name ='Crossref') tmp
|
||||||
|
on tmp.id=p.id;
|
||||||
|
|
||||||
|
create table indi_pub_gold_oa stored as parquet as
|
||||||
|
select distinct p.id, coalesce(gold_oa, 0) as gold_oa
|
||||||
|
from publication p
|
||||||
|
left outer join (
|
||||||
|
select p.id, 1 as gold_oa
|
||||||
|
from publication p
|
||||||
|
join result_instance ri on ri.id = p.id
|
||||||
|
join datasource on datasource.id = ri.hostedby
|
||||||
|
where datasource.id like '%doajarticles%') tmp
|
||||||
|
on p.id= tmp.id;
|
Loading…
Reference in New Issue