forked from D-Net/dnet-hadoop
Merge pull request 'added the missing indicators files' (#120) from antonis.lempesis/dnet-hadoop:stable_ids into stable_ids
Reviewed-on: D-Net/dnet-hadoop#120
This commit is contained in:
commit
6d3f960238
|
@ -0,0 +1,19 @@
|
|||
export PYTHON_EGG_CACHE=/home/$(whoami)/.python-eggs
|
||||
export link_folder=/tmp/impala-shell-python-egg-cache-$(whoami)
|
||||
if ! [ -L $link_folder ]
|
||||
then
|
||||
rm -Rf "$link_folder"
|
||||
ln -sfn ${PYTHON_EGG_CACHE}${link_folder} ${link_folder}
|
||||
fi
|
||||
|
||||
export TARGET=$1
|
||||
export SCRIPT_PATH=$2
|
||||
|
||||
echo "Getting file from " $SCRIPT_PATH
|
||||
hdfs dfs -copyToLocal $SCRIPT_PATH
|
||||
|
||||
echo "Creating indicators"
|
||||
impala-shell -d ${TARGET} -q "invalidate metadata"
|
||||
impala-shell -d ${TARGET} -q "show tables" --delimited | sed "s/^\(.*\)/compute stats ${TARGET}.\1;/" | impala-shell -c -f -
|
||||
cat step16_7-createIndicatorsTables.sql | impala-shell -d $TARGET -f -
|
||||
echo "Indicators created"
|
|
@ -0,0 +1,42 @@
|
|||
create table indi_pub_green_oa stored as parquet as
|
||||
select distinct p.id, coalesce(green_oa, 0) as green_oa
|
||||
from publication p
|
||||
left outer join (
|
||||
select p.id, 1 as green_oa
|
||||
from publication p
|
||||
join result_instance ri on ri.id = p.id
|
||||
join datasource on datasource.id = ri.hostedby
|
||||
where datasource.type like '%Repository%'
|
||||
and (ri.accessright = 'Open Access'
|
||||
or ri.accessright = 'Embargo')) tmp
|
||||
on p.id= tmp.id;
|
||||
|
||||
create table indi_pub_grey_lit stored as parquet as
|
||||
select distinct p.id, coalesce(grey_lit, 0) as grey_lit
|
||||
from publication p
|
||||
left outer join (
|
||||
select p.id, 1 as grey_lit
|
||||
from publication p
|
||||
join result_classifications rt on rt.id = p.id
|
||||
where rt.type not in ('Article','Part of book or chapter of book','Book','Doctoral thesis','Master thesis','Data Paper', 'Thesis', 'Bachelor thesis', 'Conference object') and
|
||||
not exists (select 1 from result_classifications rc where type ='Other literature type' and rc.id=p.id)) tmp on p.id=tmp.id;
|
||||
|
||||
create table indi_pub_doi_from_crossref stored as parquet as
|
||||
select distinct p.id, coalesce(doi_from_crossref, 0) as doi_from_crossref
|
||||
from publication p
|
||||
left outer join
|
||||
(select ri.id, 1 as doi_from_crossref from result_instance ri
|
||||
join datasource d on d.id = ri.collectedfrom
|
||||
where pidtype='Digital Object Identifier' and d.name ='Crossref') tmp
|
||||
on tmp.id=p.id;
|
||||
|
||||
create table indi_pub_gold_oa stored as parquet as
|
||||
select distinct p.id, coalesce(gold_oa, 0) as gold_oa
|
||||
from publication p
|
||||
left outer join (
|
||||
select p.id, 1 as gold_oa
|
||||
from publication p
|
||||
join result_instance ri on ri.id = p.id
|
||||
join datasource on datasource.id = ri.hostedby
|
||||
where datasource.id like '%doajarticles%') tmp
|
||||
on p.id= tmp.id;
|
Loading…
Reference in New Issue