Merge pull request 'stats DB for monitor' (#99) from antonis.lempesis/dnet-hadoop:master into master
Looks good to me, just a note on the parsing of the citations: since the last version, IIS produces citations as proper relationships among results. This is what we got already in the BETA graph ``` count r.reltype r.subreltype r.relclass 62.129.254 resultResult citation cites 62.043.309 resultResult citation isCitedBy ``` Thus, I suggest to move away from the current property based implementation for the extraction of the citation links and start relying on the relationships instead.pull/115/head
commit
e8789b0cdb
@ -0,0 +1,18 @@
|
||||
export PYTHON_EGG_CACHE=/home/$(whoami)/.python-eggs
|
||||
export link_folder=/tmp/impala-shell-python-egg-cache-$(whoami)
|
||||
if ! [ -L $link_folder ]
|
||||
then
|
||||
rm -Rf "$link_folder"
|
||||
ln -sfn ${PYTHON_EGG_CACHE}${link_folder} ${link_folder}
|
||||
fi
|
||||
|
||||
export SOURCE=$1
|
||||
export SHADOW=$2
|
||||
|
||||
echo "Updating shadow database"
|
||||
impala-shell -d ${SOURCE} -q "invalidate metadata"
|
||||
impala-shell -d ${SOURCE} -q "show tables" --delimited | sed "s/^\(.*\)/compute stats ${SOURCE}.\1;/" | impala-shell -c -f -
|
||||
impala-shell -q "create database if not exists ${SHADOW}"
|
||||
impala-shell -d ${SHADOW} -q "show tables" --delimited | sed "s/^/drop view if exists ${SHADOW}./" | sed "s/$/;/" | impala-shell -c -f -
|
||||
impala-shell -d ${SOURCE} -q "show tables" --delimited | sed "s/\(.*\)/create view ${SHADOW}.\1 as select * from ${SOURCE}.\1;/" | impala-shell -c -f -
|
||||
echo "Shadow db ready!"
|
@ -1,18 +0,0 @@
|
||||
export PYTHON_EGG_CACHE=/home/$(whoami)/.python-eggs
|
||||
export link_folder=/tmp/impala-shell-python-egg-cache-$(whoami)
|
||||
if ! [ -L $link_folder ]
|
||||
then
|
||||
rm -Rf "$link_folder"
|
||||
ln -sfn ${PYTHON_EGG_CACHE}${link_folder} ${link_folder}
|
||||
fi
|
||||
|
||||
echo "Getting file from " $3
|
||||
hdfs dfs -copyToLocal $3
|
||||
|
||||
echo "Running impala shell make the new database visible"
|
||||
impala-shell -q "INVALIDATE METADATA;"
|
||||
|
||||
echo "Running impala shell to compute new table stats"
|
||||
impala-shell -d $1 -f $2
|
||||
echo "Impala shell finished"
|
||||
rm $2
|
@ -0,0 +1,25 @@
|
||||
export PYTHON_EGG_CACHE=/home/$(whoami)/.python-eggs
|
||||
export link_folder=/tmp/impala-shell-python-egg-cache-$(whoami)
|
||||
if ! [ -L $link_folder ]
|
||||
then
|
||||
rm -Rf "$link_folder"
|
||||
ln -sfn ${PYTHON_EGG_CACHE}${link_folder} ${link_folder}
|
||||
fi
|
||||
|
||||
export SOURCE=$1
|
||||
export TARGET=$2
|
||||
export SHADOW=$3
|
||||
export SCRIPT_PATH=$4
|
||||
|
||||
echo "Getting file from " $4
|
||||
hdfs dfs -copyToLocal $4
|
||||
|
||||
echo "Creating monitor database"
|
||||
cat step20-createMonitorDB.sql | sed s/SOURCE/$1/g | sed s/TARGET/$2/g1 | impala-shell -f -
|
||||
echo "Impala shell finished"
|
||||
|
||||
echo "Updating shadow monitor database"
|
||||
impala-shell -q "create database if not exists ${SHADOW}"
|
||||
impala-shell -d ${SHADOW} -q "show tables" --delimited | sed "s/^/drop view if exists ${SHADOW}./" | sed "s/$/;/" | impala-shell -f -
|
||||
impala-shell -d ${TARGET} -q "show tables" --delimited | sed "s/\(.*\)/create view ${SHADOW}.\1 as select * from ${TARGET}.\1;/" | impala-shell -f -
|
||||
echo "Shadow db ready!"
|
@ -1,207 +0,0 @@
|
||||
------------------------------------------------------
|
||||
------------------------------------------------------
|
||||
-- Shadow schema table exchange
|
||||
------------------------------------------------------
|
||||
------------------------------------------------------
|
||||
|
||||
-- Dropping old views
|
||||
DROP VIEW IF EXISTS ${stats_db_shadow_name}.category;
|
||||
DROP VIEW IF EXISTS ${stats_db_shadow_name}.concept;
|
||||
DROP VIEW IF EXISTS ${stats_db_shadow_name}.context;
|
||||
DROP VIEW IF EXISTS ${stats_db_shadow_name}.country;
|
||||
DROP VIEW IF EXISTS ${stats_db_shadow_name}.countrygdp;
|
||||
DROP VIEW IF EXISTS ${stats_db_shadow_name}.creation_date;
|
||||
DROP VIEW IF EXISTS ${stats_db_shadow_name}.dataset;
|
||||
DROP VIEW IF EXISTS ${stats_db_shadow_name}.dataset_citations;
|
||||
DROP VIEW IF EXISTS ${stats_db_shadow_name}.dataset_classifications;
|
||||
DROP VIEW IF EXISTS ${stats_db_shadow_name}.dataset_concepts;
|
||||
DROP VIEW IF EXISTS ${stats_db_shadow_name}.dataset_datasources;
|
||||
DROP VIEW IF EXISTS ${stats_db_shadow_name}.dataset_languages;
|
||||
DROP VIEW IF EXISTS ${stats_db_shadow_name}.dataset_licenses;
|
||||
DROP VIEW IF EXISTS ${stats_db_shadow_name}.dataset_oids;
|
||||
DROP VIEW IF EXISTS ${stats_db_shadow_name}.dataset_pids;
|
||||
DROP VIEW IF EXISTS ${stats_db_shadow_name}.dataset_refereed;
|
||||
DROP VIEW IF EXISTS ${stats_db_shadow_name}.dataset_sources;
|
||||
DROP VIEW IF EXISTS ${stats_db_shadow_name}.dataset_topics;
|
||||
DROP VIEW IF EXISTS ${stats_db_shadow_name}.datasource;
|
||||
DROP VIEW IF EXISTS ${stats_db_shadow_name}.datasource_languages;
|
||||
DROP VIEW IF EXISTS ${stats_db_shadow_name}.datasource_oids;
|
||||
DROP VIEW IF EXISTS ${stats_db_shadow_name}.datasource_organizations;
|
||||
DROP VIEW IF EXISTS ${stats_db_shadow_name}.datasource_results;
|
||||
DROP VIEW IF EXISTS ${stats_db_shadow_name}.datasource_sources;
|
||||
DROP VIEW IF EXISTS ${stats_db_shadow_name}.funder;
|
||||
DROP VIEW IF EXISTS ${stats_db_shadow_name}.fundref;
|
||||
DROP VIEW IF EXISTS ${stats_db_shadow_name}.numbers_country;
|
||||
DROP VIEW IF EXISTS ${stats_db_shadow_name}.organization;
|
||||
DROP VIEW IF EXISTS ${stats_db_shadow_name}.organization_datasources;
|
||||
DROP VIEW IF EXISTS ${stats_db_shadow_name}.organization_pids;
|
||||
DROP VIEW IF EXISTS ${stats_db_shadow_name}.organization_projects;
|
||||
DROP VIEW IF EXISTS ${stats_db_shadow_name}.organization_sources;
|
||||
DROP VIEW IF EXISTS ${stats_db_shadow_name}.otherresearchproduct;
|
||||
DROP VIEW IF EXISTS ${stats_db_shadow_name}.otherresearchproduct_citations;
|
||||
DROP VIEW IF EXISTS ${stats_db_shadow_name}.otherresearchproduct_classifications;
|
||||
DROP VIEW IF EXISTS ${stats_db_shadow_name}.otherresearchproduct_concepts;
|
||||
DROP VIEW IF EXISTS ${stats_db_shadow_name}.otherresearchproduct_datasources;
|
||||
DROP VIEW IF EXISTS ${stats_db_shadow_name}.otherresearchproduct_languages;
|
||||
DROP VIEW IF EXISTS ${stats_db_shadow_name}.otherresearchproduct_licenses;
|
||||
DROP VIEW IF EXISTS ${stats_db_shadow_name}.otherresearchproduct_oids;
|
||||
DROP VIEW IF EXISTS ${stats_db_shadow_name}.otherresearchproduct_pids;
|
||||
DROP VIEW IF EXISTS ${stats_db_shadow_name}.otherresearchproduct_refereed;
|
||||
DROP VIEW IF EXISTS ${stats_db_shadow_name}.otherresearchproduct_sources;
|
||||
DROP VIEW IF EXISTS ${stats_db_shadow_name}.otherresearchproduct_topics;
|
||||
DROP VIEW IF EXISTS ${stats_db_shadow_name}.project;
|
||||
DROP VIEW IF EXISTS ${stats_db_shadow_name}.project_oids;
|
||||
DROP VIEW IF EXISTS ${stats_db_shadow_name}.project_organizations;
|
||||
DROP VIEW IF EXISTS ${stats_db_shadow_name}.project_results;
|
||||
DROP VIEW IF EXISTS ${stats_db_shadow_name}.project_resultcount;
|
||||
DROP VIEW IF EXISTS ${stats_db_shadow_name}.project_results_publication;
|
||||
DROP VIEW IF EXISTS ${stats_db_shadow_name}.publication;
|
||||
DROP VIEW IF EXISTS ${stats_db_shadow_name}.publication_citations;
|
||||
DROP VIEW IF EXISTS ${stats_db_shadow_name}.publication_classifications;
|
||||
DROP VIEW IF EXISTS ${stats_db_shadow_name}.publication_concepts;
|
||||
DROP VIEW IF EXISTS ${stats_db_shadow_name}.publication_datasources;
|
||||
DROP VIEW IF EXISTS ${stats_db_shadow_name}.publication_languages;
|
||||
DROP VIEW IF EXISTS ${stats_db_shadow_name}.publication_licenses;
|
||||
DROP VIEW IF EXISTS ${stats_db_shadow_name}.publication_oids;
|
||||
DROP VIEW IF EXISTS ${stats_db_shadow_name}.publication_pids;
|
||||
DROP VIEW IF EXISTS ${stats_db_shadow_name}.publication_refereed;
|
||||
DROP VIEW IF EXISTS ${stats_db_shadow_name}.publication_sources;
|
||||
DROP VIEW IF EXISTS ${stats_db_shadow_name}.publication_topics;
|
||||
DROP VIEW IF EXISTS ${stats_db_shadow_name}.result;
|
||||
DROP VIEW IF EXISTS ${stats_db_shadow_name}.result_affiliated_country;
|
||||
DROP VIEW IF EXISTS ${stats_db_shadow_name}.result_citations;
|
||||
DROP VIEW IF EXISTS ${stats_db_shadow_name}.result_classifications;
|
||||
DROP VIEW IF EXISTS ${stats_db_shadow_name}.result_concepts;
|
||||
DROP VIEW IF EXISTS ${stats_db_shadow_name}.result_datasources;
|
||||
DROP VIEW IF EXISTS ${stats_db_shadow_name}.result_deposited_country;
|
||||
DROP VIEW IF EXISTS ${stats_db_shadow_name}.result_fundercount;
|
||||
DROP VIEW IF EXISTS ${stats_db_shadow_name}.result_gold;
|
||||
DROP VIEW IF EXISTS ${stats_db_shadow_name}.result_greenoa;
|
||||
DROP VIEW IF EXISTS ${stats_db_shadow_name}.result_languages;
|
||||
DROP VIEW IF EXISTS ${stats_db_shadow_name}.result_licenses;
|
||||
DROP VIEW IF EXISTS ${stats_db_shadow_name}.result_oids;
|
||||
DROP VIEW IF EXISTS ${stats_db_shadow_name}.result_organization;
|
||||
DROP VIEW IF EXISTS ${stats_db_shadow_name}.result_peerreviewed;
|
||||
DROP VIEW IF EXISTS ${stats_db_shadow_name}.result_pids;
|
||||
DROP VIEW IF EXISTS ${stats_db_shadow_name}.result_projectcount;
|
||||
DROP VIEW IF EXISTS ${stats_db_shadow_name}.result_projects;
|
||||
DROP VIEW IF EXISTS ${stats_db_shadow_name}.result_refereed;
|
||||
DROP VIEW IF EXISTS ${stats_db_shadow_name}.result_sources;
|
||||
DROP VIEW IF EXISTS ${stats_db_shadow_name}.result_topics;
|
||||
DROP VIEW IF EXISTS ${stats_db_shadow_name}.rndexpediture;
|
||||
DROP VIEW IF EXISTS ${stats_db_shadow_name}.roarmap;
|
||||
DROP VIEW IF EXISTS ${stats_db_shadow_name}.software;
|
||||
DROP VIEW IF EXISTS ${stats_db_shadow_name}.software_citations;
|
||||
DROP VIEW IF EXISTS ${stats_db_shadow_name}.software_classifications;
|
||||
DROP VIEW IF EXISTS ${stats_db_shadow_name}.software_concepts;
|
||||
DROP VIEW IF EXISTS ${stats_db_shadow_name}.software_datasources;
|
||||
DROP VIEW IF EXISTS ${stats_db_shadow_name}.software_languages;
|
||||
DROP VIEW IF EXISTS ${stats_db_shadow_name}.software_licenses;
|
||||
DROP VIEW IF EXISTS ${stats_db_shadow_name}.software_oids;
|
||||
DROP VIEW IF EXISTS ${stats_db_shadow_name}.software_pids;
|
||||
DROP VIEW IF EXISTS ${stats_db_shadow_name}.software_refereed;
|
||||
DROP VIEW IF EXISTS ${stats_db_shadow_name}.software_sources;
|
||||
DROP VIEW IF EXISTS ${stats_db_shadow_name}.software_topics;
|
||||
|
||||
|
||||
-- Creating the shadow database, in case it doesn't exist
|
||||
CREATE database IF NOT EXISTS ${stats_db_shadow_name};
|
||||
|
||||
-- Creating new views
|
||||
CREATE VIEW IF NOT EXISTS ${stats_db_shadow_name}.category AS SELECT * FROM ${stats_db_name}.category;
|
||||
CREATE VIEW IF NOT EXISTS ${stats_db_shadow_name}.concept AS SELECT * FROM ${stats_db_name}.concept;
|
||||
CREATE VIEW IF NOT EXISTS ${stats_db_shadow_name}.context AS SELECT * FROM ${stats_db_name}.context;
|
||||
CREATE VIEW IF NOT EXISTS ${stats_db_shadow_name}.country AS SELECT * FROM ${stats_db_name}.country;
|
||||
CREATE VIEW IF NOT EXISTS ${stats_db_shadow_name}.countrygdp AS SELECT * FROM ${stats_db_name}.countrygdp;
|
||||
CREATE VIEW IF NOT EXISTS ${stats_db_shadow_name}.creation_date AS SELECT * FROM ${stats_db_name}.creation_date;
|
||||
CREATE VIEW IF NOT EXISTS ${stats_db_shadow_name}.dataset AS SELECT * FROM ${stats_db_name}.dataset;
|
||||
CREATE VIEW IF NOT EXISTS ${stats_db_shadow_name}.dataset_citations AS SELECT * FROM ${stats_db_name}.dataset_citations;
|
||||
CREATE VIEW IF NOT EXISTS ${stats_db_shadow_name}.dataset_classifications AS SELECT * FROM ${stats_db_name}.dataset_classifications;
|
||||
CREATE VIEW IF NOT EXISTS ${stats_db_shadow_name}.dataset_concepts AS SELECT * FROM ${stats_db_name}.dataset_concepts;
|
||||
CREATE VIEW IF NOT EXISTS ${stats_db_shadow_name}.dataset_datasources AS SELECT * FROM ${stats_db_name}.dataset_datasources;
|
||||
CREATE VIEW IF NOT EXISTS ${stats_db_shadow_name}.dataset_languages AS SELECT * FROM ${stats_db_name}.dataset_languages;
|
||||
CREATE VIEW IF NOT EXISTS ${stats_db_shadow_name}.dataset_licenses AS SELECT * FROM ${stats_db_name}.dataset_licenses;
|
||||
CREATE VIEW IF NOT EXISTS ${stats_db_shadow_name}.dataset_oids AS SELECT * FROM ${stats_db_name}.dataset_oids;
|
||||
CREATE VIEW IF NOT EXISTS ${stats_db_shadow_name}.dataset_pids AS SELECT * FROM ${stats_db_name}.dataset_pids;
|
||||
CREATE VIEW IF NOT EXISTS ${stats_db_shadow_name}.dataset_refereed AS SELECT * FROM ${stats_db_name}.dataset_refereed;
|
||||
CREATE VIEW IF NOT EXISTS ${stats_db_shadow_name}.dataset_sources AS SELECT * FROM ${stats_db_name}.dataset_sources;
|
||||
CREATE VIEW IF NOT EXISTS ${stats_db_shadow_name}.dataset_topics AS SELECT * FROM ${stats_db_name}.dataset_topics;
|
||||
CREATE VIEW IF NOT EXISTS ${stats_db_shadow_name}.datasource AS SELECT * FROM ${stats_db_name}.datasource;
|
||||
CREATE VIEW IF NOT EXISTS ${stats_db_shadow_name}.datasource_languages AS SELECT * FROM ${stats_db_name}.datasource_languages;
|
||||
CREATE VIEW IF NOT EXISTS ${stats_db_shadow_name}.datasource_oids AS SELECT * FROM ${stats_db_name}.datasource_oids;
|
||||
CREATE VIEW IF NOT EXISTS ${stats_db_shadow_name}.datasource_organizations AS SELECT * FROM ${stats_db_name}.datasource_organizations;
|
||||
CREATE VIEW IF NOT EXISTS ${stats_db_shadow_name}.datasource_results AS SELECT * FROM ${stats_db_name}.datasource_results;
|
||||
CREATE VIEW IF NOT EXISTS ${stats_db_shadow_name}.datasource_sources AS SELECT * FROM ${stats_db_name}.datasource_sources;
|
||||
CREATE VIEW IF NOT EXISTS ${stats_db_shadow_name}.funder AS SELECT * FROM ${stats_db_name}.funder;
|
||||
CREATE VIEW IF NOT EXISTS ${stats_db_shadow_name}.fundref AS SELECT * FROM ${stats_db_name}.fundref;
|
||||
CREATE VIEW IF NOT EXISTS ${stats_db_shadow_name}.numbers_country AS SELECT * FROM ${stats_db_name}.numbers_country;
|
||||
CREATE VIEW IF NOT EXISTS ${stats_db_shadow_name}.organization AS SELECT * FROM ${stats_db_name}.organization;
|
||||
CREATE VIEW IF NOT EXISTS ${stats_db_shadow_name}.organization_datasources AS SELECT * FROM ${stats_db_name}.organization_datasources;
|
||||
CREATE VIEW IF NOT EXISTS ${stats_db_shadow_name}.organization_pids AS SELECT * FROM ${stats_db_name}.organization_pids;
|
||||
CREATE VIEW IF NOT EXISTS ${stats_db_shadow_name}.organization_projects AS SELECT * FROM ${stats_db_name}.organization_projects;
|
||||
CREATE VIEW IF NOT EXISTS ${stats_db_shadow_name}.organization_sources AS SELECT * FROM ${stats_db_name}.organization_sources;
|
||||
CREATE VIEW IF NOT EXISTS ${stats_db_shadow_name}.otherresearchproduct AS SELECT * FROM ${stats_db_name}.otherresearchproduct;
|
||||
CREATE VIEW IF NOT EXISTS ${stats_db_shadow_name}.otherresearchproduct_citations AS SELECT * FROM ${stats_db_name}.otherresearchproduct_citations;
|
||||
CREATE VIEW IF NOT EXISTS ${stats_db_shadow_name}.otherresearchproduct_classifications AS SELECT * FROM ${stats_db_name}.otherresearchproduct_classifications;
|
||||
CREATE VIEW IF NOT EXISTS ${stats_db_shadow_name}.otherresearchproduct_concepts AS SELECT * FROM ${stats_db_name}.otherresearchproduct_concepts;
|
||||
CREATE VIEW IF NOT EXISTS ${stats_db_shadow_name}.otherresearchproduct_datasources AS SELECT * FROM ${stats_db_name}.otherresearchproduct_datasources;
|
||||
CREATE VIEW IF NOT EXISTS ${stats_db_shadow_name}.otherresearchproduct_languages AS SELECT * FROM ${stats_db_name}.otherresearchproduct_languages;
|
||||
CREATE VIEW IF NOT EXISTS ${stats_db_shadow_name}.otherresearchproduct_licenses AS SELECT * FROM ${stats_db_name}.otherresearchproduct_licenses;
|
||||
CREATE VIEW IF NOT EXISTS ${stats_db_shadow_name}.otherresearchproduct_oids AS SELECT * FROM ${stats_db_name}.otherresearchproduct_oids;
|
||||
CREATE VIEW IF NOT EXISTS ${stats_db_shadow_name}.otherresearchproduct_pids AS SELECT * FROM ${stats_db_name}.otherresearchproduct_pids;
|
||||
CREATE VIEW IF NOT EXISTS ${stats_db_shadow_name}.otherresearchproduct_refereed AS SELECT * FROM ${stats_db_name}.otherresearchproduct_refereed;
|
||||
CREATE VIEW IF NOT EXISTS ${stats_db_shadow_name}.otherresearchproduct_sources AS SELECT * FROM ${stats_db_name}.otherresearchproduct_sources;
|
||||
CREATE VIEW IF NOT EXISTS ${stats_db_shadow_name}.otherresearchproduct_topics AS SELECT * FROM ${stats_db_name}.otherresearchproduct_topics;
|
||||
CREATE VIEW IF NOT EXISTS ${stats_db_shadow_name}.project AS SELECT * FROM ${stats_db_name}.project;
|
||||
CREATE VIEW IF NOT EXISTS ${stats_db_shadow_name}.project_oids AS SELECT * FROM ${stats_db_name}.project_oids;
|
||||
CREATE VIEW IF NOT EXISTS ${stats_db_shadow_name}.project_organizations AS SELECT * FROM ${stats_db_name}.project_organizations;
|
||||
CREATE VIEW IF NOT EXISTS ${stats_db_shadow_name}.project_results AS SELECT * FROM ${stats_db_name}.project_results;
|
||||
CREATE VIEW IF NOT EXISTS ${stats_db_shadow_name}.project_resultcount AS SELECT * FROM ${stats_db_name}.project_resultcount;
|
||||
CREATE VIEW IF NOT EXISTS ${stats_db_shadow_name}.project_results_publication AS SELECT * FROM ${stats_db_name}.project_results_publication;
|
||||
CREATE VIEW IF NOT EXISTS ${stats_db_shadow_name}.publication AS SELECT * FROM ${stats_db_name}.publication;
|
||||
CREATE VIEW IF NOT EXISTS ${stats_db_shadow_name}.publication_citations AS SELECT * FROM ${stats_db_name}.publication_citations;
|
||||
CREATE VIEW IF NOT EXISTS ${stats_db_shadow_name}.publication_classifications AS SELECT * FROM ${stats_db_name}.publication_classifications;
|
||||
CREATE VIEW IF NOT EXISTS ${stats_db_shadow_name}.publication_concepts AS SELECT * FROM ${stats_db_name}.publication_concepts;
|
||||
CREATE VIEW IF NOT EXISTS ${stats_db_shadow_name}.publication_datasources AS SELECT * FROM ${stats_db_name}.publication_datasources;
|
||||
CREATE VIEW IF NOT EXISTS ${stats_db_shadow_name}.publication_languages AS SELECT * FROM ${stats_db_name}.publication_languages;
|
||||
CREATE VIEW IF NOT EXISTS ${stats_db_shadow_name}.publication_licenses AS SELECT * FROM ${stats_db_name}.publication_licenses;
|
||||
CREATE VIEW IF NOT EXISTS ${stats_db_shadow_name}.publication_oids AS SELECT * FROM ${stats_db_name}.publication_oids;
|
||||
CREATE VIEW IF NOT EXISTS ${stats_db_shadow_name}.publication_pids AS SELECT * FROM ${stats_db_name}.publication_pids;
|
||||
CREATE VIEW IF NOT EXISTS ${stats_db_shadow_name}.publication_refereed AS SELECT * FROM ${stats_db_name}.publication_refereed;
|
||||
CREATE VIEW IF NOT EXISTS ${stats_db_shadow_name}.publication_sources AS SELECT * FROM ${stats_db_name}.publication_sources;
|
||||
CREATE VIEW IF NOT EXISTS ${stats_db_shadow_name}.publication_topics AS SELECT * FROM ${stats_db_name}.publication_topics;
|
||||
CREATE VIEW IF NOT EXISTS ${stats_db_shadow_name}.result AS SELECT * FROM ${stats_db_name}.result;
|
||||
CREATE VIEW IF NOT EXISTS ${stats_db_shadow_name}.result_affiliated_country AS SELECT * FROM ${stats_db_name}.result_affiliated_country;
|
||||
CREATE VIEW IF NOT EXISTS ${stats_db_shadow_name}.result_citations AS SELECT * FROM ${stats_db_name}.result_citations;
|
||||
CREATE VIEW IF NOT EXISTS ${stats_db_shadow_name}.result_classifications AS SELECT * FROM ${stats_db_name}.result_classifications;
|
||||
CREATE VIEW IF NOT EXISTS ${stats_db_shadow_name}.result_concepts AS SELECT * FROM ${stats_db_name}.result_concepts;
|
||||
CREATE VIEW IF NOT EXISTS ${stats_db_shadow_name}.result_datasources AS SELECT * FROM ${stats_db_name}.result_datasources;
|
||||
CREATE VIEW IF NOT EXISTS ${stats_db_shadow_name}.result_deposited_country AS SELECT * FROM ${stats_db_name}.result_deposited_country;
|
||||
CREATE VIEW IF NOT EXISTS ${stats_db_shadow_name}.result_fundercount AS SELECT * FROM ${stats_db_name}.result_fundercount;
|
||||
CREATE VIEW IF NOT EXISTS ${stats_db_shadow_name}.result_gold AS SELECT * FROM ${stats_db_name}.result_gold;
|
||||
CREATE VIEW IF NOT EXISTS ${stats_db_shadow_name}.result_greenoa AS SELECT * FROM ${stats_db_name}.result_greenoa;
|
||||
CREATE VIEW IF NOT EXISTS ${stats_db_shadow_name}.result_languages AS SELECT * FROM ${stats_db_name}.result_languages;
|
||||
CREATE VIEW IF NOT EXISTS ${stats_db_shadow_name}.result_licenses AS SELECT * FROM ${stats_db_name}.result_licenses;
|
||||
CREATE VIEW IF NOT EXISTS ${stats_db_shadow_name}.result_oids AS SELECT * FROM ${stats_db_name}.result_oids;
|
||||
CREATE VIEW IF NOT EXISTS ${stats_db_shadow_name}.result_organization AS SELECT * FROM ${stats_db_name}.result_organization;
|
||||
CREATE VIEW IF NOT EXISTS ${stats_db_shadow_name}.result_peerreviewed AS SELECT * FROM ${stats_db_name}.result_peerreviewed;
|
||||
CREATE VIEW IF NOT EXISTS ${stats_db_shadow_name}.result_pids AS SELECT * FROM ${stats_db_name}.result_pids;
|
||||
CREATE VIEW IF NOT EXISTS ${stats_db_shadow_name}.result_projectcount AS SELECT * FROM ${stats_db_name}.result_projectcount;
|
||||
CREATE VIEW IF NOT EXISTS ${stats_db_shadow_name}.result_projects AS SELECT * FROM ${stats_db_name}.result_projects;
|
||||
CREATE VIEW IF NOT EXISTS ${stats_db_shadow_name}.result_refereed AS SELECT * FROM ${stats_db_name}.result_refereed;
|
||||
CREATE VIEW IF NOT EXISTS ${stats_db_shadow_name}.result_sources AS SELECT * FROM ${stats_db_name}.result_sources;
|
||||
CREATE VIEW IF NOT EXISTS ${stats_db_shadow_name}.result_topics AS SELECT * FROM ${stats_db_name}.result_topics;
|
||||
CREATE VIEW IF NOT EXISTS ${stats_db_shadow_name}.rndexpediture AS SELECT * FROM ${stats_db_name}.rndexpediture;
|
||||
CREATE VIEW IF NOT EXISTS ${stats_db_shadow_name}.roarmap AS SELECT * FROM ${stats_db_name}.roarmap;
|
||||
CREATE VIEW IF NOT EXISTS ${stats_db_shadow_name}.software AS SELECT * FROM ${stats_db_name}.software;
|
||||
CREATE VIEW IF NOT EXISTS ${stats_db_shadow_name}.software_citations AS SELECT * FROM ${stats_db_name}.software_citations;
|
||||
CREATE VIEW IF NOT EXISTS ${stats_db_shadow_name}.software_classifications AS SELECT * FROM ${stats_db_name}.software_classifications;
|
||||
CREATE VIEW IF NOT EXISTS ${stats_db_shadow_name}.software_concepts AS SELECT * FROM ${stats_db_name}.software_concepts;
|
||||
CREATE VIEW IF NOT EXISTS ${stats_db_shadow_name}.software_datasources AS SELECT * FROM ${stats_db_name}.software_datasources;
|
||||
CREATE VIEW IF NOT EXISTS ${stats_db_shadow_name}.software_languages AS SELECT * FROM ${stats_db_name}.software_languages;
|
||||
CREATE VIEW IF NOT EXISTS ${stats_db_shadow_name}.software_licenses AS SELECT * FROM ${stats_db_name}.software_licenses;
|
||||
CREATE VIEW IF NOT EXISTS ${stats_db_shadow_name}.software_oids AS SELECT * FROM ${stats_db_name}.software_oids;
|
||||
CREATE VIEW IF NOT EXISTS ${stats_db_shadow_name}.software_pids AS SELECT * FROM ${stats_db_name}.software_pids;
|
||||
CREATE VIEW IF NOT EXISTS ${stats_db_shadow_name}.software_refereed AS SELECT * FROM ${stats_db_name}.software_refereed;
|
||||
CREATE VIEW IF NOT EXISTS ${stats_db_shadow_name}.software_sources AS SELECT * FROM ${stats_db_name}.software_sources;
|
||||
CREATE VIEW IF NOT EXISTS ${stats_db_shadow_name}.software_topics AS SELECT * FROM ${stats_db_name}.software_topics;
|
@ -1,8 +0,0 @@
|
||||
------------------------------------------------------
|
||||
------------------------------------------------------
|
||||
-- Impala table statistics - Needed to make the tables
|
||||
-- visible for impala
|
||||
------------------------------------------------------
|
||||
------------------------------------------------------
|
||||
|
||||
INVALIDATE METADATA ${stats_db_name};
|
@ -1,8 +0,0 @@
|
||||
------------------------------------------------------
|
||||
------------------------------------------------------
|
||||
-- Impala table statistics - Needed to make the tables
|
||||
-- visible for impala
|
||||
------------------------------------------------------
|
||||
------------------------------------------------------
|
||||
|
||||
INVALIDATE METADATA ${stats_db_name};
|
@ -0,0 +1,121 @@
|
||||
drop database if exists TARGET cascade;
|
||||
create database if not exists TARGET;
|
||||
|
||||
create view if not exists TARGET.category as select * from SOURCE.category;
|
||||
create view if not exists TARGET.concept as select * from SOURCE.concept;
|
||||
create view if not exists TARGET.context as select * from SOURCE.context;
|
||||
create view if not exists TARGET.country as select * from SOURCE.country;
|
||||
create view if not exists TARGET.countrygdp as select * from SOURCE.countrygdp;
|
||||
create view if not exists TARGET.creation_date as select * from SOURCE.creation_date;
|
||||
create view if not exists TARGET.funder as select * from SOURCE.funder;
|
||||
create view if not exists TARGET.fundref as select * from SOURCE.fundref;
|
||||
create view if not exists TARGET.rndexpenditure as select * from SOURCE.rndexpediture;
|
||||
--create view if not exists TARGET.roarmap as select * from SOURCE.roarmap;
|
||||
|
||||
create table TARGET.result as
|
||||
select distinct * from (
|
||||
select * from SOURCE.result r where exists (select 1 from SOURCE.result_projects rp join SOURCE.project p on rp.project=p.id where rp.id=r.id)
|
||||
union all
|
||||
select * from SOURCE.result r where exists (select 1 from SOURCE.result_concepts rc where rc.id=r.id) ) foo;
|
||||
compute stats TARGET.result;
|
||||
|
||||
create table TARGET.result_affiliated_country as select * from SOURCE.result_affiliated_country rac where exists (select 1 from TARGET.result r where r.id=rac.id);
|
||||
compute stats TARGET.result_affiliated_country;
|
||||
|
||||
create table TARGET.result_citations as select * from SOURCE.result_citations orig where exists (select 1 from TARGET.result r where r.id=orig.id);
|
||||
compute stats TARGET.result_citations;
|
||||
|
||||
create table TARGET.result_classifications as select * from SOURCE.result_classifications orig where exists (select 1 from TARGET.result r where r.id=orig.id);
|
||||
compute stats TARGET.result_classifications;
|
||||
|
||||
create table TARGET.result_concepts as select * from SOURCE.result_concepts orig where exists (select 1 from TARGET.result r where r.id=orig.id);
|
||||
compute stats TARGET.result_concepts;
|
||||
|
||||
create table TARGET.result_datasources as select * from SOURCE.result_datasources orig where exists (select 1 from TARGET.result r where r.id=orig.id);
|
||||
compute stats TARGET.result_datasources;
|
||||
|
||||
create table TARGET.result_deposited_country as select * from SOURCE.result_deposited_country orig where exists (select 1 from TARGET.result r where r.id=orig.id);
|
||||
compute stats TARGET.result_deposited_country;
|
||||
|
||||
create table TARGET.result_fundercount as select * from SOURCE.result_fundercount orig where exists (select 1 from TARGET.result r where r.id=orig.id);
|
||||
compute stats TARGET.result_fundercount;
|
||||
|
||||
create table TARGET.result_gold as select * from SOURCE.result_gold orig where exists (select 1 from TARGET.result r where r.id=orig.id);
|
||||
compute stats TARGET.result_gold;
|
||||
|
||||
create table TARGET.result_greenoa as select * from SOURCE.result_greenoa orig where exists (select 1 from TARGET.result r where r.id=orig.id);
|
||||
compute stats TARGET.result_greenoa;
|
||||
|
||||
create table TARGET.result_languages as select * from SOURCE.result_languages orig where exists (select 1 from TARGET.result r where r.id=orig.id);
|
||||
compute stats TARGET.result_languages;
|
||||
|
||||
create table TARGET.result_licences as select * from SOURCE.result_licenses orig where exists (select 1 from TARGET.result r where r.id=orig.id);
|
||||
compute stats TARGET.result_licences;
|
||||
|
||||
create table TARGET.result_oids as select * from SOURCE.result_oids orig where exists (select 1 from TARGET.result r where r.id=orig.id);
|
||||
compute stats TARGET.result_oids;
|
||||
|
||||
create table TARGET.result_organization as select * from SOURCE.result_organization orig where exists (select 1 from TARGET.result r where r.id=orig.id);
|
||||
compute stats TARGET.result_organization;
|
||||
|
||||
create table TARGET.result_peerreviewed as select * from SOURCE.result_peerreviewed orig where exists (select 1 from TARGET.result r where r.id=orig.id);
|
||||
compute stats TARGET.result_peerreviewed;
|
||||
|
||||
create table TARGET.result_pids as select * from SOURCE.result_pids orig where exists (select 1 from TARGET.result r where r.id=orig.id);
|
||||
compute stats TARGET.result_pids;
|
||||
|
||||
create table TARGET.result_projectcount as select * from SOURCE.result_projectcount orig where exists (select 1 from TARGET.result r where r.id=orig.id);
|
||||
compute stats TARGET.result_projectcount;
|
||||
|
||||
create table TARGET.result_projects as select * from SOURCE.result_projects orig where exists (select 1 from TARGET.result r where r.id=orig.id);
|
||||
compute stats TARGET.result_projects;
|
||||
|
||||
create table TARGET.result_refereed as select * from SOURCE.result_refereed orig where exists (select 1 from TARGET.result r where r.id=orig.id);
|
||||
compute stats TARGET.result_refereed;
|
||||
|
||||
create table TARGET.result_sources as select * from SOURCE.result_sources orig where exists (select 1 from TARGET.result r where r.id=orig.id);
|
||||
compute stats TARGET.result_sources;
|
||||
|
||||
create table TARGET.result_topics as select * from SOURCE.result_topics orig where exists (select 1 from TARGET.result r where r.id=orig.id);
|
||||
compute stats TARGET.result_topics;
|
||||
|
||||
-- datasources
|
||||
create view if not exists TARGET.datasource as select * from SOURCE.datasource;
|
||||
create view if not exists TARGET.datasource_oids as select * from SOURCE.datasource_oids;
|
||||
create view if not exists TARGET.datasource_organizations as select * from SOURCE.datasource_organizations;
|
||||
create view if not exists TARGET.datasource_sources as select * from SOURCE.datasource_sources;
|
||||
|
||||
create table TARGET.datasource_results as select id as result, datasource as id from TARGET.result_datasources;
|
||||
compute stats TARGET.datasource_results;
|
||||
|
||||
-- organizations
|
||||
create view if not exists TARGET.organization as select * from SOURCE.organization;
|
||||
create view if not exists TARGET.organization_datasources as select * from SOURCE.organization_datasources;
|
||||
create view if not exists TARGET.organization_pids as select * from SOURCE.organization_pids;
|
||||
create view if not exists TARGET.organization_projects as select * from SOURCE.organization_projects;
|
||||
create view if not exists TARGET.organization_sources as select * from SOURCE.organization_sources;
|
||||
|
||||
-- projects
|
||||
create view if not exists TARGET.project as select * from SOURCE.project;
|
||||
create view if not exists TARGET.project_oids as select * from SOURCE.project_oids;
|
||||
create view if not exists TARGET.project_organizations as select * from SOURCE.project_organizations;
|
||||
create view if not exists TARGET.project_resultcount as select * from SOURCE.project_resultcount;
|
||||
|
||||
create table TARGET.project_results as select id as result, project as id from TARGET.result_projects;
|
||||
compute stats TARGET.project_results;
|
||||
|
||||
--denorm
|
||||
alter table TARGET.result rename to TARGET.res_tmp;
|
||||
|
||||
create table TARGET.result_denorm as
|
||||
select distinct r.*, rp.project, p.acronym as pacronym, p.title as ptitle, p.funder as pfunder, p.funding_lvl0 as pfunding_lvl0, rd.datasource, d.name as dname, d.type as dtype
|
||||
from TARGET.res_tmp r
|
||||
join TARGET.result_projects rp on rp.id=r.id
|
||||
join TARGET.result_datasources rd on rd.id=r.id
|
||||
join TARGET.project p on p.id=rp.project
|
||||
join TARGET.datasource d on d.id=rd.datasource;
|
||||
compute stats TARGET.result_denorm;
|
||||
|
||||
alter table TARGET.result_denorm rename to TARGET.result;
|
||||
drop table TARGET.res_tmp;
|
||||
--- done!
|
@ -1,30 +1,84 @@
|
||||
-- noinspection SqlNoDataSourceInspectionForFile
|
||||
|
||||
------------------------------------------------------
|
||||
------------------------------------------------------
|
||||
-- Project table/view and Project related tables/views
|
||||
------------------------------------------------------
|
||||
------------------------------------------------------
|
||||
-- Project_oids Table
|
||||
DROP TABLE IF EXISTS ${stats_db_name}.project_oids;
|
||||
CREATE TABLE ${stats_db_name}.project_oids AS SELECT substr(p.id, 4) AS id, oids.ids AS oid FROM ${openaire_db_name}.project p LATERAL VIEW explode(p.originalid) oids AS ids;
|
||||
|
||||
-- Project_organizations Table
|
||||
DROP TABLE IF EXISTS ${stats_db_name}.project_organizations;
|
||||
CREATE TABLE ${stats_db_name}.project_organizations AS SELECT substr(r.source, 4) AS id, substr(r.target, 4) AS organization from ${openaire_db_name}.relation r WHERE r.reltype='projectOrganization' and r.datainfo.deletedbyinference=false;
|
||||
CREATE TABLE ${stats_db_name}.project_oids AS
|
||||
SELECT substr(p.id, 4) AS id, oids.ids AS oid
|
||||
FROM ${openaire_db_name}.project p LATERAL VIEW explode(p.originalid) oids AS ids;
|
||||
CREATE TABLE ${stats_db_name}.project_organizations AS
|
||||
SELECT substr(r.source, 4) AS id, substr(r.target, 4) AS organization
|
||||
from ${openaire_db_name}.relation r
|
||||
WHERE r.reltype = 'projectOrganization'
|
||||
and r.datainfo.deletedbyinference = false;
|
||||
|
||||
-- Project_results Table
|
||||
DROP TABLE IF EXISTS ${stats_db_name}.project_results;
|
||||
CREATE TABLE ${stats_db_name}.project_results AS SELECT substr(r.target, 4) AS id, substr(r.source, 4) AS result FROM ${openaire_db_name}.relation r WHERE r.reltype='resultProject' and r.datainfo.deletedbyinference=false;
|
||||
CREATE TABLE ${stats_db_name}.project_results AS
|
||||
SELECT substr(r.target, 4) AS id, substr(r.source, 4) AS result
|
||||
FROM ${openaire_db_name}.relation r
|
||||
WHERE r.reltype = 'resultProject'
|
||||
and r.datainfo.deletedbyinference = false;
|
||||
|
||||
-- Project table
|
||||
----------------
|
||||
-- Creating and populating temporary Project table
|
||||
DROP TABLE IF EXISTS ${stats_db_name}.project_tmp;
|
||||
CREATE TABLE ${stats_db_name}.project_tmp (id STRING, acronym STRING, title STRING, funder STRING, funding_lvl0 STRING, funding_lvl1 STRING, funding_lvl2 STRING, ec39 STRING, type STRING, startdate STRING, enddate STRING, start_year INT, end_year INT, duration INT, haspubs STRING, numpubs INT, daysforlastpub INT, delayedpubs INT, callidentifier STRING, code STRING) CLUSTERED BY (id) INTO 100 buckets stored AS orc tblproperties('transactional'='true');
|
||||
CREATE TABLE ${stats_db_name}.project_tmp
|
||||
(
|
||||
id STRING,
|
||||
acronym STRING,
|
||||
title STRING,
|
||||
funder STRING,
|
||||
funding_lvl0 STRING,
|
||||
funding_lvl1 STRING,
|
||||
funding_lvl2 STRING,
|
||||
ec39 STRING,
|
||||
type STRING,
|
||||
startdate STRING,
|
||||
enddate STRING,
|
||||
start_year INT,
|
||||
end_year INT,
|
||||
duration INT,
|
||||
haspubs STRING,
|
||||
numpubs INT,
|
||||
daysforlastpub INT,
|
||||
delayedpubs INT,
|
||||
callidentifier STRING,
|
||||
code STRING
|
||||
) CLUSTERED BY (id) INTO 100 buckets stored AS orc tblproperties ('transactional' = 'true');
|
||||
|
||||
INSERT INTO ${stats_db_name}.project_tmp SELECT substr(p.id, 4) AS id, p.acronym.value AS acronym, p.title.value AS title, xpath_string(p.fundingtree[0].value, '//funder/name') AS funder, xpath_string(p.fundingtree[0].value, '//funding_level_0/name') AS funding_lvl0, xpath_string(p.fundingtree[0].value, '//funding_level_1/name') AS funding_lvl1, xpath_string(p.fundingtree[0].value, '//funding_level_2/name') AS funding_lvl2, p.ecsc39.value AS ec39, p.contracttype.classname AS type, p.startdate.value AS startdate, p.enddate.value AS enddate, year(p.startdate.value) AS start_year, year(p.enddate.value) AS end_year, CAST(MONTHS_BETWEEN(p.enddate.value, p.startdate.value) AS INT) AS duration, 'no' AS haspubs, 0 AS numpubs, 0 AS daysforlastpub, 0 AS delayedpubs, p.callidentifier.value AS callidentifier, p.code.value AS code FROM ${openaire_db_name}.project p WHERE p.datainfo.deletedbyinference=false;
|
||||
INSERT INTO ${stats_db_name}.project_tmp
|
||||
SELECT substr(p.id, 4) AS id,
|
||||
p.acronym.value AS acronym,
|
||||
p.title.value AS title,
|
||||
xpath_string(p.fundingtree[0].value, '//funder/name') AS funder,
|
||||
xpath_string(p.fundingtree[0].value, '//funding_level_0/name') AS funding_lvl0,
|
||||
xpath_string(p.fundingtree[0].value, '//funding_level_1/name') AS funding_lvl1,
|
||||
xpath_string(p.fundingtree[0].value, '//funding_level_2/name') AS funding_lvl2,
|
||||
p.ecsc39.value AS ec39,
|
||||
p.contracttype.classname AS type,
|
||||
p.startdate.value AS startdate,
|
||||
p.enddate.value AS enddate,
|
||||
year(p.startdate.value) AS start_year,
|
||||
year(p.enddate.value) AS end_year,
|
||||
CAST(MONTHS_BETWEEN(p.enddate.value, p.startdate.value) AS INT) AS duration,
|
||||
'no' AS haspubs,
|
||||
0 AS numpubs,
|
||||
0 AS daysforlastpub,
|
||||
0 AS delayedpubs,
|
||||
p.callidentifier.value AS callidentifier,
|
||||
p.code.value AS code
|
||||
FROM ${openaire_db_name}.project p
|
||||
WHERE p.datainfo.deletedbyinference = false;
|
||||
|
||||
create table ${stats_db_name}.funder as
|
||||
select distinct xpath_string(fund, '//funder/id') as id, xpath_string(fund, '//funder/name') as name, xpath_string(fund, '//funder/shortname') as shortname
|
||||
from ${openaire_db_name}.project p lateral view explode(p.fundingtree.value) fundingtree as fund
|
||||
select distinct xpath_string(fund, '//funder/id') as id,
|
||||
xpath_string(fund, '//funder/name') as name,
|
||||
xpath_string(fund, '//funder/shortname') as shortname
|
||||
from ${openaire_db_name}.project p lateral view explode(p.fundingtree.value) fundingtree as fund;
|
||||
|
||||
-- ANALYZE TABLE ${stats_db_name}.project_oids COMPUTE STATISTICS;
|
||||
-- ANALYZE TABLE ${stats_db_name}.project_oids COMPUTE STATISTICS FOR COLUMNS;
|
||||
-- ANALYZE TABLE ${stats_db_name}.project_organizations COMPUTE STATISTICS;
|
||||
-- ANALYZE TABLE ${stats_db_name}.project_organizations COMPUTE STATISTICS FOR COLUMNS;
|
||||
-- ANALYZE TABLE ${stats_db_name}.project_results COMPUTE STATISTICS;
|
||||
-- ANALYZE TABLE ${stats_db_name}.project_results COMPUTE STATISTICS FOR COLUMNS;
|
||||
-- ANALYZE TABLE ${stats_db_name}.project_tmp COMPUTE STATISTICS;
|
||||
-- ANALYZE TABLE ${stats_db_name}.project_tmp COMPUTE STATISTICS FOR COLUMNS;
|
||||
-- ANALYZE TABLE ${stats_db_name}.funder COMPUTE STATISTICS;
|
||||
-- ANALYZE TABLE ${stats_db_name}.funder COMPUTE STATISTICS FOR COLUMNS;
|
@ -1,4 +1,4 @@
|
||||
#!/usr/bin/env bash
|
||||
|
||||
curl --request GET $1/cache/updateCache
|
||||
|
||||
sleep 20h
|
Loading…
Reference in New Issue