forked from D-Net/dnet-hadoop
Merge pull request 'master' (#111) from antonis.lempesis/dnet-hadoop:master into master
Reviewed-on: D-Net/dnet-hadoop#111
This commit is contained in:
commit
a4cfabdbc6
|
@ -16,6 +16,14 @@
|
||||||
<name>monitor_db_production_name</name>
|
<name>monitor_db_production_name</name>
|
||||||
<description>the name of the monitor public database</description>
|
<description>the name of the monitor public database</description>
|
||||||
</property>
|
</property>
|
||||||
|
<property>
|
||||||
|
<name>observatory_db_name</name>
|
||||||
|
<description>the monitor database name</description>
|
||||||
|
</property>
|
||||||
|
<property>
|
||||||
|
<name>observatory_db_production_name</name>
|
||||||
|
<description>the name of the monitor public database</description>
|
||||||
|
</property>
|
||||||
<property>
|
<property>
|
||||||
<name>stats_tool_api_url</name>
|
<name>stats_tool_api_url</name>
|
||||||
<description>The url of the API of the stats tool. Is used to trigger the cache promote.</description>
|
<description>The url of the API of the stats tool. Is used to trigger the cache promote.</description>
|
||||||
|
@ -77,6 +85,19 @@
|
||||||
<argument>${monitor_db_production_name}</argument>
|
<argument>${monitor_db_production_name}</argument>
|
||||||
<file>updateProductionViews.sh</file>
|
<file>updateProductionViews.sh</file>
|
||||||
</shell>
|
</shell>
|
||||||
|
<ok to="updateObservatoryViews"/>
|
||||||
|
<error to="Kill"/>
|
||||||
|
</action>
|
||||||
|
|
||||||
|
<action name="updateObservatoryViews">
|
||||||
|
<shell xmlns="uri:oozie:shell-action:0.1">
|
||||||
|
<job-tracker>${jobTracker}</job-tracker>
|
||||||
|
<name-node>${nameNode}</name-node>
|
||||||
|
<exec>updateProductionViews.sh</exec>
|
||||||
|
<argument>${observatory_db_name}</argument>
|
||||||
|
<argument>${observatory_db_production_name}</argument>
|
||||||
|
<file>updateProductionViews.sh</file>
|
||||||
|
</shell>
|
||||||
<ok to="promoteCache"/>
|
<ok to="promoteCache"/>
|
||||||
<error to="Kill"/>
|
<error to="Kill"/>
|
||||||
</action>
|
</action>
|
||||||
|
|
|
@ -0,0 +1,28 @@
|
||||||
|
export PYTHON_EGG_CACHE=/home/$(whoami)/.python-eggs
|
||||||
|
export link_folder=/tmp/impala-shell-python-egg-cache-$(whoami)
|
||||||
|
if ! [ -L $link_folder ]
|
||||||
|
then
|
||||||
|
rm -Rf "$link_folder"
|
||||||
|
ln -sfn ${PYTHON_EGG_CACHE}${link_folder} ${link_folder}
|
||||||
|
fi
|
||||||
|
|
||||||
|
export SOURCE=$1
|
||||||
|
export TARGET=$2
|
||||||
|
export SHADOW=$3
|
||||||
|
export SCRIPT_PATH=$4
|
||||||
|
|
||||||
|
echo "Getting file from " $4
|
||||||
|
hdfs dfs -copyToLocal $4
|
||||||
|
|
||||||
|
echo "Creating observatory database"
|
||||||
|
impala-shell -q "drop database if exists ${TARGET} cascade"
|
||||||
|
impala-shell -q "create database if not exists ${TARGET}"
|
||||||
|
impala-shell -d ${SOURCE} -q "show tables" --delimited | sed "s/\(.*\)/create view ${TARGET}.\1 as select * from ${SOURCE}.\1;/" | impala-shell -f -
|
||||||
|
cat step21-createObservatoryDB.sql | sed s/SOURCE/$1/g | sed s/TARGET/$2/g1 | impala-shell -f -
|
||||||
|
echo "Impala shell finished"
|
||||||
|
|
||||||
|
echo "Updating shadow observatory database"
|
||||||
|
impala-shell -q "create database if not exists ${SHADOW}"
|
||||||
|
impala-shell -d ${SHADOW} -q "show tables" --delimited | sed "s/^/drop view if exists ${SHADOW}./" | sed "s/$/;/" | impala-shell -f -
|
||||||
|
impala-shell -d ${TARGET} -q "show tables" --delimited | sed "s/\(.*\)/create view ${SHADOW}.\1 as select * from ${TARGET}.\1;/" | impala-shell -f -
|
||||||
|
echo "Shadow db ready!"
|
|
@ -45,35 +45,3 @@ FROM ${stats_db_name}.dataset
|
||||||
UNION ALL
|
UNION ALL
|
||||||
SELECT *, bestlicence AS access_mode
|
SELECT *, bestlicence AS access_mode
|
||||||
FROM ${stats_db_name}.otherresearchproduct;
|
FROM ${stats_db_name}.otherresearchproduct;
|
||||||
|
|
||||||
|
|
||||||
-------------------------------------------------------------------------------
|
|
||||||
-- To see with Antonis if the following is needed and where it should be placed
|
|
||||||
-------------------------------------------------------------------------------
|
|
||||||
CREATE TABLE ${stats_db_name}.numbers_country AS
|
|
||||||
SELECT org.country AS country, count(distinct rd.datasource) AS datasources, count(distinct r.id) AS publications
|
|
||||||
FROM ${stats_db_name}.result r,
|
|
||||||
${stats_db_name}.result_datasources rd,
|
|
||||||
${stats_db_name}.datasource d,
|
|
||||||
${stats_db_name}.datasource_organizations dor,
|
|
||||||
${stats_db_name}.organization org
|
|
||||||
WHERE r.id = rd.id
|
|
||||||
AND rd.datasource = d.id
|
|
||||||
AND d.id = dor.id
|
|
||||||
AND dor.organization = org.id
|
|
||||||
AND r.type = 'publication'
|
|
||||||
AND r.bestlicence = 'Open Access'
|
|
||||||
GROUP BY org.country;
|
|
||||||
|
|
||||||
-- ANALYZE TABLE ${stats_db_name}.datasource COMPUTE STATISTICS;
|
|
||||||
-- ANALYZE TABLE ${stats_db_name}.datasource COMPUTE STATISTICS FOR COLUMNS;
|
|
||||||
-- ANALYZE TABLE ${stats_db_name}.publication COMPUTE STATISTICS;
|
|
||||||
-- ANALYZE TABLE ${stats_db_name}.publication COMPUTE STATISTICS FOR COLUMNS;
|
|
||||||
-- ANALYZE TABLE ${stats_db_name}.dataset COMPUTE STATISTICS;
|
|
||||||
-- ANALYZE TABLE ${stats_db_name}.dataset COMPUTE STATISTICS FOR COLUMNS;
|
|
||||||
-- ANALYZE TABLE ${stats_db_name}.software COMPUTE STATISTICS;
|
|
||||||
-- ANALYZE TABLE ${stats_db_name}.software COMPUTE STATISTICS FOR COLUMNS;
|
|
||||||
-- ANALYZE TABLE ${stats_db_name}.otherresearchproduct COMPUTE STATISTICS;
|
|
||||||
-- ANALYZE TABLE ${stats_db_name}.otherresearchproduct COMPUTE STATISTICS FOR COLUMNS;
|
|
||||||
-- ANALYZE TABLE ${stats_db_name}.numbers_country COMPUTE STATISTICS;
|
|
||||||
-- ANALYZE TABLE ${stats_db_name}.numbers_country COMPUTE STATISTICS FOR COLUMNS;
|
|
|
@ -59,33 +59,4 @@ from result_gold
|
||||||
union all
|
union all
|
||||||
select distinct r.id, false as gold
|
select distinct r.id, false as gold
|
||||||
from ${stats_db_name}.result r
|
from ${stats_db_name}.result r
|
||||||
where r.id not in (select id from result_gold);
|
where r.id not in (select id from result_gold);
|
||||||
|
|
||||||
-- shortcut result-country through the organization affiliation
|
|
||||||
create table ${stats_db_name}.result_affiliated_country as
|
|
||||||
select r.id as id, o.country as country
|
|
||||||
from ${stats_db_name}.result r
|
|
||||||
join ${stats_db_name}.result_organization ro on ro.id=r.id
|
|
||||||
join ${stats_db_name}.organization o on o.id=ro.organization
|
|
||||||
where o.country is not null and o.country!='';
|
|
||||||
|
|
||||||
-- shortcut result-country through datasource of deposition
|
|
||||||
create table ${stats_db_name}.result_deposited_country as
|
|
||||||
select r.id as id, o.country as country
|
|
||||||
from ${stats_db_name}.result r
|
|
||||||
join ${stats_db_name}.result_datasources rd on rd.id=r.id
|
|
||||||
join ${stats_db_name}.datasource d on d.id=rd.datasource
|
|
||||||
join ${stats_db_name}.datasource_organizations dor on dor.id=d.id
|
|
||||||
join ${stats_db_name}.organization o on o.id=dor.organization
|
|
||||||
where o.country is not null and o.country!='';
|
|
||||||
|
|
||||||
-- ANALYZE TABLE ${stats_db_name}.result_peerreviewed COMPUTE STATISTICS;
|
|
||||||
-- ANALYZE TABLE ${stats_db_name}.result_peerreviewed COMPUTE STATISTICS FOR COLUMNS;
|
|
||||||
-- ANALYZE TABLE ${stats_db_name}.result_greenoa COMPUTE STATISTICS;
|
|
||||||
-- ANALYZE TABLE ${stats_db_name}.result_greenoa COMPUTE STATISTICS FOR COLUMNS;
|
|
||||||
-- ANALYZE TABLE ${stats_db_name}.result_gold COMPUTE STATISTICS;
|
|
||||||
-- ANALYZE TABLE ${stats_db_name}.result_gold COMPUTE STATISTICS FOR COLUMNS;
|
|
||||||
-- ANALYZE TABLE ${stats_db_name}.result_affiliated_country COMPUTE STATISTICS;
|
|
||||||
-- ANALYZE TABLE ${stats_db_name}.result_affiliated_country COMPUTE STATISTICS FOR COLUMNS;
|
|
||||||
-- ANALYZE TABLE ${stats_db_name}.result_deposited_country COMPUTE STATISTICS;
|
|
||||||
-- ANALYZE TABLE ${stats_db_name}.result_deposited_country COMPUTE STATISTICS FOR COLUMNS;
|
|
|
@ -52,7 +52,4 @@ LEFT OUTER JOIN ${stats_db_name}.result_gold gold on gold.id=r.id;
|
||||||
drop table if exists ${stats_db_name}.result;
|
drop table if exists ${stats_db_name}.result;
|
||||||
drop view if exists ${stats_db_name}.result;
|
drop view if exists ${stats_db_name}.result;
|
||||||
create table ${stats_db_name}.result stored as parquet as select * from ${stats_db_name}.result_tmp;
|
create table ${stats_db_name}.result stored as parquet as select * from ${stats_db_name}.result_tmp;
|
||||||
drop table ${stats_db_name}.result_tmp;
|
drop table ${stats_db_name}.result_tmp;
|
||||||
--
|
|
||||||
-- ANALYZE TABLE ${stats_db_name}.result COMPUTE STATISTICS;
|
|
||||||
-- ANALYZE TABLE ${stats_db_name}.result COMPUTE STATISTICS FOR COLUMNS;
|
|
|
@ -19,9 +19,6 @@ create table TARGET.result as
|
||||||
select * from SOURCE.result r where exists (select 1 from SOURCE.result_concepts rc where rc.id=r.id) ) foo;
|
select * from SOURCE.result r where exists (select 1 from SOURCE.result_concepts rc where rc.id=r.id) ) foo;
|
||||||
compute stats TARGET.result;
|
compute stats TARGET.result;
|
||||||
|
|
||||||
create table TARGET.result_affiliated_country as select * from SOURCE.result_affiliated_country rac where exists (select 1 from TARGET.result r where r.id=rac.id);
|
|
||||||
compute stats TARGET.result_affiliated_country;
|
|
||||||
|
|
||||||
create table TARGET.result_citations as select * from SOURCE.result_citations orig where exists (select 1 from TARGET.result r where r.id=orig.id);
|
create table TARGET.result_citations as select * from SOURCE.result_citations orig where exists (select 1 from TARGET.result r where r.id=orig.id);
|
||||||
compute stats TARGET.result_citations;
|
compute stats TARGET.result_citations;
|
||||||
|
|
||||||
|
@ -34,9 +31,6 @@ compute stats TARGET.result_concepts;
|
||||||
create table TARGET.result_datasources as select * from SOURCE.result_datasources orig where exists (select 1 from TARGET.result r where r.id=orig.id);
|
create table TARGET.result_datasources as select * from SOURCE.result_datasources orig where exists (select 1 from TARGET.result r where r.id=orig.id);
|
||||||
compute stats TARGET.result_datasources;
|
compute stats TARGET.result_datasources;
|
||||||
|
|
||||||
create table TARGET.result_deposited_country as select * from SOURCE.result_deposited_country orig where exists (select 1 from TARGET.result r where r.id=orig.id);
|
|
||||||
compute stats TARGET.result_deposited_country;
|
|
||||||
|
|
||||||
create table TARGET.result_fundercount as select * from SOURCE.result_fundercount orig where exists (select 1 from TARGET.result r where r.id=orig.id);
|
create table TARGET.result_fundercount as select * from SOURCE.result_fundercount orig where exists (select 1 from TARGET.result r where r.id=orig.id);
|
||||||
compute stats TARGET.result_fundercount;
|
compute stats TARGET.result_fundercount;
|
||||||
|
|
||||||
|
|
|
@ -0,0 +1,259 @@
|
||||||
|
create table TARGET.result_affiliated_country stored as parquet as
|
||||||
|
select count(distinct r.id) as total, r.green, r.gold, case when rl.type is not null then true else false end as licence,
|
||||||
|
case when pids.pid is not null then true else false end as pid, case when r.access_mode in ('Open Access', 'Open Source') then true else false end as oa,
|
||||||
|
r.peer_reviewed, r.type, c.code as ccode, c.name as cname
|
||||||
|
from SOURCE.result r
|
||||||
|
join SOURCE.result_organization ro on ro.id=r.id
|
||||||
|
join SOURCE.organization o on o.id=ro.organization
|
||||||
|
join SOURCE.country c on c.code=o.country and c.continent_name='Europe'
|
||||||
|
left outer join SOURCE.result_licenses rl on rl.id=r.id
|
||||||
|
left outer join SOURCE.result_pids pids on pids.id=r.id
|
||||||
|
group by r.green, r.gold, licence, pid, oa, r.peer_reviewed, r.type, c.code, c.name;
|
||||||
|
|
||||||
|
create table TARGET.result_affiliated_year stored as parquet as
|
||||||
|
select count(distinct r.id) as total, r.green, r.gold, case when rl.type is not null then true else false end as licence,
|
||||||
|
case when pids.pid is not null then true else false end as pid, case when r.access_mode in ('Open Access', 'Open Source') then true else false end as oa, r.peer_reviewed, r.type, r.year
|
||||||
|
from SOURCE.result r
|
||||||
|
join SOURCE.result_organization ro on ro.id=r.id
|
||||||
|
join SOURCE.organization o on o.id=ro.organization
|
||||||
|
join SOURCE.country c on c.code=o.country and c.continent_name='Europe'
|
||||||
|
left outer join SOURCE.result_licenses rl on rl.id=r.id
|
||||||
|
left outer join SOURCE.result_pids pids on pids.id=r.id
|
||||||
|
group by r.green, r.gold, licence, pid, oa, r.peer_reviewed, r.type, r.year;
|
||||||
|
|
||||||
|
create table TARGET.result_affiliated_year_country stored as parquet as
|
||||||
|
select count(distinct r.id) as total, r.green, r.gold, case when rl.type is not null then true else false end as licence,
|
||||||
|
case when pids.pid is not null then true else false end as pid, case when r.access_mode in ('Open Access', 'Open Source') then true else false end as oa,
|
||||||
|
r.peer_reviewed, r.type, r.year, c.code as ccode, c.name as cname
|
||||||
|
from SOURCE.result r
|
||||||
|
join SOURCE.result_organization ro on ro.id=r.id
|
||||||
|
join SOURCE.organization o on o.id=ro.organization
|
||||||
|
join SOURCE.country c on c.code=o.country and c.continent_name='Europe'
|
||||||
|
left outer join SOURCE.result_licenses rl on rl.id=r.id
|
||||||
|
left outer join SOURCE.result_pids pids on pids.id=r.id
|
||||||
|
group by r.green, r.gold, licence, pid, oa, r.peer_reviewed, r.type, r.year, c.code, c.name;
|
||||||
|
|
||||||
|
create table TARGET.result_affiliated_datasource stored as parquet as
|
||||||
|
select count(distinct r.id) as total, r.green, r.gold, case when rl.type is not null then true else false end as licence,
|
||||||
|
case when pids.pid is not null then true else false end as pid, case when r.access_mode in ('Open Access', 'Open Source') then true else false end as oa, r.peer_reviewed, r.type, d.name as dname
|
||||||
|
from SOURCE.result r
|
||||||
|
join SOURCE.result_organization ro on ro.id=r.id
|
||||||
|
join SOURCE.organization o on o.id=ro.organization
|
||||||
|
join SOURCE.country c on c.code=o.country and c.continent_name='Europe'
|
||||||
|
left outer join SOURCE.result_datasources rd on rd.id=r.id
|
||||||
|
left outer join SOURCE.datasource d on d.id=rd.datasource
|
||||||
|
left outer join SOURCE.result_licenses rl on rl.id=r.id
|
||||||
|
left outer join SOURCE.result_pids pids on pids.id=r.id
|
||||||
|
group by r.green, r.gold, licence, pid, oa, r.peer_reviewed, r.type, d.name;
|
||||||
|
|
||||||
|
create table TARGET.result_affiliated_datasource_country stored as parquet as
|
||||||
|
select count(distinct r.id) as total, r.green, r.gold, case when rl.type is not null then true else false end as licence,
|
||||||
|
case when pids.pid is not null then true else false end as pid, case when r.access_mode in ('Open Access', 'Open Source') then true else false end as oa,
|
||||||
|
r.peer_reviewed, r.type, d.name as dname, c.code as ccode, c.name as cname
|
||||||
|
from SOURCE.result r
|
||||||
|
join SOURCE.result_organization ro on ro.id=r.id
|
||||||
|
join SOURCE.organization o on o.id=ro.organization
|
||||||
|
join SOURCE.country c on c.code=o.country and c.continent_name='Europe'
|
||||||
|
left outer join SOURCE.result_datasources rd on rd.id=r.id
|
||||||
|
left outer join SOURCE.datasource d on d.id=rd.datasource
|
||||||
|
left outer join SOURCE.result_licenses rl on rl.id=r.id
|
||||||
|
left outer join SOURCE.result_pids pids on pids.id=r.id
|
||||||
|
group by r.green, r.gold, licence, pid, oa, r.peer_reviewed, r.type, d.name, c.code, c.name;
|
||||||
|
|
||||||
|
create table TARGET.result_affiliated_organization stored as parquet as
|
||||||
|
select count(distinct r.id) as total, r.green, r.gold, case when rl.type is not null then true else false end as licence,
|
||||||
|
case when pids.pid is not null then true else false end as pid, case when r.access_mode in ('Open Access', 'Open Source') then true else false end as oa,
|
||||||
|
r.peer_reviewed, r.type, o.name as oname
|
||||||
|
from SOURCE.result r
|
||||||
|
join SOURCE.result_organization ro on ro.id=r.id
|
||||||
|
join SOURCE.organization o on o.id=ro.organization
|
||||||
|
join SOURCE.country c on c.code=o.country and c.continent_name='Europe'
|
||||||
|
left outer join SOURCE.result_licenses rl on rl.id=r.id
|
||||||
|
left outer join SOURCE.result_pids pids on pids.id=r.id
|
||||||
|
group by r.green, r.gold, licence, pid, oa, r.peer_reviewed, r.type, o.name;
|
||||||
|
|
||||||
|
create table TARGET.result_affiliated_organization_country stored as parquet as
|
||||||
|
select count(distinct r.id) as total, r.green, r.gold, case when rl.type is not null then true else false end as licence,
|
||||||
|
case when pids.pid is not null then true else false end as pid, case when r.access_mode in ('Open Access', 'Open Source') then true else false end as oa,
|
||||||
|
r.peer_reviewed, r.type, o.name as oname, c.code as ccode, c.name as cname
|
||||||
|
from SOURCE.result r
|
||||||
|
join SOURCE.result_organization ro on ro.id=r.id
|
||||||
|
join SOURCE.organization o on o.id=ro.organization
|
||||||
|
join SOURCE.country c on c.code=o.country and c.continent_name='Europe'
|
||||||
|
left outer join SOURCE.result_licenses rl on rl.id=r.id
|
||||||
|
left outer join SOURCE.result_pids pids on pids.id=r.id
|
||||||
|
group by r.green, r.gold, licence, pid, oa, r.peer_reviewed, r.type, o.name, c.code, c.name;
|
||||||
|
|
||||||
|
create table TARGET.result_affiliated_funder stored as parquet as
|
||||||
|
select count(distinct r.id) as total, r.green, r.gold, case when rl.type is not null then true else false end as licence,
|
||||||
|
case when pids.pid is not null then true else false end as pid, case when r.access_mode in ('Open Access', 'Open Source') then true else false end as oa, r.peer_reviewed, r.type, p.funder as pfunder
|
||||||
|
from SOURCE.result r
|
||||||
|
join SOURCE.result_organization ro on ro.id=r.id
|
||||||
|
join SOURCE.organization o on o.id=ro.organization
|
||||||
|
join SOURCE.country c on c.code=o.country and c.continent_name='Europe'
|
||||||
|
join SOURCE.result_projects rp on rp.id=r.id
|
||||||
|
join SOURCE.project p on p.id=rp.project
|
||||||
|
left outer join SOURCE.result_licenses rl on rl.id=r.id
|
||||||
|
left outer join SOURCE.result_pids pids on pids.id=r.id
|
||||||
|
group by r.green, r.gold, licence, pid, oa, r.peer_reviewed, r.type, p.funder;
|
||||||
|
|
||||||
|
create table TARGET.result_affiliated_funder_country stored as parquet as
|
||||||
|
select count(distinct r.id) as total, r.green, r.gold, case when rl.type is not null then true else false end as licence,
|
||||||
|
case when pids.pid is not null then true else false end as pid, case when r.access_mode in ('Open Access', 'Open Source') then true else false end as oa,
|
||||||
|
r.peer_reviewed, r.type, p.funder as pfunder, c.code as ccode, c.name as cname
|
||||||
|
from SOURCE.result r
|
||||||
|
join SOURCE.result_organization ro on ro.id=r.id
|
||||||
|
join SOURCE.organization o on o.id=ro.organization
|
||||||
|
join SOURCE.country c on c.code=o.country and c.continent_name='Europe'
|
||||||
|
join SOURCE.result_projects rp on rp.id=r.id
|
||||||
|
join SOURCE.project p on p.id=rp.project
|
||||||
|
left outer join SOURCE.result_licenses rl on rl.id=r.id
|
||||||
|
left outer join SOURCE.result_pids pids on pids.id=r.id
|
||||||
|
group by r.green, r.gold, licence, pid, oa, r.peer_reviewed, r.type, p.funder, c.code, c.name;
|
||||||
|
|
||||||
|
create table TARGET.result_deposited_country stored as parquet as
|
||||||
|
select count(distinct r.id) as total, r.green, r.gold, case when rl.type is not null then true else false end as licence,
|
||||||
|
case when pids.pid is not null then true else false end as pid, case when r.access_mode in ('Open Access', 'Open Source') then true else false end as oa,
|
||||||
|
r.peer_reviewed, r.type, c.code as ccode, c.name as cname
|
||||||
|
from SOURCE.result r
|
||||||
|
join SOURCE.result_datasources rd on rd.id=r.id
|
||||||
|
join SOURCE.datasource d on d.id=rd.datasource and d.type in ('Institutional Repository','Data Repository', 'Repository', 'Publication Repository')
|
||||||
|
join SOURCE.datasource_organizations dor on dor.id=d.id
|
||||||
|
join SOURCE.organization o on o.id=dor.organization
|
||||||
|
join SOURCE.country c on c.code=o.country and c.continent_name='Europe'
|
||||||
|
left outer join SOURCE.result_licenses rl on rl.id=r.id
|
||||||
|
left outer join SOURCE.result_pids pids on pids.id=r.id
|
||||||
|
group by r.green, r.gold, licence, pid, oa, r.peer_reviewed, r.type, c.code, c.name;
|
||||||
|
|
||||||
|
create table TARGET.result_deposited_year stored as parquet as
|
||||||
|
select count(distinct r.id) as total, r.green, r.gold, case when rl.type is not null then true else false end as licence,
|
||||||
|
case when pids.pid is not null then true else false end as pid, case when r.access_mode in ('Open Access', 'Open Source') then true else false end as oa, r.peer_reviewed, r.type, r.year
|
||||||
|
from SOURCE.result r
|
||||||
|
join SOURCE.result_datasources rd on rd.id=r.id
|
||||||
|
join SOURCE.datasource d on d.id=rd.datasource and d.type in ('Institutional Repository','Data Repository', 'Repository', 'Publication Repository')
|
||||||
|
join SOURCE.datasource_organizations dor on dor.id=d.id
|
||||||
|
join SOURCE.organization o on o.id=dor.organization
|
||||||
|
join SOURCE.country c on c.code=o.country and c.continent_name='Europe'
|
||||||
|
left outer join SOURCE.result_licenses rl on rl.id=r.id
|
||||||
|
left outer join SOURCE.result_pids pids on pids.id=r.id
|
||||||
|
group by r.green, r.gold, licence, pid, oa, r.peer_reviewed, r.type, r.year;
|
||||||
|
|
||||||
|
create table TARGET.result_deposited_year_country stored as parquet as
|
||||||
|
select count(distinct r.id) as total, r.green, r.gold, case when rl.type is not null then true else false end as licence,
|
||||||
|
case when pids.pid is not null then true else false end as pid, case when r.access_mode in ('Open Access', 'Open Source') then true else false end as oa,
|
||||||
|
r.peer_reviewed, r.type, r.year, c.code as ccode, c.name as cname
|
||||||
|
from SOURCE.result r
|
||||||
|
join SOURCE.result_datasources rd on rd.id=r.id
|
||||||
|
join SOURCE.datasource d on d.id=rd.datasource and d.type in ('Institutional Repository','Data Repository', 'Repository', 'Publication Repository')
|
||||||
|
join SOURCE.datasource_organizations dor on dor.id=d.id
|
||||||
|
join SOURCE.organization o on o.id=dor.organization
|
||||||
|
join SOURCE.country c on c.code=o.country and c.continent_name='Europe'
|
||||||
|
left outer join SOURCE.result_licenses rl on rl.id=r.id
|
||||||
|
left outer join SOURCE.result_pids pids on pids.id=r.id
|
||||||
|
group by r.green, r.gold, licence, pid, oa, r.peer_reviewed, r.type, r.year, c.code, c.name;
|
||||||
|
|
||||||
|
create table TARGET.result_deposited_datasource stored as parquet as
|
||||||
|
select count(distinct r.id) as total, r.green, r.gold, case when rl.type is not null then true else false end as licence,
|
||||||
|
case when pids.pid is not null then true else false end as pid, case when r.access_mode in ('Open Access', 'Open Source') then true else false end as oa,
|
||||||
|
r.peer_reviewed, r.type, d.name as dname
|
||||||
|
from SOURCE.result r
|
||||||
|
join SOURCE.result_datasources rd on rd.id=r.id
|
||||||
|
join SOURCE.datasource d on d.id=rd.datasource and d.type in ('Institutional Repository','Data Repository', 'Repository', 'Publication Repository')
|
||||||
|
join SOURCE.datasource_organizations dor on dor.id=d.id
|
||||||
|
join SOURCE.organization o on o.id=dor.organization
|
||||||
|
join SOURCE.country c on c.code=o.country and c.continent_name='Europe'
|
||||||
|
left outer join SOURCE.result_licenses rl on rl.id=r.id
|
||||||
|
left outer join SOURCE.result_pids pids on pids.id=r.id
|
||||||
|
group by r.green, r.gold, licence, pid, oa, r.peer_reviewed, r.type, d.name;
|
||||||
|
|
||||||
|
create table TARGET.result_deposited_datasource_country stored as parquet as
|
||||||
|
select count(distinct r.id) as total, r.green, r.gold, case when rl.type is not null then true else false end as licence,
|
||||||
|
case when pids.pid is not null then true else false end as pid, case when r.access_mode in ('Open Access', 'Open Source') then true else false end as oa,
|
||||||
|
r.peer_reviewed, r.type, d.name as dname, c.code as ccode, c.name as cname
|
||||||
|
from SOURCE.result r
|
||||||
|
join SOURCE.result_datasources rd on rd.id=r.id
|
||||||
|
join SOURCE.datasource d on d.id=rd.datasource and d.type in ('Institutional Repository','Data Repository', 'Repository', 'Publication Repository')
|
||||||
|
join SOURCE.datasource_organizations dor on dor.id=d.id
|
||||||
|
join SOURCE.organization o on o.id=dor.organization
|
||||||
|
join SOURCE.country c on c.code=o.country and c.continent_name='Europe'
|
||||||
|
left outer join SOURCE.result_licenses rl on rl.id=r.id
|
||||||
|
left outer join SOURCE.result_pids pids on pids.id=r.id
|
||||||
|
group by r.green, r.gold, licence, pid, oa, r.peer_reviewed, r.type, d.name, c.code, c.name;
|
||||||
|
|
||||||
|
create table TARGET.result_deposited_organization stored as parquet as
|
||||||
|
select count(distinct r.id) as total, r.green, r.gold, case when rl.type is not null then true else false end as licence,
|
||||||
|
case when pids.pid is not null then true else false end as pid, case when r.access_mode in ('Open Access', 'Open Source') then true else false end as oa, r.peer_reviewed, r.type, o.name as oname
|
||||||
|
from SOURCE.result r
|
||||||
|
join SOURCE.result_datasources rd on rd.id=r.id
|
||||||
|
join SOURCE.datasource d on d.id=rd.datasource and d.type in ('Institutional Repository','Data Repository', 'Repository', 'Publication Repository')
|
||||||
|
join SOURCE.datasource_organizations dor on dor.id=d.id
|
||||||
|
join SOURCE.organization o on o.id=dor.organization
|
||||||
|
join SOURCE.country c on c.code=o.country and c.continent_name='Europe'
|
||||||
|
left outer join SOURCE.result_licenses rl on rl.id=r.id
|
||||||
|
left outer join SOURCE.result_pids pids on pids.id=r.id
|
||||||
|
group by r.green, r.gold, licence, pid, oa, r.peer_reviewed, r.type, o.name;
|
||||||
|
|
||||||
|
create table TARGET.result_deposited_organization_country stored as parquet as
|
||||||
|
select count(distinct r.id) as total, r.green, r.gold, case when rl.type is not null then true else false end as licence,
|
||||||
|
case when pids.pid is not null then true else false end as pid, case when r.access_mode in ('Open Access', 'Open Source') then true else false end as oa,
|
||||||
|
r.peer_reviewed, r.type, o.name as oname, c.code as ccode, c.name as cname
|
||||||
|
from SOURCE.result r
|
||||||
|
join SOURCE.result_datasources rd on rd.id=r.id
|
||||||
|
join SOURCE.datasource d on d.id=rd.datasource and d.type in ('Institutional Repository','Data Repository', 'Repository', 'Publication Repository')
|
||||||
|
join SOURCE.datasource_organizations dor on dor.id=d.id
|
||||||
|
join SOURCE.organization o on o.id=dor.organization
|
||||||
|
join SOURCE.country c on c.code=o.country and c.continent_name='Europe'
|
||||||
|
left outer join SOURCE.result_licenses rl on rl.id=r.id
|
||||||
|
left outer join SOURCE.result_pids pids on pids.id=r.id
|
||||||
|
group by r.green, r.gold, licence, pid, oa, r.peer_reviewed, r.type, o.name, c.code, c.name;
|
||||||
|
|
||||||
|
create table TARGET.result_deposited_funder stored as parquet as
|
||||||
|
select count(distinct r.id) as total, r.green, r.gold, case when rl.type is not null then true else false end as licence,
|
||||||
|
case when pids.pid is not null then true else false end as pid, case when r.access_mode in ('Open Access', 'Open Source') then true else false end as oa,
|
||||||
|
r.peer_reviewed, r.type, p.funder as pfunder
|
||||||
|
from SOURCE.result r
|
||||||
|
join SOURCE.result_datasources rd on rd.id=r.id
|
||||||
|
join SOURCE.datasource d on d.id=rd.datasource and d.type in ('Institutional Repository','Data Repository', 'Repository', 'Publication Repository')
|
||||||
|
join SOURCE.datasource_organizations dor on dor.id=d.id
|
||||||
|
join SOURCE.organization o on o.id=dor.organization
|
||||||
|
join SOURCE.country c on c.code=o.country and c.continent_name='Europe'
|
||||||
|
join SOURCE.result_projects rp on rp.id=r.id
|
||||||
|
join SOURCE.project p on p.id=rp.project
|
||||||
|
left outer join SOURCE.result_licenses rl on rl.id=r.id
|
||||||
|
left outer join SOURCE.result_pids pids on pids.id=r.id
|
||||||
|
group by r.green, r.gold, licence, pid, oa, r.peer_reviewed, r.type, p.funder;
|
||||||
|
|
||||||
|
create table TARGET.result_deposited_funder_country stored as parquet as
|
||||||
|
select count(distinct r.id) as total, r.green, r.gold, case when rl.type is not null then true else false end as licence,
|
||||||
|
case when pids.pid is not null then true else false end as pid, case when r.access_mode in ('Open Access', 'Open Source') then true else false end as oa,
|
||||||
|
r.peer_reviewed, r.type, p.funder as pfunder, c.code as ccode, c.name as cname
|
||||||
|
from SOURCE.result r
|
||||||
|
join SOURCE.result_datasources rd on rd.id=r.id
|
||||||
|
join SOURCE.datasource d on d.id=rd.datasource and d.type in ('Institutional Repository','Data Repository', 'Repository', 'Publication Repository')
|
||||||
|
join SOURCE.datasource_organizations dor on dor.id=d.id
|
||||||
|
join SOURCE.organization o on o.id=dor.organization
|
||||||
|
join SOURCE.country c on c.code=o.country and c.continent_name='Europe'
|
||||||
|
join SOURCE.result_projects rp on rp.id=r.id
|
||||||
|
join SOURCE.project p on p.id=rp.project
|
||||||
|
left outer join SOURCE.result_licenses rl on rl.id=r.id
|
||||||
|
left outer join SOURCE.result_pids pids on pids.id=r.id
|
||||||
|
group by r.green, r.gold, licence, pid, oa, r.peer_reviewed, r.type, p.funder, c.code, c.name;
|
||||||
|
|
||||||
|
compute stats TARGET.result_affiliated_country;
|
||||||
|
compute stats TARGET.result_affiliated_year;
|
||||||
|
compute stats TARGET.result_affiliated_year_country;
|
||||||
|
compute stats TARGET.result_affiliated_datasource;
|
||||||
|
compute stats TARGET.result_affiliated_datasource_country;
|
||||||
|
compute stats TARGET.result_affiliated_organization;
|
||||||
|
compute stats TARGET.result_affiliated_organization_country;
|
||||||
|
compute stats TARGET.result_affiliated_funder;
|
||||||
|
compute stats TARGET.result_affiliated_funder_country;
|
||||||
|
compute stats TARGET.result_deposited_country;
|
||||||
|
compute stats TARGET.result_deposited_year;
|
||||||
|
compute stats TARGET.result_deposited_year_country;
|
||||||
|
compute stats TARGET.result_deposited_datasource;
|
||||||
|
compute stats TARGET.result_deposited_datasource_country;
|
||||||
|
compute stats TARGET.result_deposited_organization;
|
||||||
|
compute stats TARGET.result_deposited_organization_country;
|
||||||
|
compute stats TARGET.result_deposited_funder;
|
||||||
|
compute stats TARGET.result_deposited_funder_country;
|
|
@ -25,6 +25,14 @@
|
||||||
<name>monitor_db_shadow_name</name>
|
<name>monitor_db_shadow_name</name>
|
||||||
<description>the name of the shadow monitor db</description>
|
<description>the name of the shadow monitor db</description>
|
||||||
</property>
|
</property>
|
||||||
|
<property>
|
||||||
|
<name>observatory_db_name</name>
|
||||||
|
<description>the target monitor db name</description>
|
||||||
|
</property>
|
||||||
|
<property>
|
||||||
|
<name>observatory_db_shadow_name</name>
|
||||||
|
<description>the name of the shadow monitor db</description>
|
||||||
|
</property>
|
||||||
<property>
|
<property>
|
||||||
<name>stats_tool_api_url</name>
|
<name>stats_tool_api_url</name>
|
||||||
<description>The url of the API of the stats tool. Is used to trigger the cache update.</description>
|
<description>The url of the API of the stats tool. Is used to trigger the cache update.</description>
|
||||||
|
@ -305,11 +313,26 @@
|
||||||
<argument>${wf:appPath()}/scripts/step20-createMonitorDB.sql</argument>
|
<argument>${wf:appPath()}/scripts/step20-createMonitorDB.sql</argument>
|
||||||
<file>monitor.sh</file>
|
<file>monitor.sh</file>
|
||||||
</shell>
|
</shell>
|
||||||
<ok to="Step21"/>
|
<ok to="step21-createObservatoryDB"/>
|
||||||
<error to="Kill"/>
|
<error to="Kill"/>
|
||||||
</action>
|
</action>
|
||||||
|
|
||||||
<action name="Step21">
|
<action name="step21-createObservatoryDB">
|
||||||
|
<shell xmlns="uri:oozie:shell-action:0.1">
|
||||||
|
<job-tracker>${jobTracker}</job-tracker>
|
||||||
|
<name-node>${nameNode}</name-node>
|
||||||
|
<exec>observatory.sh</exec>
|
||||||
|
<argument>${stats_db_name}</argument>
|
||||||
|
<argument>${observatory_db_name}</argument>
|
||||||
|
<argument>${observatory_db_shadow_name}</argument>
|
||||||
|
<argument>${wf:appPath()}/scripts/step21-createObservatoryDB.sql</argument>
|
||||||
|
<file>observatory.sh</file>
|
||||||
|
</shell>
|
||||||
|
<ok to="Step22"/>
|
||||||
|
<error to="Kill"/>
|
||||||
|
</action>
|
||||||
|
|
||||||
|
<action name="Step22">
|
||||||
<shell xmlns="uri:oozie:shell-action:0.1">
|
<shell xmlns="uri:oozie:shell-action:0.1">
|
||||||
<job-tracker>${jobTracker}</job-tracker>
|
<job-tracker>${jobTracker}</job-tracker>
|
||||||
<name-node>${nameNode}</name-node>
|
<name-node>${nameNode}</name-node>
|
||||||
|
@ -322,4 +345,4 @@
|
||||||
</action>
|
</action>
|
||||||
|
|
||||||
<end name="End"/>
|
<end name="End"/>
|
||||||
</workflow-app>
|
</workflow-app>
|
Loading…
Reference in New Issue