2020-07-02 01:42:30 +02:00
|
|
|
-- replace the creation of the result view to include the boolean fields from the previous tables (green, gold,
|
|
|
|
-- peer reviewed)
|
|
|
|
drop table if exists ${stats_db_name}.result_tmp;
|
2020-07-10 01:52:01 +02:00
|
|
|
CREATE TABLE ${stats_db_name}.result_tmp (
|
2020-07-02 01:42:30 +02:00
|
|
|
id STRING,
|
|
|
|
title STRING,
|
|
|
|
publisher STRING,
|
|
|
|
journal STRING,
|
|
|
|
`date` STRING,
|
|
|
|
`year` INT,
|
|
|
|
bestlicence STRING,
|
|
|
|
access_mode STRING,
|
|
|
|
embargo_end_date STRING,
|
|
|
|
delayed BOOLEAN,
|
|
|
|
authors INT,
|
|
|
|
source STRING,
|
|
|
|
abstract BOOLEAN,
|
|
|
|
type STRING ,
|
|
|
|
peer_reviewed BOOLEAN,
|
|
|
|
green BOOLEAN,
|
|
|
|
gold BOOLEAN)
|
|
|
|
clustered by (id) into 100 buckets stored as orc tblproperties('transactional'='true');
|
|
|
|
|
|
|
|
insert into ${stats_db_name}.result_tmp
|
|
|
|
select r.id, r.title, r.publisher, r.journal, r.`date`, date_format(r.`date`, 'yyyy'), r.bestlicence, r.bestlicence, r.embargo_end_date, r.delayed, r.authors, r.source, r.abstract, r.type, pr.peer_reviewed, green.green, gold.gold
|
|
|
|
FROM ${stats_db_name}.publication r
|
|
|
|
LEFT OUTER JOIN ${stats_db_name}.result_peerreviewed pr on pr.id=r.id
|
|
|
|
LEFT OUTER JOIN ${stats_db_name}.result_greenoa green on green.id=r.id
|
|
|
|
LEFT OUTER JOIN ${stats_db_name}.result_gold gold on gold.id=r.id;
|
|
|
|
|
|
|
|
insert into ${stats_db_name}.result_tmp
|
|
|
|
select r.id, r.title, r.publisher, r.journal, r.`date`, date_format(r.`date`, 'yyyy'), r.bestlicence, r.bestlicence, r.embargo_end_date, r.delayed, r.authors, r.source, r.abstract, r.type, pr.peer_reviewed, green.green, gold.gold
|
|
|
|
FROM ${stats_db_name}.dataset r
|
|
|
|
LEFT OUTER JOIN ${stats_db_name}.result_peerreviewed pr on pr.id=r.id
|
|
|
|
LEFT OUTER JOIN ${stats_db_name}.result_greenoa green on green.id=r.id
|
|
|
|
LEFT OUTER JOIN ${stats_db_name}.result_gold gold on gold.id=r.id;
|
|
|
|
|
|
|
|
insert into ${stats_db_name}.result_tmp
|
|
|
|
select r.id, r.title, r.publisher, r.journal, r.`date`, date_format(r.`date`, 'yyyy'), r.bestlicence, r.bestlicence, r.embargo_end_date, r.delayed, r.authors, r.source, r.abstract, r.type, pr.peer_reviewed, green.green, gold.gold
|
|
|
|
FROM ${stats_db_name}.software r
|
|
|
|
LEFT OUTER JOIN ${stats_db_name}.result_peerreviewed pr on pr.id=r.id
|
|
|
|
LEFT OUTER JOIN ${stats_db_name}.result_greenoa green on green.id=r.id
|
|
|
|
LEFT OUTER JOIN ${stats_db_name}.result_gold gold on gold.id=r.id;
|
|
|
|
|
|
|
|
insert into ${stats_db_name}.result_tmp
|
|
|
|
select r.id, r.title, r.publisher, r.journal, r.`date`, date_format(r.`date`, 'yyyy'), r.bestlicence, r.bestlicence, r.embargo_end_date, r.delayed, r.authors, r.source, r.abstract, r.type, pr.peer_reviewed, green.green, gold.gold
|
|
|
|
FROM ${stats_db_name}.otherresearchproduct r
|
|
|
|
LEFT OUTER JOIN ${stats_db_name}.result_peerreviewed pr on pr.id=r.id
|
|
|
|
LEFT OUTER JOIN ${stats_db_name}.result_greenoa green on green.id=r.id
|
|
|
|
LEFT OUTER JOIN ${stats_db_name}.result_gold gold on gold.id=r.id;
|
|
|
|
|
|
|
|
drop table if exists ${stats_db_name}.result;
|
|
|
|
drop view if exists ${stats_db_name}.result;
|
|
|
|
create table ${stats_db_name}.result stored as parquet as select * from ${stats_db_name}.result_tmp;
|
2021-05-28 14:11:46 +02:00
|
|
|
drop table ${stats_db_name}.result_tmp;
|