Update "dhp-stats-update" workflow to use "spark"-actions, instead of "hive" ones.
Note: Currently the code is set to only test the "Step1".
This commit is contained in:
parent
d7da4f814b
commit
db33f7727c
|
@ -0,0 +1,18 @@
|
||||||
|
# Install the whole "dnet-hadoop" project.
|
||||||
|
|
||||||
|
# Delete this module's previous build-files in order to avoid any conflicts.
|
||||||
|
rm -rf target/ ||
|
||||||
|
|
||||||
|
# Go to the root directory of this project.
|
||||||
|
cd ../../
|
||||||
|
|
||||||
|
# Select the build profile.
|
||||||
|
DEFAULT_PROFILE='' # It's the empty profile.
|
||||||
|
NEWER_VERSIONS_PROFILE='-Pscala-2.12'
|
||||||
|
CHOSEN_MAVEN_PROFILE=${DEFAULT_PROFILE}
|
||||||
|
|
||||||
|
# Install the project.
|
||||||
|
mvn clean install -U ${CHOSEN_MAVEN_PROFILE} -Dmaven.test.skip=true
|
||||||
|
|
||||||
|
# We skip tests for all modules, since the take a big amount of time and some of them fail.
|
||||||
|
# Any test added to this module, will be executed in the "runOozieWorkflow.sh" script.
|
|
@ -0,0 +1,20 @@
|
||||||
|
# This script deploys and runs the oozie workflow on the cluster, defined in the "~/.dhp/application.properties" file.
|
||||||
|
|
||||||
|
# Select the build profile.
|
||||||
|
DEFAULT_PROFILE='' # It's the empty profile.
|
||||||
|
NEWER_VERSIONS_PROFILE='-Pscala-2.12'
|
||||||
|
CHOSEN_MAVEN_PROFILE=${DEFAULT_PROFILE}
|
||||||
|
|
||||||
|
# Build and deploy this module.
|
||||||
|
mvn clean package -U ${CHOSEN_MAVEN_PROFILE} -Poozie-package,deploy,run \
|
||||||
|
-Dworkflow.source.dir=eu/dnetlib/dhp/oa/graph/stats
|
||||||
|
|
||||||
|
# Show the Oozie-job-ID.
|
||||||
|
echo -e "\n\nShowing the contents of \"extract-and-run-on-remote-host.log\":\n"
|
||||||
|
cat ./target/extract-and-run-on-remote-host.log
|
||||||
|
|
||||||
|
# Check oozie workflow status
|
||||||
|
# oozie job -oozie http://iis-cdh5-test-m3:11000/oozie -info <workflow-ID>
|
||||||
|
|
||||||
|
# Get the <job-ID> from the previous output and check the logs:
|
||||||
|
# yarn logs -applicationId <job-ID>
|
|
@ -4,5 +4,5 @@
|
||||||
--------------------------------------------------------------
|
--------------------------------------------------------------
|
||||||
--------------------------------------------------------------
|
--------------------------------------------------------------
|
||||||
|
|
||||||
DROP database IF EXISTS ${stats_db_name} CASCADE;
|
DROP database IF EXISTS ${stats_db_name} CASCADE; /*EOS*/
|
||||||
CREATE database ${stats_db_name};
|
CREATE database ${stats_db_name}; /*EOS*/
|
||||||
|
|
|
@ -5,27 +5,27 @@
|
||||||
------------------------------------------------------------------------------------------------
|
------------------------------------------------------------------------------------------------
|
||||||
CREATE OR REPLACE VIEW ${stats_db_name}.fundref AS
|
CREATE OR REPLACE VIEW ${stats_db_name}.fundref AS
|
||||||
SELECT *
|
SELECT *
|
||||||
FROM ${external_stats_db_name}.fundref;
|
FROM ${external_stats_db_name}.fundref; /*EOS*/
|
||||||
|
|
||||||
CREATE OR REPLACE VIEW ${stats_db_name}.country AS
|
CREATE OR REPLACE VIEW ${stats_db_name}.country AS
|
||||||
SELECT *
|
SELECT *
|
||||||
FROM ${external_stats_db_name}.country;
|
FROM ${external_stats_db_name}.country; /*EOS*/
|
||||||
|
|
||||||
CREATE OR REPLACE VIEW ${stats_db_name}.countrygdp AS
|
CREATE OR REPLACE VIEW ${stats_db_name}.countrygdp AS
|
||||||
SELECT *
|
SELECT *
|
||||||
FROM ${external_stats_db_name}.countrygdp;
|
FROM ${external_stats_db_name}.countrygdp; /*EOS*/
|
||||||
|
|
||||||
CREATE OR REPLACE VIEW ${stats_db_name}.roarmap AS
|
CREATE OR REPLACE VIEW ${stats_db_name}.roarmap AS
|
||||||
SELECT *
|
SELECT *
|
||||||
FROM ${external_stats_db_name}.roarmap;
|
FROM ${external_stats_db_name}.roarmap; /*EOS*/
|
||||||
|
|
||||||
CREATE OR REPLACE VIEW ${stats_db_name}.rndexpediture AS
|
CREATE OR REPLACE VIEW ${stats_db_name}.rndexpediture AS
|
||||||
SELECT *
|
SELECT *
|
||||||
FROM ${external_stats_db_name}.rndexpediture;
|
FROM ${external_stats_db_name}.rndexpediture; /*EOS*/
|
||||||
|
|
||||||
CREATE OR REPLACE VIEW ${stats_db_name}.licenses_normalized AS
|
CREATE OR REPLACE VIEW ${stats_db_name}.licenses_normalized AS
|
||||||
SELECT *
|
SELECT *
|
||||||
FROM ${external_stats_db_name}.licenses_normalized;
|
FROM ${external_stats_db_name}.licenses_normalized; /*EOS*/
|
||||||
|
|
||||||
------------------------------------------------------------------------------------------------
|
------------------------------------------------------------------------------------------------
|
||||||
------------------------------------------------------------------------------------------------
|
------------------------------------------------------------------------------------------------
|
||||||
|
@ -33,23 +33,23 @@ FROM ${external_stats_db_name}.licenses_normalized;
|
||||||
------------------------------------------------------------------------------------------------
|
------------------------------------------------------------------------------------------------
|
||||||
------------------------------------------------------------------------------------------------
|
------------------------------------------------------------------------------------------------
|
||||||
create or replace view ${stats_db_name}.usage_stats as
|
create or replace view ${stats_db_name}.usage_stats as
|
||||||
select * from openaire_prod_usage_stats.usage_stats;
|
select * from openaire_prod_usage_stats.usage_stats; /*EOS*/
|
||||||
|
|
||||||
create or replace view ${stats_db_name}.downloads_stats as
|
create or replace view ${stats_db_name}.downloads_stats as
|
||||||
select * from openaire_prod_usage_stats.downloads_stats;
|
select * from openaire_prod_usage_stats.downloads_stats; /*EOS*/
|
||||||
|
|
||||||
create or replace view ${stats_db_name}.pageviews_stats as
|
create or replace view ${stats_db_name}.pageviews_stats as
|
||||||
select * from openaire_prod_usage_stats.pageviews_stats;
|
select * from openaire_prod_usage_stats.pageviews_stats; /*EOS*/
|
||||||
|
|
||||||
create or replace view ${stats_db_name}.views_stats as
|
create or replace view ${stats_db_name}.views_stats as
|
||||||
select * from openaire_prod_usage_stats.views_stats;
|
select * from openaire_prod_usage_stats.views_stats; /*EOS*/
|
||||||
|
|
||||||
------------------------------------------------------------------------------------------------
|
------------------------------------------------------------------------------------------------
|
||||||
------------------------------------------------------------------------------------------------
|
------------------------------------------------------------------------------------------------
|
||||||
-- Creation date of the database
|
-- Creation date of the database
|
||||||
------------------------------------------------------------------------------------------------
|
------------------------------------------------------------------------------------------------
|
||||||
------------------------------------------------------------------------------------------------
|
------------------------------------------------------------------------------------------------
|
||||||
DROP TABLE IF EXISTS ${stats_db_name}.creation_date purge;
|
DROP TABLE IF EXISTS ${stats_db_name}.creation_date purge; /*EOS*/
|
||||||
|
|
||||||
create table ${stats_db_name}.creation_date STORED AS PARQUET as
|
create table ${stats_db_name}.creation_date STORED AS PARQUET as
|
||||||
select date_format(current_date(), 'dd-MM-yyyy') as date;
|
select date_format(current_date(), 'dd-MM-yyyy') as date; /*EOS*/
|
||||||
|
|
|
@ -10,7 +10,7 @@ SET harvested='true'
|
||||||
WHERE datasource_tmp.id IN (SELECT DISTINCT d.id
|
WHERE datasource_tmp.id IN (SELECT DISTINCT d.id
|
||||||
FROM ${stats_db_name}.datasource_tmp d,
|
FROM ${stats_db_name}.datasource_tmp d,
|
||||||
${stats_db_name}.result_datasources rd
|
${stats_db_name}.result_datasources rd
|
||||||
WHERE d.id = rd.datasource);
|
WHERE d.id = rd.datasource); /*EOS*/
|
||||||
|
|
||||||
-- Project temporary table update and final project table creation with final updates that can not be applied to ORC tables
|
-- Project temporary table update and final project table creation with final updates that can not be applied to ORC tables
|
||||||
UPDATE ${stats_db_name}.project_tmp
|
UPDATE ${stats_db_name}.project_tmp
|
||||||
|
@ -19,8 +19,8 @@ WHERE project_tmp.id IN (SELECT pr.id
|
||||||
FROM ${stats_db_name}.project_results pr,
|
FROM ${stats_db_name}.project_results pr,
|
||||||
${stats_db_name}.result r
|
${stats_db_name}.result r
|
||||||
WHERE pr.result = r.id
|
WHERE pr.result = r.id
|
||||||
AND r.type = 'publication');
|
AND r.type = 'publication'); /*EOS*/
|
||||||
DROP TABLE IF EXISTS ${stats_db_name}.stored purge;
|
DROP TABLE IF EXISTS ${stats_db_name}.stored purge; /*EOS*/
|
||||||
|
|
||||||
CREATE TABLE ${stats_db_name}.project stored as parquet as
|
CREATE TABLE ${stats_db_name}.project stored as parquet as
|
||||||
SELECT p.id,
|
SELECT p.id,
|
||||||
|
@ -63,7 +63,7 @@ FROM ${stats_db_name}.project_tmp p
|
||||||
AND r.type = 'publication'
|
AND r.type = 'publication'
|
||||||
AND datediff(to_date(r.date), to_date(pp.enddate)) > 0
|
AND datediff(to_date(r.date), to_date(pp.enddate)) > 0
|
||||||
GROUP BY pp.id) AS prr2
|
GROUP BY pp.id) AS prr2
|
||||||
ON prr2.id = p.id;
|
ON prr2.id = p.id; /*EOS*/
|
||||||
|
|
||||||
UPDATE ${stats_db_name}.publication_tmp
|
UPDATE ${stats_db_name}.publication_tmp
|
||||||
SET delayed = 'yes'
|
SET delayed = 'yes'
|
||||||
|
@ -73,7 +73,7 @@ WHERE publication_tmp.id IN (SELECT distinct r.id
|
||||||
${stats_db_name}.project_tmp p
|
${stats_db_name}.project_tmp p
|
||||||
WHERE r.id = pr.result
|
WHERE r.id = pr.result
|
||||||
AND pr.id = p.id
|
AND pr.id = p.id
|
||||||
AND to_date(r.date) - to_date(p.enddate) > 0);
|
AND to_date(r.date) - to_date(p.enddate) > 0); /*EOS*/
|
||||||
|
|
||||||
UPDATE ${stats_db_name}.dataset_tmp
|
UPDATE ${stats_db_name}.dataset_tmp
|
||||||
SET delayed = 'yes'
|
SET delayed = 'yes'
|
||||||
|
@ -83,7 +83,7 @@ WHERE dataset_tmp.id IN (SELECT distinct r.id
|
||||||
${stats_db_name}.project_tmp p
|
${stats_db_name}.project_tmp p
|
||||||
WHERE r.id = pr.result
|
WHERE r.id = pr.result
|
||||||
AND pr.id = p.id
|
AND pr.id = p.id
|
||||||
AND to_date(r.date) - to_date(p.enddate) > 0);
|
AND to_date(r.date) - to_date(p.enddate) > 0); /*EOS*/
|
||||||
|
|
||||||
UPDATE ${stats_db_name}.software_tmp
|
UPDATE ${stats_db_name}.software_tmp
|
||||||
SET delayed = 'yes'
|
SET delayed = 'yes'
|
||||||
|
@ -93,7 +93,7 @@ WHERE software_tmp.id IN (SELECT distinct r.id
|
||||||
${stats_db_name}.project_tmp p
|
${stats_db_name}.project_tmp p
|
||||||
WHERE r.id = pr.result
|
WHERE r.id = pr.result
|
||||||
AND pr.id = p.id
|
AND pr.id = p.id
|
||||||
AND to_date(r.date) - to_date(p.enddate) > 0);
|
AND to_date(r.date) - to_date(p.enddate) > 0); /*EOS*/
|
||||||
|
|
||||||
UPDATE ${stats_db_name}.otherresearchproduct_tmp
|
UPDATE ${stats_db_name}.otherresearchproduct_tmp
|
||||||
SET delayed = 'yes'
|
SET delayed = 'yes'
|
||||||
|
@ -103,7 +103,7 @@ WHERE otherresearchproduct_tmp.id IN (SELECT distinct r.id
|
||||||
${stats_db_name}.project_tmp p
|
${stats_db_name}.project_tmp p
|
||||||
WHERE r.id = pr.result
|
WHERE r.id = pr.result
|
||||||
AND pr.id = p.id
|
AND pr.id = p.id
|
||||||
AND to_date(r.date) - to_date(p.enddate) > 0);
|
AND to_date(r.date) - to_date(p.enddate) > 0); /*EOS*/
|
||||||
|
|
||||||
CREATE OR REPLACE VIEW ${stats_db_name}.project_results_publication AS
|
CREATE OR REPLACE VIEW ${stats_db_name}.project_results_publication AS
|
||||||
SELECT result_projects.id AS result,
|
SELECT result_projects.id AS result,
|
||||||
|
@ -116,4 +116,4 @@ FROM ${stats_db_name}.result_projects,
|
||||||
${stats_db_name}.project
|
${stats_db_name}.project
|
||||||
WHERE result_projects.id = result.id
|
WHERE result_projects.id = result.id
|
||||||
AND result.type = 'publication'
|
AND result.type = 'publication'
|
||||||
AND project.id = result_projects.project;
|
AND project.id = result_projects.project; /*EOS*/
|
|
@ -1,42 +1,42 @@
|
||||||
------------------------------------------------------------------------------------------------------
|
------------------------------------------------------------------------------------------------------
|
||||||
-- Creating parquet tables from the updated temporary tables and removing unnecessary temporary tables
|
-- Creating parquet tables from the updated temporary tables and removing unnecessary temporary tables
|
||||||
------------------------------------------------------------------------------------------------------
|
------------------------------------------------------------------------------------------------------
|
||||||
DROP TABLE IF EXISTS ${stats_db_name}.datasource purge;
|
DROP TABLE IF EXISTS ${stats_db_name}.datasource purge; /*EOS*/
|
||||||
|
|
||||||
CREATE TABLE ${stats_db_name}.datasource stored AS parquet AS
|
CREATE TABLE ${stats_db_name}.datasource stored AS parquet AS
|
||||||
SELECT *
|
SELECT *
|
||||||
FROM ${stats_db_name}.datasource_tmp;
|
FROM ${stats_db_name}.datasource_tmp; /*EOS*/
|
||||||
|
|
||||||
DROP TABLE IF EXISTS ${stats_db_name}.publication purge;
|
DROP TABLE IF EXISTS ${stats_db_name}.publication purge; /*EOS*/
|
||||||
|
|
||||||
CREATE TABLE ${stats_db_name}.publication stored AS parquet AS
|
CREATE TABLE ${stats_db_name}.publication stored AS parquet AS
|
||||||
SELECT *
|
SELECT *
|
||||||
FROM ${stats_db_name}.publication_tmp;
|
FROM ${stats_db_name}.publication_tmp; /*EOS*/
|
||||||
|
|
||||||
DROP TABLE IF EXISTS ${stats_db_name}.dataset purge;
|
DROP TABLE IF EXISTS ${stats_db_name}.dataset purge; /*EOS*/
|
||||||
|
|
||||||
CREATE TABLE ${stats_db_name}.dataset stored AS parquet AS
|
CREATE TABLE ${stats_db_name}.dataset stored AS parquet AS
|
||||||
SELECT *
|
SELECT *
|
||||||
FROM ${stats_db_name}.dataset_tmp;
|
FROM ${stats_db_name}.dataset_tmp; /*EOS*/
|
||||||
|
|
||||||
DROP TABLE IF EXISTS ${stats_db_name}.software purge;
|
DROP TABLE IF EXISTS ${stats_db_name}.software purge; /*EOS*/
|
||||||
|
|
||||||
CREATE TABLE ${stats_db_name}.software stored AS parquet AS
|
CREATE TABLE ${stats_db_name}.software stored AS parquet AS
|
||||||
SELECT *
|
SELECT *
|
||||||
FROM ${stats_db_name}.software_tmp;
|
FROM ${stats_db_name}.software_tmp; /*EOS*/
|
||||||
|
|
||||||
DROP TABLE IF EXISTS ${stats_db_name}.otherresearchproduct purge;
|
DROP TABLE IF EXISTS ${stats_db_name}.otherresearchproduct purge; /*EOS*/
|
||||||
|
|
||||||
CREATE TABLE ${stats_db_name}.otherresearchproduct stored AS parquet AS
|
CREATE TABLE ${stats_db_name}.otherresearchproduct stored AS parquet AS
|
||||||
SELECT *
|
SELECT *
|
||||||
FROM ${stats_db_name}.otherresearchproduct_tmp;
|
FROM ${stats_db_name}.otherresearchproduct_tmp; /*EOS*/
|
||||||
|
|
||||||
DROP TABLE ${stats_db_name}.project_tmp;
|
DROP TABLE ${stats_db_name}.project_tmp; /*EOS*/
|
||||||
DROP TABLE ${stats_db_name}.datasource_tmp;
|
DROP TABLE ${stats_db_name}.datasource_tmp; /*EOS*/
|
||||||
DROP TABLE ${stats_db_name}.publication_tmp;
|
DROP TABLE ${stats_db_name}.publication_tmp; /*EOS*/
|
||||||
DROP TABLE ${stats_db_name}.dataset_tmp;
|
DROP TABLE ${stats_db_name}.dataset_tmp; /*EOS*/
|
||||||
DROP TABLE ${stats_db_name}.software_tmp;
|
DROP TABLE ${stats_db_name}.software_tmp; /*EOS*/
|
||||||
DROP TABLE ${stats_db_name}.otherresearchproduct_tmp;
|
DROP TABLE ${stats_db_name}.otherresearchproduct_tmp; /*EOS*/
|
||||||
|
|
||||||
----------------------------------------------
|
----------------------------------------------
|
||||||
-- Re-creating views from final parquet tables
|
-- Re-creating views from final parquet tables
|
||||||
|
@ -54,4 +54,4 @@ SELECT *, bestlicence AS access_mode
|
||||||
FROM ${stats_db_name}.dataset
|
FROM ${stats_db_name}.dataset
|
||||||
UNION ALL
|
UNION ALL
|
||||||
SELECT *, bestlicence AS access_mode
|
SELECT *, bestlicence AS access_mode
|
||||||
FROM ${stats_db_name}.otherresearchproduct;
|
FROM ${stats_db_name}.otherresearchproduct; /*EOS*/
|
||||||
|
|
|
@ -5,7 +5,7 @@
|
||||||
-- Sources related tables/views
|
-- Sources related tables/views
|
||||||
------------------------------------------------------
|
------------------------------------------------------
|
||||||
------------------------------------------------------
|
------------------------------------------------------
|
||||||
DROP TABLE IF EXISTS ${stats_db_name}.publication_sources purge;
|
DROP TABLE IF EXISTS ${stats_db_name}.publication_sources purge; /*EOS*/
|
||||||
|
|
||||||
CREATE TABLE IF NOT EXISTS ${stats_db_name}.publication_sources STORED AS PARQUET as
|
CREATE TABLE IF NOT EXISTS ${stats_db_name}.publication_sources STORED AS PARQUET as
|
||||||
SELECT p.id, case when d.id is null then 'other' else p.datasource end as datasource
|
SELECT p.id, case when d.id is null then 'other' else p.datasource end as datasource
|
||||||
|
@ -16,9 +16,9 @@ LEFT OUTER JOIN
|
||||||
(
|
(
|
||||||
SELECT substr(d.id, 4) id
|
SELECT substr(d.id, 4) id
|
||||||
from ${openaire_db_name}.datasource d
|
from ${openaire_db_name}.datasource d
|
||||||
WHERE d.datainfo.deletedbyinference=false and d.datainfo.invisible = FALSE) d on p.datasource = d.id;
|
WHERE d.datainfo.deletedbyinference=false and d.datainfo.invisible = FALSE) d on p.datasource = d.id; /*EOS*/
|
||||||
|
|
||||||
DROP TABLE IF EXISTS ${stats_db_name}.dataset_sources purge;
|
DROP TABLE IF EXISTS ${stats_db_name}.dataset_sources purge; /*EOS*/
|
||||||
|
|
||||||
CREATE TABLE IF NOT EXISTS ${stats_db_name}.dataset_sources STORED AS PARQUET as
|
CREATE TABLE IF NOT EXISTS ${stats_db_name}.dataset_sources STORED AS PARQUET as
|
||||||
SELECT p.id, case when d.id is null then 'other' else p.datasource end as datasource
|
SELECT p.id, case when d.id is null then 'other' else p.datasource end as datasource
|
||||||
|
@ -29,9 +29,9 @@ LEFT OUTER JOIN
|
||||||
(
|
(
|
||||||
SELECT substr(d.id, 4) id
|
SELECT substr(d.id, 4) id
|
||||||
from ${openaire_db_name}.datasource d
|
from ${openaire_db_name}.datasource d
|
||||||
WHERE d.datainfo.deletedbyinference=false and d.datainfo.invisible = FALSE) d on p.datasource = d.id;
|
WHERE d.datainfo.deletedbyinference=false and d.datainfo.invisible = FALSE) d on p.datasource = d.id; /*EOS*/
|
||||||
|
|
||||||
DROP TABLE IF EXISTS ${stats_db_name}.software_sources purge;
|
DROP TABLE IF EXISTS ${stats_db_name}.software_sources purge; /*EOS*/
|
||||||
|
|
||||||
CREATE TABLE IF NOT EXISTS ${stats_db_name}.software_sources STORED AS PARQUET as
|
CREATE TABLE IF NOT EXISTS ${stats_db_name}.software_sources STORED AS PARQUET as
|
||||||
SELECT p.id, case when d.id is null then 'other' else p.datasource end as datasource
|
SELECT p.id, case when d.id is null then 'other' else p.datasource end as datasource
|
||||||
|
@ -42,9 +42,9 @@ LEFT OUTER JOIN
|
||||||
(
|
(
|
||||||
SELECT substr(d.id, 4) id
|
SELECT substr(d.id, 4) id
|
||||||
from ${openaire_db_name}.datasource d
|
from ${openaire_db_name}.datasource d
|
||||||
WHERE d.datainfo.deletedbyinference=false and d.datainfo.invisible = FALSE) d on p.datasource = d.id;
|
WHERE d.datainfo.deletedbyinference=false and d.datainfo.invisible = FALSE) d on p.datasource = d.id; /*EOS*/
|
||||||
|
|
||||||
DROP TABLE IF EXISTS ${stats_db_name}.otherresearchproduct_sources purge;
|
DROP TABLE IF EXISTS ${stats_db_name}.otherresearchproduct_sources purge; /*EOS*/
|
||||||
|
|
||||||
CREATE TABLE IF NOT EXISTS ${stats_db_name}.otherresearchproduct_sources STORED AS PARQUET as
|
CREATE TABLE IF NOT EXISTS ${stats_db_name}.otherresearchproduct_sources STORED AS PARQUET as
|
||||||
SELECT p.id, case when d.id is null then 'other' else p.datasource end as datasource
|
SELECT p.id, case when d.id is null then 'other' else p.datasource end as datasource
|
||||||
|
@ -55,7 +55,7 @@ LEFT OUTER JOIN
|
||||||
(
|
(
|
||||||
SELECT substr(d.id, 4) id
|
SELECT substr(d.id, 4) id
|
||||||
from ${openaire_db_name}.datasource d
|
from ${openaire_db_name}.datasource d
|
||||||
WHERE d.datainfo.deletedbyinference=false and d.datainfo.invisible = FALSE) d on p.datasource = d.id;
|
WHERE d.datainfo.deletedbyinference=false and d.datainfo.invisible = FALSE) d on p.datasource = d.id; /*EOS*/
|
||||||
|
|
||||||
CREATE VIEW IF NOT EXISTS ${stats_db_name}.result_sources AS
|
CREATE VIEW IF NOT EXISTS ${stats_db_name}.result_sources AS
|
||||||
SELECT * FROM ${stats_db_name}.publication_sources
|
SELECT * FROM ${stats_db_name}.publication_sources
|
||||||
|
@ -64,9 +64,9 @@ SELECT * FROM ${stats_db_name}.dataset_sources
|
||||||
UNION ALL
|
UNION ALL
|
||||||
SELECT * FROM ${stats_db_name}.software_sources
|
SELECT * FROM ${stats_db_name}.software_sources
|
||||||
UNION ALL
|
UNION ALL
|
||||||
SELECT * FROM ${stats_db_name}.otherresearchproduct_sources;
|
SELECT * FROM ${stats_db_name}.otherresearchproduct_sources; /*EOS*/
|
||||||
|
|
||||||
DROP TABLE IF EXISTS ${stats_db_name}.result_orcid purge;
|
DROP TABLE IF EXISTS ${stats_db_name}.result_orcid purge; /*EOS*/
|
||||||
|
|
||||||
CREATE TABLE IF NOT EXISTS ${stats_db_name}.result_orcid STORED AS PARQUET as
|
CREATE TABLE IF NOT EXISTS ${stats_db_name}.result_orcid STORED AS PARQUET as
|
||||||
select distinct res.id, upper(regexp_replace(res.orcid, 'http://orcid.org/' ,'')) as orcid
|
select distinct res.id, upper(regexp_replace(res.orcid, 'http://orcid.org/' ,'')) as orcid
|
||||||
|
@ -76,9 +76,9 @@ from (
|
||||||
LATERAL VIEW explode(author) a as auth
|
LATERAL VIEW explode(author) a as auth
|
||||||
LATERAL VIEW explode(auth.pid) ap as auth_pid
|
LATERAL VIEW explode(auth.pid) ap as auth_pid
|
||||||
LATERAL VIEW explode(auth.pid.qualifier.classid) apt as author_pid_type
|
LATERAL VIEW explode(auth.pid.qualifier.classid) apt as author_pid_type
|
||||||
WHERE res.datainfo.deletedbyinference = FALSE and res.datainfo.invisible = FALSE and author_pid_type = 'orcid') as res;
|
WHERE res.datainfo.deletedbyinference = FALSE and res.datainfo.invisible = FALSE and author_pid_type = 'orcid') as res; /*EOS*/
|
||||||
|
|
||||||
DROP TABLE IF EXISTS ${stats_db_name}.result_result purge;
|
DROP TABLE IF EXISTS ${stats_db_name}.result_result purge; /*EOS*/
|
||||||
|
|
||||||
CREATE TABLE IF NOT EXISTS ${stats_db_name}.result_result stored as parquet as
|
CREATE TABLE IF NOT EXISTS ${stats_db_name}.result_result stored as parquet as
|
||||||
select substr(rel.source, 4) as source, substr(rel.target, 4) as target, relclass, subreltype
|
select substr(rel.source, 4) as source, substr(rel.target, 4) as target, relclass, subreltype
|
||||||
|
@ -91,9 +91,9 @@ where reltype='resultResult'
|
||||||
and r2.datainfo.deletedbyinference=false and r2.datainfo.invisible = FALSE
|
and r2.datainfo.deletedbyinference=false and r2.datainfo.invisible = FALSE
|
||||||
and r1.resulttype.classname != 'other'
|
and r1.resulttype.classname != 'other'
|
||||||
and r2.resulttype.classname != 'other'
|
and r2.resulttype.classname != 'other'
|
||||||
and rel.datainfo.deletedbyinference=false and rel.datainfo.invisible = FALSE;
|
and rel.datainfo.deletedbyinference=false and rel.datainfo.invisible = FALSE; /*EOS*/
|
||||||
|
|
||||||
DROP TABLE IF EXISTS ${stats_db_name}.result_citations_oc purge;
|
DROP TABLE IF EXISTS ${stats_db_name}.result_citations_oc purge; /*EOS*/
|
||||||
|
|
||||||
CREATE TABLE IF NOT EXISTS ${stats_db_name}.result_citations_oc stored as parquet as
|
CREATE TABLE IF NOT EXISTS ${stats_db_name}.result_citations_oc stored as parquet as
|
||||||
select substr(target, 4) as id, count(distinct substr(source, 4)) as citations
|
select substr(target, 4) as id, count(distinct substr(source, 4)) as citations
|
||||||
|
@ -108,9 +108,9 @@ where relClass='Cites' and rel.datainfo.provenanceaction.classid = 'sysimport:cr
|
||||||
and r1.resulttype.classname != 'other'
|
and r1.resulttype.classname != 'other'
|
||||||
and r2.resulttype.classname != 'other'
|
and r2.resulttype.classname != 'other'
|
||||||
and rel.datainfo.deletedbyinference=false and rel.datainfo.invisible = FALSE
|
and rel.datainfo.deletedbyinference=false and rel.datainfo.invisible = FALSE
|
||||||
group by substr(target, 4);
|
group by substr(target, 4); /*EOS*/
|
||||||
|
|
||||||
DROP TABLE IF EXISTS ${stats_db_name}.result_references_oc purge;
|
DROP TABLE IF EXISTS ${stats_db_name}.result_references_oc purge; /*EOS*/
|
||||||
|
|
||||||
CREATE TABLE IF NOT EXISTS ${stats_db_name}.result_references_oc stored as parquet as
|
CREATE TABLE IF NOT EXISTS ${stats_db_name}.result_references_oc stored as parquet as
|
||||||
select substr(source, 4) as id, count(distinct substr(target, 4)) as references
|
select substr(source, 4) as id, count(distinct substr(target, 4)) as references
|
||||||
|
@ -125,4 +125,4 @@ where relClass='Cites' and rel.datainfo.provenanceaction.classid = 'sysimport:cr
|
||||||
and r1.resulttype.classname != 'other'
|
and r1.resulttype.classname != 'other'
|
||||||
and r2.resulttype.classname != 'other'
|
and r2.resulttype.classname != 'other'
|
||||||
and rel.datainfo.deletedbyinference=false and rel.datainfo.invisible = FALSE
|
and rel.datainfo.deletedbyinference=false and rel.datainfo.invisible = FALSE
|
||||||
group by substr(source, 4);
|
group by substr(source, 4); /*EOS*/
|
|
@ -5,33 +5,33 @@
|
||||||
-- Licences related tables/views
|
-- Licences related tables/views
|
||||||
------------------------------------------------------
|
------------------------------------------------------
|
||||||
------------------------------------------------------
|
------------------------------------------------------
|
||||||
DROP TABLE IF EXISTS ${stats_db_name}.publication_licenses purge;
|
DROP TABLE IF EXISTS ${stats_db_name}.publication_licenses purge; /*EOS*/
|
||||||
|
|
||||||
CREATE TABLE IF NOT EXISTS ${stats_db_name}.publication_licenses STORED AS PARQUET AS
|
CREATE TABLE IF NOT EXISTS ${stats_db_name}.publication_licenses STORED AS PARQUET AS
|
||||||
SELECT substr(p.id, 4) as id, licenses.value as type
|
SELECT substr(p.id, 4) as id, licenses.value as type
|
||||||
from ${openaire_db_name}.publication p LATERAL VIEW explode(p.instance.license) instances as licenses
|
from ${openaire_db_name}.publication p LATERAL VIEW explode(p.instance.license) instances as licenses
|
||||||
where licenses.value is not null and licenses.value != '' and p.datainfo.deletedbyinference=false and p.datainfo.invisible = FALSE;
|
where licenses.value is not null and licenses.value != '' and p.datainfo.deletedbyinference=false and p.datainfo.invisible = FALSE; /*EOS*/
|
||||||
|
|
||||||
DROP TABLE IF EXISTS ${stats_db_name}.dataset_licenses purge;
|
DROP TABLE IF EXISTS ${stats_db_name}.dataset_licenses purge; /*EOS*/
|
||||||
|
|
||||||
CREATE TABLE IF NOT EXISTS ${stats_db_name}.dataset_licenses STORED AS PARQUET AS
|
CREATE TABLE IF NOT EXISTS ${stats_db_name}.dataset_licenses STORED AS PARQUET AS
|
||||||
SELECT substr(p.id, 4) as id, licenses.value as type
|
SELECT substr(p.id, 4) as id, licenses.value as type
|
||||||
from ${openaire_db_name}.dataset p LATERAL VIEW explode(p.instance.license) instances as licenses
|
from ${openaire_db_name}.dataset p LATERAL VIEW explode(p.instance.license) instances as licenses
|
||||||
where licenses.value is not null and licenses.value != '' and p.datainfo.deletedbyinference=false and p.datainfo.invisible = FALSE;
|
where licenses.value is not null and licenses.value != '' and p.datainfo.deletedbyinference=false and p.datainfo.invisible = FALSE; /*EOS*/
|
||||||
|
|
||||||
DROP TABLE IF EXISTS ${stats_db_name}.software_licenses purge;
|
DROP TABLE IF EXISTS ${stats_db_name}.software_licenses purge; /*EOS*/
|
||||||
|
|
||||||
CREATE TABLE IF NOT EXISTS ${stats_db_name}.software_licenses STORED AS PARQUET AS
|
CREATE TABLE IF NOT EXISTS ${stats_db_name}.software_licenses STORED AS PARQUET AS
|
||||||
SELECT substr(p.id, 4) as id, licenses.value as type
|
SELECT substr(p.id, 4) as id, licenses.value as type
|
||||||
from ${openaire_db_name}.software p LATERAL VIEW explode(p.instance.license) instances as licenses
|
from ${openaire_db_name}.software p LATERAL VIEW explode(p.instance.license) instances as licenses
|
||||||
where licenses.value is not null and licenses.value != '' and p.datainfo.deletedbyinference=false and p.datainfo.invisible = FALSE;
|
where licenses.value is not null and licenses.value != '' and p.datainfo.deletedbyinference=false and p.datainfo.invisible = FALSE; /*EOS*/
|
||||||
|
|
||||||
DROP TABLE IF EXISTS ${stats_db_name}.otherresearchproduct_licenses purge;
|
DROP TABLE IF EXISTS ${stats_db_name}.otherresearchproduct_licenses purge; /*EOS*/
|
||||||
|
|
||||||
CREATE TABLE IF NOT EXISTS ${stats_db_name}.otherresearchproduct_licenses STORED AS PARQUET AS
|
CREATE TABLE IF NOT EXISTS ${stats_db_name}.otherresearchproduct_licenses STORED AS PARQUET AS
|
||||||
SELECT substr(p.id, 4) as id, licenses.value as type
|
SELECT substr(p.id, 4) as id, licenses.value as type
|
||||||
from ${openaire_db_name}.otherresearchproduct p LATERAL VIEW explode(p.instance.license) instances as licenses
|
from ${openaire_db_name}.otherresearchproduct p LATERAL VIEW explode(p.instance.license) instances as licenses
|
||||||
where licenses.value is not null and licenses.value != '' and p.datainfo.deletedbyinference=false and p.datainfo.invisible = FALSE;
|
where licenses.value is not null and licenses.value != '' and p.datainfo.deletedbyinference=false and p.datainfo.invisible = FALSE; /*EOS*/
|
||||||
|
|
||||||
CREATE VIEW IF NOT EXISTS ${stats_db_name}.result_licenses AS
|
CREATE VIEW IF NOT EXISTS ${stats_db_name}.result_licenses AS
|
||||||
SELECT * FROM ${stats_db_name}.publication_licenses
|
SELECT * FROM ${stats_db_name}.publication_licenses
|
||||||
|
@ -40,15 +40,15 @@ SELECT * FROM ${stats_db_name}.dataset_licenses
|
||||||
UNION ALL
|
UNION ALL
|
||||||
SELECT * FROM ${stats_db_name}.software_licenses
|
SELECT * FROM ${stats_db_name}.software_licenses
|
||||||
UNION ALL
|
UNION ALL
|
||||||
SELECT * FROM ${stats_db_name}.otherresearchproduct_licenses;
|
SELECT * FROM ${stats_db_name}.otherresearchproduct_licenses; /*EOS*/
|
||||||
|
|
||||||
DROP TABLE IF EXISTS ${stats_db_name}.organization_pids purge;
|
DROP TABLE IF EXISTS ${stats_db_name}.organization_pids purge; /*EOS*/
|
||||||
|
|
||||||
CREATE TABLE IF NOT EXISTS ${stats_db_name}.organization_pids STORED AS PARQUET AS
|
CREATE TABLE IF NOT EXISTS ${stats_db_name}.organization_pids STORED AS PARQUET AS
|
||||||
select substr(o.id, 4) as id, ppid.qualifier.classname as type, ppid.value as pid
|
select substr(o.id, 4) as id, ppid.qualifier.classname as type, ppid.value as pid
|
||||||
from ${openaire_db_name}.organization o lateral view explode(o.pid) pids as ppid;
|
from ${openaire_db_name}.organization o lateral view explode(o.pid) pids as ppid; /*EOS*/
|
||||||
|
|
||||||
DROP TABLE IF EXISTS ${stats_db_name}.organization_sources purge;
|
DROP TABLE IF EXISTS ${stats_db_name}.organization_sources purge; /*EOS*/
|
||||||
|
|
||||||
CREATE TABLE IF NOT EXISTS ${stats_db_name}.organization_sources STORED AS PARQUET as
|
CREATE TABLE IF NOT EXISTS ${stats_db_name}.organization_sources STORED AS PARQUET as
|
||||||
SELECT o.id, case when d.id is null then 'other' else o.datasource end as datasource
|
SELECT o.id, case when d.id is null then 'other' else o.datasource end as datasource
|
||||||
|
@ -58,10 +58,10 @@ FROM (
|
||||||
LEFT OUTER JOIN (
|
LEFT OUTER JOIN (
|
||||||
SELECT substr(d.id, 4) id
|
SELECT substr(d.id, 4) id
|
||||||
from ${openaire_db_name}.datasource d
|
from ${openaire_db_name}.datasource d
|
||||||
WHERE d.datainfo.deletedbyinference=false and d.datainfo.invisible = FALSE) d on o.datasource = d.id;
|
WHERE d.datainfo.deletedbyinference=false and d.datainfo.invisible = FALSE) d on o.datasource = d.id; /*EOS*/
|
||||||
|
|
||||||
DROP TABLE IF EXISTS ${stats_db_name}.result_accessroute purge;
|
DROP TABLE IF EXISTS ${stats_db_name}.result_accessroute purge; /*EOS*/
|
||||||
|
|
||||||
CREATE TABLE IF NOT EXISTS ${stats_db_name}.result_accessroute STORED AS PARQUET as
|
CREATE TABLE IF NOT EXISTS ${stats_db_name}.result_accessroute STORED AS PARQUET as
|
||||||
select distinct substr(id,4) as id, accessroute from ${openaire_db_name}.result
|
select distinct substr(id,4) as id, accessroute from ${openaire_db_name}.result
|
||||||
lateral view explode (instance.accessright.openaccessroute) openaccessroute as accessroute;
|
lateral view explode (instance.accessright.openaccessroute) openaccessroute as accessroute; /*EOS*/
|
||||||
|
|
|
@ -6,7 +6,7 @@
|
||||||
------------------------------------------------------
|
------------------------------------------------------
|
||||||
------------------------------------------------------
|
------------------------------------------------------
|
||||||
|
|
||||||
DROP TABLE IF EXISTS ${stats_db_name}.publication_refereed purge;
|
DROP TABLE IF EXISTS ${stats_db_name}.publication_refereed purge; /*EOS*/
|
||||||
CREATE TABLE IF NOT EXISTS ${stats_db_name}.publication_refereed STORED AS PARQUET as
|
CREATE TABLE IF NOT EXISTS ${stats_db_name}.publication_refereed STORED AS PARQUET as
|
||||||
with peer_reviewed as (
|
with peer_reviewed as (
|
||||||
select distinct substr(r.id, 4) as id, inst.refereed.classname as refereed
|
select distinct substr(r.id, 4) as id, inst.refereed.classname as refereed
|
||||||
|
@ -22,9 +22,9 @@ from (
|
||||||
union all
|
union all
|
||||||
select non_peer_reviewed.* from non_peer_reviewed
|
select non_peer_reviewed.* from non_peer_reviewed
|
||||||
left join peer_reviewed on peer_reviewed.id=non_peer_reviewed.id
|
left join peer_reviewed on peer_reviewed.id=non_peer_reviewed.id
|
||||||
where peer_reviewed.id is null) pr;
|
where peer_reviewed.id is null) pr; /*EOS*/
|
||||||
|
|
||||||
DROP TABLE IF EXISTS ${stats_db_name}.dataset_refereed purge;
|
DROP TABLE IF EXISTS ${stats_db_name}.dataset_refereed purge; /*EOS*/
|
||||||
CREATE TABLE IF NOT EXISTS ${stats_db_name}.dataset_refereed STORED AS PARQUET as
|
CREATE TABLE IF NOT EXISTS ${stats_db_name}.dataset_refereed STORED AS PARQUET as
|
||||||
with peer_reviewed as (
|
with peer_reviewed as (
|
||||||
select distinct substr(r.id, 4) as id, inst.refereed.classname as refereed
|
select distinct substr(r.id, 4) as id, inst.refereed.classname as refereed
|
||||||
|
@ -40,9 +40,9 @@ from (
|
||||||
union all
|
union all
|
||||||
select non_peer_reviewed.* from non_peer_reviewed
|
select non_peer_reviewed.* from non_peer_reviewed
|
||||||
left join peer_reviewed on peer_reviewed.id=non_peer_reviewed.id
|
left join peer_reviewed on peer_reviewed.id=non_peer_reviewed.id
|
||||||
where peer_reviewed.id is null) pr;
|
where peer_reviewed.id is null) pr; /*EOS*/
|
||||||
|
|
||||||
DROP TABLE IF EXISTS ${stats_db_name}.software_refereed purge;
|
DROP TABLE IF EXISTS ${stats_db_name}.software_refereed purge; /*EOS*/
|
||||||
CREATE TABLE IF NOT EXISTS ${stats_db_name}.software_refereed STORED AS PARQUET as
|
CREATE TABLE IF NOT EXISTS ${stats_db_name}.software_refereed STORED AS PARQUET as
|
||||||
with peer_reviewed as (
|
with peer_reviewed as (
|
||||||
select distinct substr(r.id, 4) as id, inst.refereed.classname as refereed
|
select distinct substr(r.id, 4) as id, inst.refereed.classname as refereed
|
||||||
|
@ -58,9 +58,9 @@ from (
|
||||||
union all
|
union all
|
||||||
select non_peer_reviewed.* from non_peer_reviewed
|
select non_peer_reviewed.* from non_peer_reviewed
|
||||||
left join peer_reviewed on peer_reviewed.id=non_peer_reviewed.id
|
left join peer_reviewed on peer_reviewed.id=non_peer_reviewed.id
|
||||||
where peer_reviewed.id is null) pr;
|
where peer_reviewed.id is null) pr; /*EOS*/
|
||||||
|
|
||||||
DROP TABLE IF EXISTS ${stats_db_name}.otherresearchproduct_refereed purge;
|
DROP TABLE IF EXISTS ${stats_db_name}.otherresearchproduct_refereed purge; /*EOS*/
|
||||||
CREATE TABLE IF NOT EXISTS ${stats_db_name}.otherresearchproduct_refereed STORED AS PARQUET as
|
CREATE TABLE IF NOT EXISTS ${stats_db_name}.otherresearchproduct_refereed STORED AS PARQUET as
|
||||||
with peer_reviewed as (
|
with peer_reviewed as (
|
||||||
select distinct substr(r.id, 4) as id, inst.refereed.classname as refereed
|
select distinct substr(r.id, 4) as id, inst.refereed.classname as refereed
|
||||||
|
@ -76,7 +76,7 @@ from (
|
||||||
union all
|
union all
|
||||||
select non_peer_reviewed.* from non_peer_reviewed
|
select non_peer_reviewed.* from non_peer_reviewed
|
||||||
left join peer_reviewed on peer_reviewed.id=non_peer_reviewed.id
|
left join peer_reviewed on peer_reviewed.id=non_peer_reviewed.id
|
||||||
where peer_reviewed.id is null) pr;
|
where peer_reviewed.id is null) pr; /*EOS*/
|
||||||
|
|
||||||
CREATE VIEW IF NOT EXISTS ${stats_db_name}.result_refereed as
|
CREATE VIEW IF NOT EXISTS ${stats_db_name}.result_refereed as
|
||||||
select * from ${stats_db_name}.publication_refereed
|
select * from ${stats_db_name}.publication_refereed
|
||||||
|
@ -85,17 +85,17 @@ select * from ${stats_db_name}.dataset_refereed
|
||||||
union all
|
union all
|
||||||
select * from ${stats_db_name}.software_refereed
|
select * from ${stats_db_name}.software_refereed
|
||||||
union all
|
union all
|
||||||
select * from ${stats_db_name}.otherresearchproduct_refereed;
|
select * from ${stats_db_name}.otherresearchproduct_refereed; /*EOS*/
|
||||||
|
|
||||||
DROP TABLE IF EXISTS ${stats_db_name}.indi_impact_measures purge;
|
DROP TABLE IF EXISTS ${stats_db_name}.indi_impact_measures purge; /*EOS*/
|
||||||
|
|
||||||
create table if not exists ${stats_db_name}.indi_impact_measures STORED AS PARQUET as
|
create table if not exists ${stats_db_name}.indi_impact_measures STORED AS PARQUET as
|
||||||
select substr(id, 4) as id, measures_ids.id impactmetric, cast(measures_ids.unit.value[0] as double) score,
|
select substr(id, 4) as id, measures_ids.id impactmetric, cast(measures_ids.unit.value[0] as double) score,
|
||||||
cast(measures_ids.unit.value[0] as decimal(6,3)) score_dec, measures_ids.unit.value[1] impact_class
|
cast(measures_ids.unit.value[0] as decimal(6,3)) score_dec, measures_ids.unit.value[1] impact_class
|
||||||
from ${openaire_db_name}.result lateral view explode(measures) measures as measures_ids
|
from ${openaire_db_name}.result lateral view explode(measures) measures as measures_ids
|
||||||
where measures_ids.id!='views' and measures_ids.id!='downloads';
|
where measures_ids.id!='views' and measures_ids.id!='downloads'; /*EOS*/
|
||||||
|
|
||||||
DROP TABLE IF EXISTS ${stats_db_name}.result_apc_affiliations purge;
|
DROP TABLE IF EXISTS ${stats_db_name}.result_apc_affiliations purge; /*EOS*/
|
||||||
|
|
||||||
create table if not exists ${stats_db_name}.result_apc_affiliations STORED AS PARQUET as
|
create table if not exists ${stats_db_name}.result_apc_affiliations STORED AS PARQUET as
|
||||||
select distinct substr(rel.target,4) id, substr(rel.source,4) organization, o.legalname.value name,
|
select distinct substr(rel.target,4) id, substr(rel.source,4) organization, o.legalname.value name,
|
||||||
|
@ -104,4 +104,4 @@ rel.properties[1].value apc_currency
|
||||||
from ${openaire_db_name}.relation rel
|
from ${openaire_db_name}.relation rel
|
||||||
join ${openaire_db_name}.organization o on o.id=rel.source
|
join ${openaire_db_name}.organization o on o.id=rel.source
|
||||||
join ${openaire_db_name}.result r on r.id=rel.target
|
join ${openaire_db_name}.result r on r.id=rel.target
|
||||||
where rel.subreltype = 'affiliation' and rel.datainfo.deletedbyinference = false and size(rel.properties)>0;
|
where rel.subreltype = 'affiliation' and rel.datainfo.deletedbyinference = false and size(rel.properties)>0; /*EOS*/
|
|
@ -1,25 +1,25 @@
|
||||||
-------------------------------------------
|
-------------------------------------------
|
||||||
--- Extra tables, mostly used by indicators
|
--- Extra tables, mostly used by indicators
|
||||||
|
|
||||||
DROP TABLE IF EXISTS ${stats_db_name}.result_projectcount purge;
|
DROP TABLE IF EXISTS ${stats_db_name}.result_projectcount purge; /*EOS*/
|
||||||
|
|
||||||
create table if not exists ${stats_db_name}.result_projectcount STORED AS PARQUET as
|
create table if not exists ${stats_db_name}.result_projectcount STORED AS PARQUET as
|
||||||
select r.id, count(distinct p.id) as count
|
select r.id, count(distinct p.id) as count
|
||||||
from ${stats_db_name}.result r
|
from ${stats_db_name}.result r
|
||||||
left outer join ${stats_db_name}.result_projects rp on rp.id=r.id
|
left outer join ${stats_db_name}.result_projects rp on rp.id=r.id
|
||||||
left outer join ${stats_db_name}.project p on p.id=rp.project
|
left outer join ${stats_db_name}.project p on p.id=rp.project
|
||||||
group by r.id;
|
group by r.id; /*EOS*/
|
||||||
|
|
||||||
DROP TABLE IF EXISTS ${stats_db_name}.result_fundercount purge;
|
DROP TABLE IF EXISTS ${stats_db_name}.result_fundercount purge; /*EOS*/
|
||||||
|
|
||||||
create table if not exists ${stats_db_name}.result_fundercount STORED AS PARQUET as
|
create table if not exists ${stats_db_name}.result_fundercount STORED AS PARQUET as
|
||||||
select r.id, count(distinct p.funder) as count
|
select r.id, count(distinct p.funder) as count
|
||||||
from ${stats_db_name}.result r
|
from ${stats_db_name}.result r
|
||||||
left outer join ${stats_db_name}.result_projects rp on rp.id=r.id
|
left outer join ${stats_db_name}.result_projects rp on rp.id=r.id
|
||||||
left outer join ${stats_db_name}.project p on p.id=rp.project
|
left outer join ${stats_db_name}.project p on p.id=rp.project
|
||||||
group by r.id;
|
group by r.id; /*EOS*/
|
||||||
|
|
||||||
DROP TABLE IF EXISTS ${stats_db_name}.project_resultcount purge;
|
DROP TABLE IF EXISTS ${stats_db_name}.project_resultcount purge; /*EOS*/
|
||||||
|
|
||||||
create table if not exists ${stats_db_name}.project_resultcount STORED AS PARQUET as
|
create table if not exists ${stats_db_name}.project_resultcount STORED AS PARQUET as
|
||||||
with rcount as (
|
with rcount as (
|
||||||
|
@ -33,17 +33,17 @@ select rcount.pid, sum(case when rcount.type='publication' then rcount.count els
|
||||||
sum(case when rcount.type='software' then rcount.count else 0 end) as software,
|
sum(case when rcount.type='software' then rcount.count else 0 end) as software,
|
||||||
sum(case when rcount.type='other' then rcount.count else 0 end) as other
|
sum(case when rcount.type='other' then rcount.count else 0 end) as other
|
||||||
from rcount
|
from rcount
|
||||||
group by rcount.pid;
|
group by rcount.pid; /*EOS*/
|
||||||
|
|
||||||
create or replace view ${stats_db_name}.rndexpenditure as select * from stats_ext.rndexpediture;
|
create or replace view ${stats_db_name}.rndexpenditure as select * from stats_ext.rndexpediture; /*EOS*/
|
||||||
create or replace view ${stats_db_name}.rndgdpexpenditure as select * from stats_ext.rndgdpexpenditure;
|
create or replace view ${stats_db_name}.rndgdpexpenditure as select * from stats_ext.rndgdpexpenditure; /*EOS*/
|
||||||
create or replace view ${stats_db_name}.doctoratestudents as select * from stats_ext.doctoratestudents;
|
create or replace view ${stats_db_name}.doctoratestudents as select * from stats_ext.doctoratestudents; /*EOS*/
|
||||||
create or replace view ${stats_db_name}.totalresearchers as select * from stats_ext.totalresearchers;
|
create or replace view ${stats_db_name}.totalresearchers as select * from stats_ext.totalresearchers; /*EOS*/
|
||||||
create or replace view ${stats_db_name}.totalresearchersft as select * from stats_ext.totalresearchersft;
|
create or replace view ${stats_db_name}.totalresearchersft as select * from stats_ext.totalresearchersft; /*EOS*/
|
||||||
create or replace view ${stats_db_name}.hrrst as select * from stats_ext.hrrst;
|
create or replace view ${stats_db_name}.hrrst as select * from stats_ext.hrrst; /*EOS*/
|
||||||
create or replace view ${stats_db_name}.graduatedoctorates as select * from stats_ext.graduatedoctorates;
|
create or replace view ${stats_db_name}.graduatedoctorates as select * from stats_ext.graduatedoctorates; /*EOS*/
|
||||||
|
|
||||||
DROP TABLE IF EXISTS ${stats_db_name}.result_instance purge;
|
DROP TABLE IF EXISTS ${stats_db_name}.result_instance purge; /*EOS*/
|
||||||
|
|
||||||
create table if not exists ${stats_db_name}.result_instance stored as parquet as
|
create table if not exists ${stats_db_name}.result_instance stored as parquet as
|
||||||
select distinct r.*
|
select distinct r.*
|
||||||
|
@ -51,9 +51,9 @@ from (
|
||||||
select substr(r.id, 4) as id, inst.accessright.classname as accessright, inst.accessright.openaccessroute as accessright_uw, substr(inst.collectedfrom.key, 4) as collectedfrom,
|
select substr(r.id, 4) as id, inst.accessright.classname as accessright, inst.accessright.openaccessroute as accessright_uw, substr(inst.collectedfrom.key, 4) as collectedfrom,
|
||||||
substr(inst.hostedby.key, 4) as hostedby, inst.dateofacceptance.value as dateofacceptance, inst.license.value as license, p.qualifier.classname as pidtype, p.value as pid
|
substr(inst.hostedby.key, 4) as hostedby, inst.dateofacceptance.value as dateofacceptance, inst.license.value as license, p.qualifier.classname as pidtype, p.value as pid
|
||||||
from ${openaire_db_name}.result r lateral view explode(r.instance) instances as inst lateral view outer explode(inst.pid) pids as p) r
|
from ${openaire_db_name}.result r lateral view explode(r.instance) instances as inst lateral view outer explode(inst.pid) pids as p) r
|
||||||
join ${stats_db_name}.result res on res.id=r.id;
|
join ${stats_db_name}.result res on res.id=r.id; /*EOS*/
|
||||||
|
|
||||||
DROP TABLE IF EXISTS ${stats_db_name}.result_apc purge;
|
DROP TABLE IF EXISTS ${stats_db_name}.result_apc purge; /*EOS*/
|
||||||
|
|
||||||
create table if not exists ${stats_db_name}.result_apc STORED AS PARQUET as
|
create table if not exists ${stats_db_name}.result_apc STORED AS PARQUET as
|
||||||
select distinct r.id, r.amount, r.currency
|
select distinct r.id, r.amount, r.currency
|
||||||
|
@ -61,6 +61,6 @@ from (
|
||||||
select substr(r.id, 4) as id, cast(inst.processingchargeamount.value as float) as amount, inst.processingchargecurrency.value as currency
|
select substr(r.id, 4) as id, cast(inst.processingchargeamount.value as float) as amount, inst.processingchargecurrency.value as currency
|
||||||
from ${openaire_db_name}.result r lateral view explode(r.instance) instances as inst) r
|
from ${openaire_db_name}.result r lateral view explode(r.instance) instances as inst) r
|
||||||
join ${stats_db_name}.result res on res.id=r.id
|
join ${stats_db_name}.result res on res.id=r.id
|
||||||
where r.amount is not null;
|
where r.amount is not null; /*EOS*/
|
||||||
|
|
||||||
create or replace view ${stats_db_name}.issn_gold_oa_dataset as select * from ${external_stats_db_name}.issn_gold_oa_dataset;
|
create or replace view ${stats_db_name}.issn_gold_oa_dataset as select * from ${external_stats_db_name}.issn_gold_oa_dataset; /*EOS*/
|
|
@ -3,26 +3,26 @@
|
||||||
----------------------------------------------------
|
----------------------------------------------------
|
||||||
|
|
||||||
-- Peer reviewed:
|
-- Peer reviewed:
|
||||||
drop table if exists ${stats_db_name}.result_peerreviewed purge;
|
drop table if exists ${stats_db_name}.result_peerreviewed purge; /*EOS*/
|
||||||
|
|
||||||
create table IF NOT EXISTS ${stats_db_name}.result_peerreviewed STORED AS PARQUET as
|
create table IF NOT EXISTS ${stats_db_name}.result_peerreviewed STORED AS PARQUET as
|
||||||
select r.id as id, case when doi.doi_from_crossref=1 and grey.grey_lit=0 then true else false end as peer_reviewed
|
select r.id as id, case when doi.doi_from_crossref=1 and grey.grey_lit=0 then true else false end as peer_reviewed
|
||||||
from ${stats_db_name}.result r
|
from ${stats_db_name}.result r
|
||||||
left outer join ${stats_db_name}.indi_pub_doi_from_crossref doi on doi.id=r.id
|
left outer join ${stats_db_name}.indi_pub_doi_from_crossref doi on doi.id=r.id
|
||||||
left outer join ${stats_db_name}.indi_pub_grey_lit grey on grey.id=r.id;
|
left outer join ${stats_db_name}.indi_pub_grey_lit grey on grey.id=r.id; /*EOS*/
|
||||||
|
|
||||||
-- Green OA:
|
-- Green OA:
|
||||||
drop table if exists ${stats_db_name}.result_greenoa purge;
|
drop table if exists ${stats_db_name}.result_greenoa purge; /*EOS*/
|
||||||
|
|
||||||
create table IF NOT EXISTS ${stats_db_name}.result_greenoa STORED AS PARQUET as
|
create table IF NOT EXISTS ${stats_db_name}.result_greenoa STORED AS PARQUET as
|
||||||
select r.id, case when green.green_oa=1 then true else false end as green
|
select r.id, case when green.green_oa=1 then true else false end as green
|
||||||
from ${stats_db_name}.result r
|
from ${stats_db_name}.result r
|
||||||
left outer join ${stats_db_name}.indi_pub_green_oa green on green.id=r.id;
|
left outer join ${stats_db_name}.indi_pub_green_oa green on green.id=r.id; /*EOS*/
|
||||||
|
|
||||||
-- GOLD OA:
|
-- GOLD OA:
|
||||||
drop table if exists ${stats_db_name}.result_gold purge;
|
drop table if exists ${stats_db_name}.result_gold purge; /*EOS*/
|
||||||
|
|
||||||
create table IF NOT EXISTS ${stats_db_name}.result_gold STORED AS PARQUET as
|
create table IF NOT EXISTS ${stats_db_name}.result_gold STORED AS PARQUET as
|
||||||
select r.id, case when gold.is_gold=1 then true else false end as gold
|
select r.id, case when gold.is_gold=1 then true else false end as gold
|
||||||
from ${stats_db_name}.result r
|
from ${stats_db_name}.result r
|
||||||
left outer join ${stats_db_name}.indi_pub_gold_oa gold on gold.id=r.id;
|
left outer join ${stats_db_name}.indi_pub_gold_oa gold on gold.id=r.id; /*EOS*/
|
|
@ -1,6 +1,6 @@
|
||||||
-- replace the creation of the result view to include the boolean fields from the previous tables (green, gold,
|
-- replace the creation of the result view to include the boolean fields from the previous tables (green, gold,
|
||||||
-- peer reviewed)
|
-- peer reviewed)
|
||||||
drop table if exists ${stats_db_name}.result_tmp;
|
drop table if exists ${stats_db_name}.result_tmp; /*EOS*/
|
||||||
|
|
||||||
CREATE TABLE ${stats_db_name}.result_tmp (
|
CREATE TABLE ${stats_db_name}.result_tmp (
|
||||||
id STRING,
|
id STRING,
|
||||||
|
@ -20,37 +20,37 @@ CREATE TABLE ${stats_db_name}.result_tmp (
|
||||||
peer_reviewed BOOLEAN,
|
peer_reviewed BOOLEAN,
|
||||||
green BOOLEAN,
|
green BOOLEAN,
|
||||||
gold BOOLEAN)
|
gold BOOLEAN)
|
||||||
clustered by (id) into 100 buckets stored as orc tblproperties('transactional'='true');
|
clustered by (id) into 100 buckets stored as orc tblproperties('transactional'='true'); /*EOS*/
|
||||||
|
|
||||||
insert into ${stats_db_name}.result_tmp
|
insert into ${stats_db_name}.result_tmp
|
||||||
select r.id, r.title, r.publisher, r.journal, r.`date`, date_format(r.`date`, 'yyyy'), r.bestlicence, r.bestlicence, r.embargo_end_date, r.delayed, r.authors, r.source, r.abstract, r.type, pr.peer_reviewed, green.green, gold.gold
|
select r.id, r.title, r.publisher, r.journal, r.`date`, date_format(r.`date`, 'yyyy'), r.bestlicence, r.bestlicence, r.embargo_end_date, r.delayed, r.authors, r.source, r.abstract, r.type, pr.peer_reviewed, green.green, gold.gold
|
||||||
FROM ${stats_db_name}.publication r
|
FROM ${stats_db_name}.publication r
|
||||||
LEFT OUTER JOIN ${stats_db_name}.result_peerreviewed pr on pr.id=r.id
|
LEFT OUTER JOIN ${stats_db_name}.result_peerreviewed pr on pr.id=r.id
|
||||||
LEFT OUTER JOIN ${stats_db_name}.result_greenoa green on green.id=r.id
|
LEFT OUTER JOIN ${stats_db_name}.result_greenoa green on green.id=r.id
|
||||||
LEFT OUTER JOIN ${stats_db_name}.result_gold gold on gold.id=r.id;
|
LEFT OUTER JOIN ${stats_db_name}.result_gold gold on gold.id=r.id; /*EOS*/
|
||||||
|
|
||||||
insert into ${stats_db_name}.result_tmp
|
insert into ${stats_db_name}.result_tmp
|
||||||
select r.id, r.title, r.publisher, r.journal, r.`date`, date_format(r.`date`, 'yyyy'), r.bestlicence, r.bestlicence, r.embargo_end_date, r.delayed, r.authors, r.source, r.abstract, r.type, pr.peer_reviewed, green.green, gold.gold
|
select r.id, r.title, r.publisher, r.journal, r.`date`, date_format(r.`date`, 'yyyy'), r.bestlicence, r.bestlicence, r.embargo_end_date, r.delayed, r.authors, r.source, r.abstract, r.type, pr.peer_reviewed, green.green, gold.gold
|
||||||
FROM ${stats_db_name}.dataset r
|
FROM ${stats_db_name}.dataset r
|
||||||
LEFT OUTER JOIN ${stats_db_name}.result_peerreviewed pr on pr.id=r.id
|
LEFT OUTER JOIN ${stats_db_name}.result_peerreviewed pr on pr.id=r.id
|
||||||
LEFT OUTER JOIN ${stats_db_name}.result_greenoa green on green.id=r.id
|
LEFT OUTER JOIN ${stats_db_name}.result_greenoa green on green.id=r.id
|
||||||
LEFT OUTER JOIN ${stats_db_name}.result_gold gold on gold.id=r.id;
|
LEFT OUTER JOIN ${stats_db_name}.result_gold gold on gold.id=r.id; /*EOS*/
|
||||||
|
|
||||||
insert into ${stats_db_name}.result_tmp
|
insert into ${stats_db_name}.result_tmp
|
||||||
select r.id, r.title, r.publisher, r.journal, r.`date`, date_format(r.`date`, 'yyyy'), r.bestlicence, r.bestlicence, r.embargo_end_date, r.delayed, r.authors, r.source, r.abstract, r.type, pr.peer_reviewed, green.green, gold.gold
|
select r.id, r.title, r.publisher, r.journal, r.`date`, date_format(r.`date`, 'yyyy'), r.bestlicence, r.bestlicence, r.embargo_end_date, r.delayed, r.authors, r.source, r.abstract, r.type, pr.peer_reviewed, green.green, gold.gold
|
||||||
FROM ${stats_db_name}.software r
|
FROM ${stats_db_name}.software r
|
||||||
LEFT OUTER JOIN ${stats_db_name}.result_peerreviewed pr on pr.id=r.id
|
LEFT OUTER JOIN ${stats_db_name}.result_peerreviewed pr on pr.id=r.id
|
||||||
LEFT OUTER JOIN ${stats_db_name}.result_greenoa green on green.id=r.id
|
LEFT OUTER JOIN ${stats_db_name}.result_greenoa green on green.id=r.id
|
||||||
LEFT OUTER JOIN ${stats_db_name}.result_gold gold on gold.id=r.id;
|
LEFT OUTER JOIN ${stats_db_name}.result_gold gold on gold.id=r.id; /*EOS*/
|
||||||
|
|
||||||
insert into ${stats_db_name}.result_tmp
|
insert into ${stats_db_name}.result_tmp
|
||||||
select r.id, r.title, r.publisher, r.journal, r.`date`, date_format(r.`date`, 'yyyy'), r.bestlicence, r.bestlicence, r.embargo_end_date, r.delayed, r.authors, r.source, r.abstract, r.type, pr.peer_reviewed, green.green, gold.gold
|
select r.id, r.title, r.publisher, r.journal, r.`date`, date_format(r.`date`, 'yyyy'), r.bestlicence, r.bestlicence, r.embargo_end_date, r.delayed, r.authors, r.source, r.abstract, r.type, pr.peer_reviewed, green.green, gold.gold
|
||||||
FROM ${stats_db_name}.otherresearchproduct r
|
FROM ${stats_db_name}.otherresearchproduct r
|
||||||
LEFT OUTER JOIN ${stats_db_name}.result_peerreviewed pr on pr.id=r.id
|
LEFT OUTER JOIN ${stats_db_name}.result_peerreviewed pr on pr.id=r.id
|
||||||
LEFT OUTER JOIN ${stats_db_name}.result_greenoa green on green.id=r.id
|
LEFT OUTER JOIN ${stats_db_name}.result_greenoa green on green.id=r.id
|
||||||
LEFT OUTER JOIN ${stats_db_name}.result_gold gold on gold.id=r.id;
|
LEFT OUTER JOIN ${stats_db_name}.result_gold gold on gold.id=r.id; /*EOS*/
|
||||||
|
|
||||||
drop table if exists ${stats_db_name}.result;
|
drop table if exists ${stats_db_name}.result; /*EOS*/
|
||||||
drop view if exists ${stats_db_name}.result;
|
drop view if exists ${stats_db_name}.result; /*EOS*/
|
||||||
create table ${stats_db_name}.result stored as parquet as select * from ${stats_db_name}.result_tmp;
|
create table ${stats_db_name}.result stored as parquet as select * from ${stats_db_name}.result_tmp; /*EOS*/
|
||||||
drop table ${stats_db_name}.result_tmp;
|
drop table ${stats_db_name}.result_tmp; /*EOS*/
|
|
@ -5,7 +5,7 @@
|
||||||
--------------------------------------------------------------
|
--------------------------------------------------------------
|
||||||
|
|
||||||
-- Publication temporary table
|
-- Publication temporary table
|
||||||
DROP TABLE IF EXISTS ${stats_db_name}.publication_tmp purge;
|
DROP TABLE IF EXISTS ${stats_db_name}.publication_tmp purge; /*EOS*/
|
||||||
CREATE TABLE ${stats_db_name}.publication_tmp
|
CREATE TABLE ${stats_db_name}.publication_tmp
|
||||||
(
|
(
|
||||||
id STRING,
|
id STRING,
|
||||||
|
@ -22,7 +22,7 @@ CREATE TABLE ${stats_db_name}.publication_tmp
|
||||||
abstract BOOLEAN,
|
abstract BOOLEAN,
|
||||||
type STRING
|
type STRING
|
||||||
)
|
)
|
||||||
clustered by (id) into 100 buckets stored as orc tblproperties ('transactional' = 'true');
|
clustered by (id) into 100 buckets stored as orc tblproperties ('transactional' = 'true'); /*EOS*/
|
||||||
|
|
||||||
INSERT INTO ${stats_db_name}.publication_tmp
|
INSERT INTO ${stats_db_name}.publication_tmp
|
||||||
SELECT substr(p.id, 4) as id,
|
SELECT substr(p.id, 4) as id,
|
||||||
|
@ -39,17 +39,17 @@ SELECT substr(p.id, 4) as id,
|
||||||
case when size(p.description) > 0 then true else false end as abstract,
|
case when size(p.description) > 0 then true else false end as abstract,
|
||||||
'publication' as type
|
'publication' as type
|
||||||
from ${openaire_db_name}.publication p
|
from ${openaire_db_name}.publication p
|
||||||
where p.datainfo.deletedbyinference = false and p.datainfo.invisible=false;
|
where p.datainfo.deletedbyinference = false and p.datainfo.invisible=false; /*EOS*/
|
||||||
|
|
||||||
DROP TABLE IF EXISTS ${stats_db_name}.publication_classifications purge;
|
DROP TABLE IF EXISTS ${stats_db_name}.publication_classifications purge; /*EOS*/
|
||||||
|
|
||||||
CREATE TABLE ${stats_db_name}.publication_classifications STORED AS PARQUET AS
|
CREATE TABLE ${stats_db_name}.publication_classifications STORED AS PARQUET AS
|
||||||
SELECT substr(p.id, 4) as id, instancetype.classname as type
|
SELECT substr(p.id, 4) as id, instancetype.classname as type
|
||||||
from ${openaire_db_name}.publication p
|
from ${openaire_db_name}.publication p
|
||||||
LATERAL VIEW explode(p.instance.instancetype) instances as instancetype
|
LATERAL VIEW explode(p.instance.instancetype) instances as instancetype
|
||||||
where p.datainfo.deletedbyinference = false and p.datainfo.invisible=false;
|
where p.datainfo.deletedbyinference = false and p.datainfo.invisible=false; /*EOS*/
|
||||||
|
|
||||||
DROP TABLE IF EXISTS ${stats_db_name}.publication_concepts purge;
|
DROP TABLE IF EXISTS ${stats_db_name}.publication_concepts purge; /*EOS*/
|
||||||
|
|
||||||
CREATE TABLE ${stats_db_name}.publication_concepts STORED AS PARQUET AS
|
CREATE TABLE ${stats_db_name}.publication_concepts STORED AS PARQUET AS
|
||||||
SELECT substr(p.id, 4) as id, case
|
SELECT substr(p.id, 4) as id, case
|
||||||
|
@ -58,9 +58,9 @@ SELECT substr(p.id, 4) as id, case
|
||||||
when contexts.context.id RLIKE '^[^::]+$' then concat(contexts.context.id, '::other::other') END as concept
|
when contexts.context.id RLIKE '^[^::]+$' then concat(contexts.context.id, '::other::other') END as concept
|
||||||
from ${openaire_db_name}.publication p
|
from ${openaire_db_name}.publication p
|
||||||
LATERAL VIEW explode(p.context) contexts as context
|
LATERAL VIEW explode(p.context) contexts as context
|
||||||
where p.datainfo.deletedbyinference = false and p.datainfo.invisible=false;
|
where p.datainfo.deletedbyinference = false and p.datainfo.invisible=false; /*EOS*/
|
||||||
|
|
||||||
DROP TABLE IF EXISTS ${stats_db_name}.publication_datasources purge;
|
DROP TABLE IF EXISTS ${stats_db_name}.publication_datasources purge; /*EOS*/
|
||||||
|
|
||||||
CREATE TABLE ${stats_db_name}.publication_datasources STORED AS PARQUET as
|
CREATE TABLE ${stats_db_name}.publication_datasources STORED AS PARQUET as
|
||||||
SELECT p.id, case when d.id is null then 'other' else p.datasource end as datasource
|
SELECT p.id, case when d.id is null then 'other' else p.datasource end as datasource
|
||||||
|
@ -71,44 +71,44 @@ FROM (
|
||||||
LEFT OUTER JOIN (
|
LEFT OUTER JOIN (
|
||||||
SELECT substr(d.id, 4) id
|
SELECT substr(d.id, 4) id
|
||||||
from ${openaire_db_name}.datasource d
|
from ${openaire_db_name}.datasource d
|
||||||
WHERE d.datainfo.deletedbyinference = false and d.datainfo.invisible=false) d on p.datasource = d.id;
|
WHERE d.datainfo.deletedbyinference = false and d.datainfo.invisible=false) d on p.datasource = d.id; /*EOS*/
|
||||||
|
|
||||||
DROP TABLE IF EXISTS ${stats_db_name}.publication_languages purge;
|
DROP TABLE IF EXISTS ${stats_db_name}.publication_languages purge; /*EOS*/
|
||||||
|
|
||||||
CREATE TABLE ${stats_db_name}.publication_languages STORED AS PARQUET AS
|
CREATE TABLE ${stats_db_name}.publication_languages STORED AS PARQUET AS
|
||||||
select substr(p.id, 4) as id, p.language.classname as language
|
select substr(p.id, 4) as id, p.language.classname as language
|
||||||
FROM ${openaire_db_name}.publication p
|
FROM ${openaire_db_name}.publication p
|
||||||
where p.datainfo.deletedbyinference = false and p.datainfo.invisible=false;
|
where p.datainfo.deletedbyinference = false and p.datainfo.invisible=false; /*EOS*/
|
||||||
|
|
||||||
DROP TABLE IF EXISTS ${stats_db_name}.publication_oids purge;
|
DROP TABLE IF EXISTS ${stats_db_name}.publication_oids purge; /*EOS*/
|
||||||
|
|
||||||
CREATE TABLE ${stats_db_name}.publication_oids STORED AS PARQUET AS
|
CREATE TABLE ${stats_db_name}.publication_oids STORED AS PARQUET AS
|
||||||
SELECT substr(p.id, 4) AS id, oids.ids AS oid
|
SELECT substr(p.id, 4) AS id, oids.ids AS oid
|
||||||
FROM ${openaire_db_name}.publication p
|
FROM ${openaire_db_name}.publication p
|
||||||
LATERAL VIEW explode(p.originalid) oids AS ids
|
LATERAL VIEW explode(p.originalid) oids AS ids
|
||||||
where p.datainfo.deletedbyinference = false and p.datainfo.invisible=false;
|
where p.datainfo.deletedbyinference = false and p.datainfo.invisible=false; /*EOS*/
|
||||||
|
|
||||||
DROP TABLE IF EXISTS ${stats_db_name}.publication_pids purge;
|
DROP TABLE IF EXISTS ${stats_db_name}.publication_pids purge; /*EOS*/
|
||||||
|
|
||||||
CREATE TABLE ${stats_db_name}.publication_pids STORED AS PARQUET AS
|
CREATE TABLE ${stats_db_name}.publication_pids STORED AS PARQUET AS
|
||||||
SELECT substr(p.id, 4) AS id, ppid.qualifier.classname AS type, ppid.value as pid
|
SELECT substr(p.id, 4) AS id, ppid.qualifier.classname AS type, ppid.value as pid
|
||||||
FROM ${openaire_db_name}.publication p
|
FROM ${openaire_db_name}.publication p
|
||||||
LATERAL VIEW explode(p.pid) pids AS ppid
|
LATERAL VIEW explode(p.pid) pids AS ppid
|
||||||
where p.datainfo.deletedbyinference = false and p.datainfo.invisible=false;
|
where p.datainfo.deletedbyinference = false and p.datainfo.invisible=false; /*EOS*/
|
||||||
|
|
||||||
DROP TABLE IF EXISTS ${stats_db_name}.publication_topics purge;
|
DROP TABLE IF EXISTS ${stats_db_name}.publication_topics purge; /*EOS*/
|
||||||
|
|
||||||
CREATE TABLE ${stats_db_name}.publication_topics STORED AS PARQUET as
|
CREATE TABLE ${stats_db_name}.publication_topics STORED AS PARQUET as
|
||||||
select substr(p.id, 4) AS id, subjects.subject.qualifier.classname AS TYPE, subjects.subject.value AS topic
|
select substr(p.id, 4) AS id, subjects.subject.qualifier.classname AS TYPE, subjects.subject.value AS topic
|
||||||
FROM ${openaire_db_name}.publication p
|
FROM ${openaire_db_name}.publication p
|
||||||
LATERAL VIEW explode(p.subject) subjects AS subject
|
LATERAL VIEW explode(p.subject) subjects AS subject
|
||||||
where p.datainfo.deletedbyinference = false and p.datainfo.invisible=false;
|
where p.datainfo.deletedbyinference = false and p.datainfo.invisible=false; /*EOS*/
|
||||||
|
|
||||||
DROP TABLE IF EXISTS ${stats_db_name}.publication_citations purge;
|
DROP TABLE IF EXISTS ${stats_db_name}.publication_citations purge; /*EOS*/
|
||||||
|
|
||||||
CREATE TABLE ${stats_db_name}.publication_citations STORED AS PARQUET AS
|
CREATE TABLE ${stats_db_name}.publication_citations STORED AS PARQUET AS
|
||||||
SELECT substr(p.id, 4) AS id, xpath_string(citation.value, "//citation/id[@type='openaire']/@value") AS cites
|
SELECT substr(p.id, 4) AS id, xpath_string(citation.value, "//citation/id[@type='openaire']/@value") AS cites
|
||||||
FROM ${openaire_db_name}.publication p
|
FROM ${openaire_db_name}.publication p
|
||||||
lateral view explode(p.extrainfo) citations AS citation
|
lateral view explode(p.extrainfo) citations AS citation
|
||||||
WHERE xpath_string(citation.value, "//citation/id[@type='openaire']/@value") != ""
|
WHERE xpath_string(citation.value, "//citation/id[@type='openaire']/@value") != ""
|
||||||
and p.datainfo.deletedbyinference = false and p.datainfo.invisible=false;
|
and p.datainfo.deletedbyinference = false and p.datainfo.invisible=false; /*EOS*/
|
|
@ -5,7 +5,7 @@ from ${stats_db_name}.result r
|
||||||
select rl.id, sum(case when rl.type like 'CC%' then 1 else 0 end) as count
|
select rl.id, sum(case when rl.type like 'CC%' then 1 else 0 end) as count
|
||||||
from ${stats_db_name}.result_licenses rl
|
from ${stats_db_name}.result_licenses rl
|
||||||
group by rl.id
|
group by rl.id
|
||||||
) rln on rln.id=r.id;
|
) rln on rln.id=r.id; /*EOS*/
|
||||||
|
|
||||||
|
|
||||||
create table ${observatory_db_name}.result_affiliated_country stored as parquet as
|
create table ${observatory_db_name}.result_affiliated_country stored as parquet as
|
||||||
|
@ -35,7 +35,7 @@ from ${stats_db_name}.result r
|
||||||
left outer join ${stats_db_name}.result_fundercount rfc on rfc.id=r.id
|
left outer join ${stats_db_name}.result_fundercount rfc on rfc.id=r.id
|
||||||
group by r.green, r.gold, case when rl.type is not null then true else false end, case when pids.pid is not null then true else false end,
|
group by r.green, r.gold, case when rl.type is not null then true else false end, case when pids.pid is not null then true else false end,
|
||||||
case when r.access_mode in ('Open Access', 'Open Source') then true else false end, r.peer_reviewed, r.type, abstract,
|
case when r.access_mode in ('Open Access', 'Open Source') then true else false end, r.peer_reviewed, r.type, abstract,
|
||||||
cc_licence, r.authors > 1, rpc.count > 1, rfc.count > 1, c.code, c.name;
|
cc_licence, r.authors > 1, rpc.count > 1, rfc.count > 1, c.code, c.name; /*EOS*/
|
||||||
|
|
||||||
|
|
||||||
create table ${observatory_db_name}.result_affiliated_year stored as parquet as
|
create table ${observatory_db_name}.result_affiliated_year stored as parquet as
|
||||||
|
@ -65,7 +65,7 @@ from ${stats_db_name}.result r
|
||||||
left outer join ${stats_db_name}.result_fundercount rfc on rfc.id=r.id
|
left outer join ${stats_db_name}.result_fundercount rfc on rfc.id=r.id
|
||||||
group by r.green, r.gold, case when rl.type is not null then true else false end, case when pids.pid is not null then true else false end,
|
group by r.green, r.gold, case when rl.type is not null then true else false end, case when pids.pid is not null then true else false end,
|
||||||
case when r.access_mode in ('Open Access', 'Open Source') then true else false end, r.peer_reviewed, r.type, abstract,
|
case when r.access_mode in ('Open Access', 'Open Source') then true else false end, r.peer_reviewed, r.type, abstract,
|
||||||
cc_licence, r.authors > 1, rpc.count > 1, rfc.count > 1, r.year;
|
cc_licence, r.authors > 1, rpc.count > 1, rfc.count > 1, r.year; /*EOS*/
|
||||||
|
|
||||||
|
|
||||||
create table ${observatory_db_name}.result_affiliated_year_country stored as parquet as
|
create table ${observatory_db_name}.result_affiliated_year_country stored as parquet as
|
||||||
|
@ -95,7 +95,7 @@ from ${stats_db_name}.result r
|
||||||
left outer join ${stats_db_name}.result_fundercount rfc on rfc.id=r.id
|
left outer join ${stats_db_name}.result_fundercount rfc on rfc.id=r.id
|
||||||
group by r.green, r.gold, case when rl.type is not null then true else false end, case when pids.pid is not null then true else false end,
|
group by r.green, r.gold, case when rl.type is not null then true else false end, case when pids.pid is not null then true else false end,
|
||||||
case when r.access_mode in ('Open Access', 'Open Source') then true else false end, r.peer_reviewed, r.type, abstract,
|
case when r.access_mode in ('Open Access', 'Open Source') then true else false end, r.peer_reviewed, r.type, abstract,
|
||||||
cc_licence, r.authors > 1, rpc.count > 1, rfc.count > 1, r.year, c.code, c.name;
|
cc_licence, r.authors > 1, rpc.count > 1, rfc.count > 1, r.year, c.code, c.name; /*EOS*/
|
||||||
|
|
||||||
|
|
||||||
create table ${observatory_db_name}.result_affiliated_datasource stored as parquet as
|
create table ${observatory_db_name}.result_affiliated_datasource stored as parquet as
|
||||||
|
@ -127,7 +127,7 @@ from ${stats_db_name}.result r
|
||||||
left outer join ${stats_db_name}.result_fundercount rfc on rfc.id=r.id
|
left outer join ${stats_db_name}.result_fundercount rfc on rfc.id=r.id
|
||||||
group by r.green, r.gold, case when rl.type is not null then true else false end, case when pids.pid is not null then true else false end,
|
group by r.green, r.gold, case when rl.type is not null then true else false end, case when pids.pid is not null then true else false end,
|
||||||
case when r.access_mode in ('Open Access', 'Open Source') then true else false end, r.peer_reviewed, r.type, abstract,
|
case when r.access_mode in ('Open Access', 'Open Source') then true else false end, r.peer_reviewed, r.type, abstract,
|
||||||
cc_licence, r.authors > 1, rpc.count > 1, rfc.count > 1, d.name;
|
cc_licence, r.authors > 1, rpc.count > 1, rfc.count > 1, d.name; /*EOS*/
|
||||||
|
|
||||||
create table ${observatory_db_name}.result_affiliated_datasource_country stored as parquet as
|
create table ${observatory_db_name}.result_affiliated_datasource_country stored as parquet as
|
||||||
select
|
select
|
||||||
|
@ -158,7 +158,7 @@ from ${stats_db_name}.result r
|
||||||
left outer join ${stats_db_name}.result_fundercount rfc on rfc.id=r.id
|
left outer join ${stats_db_name}.result_fundercount rfc on rfc.id=r.id
|
||||||
group by r.green, r.gold, case when rl.type is not null then true else false end, case when pids.pid is not null then true else false end,
|
group by r.green, r.gold, case when rl.type is not null then true else false end, case when pids.pid is not null then true else false end,
|
||||||
case when r.access_mode in ('Open Access', 'Open Source') then true else false end, r.peer_reviewed, r.type, abstract,
|
case when r.access_mode in ('Open Access', 'Open Source') then true else false end, r.peer_reviewed, r.type, abstract,
|
||||||
cc_licence, r.authors > 1, rpc.count > 1, rfc.count > 1, d.name, c.code, c.name;
|
cc_licence, r.authors > 1, rpc.count > 1, rfc.count > 1, d.name, c.code, c.name; /*EOS*/
|
||||||
|
|
||||||
create table ${observatory_db_name}.result_affiliated_organization stored as parquet as
|
create table ${observatory_db_name}.result_affiliated_organization stored as parquet as
|
||||||
select
|
select
|
||||||
|
@ -187,7 +187,7 @@ from ${stats_db_name}.result r
|
||||||
left outer join ${stats_db_name}.result_fundercount rfc on rfc.id=r.id
|
left outer join ${stats_db_name}.result_fundercount rfc on rfc.id=r.id
|
||||||
group by r.green, r.gold, case when rl.type is not null then true else false end, case when pids.pid is not null then true else false end,
|
group by r.green, r.gold, case when rl.type is not null then true else false end, case when pids.pid is not null then true else false end,
|
||||||
case when r.access_mode in ('Open Access', 'Open Source') then true else false end, r.peer_reviewed, r.type, abstract,
|
case when r.access_mode in ('Open Access', 'Open Source') then true else false end, r.peer_reviewed, r.type, abstract,
|
||||||
cc_licence, r.authors > 1, rpc.count > 1, rfc.count > 1, o.name;
|
cc_licence, r.authors > 1, rpc.count > 1, rfc.count > 1, o.name; /*EOS*/
|
||||||
|
|
||||||
create table ${observatory_db_name}.result_affiliated_organization_country stored as parquet as
|
create table ${observatory_db_name}.result_affiliated_organization_country stored as parquet as
|
||||||
select
|
select
|
||||||
|
@ -216,7 +216,7 @@ from ${stats_db_name}.result r
|
||||||
left outer join ${stats_db_name}.result_fundercount rfc on rfc.id=r.id
|
left outer join ${stats_db_name}.result_fundercount rfc on rfc.id=r.id
|
||||||
group by r.green, r.gold, case when rl.type is not null then true else false end, case when pids.pid is not null then true else false end,
|
group by r.green, r.gold, case when rl.type is not null then true else false end, case when pids.pid is not null then true else false end,
|
||||||
case when r.access_mode in ('Open Access', 'Open Source') then true else false end, r.peer_reviewed, r.type, abstract,
|
case when r.access_mode in ('Open Access', 'Open Source') then true else false end, r.peer_reviewed, r.type, abstract,
|
||||||
cc_licence, r.authors > 1, rpc.count > 1, rfc.count > 1, o.name, c.code, c.name;
|
cc_licence, r.authors > 1, rpc.count > 1, rfc.count > 1, o.name, c.code, c.name; /*EOS*/
|
||||||
|
|
||||||
create table ${observatory_db_name}.result_affiliated_funder stored as parquet as
|
create table ${observatory_db_name}.result_affiliated_funder stored as parquet as
|
||||||
select
|
select
|
||||||
|
@ -247,7 +247,7 @@ from ${stats_db_name}.result r
|
||||||
left outer join ${stats_db_name}.result_fundercount rfc on rfc.id=r.id
|
left outer join ${stats_db_name}.result_fundercount rfc on rfc.id=r.id
|
||||||
group by r.green, r.gold, case when rl.type is not null then true else false end, case when pids.pid is not null then true else false end,
|
group by r.green, r.gold, case when rl.type is not null then true else false end, case when pids.pid is not null then true else false end,
|
||||||
case when r.access_mode in ('Open Access', 'Open Source') then true else false end, r.peer_reviewed, r.type, abstract,
|
case when r.access_mode in ('Open Access', 'Open Source') then true else false end, r.peer_reviewed, r.type, abstract,
|
||||||
cc_licence, r.authors > 1, rpc.count > 1, rfc.count > 1, p.funder;
|
cc_licence, r.authors > 1, rpc.count > 1, rfc.count > 1, p.funder; /*EOS*/
|
||||||
|
|
||||||
create table ${observatory_db_name}.result_affiliated_funder_country stored as parquet as
|
create table ${observatory_db_name}.result_affiliated_funder_country stored as parquet as
|
||||||
select
|
select
|
||||||
|
@ -278,7 +278,7 @@ from ${stats_db_name}.result r
|
||||||
left outer join ${stats_db_name}.result_fundercount rfc on rfc.id=r.id
|
left outer join ${stats_db_name}.result_fundercount rfc on rfc.id=r.id
|
||||||
group by r.green, r.gold, case when rl.type is not null then true else false end, case when pids.pid is not null then true else false end,
|
group by r.green, r.gold, case when rl.type is not null then true else false end, case when pids.pid is not null then true else false end,
|
||||||
case when r.access_mode in ('Open Access', 'Open Source') then true else false end, r.peer_reviewed, r.type, abstract,
|
case when r.access_mode in ('Open Access', 'Open Source') then true else false end, r.peer_reviewed, r.type, abstract,
|
||||||
cc_licence, r.authors > 1, rpc.count > 1, rfc.count > 1, p.funder, c.code, c.name;
|
cc_licence, r.authors > 1, rpc.count > 1, rfc.count > 1, p.funder, c.code, c.name; /*EOS*/
|
||||||
|
|
||||||
create table ${observatory_db_name}.result_deposited_country stored as parquet as
|
create table ${observatory_db_name}.result_deposited_country stored as parquet as
|
||||||
select
|
select
|
||||||
|
@ -309,7 +309,7 @@ from ${stats_db_name}.result r
|
||||||
left outer join ${stats_db_name}.result_fundercount rfc on rfc.id=r.id
|
left outer join ${stats_db_name}.result_fundercount rfc on rfc.id=r.id
|
||||||
group by r.green, r.gold, case when rl.type is not null then true else false end, case when pids.pid is not null then true else false end,
|
group by r.green, r.gold, case when rl.type is not null then true else false end, case when pids.pid is not null then true else false end,
|
||||||
case when r.access_mode in ('Open Access', 'Open Source') then true else false end, r.peer_reviewed, r.type, abstract,
|
case when r.access_mode in ('Open Access', 'Open Source') then true else false end, r.peer_reviewed, r.type, abstract,
|
||||||
cc_licence, r.authors > 1, rpc.count > 1, rfc.count > 1, c.code, c.name;
|
cc_licence, r.authors > 1, rpc.count > 1, rfc.count > 1, c.code, c.name; /*EOS*/
|
||||||
|
|
||||||
create table ${observatory_db_name}.result_deposited_year stored as parquet as
|
create table ${observatory_db_name}.result_deposited_year stored as parquet as
|
||||||
select
|
select
|
||||||
|
@ -340,7 +340,7 @@ from ${stats_db_name}.result r
|
||||||
left outer join ${stats_db_name}.result_fundercount rfc on rfc.id=r.id
|
left outer join ${stats_db_name}.result_fundercount rfc on rfc.id=r.id
|
||||||
group by r.green, r.gold, case when rl.type is not null then true else false end, case when pids.pid is not null then true else false end,
|
group by r.green, r.gold, case when rl.type is not null then true else false end, case when pids.pid is not null then true else false end,
|
||||||
case when r.access_mode in ('Open Access', 'Open Source') then true else false end, r.peer_reviewed, r.type, abstract,
|
case when r.access_mode in ('Open Access', 'Open Source') then true else false end, r.peer_reviewed, r.type, abstract,
|
||||||
cc_licence, r.authors > 1, rpc.count > 1, rfc.count > 1, r.year;
|
cc_licence, r.authors > 1, rpc.count > 1, rfc.count > 1, r.year; /*EOS*/
|
||||||
|
|
||||||
|
|
||||||
create table ${observatory_db_name}.result_deposited_year_country stored as parquet as
|
create table ${observatory_db_name}.result_deposited_year_country stored as parquet as
|
||||||
|
@ -372,7 +372,7 @@ from ${stats_db_name}.result r
|
||||||
left outer join ${stats_db_name}.result_fundercount rfc on rfc.id=r.id
|
left outer join ${stats_db_name}.result_fundercount rfc on rfc.id=r.id
|
||||||
group by r.green, r.gold, case when rl.type is not null then true else false end, case when pids.pid is not null then true else false end,
|
group by r.green, r.gold, case when rl.type is not null then true else false end, case when pids.pid is not null then true else false end,
|
||||||
case when r.access_mode in ('Open Access', 'Open Source') then true else false end, r.peer_reviewed, r.type, abstract,
|
case when r.access_mode in ('Open Access', 'Open Source') then true else false end, r.peer_reviewed, r.type, abstract,
|
||||||
cc_licence, r.authors > 1, rpc.count > 1, rfc.count > 1, r.year, c.code, c.name;
|
cc_licence, r.authors > 1, rpc.count > 1, rfc.count > 1, r.year, c.code, c.name; /*EOS*/
|
||||||
|
|
||||||
create table ${observatory_db_name}.result_deposited_datasource stored as parquet as
|
create table ${observatory_db_name}.result_deposited_datasource stored as parquet as
|
||||||
select
|
select
|
||||||
|
@ -403,7 +403,7 @@ from ${stats_db_name}.result r
|
||||||
left outer join ${stats_db_name}.result_fundercount rfc on rfc.id=r.id
|
left outer join ${stats_db_name}.result_fundercount rfc on rfc.id=r.id
|
||||||
group by r.green, r.gold, case when rl.type is not null then true else false end, case when pids.pid is not null then true else false end,
|
group by r.green, r.gold, case when rl.type is not null then true else false end, case when pids.pid is not null then true else false end,
|
||||||
case when r.access_mode in ('Open Access', 'Open Source') then true else false end, r.peer_reviewed, r.type, abstract,
|
case when r.access_mode in ('Open Access', 'Open Source') then true else false end, r.peer_reviewed, r.type, abstract,
|
||||||
cc_licence, r.authors > 1, rpc.count > 1, rfc.count > 1, d.name;
|
cc_licence, r.authors > 1, rpc.count > 1, rfc.count > 1, d.name; /*EOS*/
|
||||||
|
|
||||||
create table ${observatory_db_name}.result_deposited_datasource_country stored as parquet as
|
create table ${observatory_db_name}.result_deposited_datasource_country stored as parquet as
|
||||||
select
|
select
|
||||||
|
@ -434,7 +434,7 @@ from ${stats_db_name}.result r
|
||||||
left outer join ${stats_db_name}.result_fundercount rfc on rfc.id=r.id
|
left outer join ${stats_db_name}.result_fundercount rfc on rfc.id=r.id
|
||||||
group by r.green, r.gold, case when rl.type is not null then true else false end, case when pids.pid is not null then true else false end,
|
group by r.green, r.gold, case when rl.type is not null then true else false end, case when pids.pid is not null then true else false end,
|
||||||
case when r.access_mode in ('Open Access', 'Open Source') then true else false end, r.peer_reviewed, r.type, abstract,
|
case when r.access_mode in ('Open Access', 'Open Source') then true else false end, r.peer_reviewed, r.type, abstract,
|
||||||
cc_licence, r.authors > 1, rpc.count > 1, rfc.count > 1, d.name, c.code, c.name;
|
cc_licence, r.authors > 1, rpc.count > 1, rfc.count > 1, d.name, c.code, c.name; /*EOS*/
|
||||||
|
|
||||||
create table ${observatory_db_name}.result_deposited_organization stored as parquet as
|
create table ${observatory_db_name}.result_deposited_organization stored as parquet as
|
||||||
select
|
select
|
||||||
|
@ -465,7 +465,7 @@ from ${stats_db_name}.result r
|
||||||
left outer join ${stats_db_name}.result_fundercount rfc on rfc.id=r.id
|
left outer join ${stats_db_name}.result_fundercount rfc on rfc.id=r.id
|
||||||
group by r.green, r.gold, case when rl.type is not null then true else false end, case when pids.pid is not null then true else false end,
|
group by r.green, r.gold, case when rl.type is not null then true else false end, case when pids.pid is not null then true else false end,
|
||||||
case when r.access_mode in ('Open Access', 'Open Source') then true else false end, r.peer_reviewed, r.type, abstract,
|
case when r.access_mode in ('Open Access', 'Open Source') then true else false end, r.peer_reviewed, r.type, abstract,
|
||||||
cc_licence, r.authors > 1, rpc.count > 1, rfc.count > 1, o.name;
|
cc_licence, r.authors > 1, rpc.count > 1, rfc.count > 1, o.name; /*EOS*/
|
||||||
|
|
||||||
create table ${observatory_db_name}.result_deposited_organization_country stored as parquet as
|
create table ${observatory_db_name}.result_deposited_organization_country stored as parquet as
|
||||||
select
|
select
|
||||||
|
@ -496,7 +496,7 @@ from ${stats_db_name}.result r
|
||||||
left outer join ${stats_db_name}.result_fundercount rfc on rfc.id=r.id
|
left outer join ${stats_db_name}.result_fundercount rfc on rfc.id=r.id
|
||||||
group by r.green, r.gold, case when rl.type is not null then true else false end, case when pids.pid is not null then true else false end,
|
group by r.green, r.gold, case when rl.type is not null then true else false end, case when pids.pid is not null then true else false end,
|
||||||
case when r.access_mode in ('Open Access', 'Open Source') then true else false end, r.peer_reviewed, r.type, abstract,
|
case when r.access_mode in ('Open Access', 'Open Source') then true else false end, r.peer_reviewed, r.type, abstract,
|
||||||
cc_licence, r.authors > 1, rpc.count > 1, rfc.count > 1, o.name, c.code, c.name;
|
cc_licence, r.authors > 1, rpc.count > 1, rfc.count > 1, o.name, c.code, c.name; /*EOS*/
|
||||||
|
|
||||||
create table ${observatory_db_name}.result_deposited_funder stored as parquet as
|
create table ${observatory_db_name}.result_deposited_funder stored as parquet as
|
||||||
select
|
select
|
||||||
|
@ -529,7 +529,7 @@ from ${stats_db_name}.result r
|
||||||
left outer join ${stats_db_name}.result_fundercount rfc on rfc.id=r.id
|
left outer join ${stats_db_name}.result_fundercount rfc on rfc.id=r.id
|
||||||
group by r.green, r.gold, case when rl.type is not null then true else false end, case when pids.pid is not null then true else false end,
|
group by r.green, r.gold, case when rl.type is not null then true else false end, case when pids.pid is not null then true else false end,
|
||||||
case when r.access_mode in ('Open Access', 'Open Source') then true else false end, r.peer_reviewed, r.type, abstract,
|
case when r.access_mode in ('Open Access', 'Open Source') then true else false end, r.peer_reviewed, r.type, abstract,
|
||||||
cc_licence, r.authors > 1, rpc.count > 1, rfc.count > 1, p.funder;
|
cc_licence, r.authors > 1, rpc.count > 1, rfc.count > 1, p.funder; /*EOS*/
|
||||||
|
|
||||||
create table ${observatory_db_name}.result_deposited_funder_country stored as parquet as
|
create table ${observatory_db_name}.result_deposited_funder_country stored as parquet as
|
||||||
select
|
select
|
||||||
|
@ -562,4 +562,4 @@ from ${stats_db_name}.result r
|
||||||
left outer join ${stats_db_name}.result_fundercount rfc on rfc.id=r.id
|
left outer join ${stats_db_name}.result_fundercount rfc on rfc.id=r.id
|
||||||
group by r.green, r.gold, case when rl.type is not null then true else false end, case when pids.pid is not null then true else false end,
|
group by r.green, r.gold, case when rl.type is not null then true else false end, case when pids.pid is not null then true else false end,
|
||||||
case when r.access_mode in ('Open Access', 'Open Source') then true else false end, r.peer_reviewed, r.type, abstract,
|
case when r.access_mode in ('Open Access', 'Open Source') then true else false end, r.peer_reviewed, r.type, abstract,
|
||||||
cc_licence, r.authors > 1, rpc.count > 1, rfc.count > 1, p.funder, c.code, c.name;
|
cc_licence, r.authors > 1, rpc.count > 1, rfc.count > 1, p.funder, c.code, c.name; /*EOS*/
|
||||||
|
|
|
@ -5,7 +5,7 @@
|
||||||
------------------------------------------------------
|
------------------------------------------------------
|
||||||
|
|
||||||
-- Dataset temporary table supporting updates
|
-- Dataset temporary table supporting updates
|
||||||
DROP TABLE IF EXISTS ${stats_db_name}.dataset_tmp purge;
|
DROP TABLE IF EXISTS ${stats_db_name}.dataset_tmp purge; /*EOS*/
|
||||||
|
|
||||||
CREATE TABLE ${stats_db_name}.dataset_tmp
|
CREATE TABLE ${stats_db_name}.dataset_tmp
|
||||||
(
|
(
|
||||||
|
@ -23,7 +23,7 @@ CREATE TABLE ${stats_db_name}.dataset_tmp
|
||||||
abstract BOOLEAN,
|
abstract BOOLEAN,
|
||||||
type STRING
|
type STRING
|
||||||
)
|
)
|
||||||
clustered by (id) into 100 buckets stored AS orc tblproperties ('transactional' = 'true');
|
clustered by (id) into 100 buckets stored AS orc tblproperties ('transactional' = 'true'); /*EOS*/
|
||||||
|
|
||||||
INSERT INTO ${stats_db_name}.dataset_tmp
|
INSERT INTO ${stats_db_name}.dataset_tmp
|
||||||
SELECT substr(d.id, 4) AS id,
|
SELECT substr(d.id, 4) AS id,
|
||||||
|
@ -40,26 +40,26 @@ SELECT substr(d.id, 4) AS id,
|
||||||
CASE WHEN SIZE(d.description) > 0 THEN TRUE ELSE FALSE end AS abstract,
|
CASE WHEN SIZE(d.description) > 0 THEN TRUE ELSE FALSE end AS abstract,
|
||||||
'dataset' AS type
|
'dataset' AS type
|
||||||
FROM ${openaire_db_name}.dataset d
|
FROM ${openaire_db_name}.dataset d
|
||||||
WHERE d.datainfo.deletedbyinference = FALSE and d.datainfo.invisible=false;
|
WHERE d.datainfo.deletedbyinference = FALSE and d.datainfo.invisible=false; /*EOS*/
|
||||||
|
|
||||||
DROP TABLE IF EXISTS ${stats_db_name}.dataset_citations purge;
|
DROP TABLE IF EXISTS ${stats_db_name}.dataset_citations purge; /*EOS*/
|
||||||
|
|
||||||
CREATE TABLE ${stats_db_name}.dataset_citations STORED AS PARQUET AS
|
CREATE TABLE ${stats_db_name}.dataset_citations STORED AS PARQUET AS
|
||||||
SELECT substr(d.id, 4) AS id, xpath_string(citation.value, "//citation/id[@type='openaire']/@value") AS cites
|
SELECT substr(d.id, 4) AS id, xpath_string(citation.value, "//citation/id[@type='openaire']/@value") AS cites
|
||||||
FROM ${openaire_db_name}.dataset d
|
FROM ${openaire_db_name}.dataset d
|
||||||
LATERAL VIEW explode(d.extrainfo) citations AS citation
|
LATERAL VIEW explode(d.extrainfo) citations AS citation
|
||||||
WHERE xpath_string(citation.value, "//citation/id[@type='openaire']/@value") != ""
|
WHERE xpath_string(citation.value, "//citation/id[@type='openaire']/@value") != ""
|
||||||
and d.datainfo.deletedbyinference = false and d.datainfo.invisible=false;
|
and d.datainfo.deletedbyinference = false and d.datainfo.invisible=false; /*EOS*/
|
||||||
|
|
||||||
DROP TABLE IF EXISTS ${stats_db_name}.dataset_classifications purge;
|
DROP TABLE IF EXISTS ${stats_db_name}.dataset_classifications purge; /*EOS*/
|
||||||
|
|
||||||
CREATE TABLE ${stats_db_name}.dataset_classifications STORED AS PARQUET AS
|
CREATE TABLE ${stats_db_name}.dataset_classifications STORED AS PARQUET AS
|
||||||
SELECT substr(p.id, 4) AS id, instancetype.classname AS type
|
SELECT substr(p.id, 4) AS id, instancetype.classname AS type
|
||||||
FROM ${openaire_db_name}.dataset p
|
FROM ${openaire_db_name}.dataset p
|
||||||
LATERAL VIEW explode(p.instance.instancetype) instances AS instancetype
|
LATERAL VIEW explode(p.instance.instancetype) instances AS instancetype
|
||||||
where p.datainfo.deletedbyinference = false and p.datainfo.invisible=false;
|
where p.datainfo.deletedbyinference = false and p.datainfo.invisible=false; /*EOS*/
|
||||||
|
|
||||||
DROP TABLE IF EXISTS ${stats_db_name}.dataset_concepts purge;
|
DROP TABLE IF EXISTS ${stats_db_name}.dataset_concepts purge; /*EOS*/
|
||||||
|
|
||||||
CREATE TABLE ${stats_db_name}.dataset_concepts STORED AS PARQUET AS
|
CREATE TABLE ${stats_db_name}.dataset_concepts STORED AS PARQUET AS
|
||||||
SELECT substr(p.id, 4) as id, case
|
SELECT substr(p.id, 4) as id, case
|
||||||
|
@ -68,9 +68,9 @@ SELECT substr(p.id, 4) as id, case
|
||||||
when contexts.context.id RLIKE '^[^::]+$' then concat(contexts.context.id, '::other::other') END as concept
|
when contexts.context.id RLIKE '^[^::]+$' then concat(contexts.context.id, '::other::other') END as concept
|
||||||
from ${openaire_db_name}.dataset p
|
from ${openaire_db_name}.dataset p
|
||||||
LATERAL VIEW explode(p.context) contexts as context
|
LATERAL VIEW explode(p.context) contexts as context
|
||||||
where p.datainfo.deletedbyinference = false and p.datainfo.invisible=false;
|
where p.datainfo.deletedbyinference = false and p.datainfo.invisible=false; /*EOS*/
|
||||||
|
|
||||||
DROP TABLE IF EXISTS ${stats_db_name}.dataset_datasources purge;
|
DROP TABLE IF EXISTS ${stats_db_name}.dataset_datasources purge; /*EOS*/
|
||||||
|
|
||||||
CREATE TABLE ${stats_db_name}.dataset_datasources STORED AS PARQUET AS
|
CREATE TABLE ${stats_db_name}.dataset_datasources STORED AS PARQUET AS
|
||||||
SELECT p.id, case when d.id IS NULL THEN 'other' ELSE p.datasource END AS datasource
|
SELECT p.id, case when d.id IS NULL THEN 'other' ELSE p.datasource END AS datasource
|
||||||
|
@ -82,35 +82,35 @@ FROM (
|
||||||
LEFT OUTER JOIN (
|
LEFT OUTER JOIN (
|
||||||
SELECT substr(d.id, 4) id
|
SELECT substr(d.id, 4) id
|
||||||
FROM ${openaire_db_name}.datasource d
|
FROM ${openaire_db_name}.datasource d
|
||||||
WHERE d.datainfo.deletedbyinference = false and d.datainfo.invisible=false) d ON p.datasource = d.id;
|
WHERE d.datainfo.deletedbyinference = false and d.datainfo.invisible=false) d ON p.datasource = d.id; /*EOS*/
|
||||||
|
|
||||||
DROP TABLE IF EXISTS ${stats_db_name}.dataset_languages purge;
|
DROP TABLE IF EXISTS ${stats_db_name}.dataset_languages purge; /*EOS*/
|
||||||
|
|
||||||
CREATE TABLE ${stats_db_name}.dataset_languages STORED AS PARQUET AS
|
CREATE TABLE ${stats_db_name}.dataset_languages STORED AS PARQUET AS
|
||||||
SELECT substr(p.id, 4) AS id, p.language.classname AS language
|
SELECT substr(p.id, 4) AS id, p.language.classname AS language
|
||||||
FROM ${openaire_db_name}.dataset p
|
FROM ${openaire_db_name}.dataset p
|
||||||
where p.datainfo.deletedbyinference = false and p.datainfo.invisible=false;
|
where p.datainfo.deletedbyinference = false and p.datainfo.invisible=false; /*EOS*/
|
||||||
|
|
||||||
DROP TABLE IF EXISTS ${stats_db_name}.dataset_oids purge;
|
DROP TABLE IF EXISTS ${stats_db_name}.dataset_oids purge; /*EOS*/
|
||||||
|
|
||||||
CREATE TABLE ${stats_db_name}.dataset_oids STORED AS PARQUET AS
|
CREATE TABLE ${stats_db_name}.dataset_oids STORED AS PARQUET AS
|
||||||
SELECT substr(p.id, 4) AS id, oids.ids AS oid
|
SELECT substr(p.id, 4) AS id, oids.ids AS oid
|
||||||
FROM ${openaire_db_name}.dataset p
|
FROM ${openaire_db_name}.dataset p
|
||||||
LATERAL VIEW explode(p.originalid) oids AS ids
|
LATERAL VIEW explode(p.originalid) oids AS ids
|
||||||
where p.datainfo.deletedbyinference = false and p.datainfo.invisible=false;
|
where p.datainfo.deletedbyinference = false and p.datainfo.invisible=false; /*EOS*/
|
||||||
|
|
||||||
DROP TABLE IF EXISTS ${stats_db_name}.dataset_pids purge;
|
DROP TABLE IF EXISTS ${stats_db_name}.dataset_pids purge; /*EOS*/
|
||||||
|
|
||||||
CREATE TABLE ${stats_db_name}.dataset_pids STORED AS PARQUET AS
|
CREATE TABLE ${stats_db_name}.dataset_pids STORED AS PARQUET AS
|
||||||
SELECT substr(p.id, 4) AS id, ppid.qualifier.classname AS type, ppid.value AS pid
|
SELECT substr(p.id, 4) AS id, ppid.qualifier.classname AS type, ppid.value AS pid
|
||||||
FROM ${openaire_db_name}.dataset p
|
FROM ${openaire_db_name}.dataset p
|
||||||
LATERAL VIEW explode(p.pid) pids AS ppid
|
LATERAL VIEW explode(p.pid) pids AS ppid
|
||||||
where p.datainfo.deletedbyinference = false and p.datainfo.invisible=false;
|
where p.datainfo.deletedbyinference = false and p.datainfo.invisible=false; /*EOS*/
|
||||||
|
|
||||||
DROP TABLE IF EXISTS ${stats_db_name}.dataset_topics purge;
|
DROP TABLE IF EXISTS ${stats_db_name}.dataset_topics purge; /*EOS*/
|
||||||
|
|
||||||
CREATE TABLE ${stats_db_name}.dataset_topics STORED AS PARQUET AS
|
CREATE TABLE ${stats_db_name}.dataset_topics STORED AS PARQUET AS
|
||||||
SELECT substr(p.id, 4) AS id, subjects.subject.qualifier.classname AS type, subjects.subject.value AS topic
|
SELECT substr(p.id, 4) AS id, subjects.subject.qualifier.classname AS type, subjects.subject.value AS topic
|
||||||
FROM ${openaire_db_name}.dataset p
|
FROM ${openaire_db_name}.dataset p
|
||||||
LATERAL VIEW explode(p.subject) subjects AS subject
|
LATERAL VIEW explode(p.subject) subjects AS subject
|
||||||
where p.datainfo.deletedbyinference = false and p.datainfo.invisible=false;
|
where p.datainfo.deletedbyinference = false and p.datainfo.invisible=false; /*EOS*/
|
|
@ -5,7 +5,7 @@
|
||||||
--------------------------------------------------------
|
--------------------------------------------------------
|
||||||
|
|
||||||
-- Software temporary table supporting updates
|
-- Software temporary table supporting updates
|
||||||
DROP TABLE IF EXISTS ${stats_db_name}.software_tmp purge;
|
DROP TABLE IF EXISTS ${stats_db_name}.software_tmp purge; /*EOS*/
|
||||||
CREATE TABLE ${stats_db_name}.software_tmp
|
CREATE TABLE ${stats_db_name}.software_tmp
|
||||||
(
|
(
|
||||||
id STRING,
|
id STRING,
|
||||||
|
@ -22,7 +22,7 @@ CREATE TABLE ${stats_db_name}.software_tmp
|
||||||
abstract BOOLEAN,
|
abstract BOOLEAN,
|
||||||
type STRING
|
type STRING
|
||||||
)
|
)
|
||||||
clustered by (id) INTO 100 buckets stored AS orc tblproperties ('transactional' = 'true');
|
clustered by (id) INTO 100 buckets stored AS orc tblproperties ('transactional' = 'true'); /*EOS*/
|
||||||
|
|
||||||
INSERT INTO ${stats_db_name}.software_tmp
|
INSERT INTO ${stats_db_name}.software_tmp
|
||||||
SELECT substr(s.id, 4) as id,
|
SELECT substr(s.id, 4) as id,
|
||||||
|
@ -39,24 +39,24 @@ SELECT substr(s.id, 4) as id,
|
||||||
CASE WHEN SIZE(s.description) > 0 THEN TRUE ELSE FALSE END AS abstract,
|
CASE WHEN SIZE(s.description) > 0 THEN TRUE ELSE FALSE END AS abstract,
|
||||||
'software' as type
|
'software' as type
|
||||||
from ${openaire_db_name}.software s
|
from ${openaire_db_name}.software s
|
||||||
where s.datainfo.deletedbyinference = false and s.datainfo.invisible=false;
|
where s.datainfo.deletedbyinference = false and s.datainfo.invisible=false; /*EOS*/
|
||||||
|
|
||||||
DROP TABLE IF EXISTS ${stats_db_name}.software_citations purge;
|
DROP TABLE IF EXISTS ${stats_db_name}.software_citations purge; /*EOS*/
|
||||||
|
|
||||||
CREATE TABLE ${stats_db_name}.software_citations STORED AS PARQUET AS
|
CREATE TABLE ${stats_db_name}.software_citations STORED AS PARQUET AS
|
||||||
SELECT substr(s.id, 4) as id, xpath_string(citation.value, "//citation/id[@type='openaire']/@value") AS cites
|
SELECT substr(s.id, 4) as id, xpath_string(citation.value, "//citation/id[@type='openaire']/@value") AS cites
|
||||||
FROM ${openaire_db_name}.software s
|
FROM ${openaire_db_name}.software s
|
||||||
LATERAL VIEW explode(s.extrainfo) citations as citation
|
LATERAL VIEW explode(s.extrainfo) citations as citation
|
||||||
where xpath_string(citation.value, "//citation/id[@type='openaire']/@value") != ""
|
where xpath_string(citation.value, "//citation/id[@type='openaire']/@value") != ""
|
||||||
and s.datainfo.deletedbyinference = false and s.datainfo.invisible=false;
|
and s.datainfo.deletedbyinference = false and s.datainfo.invisible=false; /*EOS*/
|
||||||
|
|
||||||
DROP TABLE IF EXISTS ${stats_db_name}.software_classifications purge;
|
DROP TABLE IF EXISTS ${stats_db_name}.software_classifications purge; /*EOS*/
|
||||||
|
|
||||||
CREATE TABLE ${stats_db_name}.software_classifications STORED AS PARQUET AS
|
CREATE TABLE ${stats_db_name}.software_classifications STORED AS PARQUET AS
|
||||||
SELECT substr(p.id, 4) AS id, instancetype.classname AS type
|
SELECT substr(p.id, 4) AS id, instancetype.classname AS type
|
||||||
FROM ${openaire_db_name}.software p
|
FROM ${openaire_db_name}.software p
|
||||||
LATERAL VIEW explode(p.instance.instancetype) instances AS instancetype
|
LATERAL VIEW explode(p.instance.instancetype) instances AS instancetype
|
||||||
where p.datainfo.deletedbyinference = false and p.datainfo.invisible=false;
|
where p.datainfo.deletedbyinference = false and p.datainfo.invisible=false; /*EOS*/
|
||||||
|
|
||||||
CREATE TABLE ${stats_db_name}.software_concepts STORED AS PARQUET AS
|
CREATE TABLE ${stats_db_name}.software_concepts STORED AS PARQUET AS
|
||||||
SELECT substr(p.id, 4) as id, case
|
SELECT substr(p.id, 4) as id, case
|
||||||
|
@ -65,9 +65,9 @@ SELECT substr(p.id, 4) as id, case
|
||||||
when contexts.context.id RLIKE '^[^::]+$' then concat(contexts.context.id, '::other::other') END as concept
|
when contexts.context.id RLIKE '^[^::]+$' then concat(contexts.context.id, '::other::other') END as concept
|
||||||
FROM ${openaire_db_name}.software p
|
FROM ${openaire_db_name}.software p
|
||||||
LATERAL VIEW explode(p.context) contexts AS context
|
LATERAL VIEW explode(p.context) contexts AS context
|
||||||
where p.datainfo.deletedbyinference = false and p.datainfo.invisible=false;
|
where p.datainfo.deletedbyinference = false and p.datainfo.invisible=false; /*EOS*/
|
||||||
|
|
||||||
DROP TABLE IF EXISTS ${stats_db_name}.software_datasources purge;
|
DROP TABLE IF EXISTS ${stats_db_name}.software_datasources purge; /*EOS*/
|
||||||
|
|
||||||
CREATE TABLE ${stats_db_name}.software_datasources STORED AS PARQUET AS
|
CREATE TABLE ${stats_db_name}.software_datasources STORED AS PARQUET AS
|
||||||
SELECT p.id, CASE WHEN d.id IS NULL THEN 'other' ELSE p.datasource end as datasource
|
SELECT p.id, CASE WHEN d.id IS NULL THEN 'other' ELSE p.datasource end as datasource
|
||||||
|
@ -79,35 +79,35 @@ FROM (
|
||||||
LEFT OUTER JOIN (
|
LEFT OUTER JOIN (
|
||||||
SELECT substr(d.id, 4) id
|
SELECT substr(d.id, 4) id
|
||||||
FROM ${openaire_db_name}.datasource d
|
FROM ${openaire_db_name}.datasource d
|
||||||
WHERE d.datainfo.deletedbyinference = false and d.datainfo.invisible=false) d ON p.datasource = d.id;
|
WHERE d.datainfo.deletedbyinference = false and d.datainfo.invisible=false) d ON p.datasource = d.id; /*EOS*/
|
||||||
|
|
||||||
DROP TABLE IF EXISTS ${stats_db_name}.software_languages purge;
|
DROP TABLE IF EXISTS ${stats_db_name}.software_languages purge; /*EOS*/
|
||||||
|
|
||||||
CREATE TABLE ${stats_db_name}.software_languages STORED AS PARQUET AS
|
CREATE TABLE ${stats_db_name}.software_languages STORED AS PARQUET AS
|
||||||
select substr(p.id, 4) AS id, p.language.classname AS language
|
select substr(p.id, 4) AS id, p.language.classname AS language
|
||||||
FROM ${openaire_db_name}.software p
|
FROM ${openaire_db_name}.software p
|
||||||
where p.datainfo.deletedbyinference = false and p.datainfo.invisible=false;
|
where p.datainfo.deletedbyinference = false and p.datainfo.invisible=false; /*EOS*/
|
||||||
|
|
||||||
DROP TABLE IF EXISTS ${stats_db_name}.software_oids purge;
|
DROP TABLE IF EXISTS ${stats_db_name}.software_oids purge; /*EOS*/
|
||||||
|
|
||||||
CREATE TABLE ${stats_db_name}.software_oids STORED AS PARQUET AS
|
CREATE TABLE ${stats_db_name}.software_oids STORED AS PARQUET AS
|
||||||
SELECT substr(p.id, 4) AS id, oids.ids AS oid
|
SELECT substr(p.id, 4) AS id, oids.ids AS oid
|
||||||
FROM ${openaire_db_name}.software p
|
FROM ${openaire_db_name}.software p
|
||||||
LATERAL VIEW explode(p.originalid) oids AS ids
|
LATERAL VIEW explode(p.originalid) oids AS ids
|
||||||
where p.datainfo.deletedbyinference = false and p.datainfo.invisible=false;
|
where p.datainfo.deletedbyinference = false and p.datainfo.invisible=false; /*EOS*/
|
||||||
|
|
||||||
DROP TABLE IF EXISTS ${stats_db_name}.software_pids purge;
|
DROP TABLE IF EXISTS ${stats_db_name}.software_pids purge; /*EOS*/
|
||||||
|
|
||||||
CREATE TABLE ${stats_db_name}.software_pids STORED AS PARQUET AS
|
CREATE TABLE ${stats_db_name}.software_pids STORED AS PARQUET AS
|
||||||
SELECT substr(p.id, 4) AS id, ppid.qualifier.classname AS type, ppid.value AS pid
|
SELECT substr(p.id, 4) AS id, ppid.qualifier.classname AS type, ppid.value AS pid
|
||||||
FROM ${openaire_db_name}.software p
|
FROM ${openaire_db_name}.software p
|
||||||
LATERAL VIEW explode(p.pid) pids AS ppid
|
LATERAL VIEW explode(p.pid) pids AS ppid
|
||||||
where p.datainfo.deletedbyinference = false and p.datainfo.invisible=false;
|
where p.datainfo.deletedbyinference = false and p.datainfo.invisible=false; /*EOS*/
|
||||||
|
|
||||||
DROP TABLE IF EXISTS ${stats_db_name}.software_topics purge;
|
DROP TABLE IF EXISTS ${stats_db_name}.software_topics purge; /*EOS*/
|
||||||
|
|
||||||
CREATE TABLE ${stats_db_name}.software_topics STORED AS PARQUET AS
|
CREATE TABLE ${stats_db_name}.software_topics STORED AS PARQUET AS
|
||||||
SELECT substr(p.id, 4) AS id, subjects.subject.qualifier.classname AS type, subjects.subject.value AS topic
|
SELECT substr(p.id, 4) AS id, subjects.subject.qualifier.classname AS type, subjects.subject.value AS topic
|
||||||
FROM ${openaire_db_name}.software p
|
FROM ${openaire_db_name}.software p
|
||||||
LATERAL VIEW explode(p.subject) subjects AS subject
|
LATERAL VIEW explode(p.subject) subjects AS subject
|
||||||
where p.datainfo.deletedbyinference = false and p.datainfo.invisible=false;
|
where p.datainfo.deletedbyinference = false and p.datainfo.invisible=false; /*EOS*/
|
|
@ -5,7 +5,7 @@
|
||||||
--------------------------------------------------------------------------------
|
--------------------------------------------------------------------------------
|
||||||
|
|
||||||
-- Otherresearchproduct temporary table supporting updates
|
-- Otherresearchproduct temporary table supporting updates
|
||||||
DROP TABLE IF EXISTS ${stats_db_name}.otherresearchproduct_tmp purge;
|
DROP TABLE IF EXISTS ${stats_db_name}.otherresearchproduct_tmp purge; /*EOS*/
|
||||||
|
|
||||||
CREATE TABLE ${stats_db_name}.otherresearchproduct_tmp
|
CREATE TABLE ${stats_db_name}.otherresearchproduct_tmp
|
||||||
(
|
(
|
||||||
|
@ -22,7 +22,7 @@ CREATE TABLE ${stats_db_name}.otherresearchproduct_tmp
|
||||||
source STRING,
|
source STRING,
|
||||||
abstract BOOLEAN,
|
abstract BOOLEAN,
|
||||||
type STRING
|
type STRING
|
||||||
) CLUSTERED BY (id) INTO 100 buckets stored AS orc tblproperties ('transactional' = 'true');
|
) CLUSTERED BY (id) INTO 100 buckets stored AS orc tblproperties ('transactional' = 'true'); /*EOS*/
|
||||||
|
|
||||||
INSERT INTO ${stats_db_name}.otherresearchproduct_tmp
|
INSERT INTO ${stats_db_name}.otherresearchproduct_tmp
|
||||||
SELECT substr(o.id, 4) AS id,
|
SELECT substr(o.id, 4) AS id,
|
||||||
|
@ -39,23 +39,23 @@ SELECT substr(o.id, 4) AS id,
|
||||||
CASE WHEN SIZE(o.description) > 0 THEN TRUE ELSE FALSE END AS abstract,
|
CASE WHEN SIZE(o.description) > 0 THEN TRUE ELSE FALSE END AS abstract,
|
||||||
'other' AS type
|
'other' AS type
|
||||||
FROM ${openaire_db_name}.otherresearchproduct o
|
FROM ${openaire_db_name}.otherresearchproduct o
|
||||||
WHERE o.datainfo.deletedbyinference = FALSE and o.datainfo.invisible=false;
|
WHERE o.datainfo.deletedbyinference = FALSE and o.datainfo.invisible=false; /*EOS*/
|
||||||
|
|
||||||
-- Otherresearchproduct_citations
|
-- Otherresearchproduct_citations
|
||||||
DROP TABLE IF EXISTS ${stats_db_name}.otherresearchproduct_citations purge;
|
DROP TABLE IF EXISTS ${stats_db_name}.otherresearchproduct_citations purge; /*EOS*/
|
||||||
|
|
||||||
CREATE TABLE ${stats_db_name}.otherresearchproduct_citations STORED AS PARQUET AS
|
CREATE TABLE ${stats_db_name}.otherresearchproduct_citations STORED AS PARQUET AS
|
||||||
SELECT substr(o.id, 4) AS id, xpath_string(citation.value, "//citation/id[@type='openaire']/@value") AS cites
|
SELECT substr(o.id, 4) AS id, xpath_string(citation.value, "//citation/id[@type='openaire']/@value") AS cites
|
||||||
FROM ${openaire_db_name}.otherresearchproduct o LATERAL VIEW explode(o.extrainfo) citations AS citation
|
FROM ${openaire_db_name}.otherresearchproduct o LATERAL VIEW explode(o.extrainfo) citations AS citation
|
||||||
WHERE xpath_string(citation.value, "//citation/id[@type='openaire']/@value") != ""
|
WHERE xpath_string(citation.value, "//citation/id[@type='openaire']/@value") != ""
|
||||||
and o.datainfo.deletedbyinference = false and o.datainfo.invisible=false;
|
and o.datainfo.deletedbyinference = false and o.datainfo.invisible=false; /*EOS*/
|
||||||
|
|
||||||
CREATE TABLE ${stats_db_name}.otherresearchproduct_classifications STORED AS PARQUET AS
|
CREATE TABLE ${stats_db_name}.otherresearchproduct_classifications STORED AS PARQUET AS
|
||||||
SELECT substr(p.id, 4) AS id, instancetype.classname AS type
|
SELECT substr(p.id, 4) AS id, instancetype.classname AS type
|
||||||
FROM ${openaire_db_name}.otherresearchproduct p LATERAL VIEW explode(p.instance.instancetype) instances AS instancetype
|
FROM ${openaire_db_name}.otherresearchproduct p LATERAL VIEW explode(p.instance.instancetype) instances AS instancetype
|
||||||
where p.datainfo.deletedbyinference = false and p.datainfo.invisible=false;
|
where p.datainfo.deletedbyinference = false and p.datainfo.invisible=false; /*EOS*/
|
||||||
|
|
||||||
DROP TABLE IF EXISTS ${stats_db_name}.otherresearchproduct_concepts purge;
|
DROP TABLE IF EXISTS ${stats_db_name}.otherresearchproduct_concepts purge; /*EOS*/
|
||||||
|
|
||||||
CREATE TABLE ${stats_db_name}.otherresearchproduct_concepts STORED AS PARQUET AS
|
CREATE TABLE ${stats_db_name}.otherresearchproduct_concepts STORED AS PARQUET AS
|
||||||
SELECT substr(p.id, 4) as id, case
|
SELECT substr(p.id, 4) as id, case
|
||||||
|
@ -63,9 +63,9 @@ SELECT substr(p.id, 4) as id, case
|
||||||
when contexts.context.id RLIKE '^[^::]+::[^::]+$' then concat(contexts.context.id, '::other')
|
when contexts.context.id RLIKE '^[^::]+::[^::]+$' then concat(contexts.context.id, '::other')
|
||||||
when contexts.context.id RLIKE '^[^::]+$' then concat(contexts.context.id, '::other::other') END as concept
|
when contexts.context.id RLIKE '^[^::]+$' then concat(contexts.context.id, '::other::other') END as concept
|
||||||
FROM ${openaire_db_name}.otherresearchproduct p LATERAL VIEW explode(p.context) contexts AS context
|
FROM ${openaire_db_name}.otherresearchproduct p LATERAL VIEW explode(p.context) contexts AS context
|
||||||
where p.datainfo.deletedbyinference = false and p.datainfo.invisible=false;
|
where p.datainfo.deletedbyinference = false and p.datainfo.invisible=false; /*EOS*/
|
||||||
|
|
||||||
DROP TABLE IF EXISTS ${stats_db_name}.otherresearchproduct_datasources purge;
|
DROP TABLE IF EXISTS ${stats_db_name}.otherresearchproduct_datasources purge; /*EOS*/
|
||||||
|
|
||||||
CREATE TABLE ${stats_db_name}.otherresearchproduct_datasources STORED AS PARQUET AS
|
CREATE TABLE ${stats_db_name}.otherresearchproduct_datasources STORED AS PARQUET AS
|
||||||
SELECT p.id, CASE WHEN d.id IS NULL THEN 'other' ELSE p.datasource END AS datasource
|
SELECT p.id, CASE WHEN d.id IS NULL THEN 'other' ELSE p.datasource END AS datasource
|
||||||
|
@ -74,32 +74,32 @@ FROM (SELECT substr(p.id, 4) AS id, substr(instances.instance.hostedby.key, 4) A
|
||||||
where p.datainfo.deletedbyinference = false and p.datainfo.invisible=false) p
|
where p.datainfo.deletedbyinference = false and p.datainfo.invisible=false) p
|
||||||
LEFT OUTER JOIN(SELECT substr(d.id, 4) id
|
LEFT OUTER JOIN(SELECT substr(d.id, 4) id
|
||||||
from ${openaire_db_name}.datasource d
|
from ${openaire_db_name}.datasource d
|
||||||
WHERE d.datainfo.deletedbyinference = false and d.datainfo.invisible=false) d on p.datasource = d.id;
|
WHERE d.datainfo.deletedbyinference = false and d.datainfo.invisible=false) d on p.datasource = d.id; /*EOS*/
|
||||||
|
|
||||||
DROP TABLE IF EXISTS ${stats_db_name}.otherresearchproduct_languages purge;
|
DROP TABLE IF EXISTS ${stats_db_name}.otherresearchproduct_languages purge; /*EOS*/
|
||||||
|
|
||||||
CREATE TABLE ${stats_db_name}.otherresearchproduct_languages STORED AS PARQUET AS
|
CREATE TABLE ${stats_db_name}.otherresearchproduct_languages STORED AS PARQUET AS
|
||||||
SELECT substr(p.id, 4) AS id, p.language.classname AS language
|
SELECT substr(p.id, 4) AS id, p.language.classname AS language
|
||||||
FROM ${openaire_db_name}.otherresearchproduct p
|
FROM ${openaire_db_name}.otherresearchproduct p
|
||||||
where p.datainfo.deletedbyinference = false and p.datainfo.invisible=false;
|
where p.datainfo.deletedbyinference = false and p.datainfo.invisible=false; /*EOS*/
|
||||||
|
|
||||||
DROP TABLE IF EXISTS ${stats_db_name}.otherresearchproduct_oids purge;
|
DROP TABLE IF EXISTS ${stats_db_name}.otherresearchproduct_oids purge; /*EOS*/
|
||||||
|
|
||||||
CREATE TABLE ${stats_db_name}.otherresearchproduct_oids STORED AS PARQUET AS
|
CREATE TABLE ${stats_db_name}.otherresearchproduct_oids STORED AS PARQUET AS
|
||||||
SELECT substr(p.id, 4) AS id, oids.ids AS oid
|
SELECT substr(p.id, 4) AS id, oids.ids AS oid
|
||||||
FROM ${openaire_db_name}.otherresearchproduct p LATERAL VIEW explode(p.originalid) oids AS ids
|
FROM ${openaire_db_name}.otherresearchproduct p LATERAL VIEW explode(p.originalid) oids AS ids
|
||||||
where p.datainfo.deletedbyinference = false and p.datainfo.invisible=false;
|
where p.datainfo.deletedbyinference = false and p.datainfo.invisible=false; /*EOS*/
|
||||||
|
|
||||||
DROP TABLE IF EXISTS ${stats_db_name}.otherresearchproduct_pids purge;
|
DROP TABLE IF EXISTS ${stats_db_name}.otherresearchproduct_pids purge; /*EOS*/
|
||||||
|
|
||||||
CREATE TABLE ${stats_db_name}.otherresearchproduct_pids STORED AS PARQUET AS
|
CREATE TABLE ${stats_db_name}.otherresearchproduct_pids STORED AS PARQUET AS
|
||||||
SELECT substr(p.id, 4) AS id, ppid.qualifier.classname AS type, ppid.value AS pid
|
SELECT substr(p.id, 4) AS id, ppid.qualifier.classname AS type, ppid.value AS pid
|
||||||
FROM ${openaire_db_name}.otherresearchproduct p LATERAL VIEW explode(p.pid) pids AS ppid
|
FROM ${openaire_db_name}.otherresearchproduct p LATERAL VIEW explode(p.pid) pids AS ppid
|
||||||
where p.datainfo.deletedbyinference = false and p.datainfo.invisible=false;
|
where p.datainfo.deletedbyinference = false and p.datainfo.invisible=false; /*EOS*/
|
||||||
|
|
||||||
DROP TABLE IF EXISTS ${stats_db_name}.otherresearchproduct_topics purge;
|
DROP TABLE IF EXISTS ${stats_db_name}.otherresearchproduct_topics purge; /*EOS*/
|
||||||
|
|
||||||
CREATE TABLE ${stats_db_name}.otherresearchproduct_topics STORED AS PARQUET AS
|
CREATE TABLE ${stats_db_name}.otherresearchproduct_topics STORED AS PARQUET AS
|
||||||
SELECT substr(p.id, 4) AS id, subjects.subject.qualifier.classname AS type, subjects.subject.value AS topic
|
SELECT substr(p.id, 4) AS id, subjects.subject.qualifier.classname AS type, subjects.subject.value AS topic
|
||||||
FROM ${openaire_db_name}.otherresearchproduct p LATERAL VIEW explode(p.subject) subjects AS subject
|
FROM ${openaire_db_name}.otherresearchproduct p LATERAL VIEW explode(p.subject) subjects AS subject
|
||||||
where p.datainfo.deletedbyinference = false and p.datainfo.invisible=false;
|
where p.datainfo.deletedbyinference = false and p.datainfo.invisible=false; /*EOS*/
|
|
@ -3,38 +3,38 @@
|
||||||
-- Project table/view and Project related tables/views
|
-- Project table/view and Project related tables/views
|
||||||
------------------------------------------------------
|
------------------------------------------------------
|
||||||
------------------------------------------------------
|
------------------------------------------------------
|
||||||
DROP TABLE IF EXISTS ${stats_db_name}.project_oids purge;
|
DROP TABLE IF EXISTS ${stats_db_name}.project_oids purge; /*EOS*/
|
||||||
|
|
||||||
CREATE TABLE ${stats_db_name}.project_oids STORED AS PARQUET AS
|
CREATE TABLE ${stats_db_name}.project_oids STORED AS PARQUET AS
|
||||||
SELECT substr(p.id, 4) AS id, oids.ids AS oid
|
SELECT substr(p.id, 4) AS id, oids.ids AS oid
|
||||||
FROM ${openaire_db_name}.project p LATERAL VIEW explode(p.originalid) oids AS ids
|
FROM ${openaire_db_name}.project p LATERAL VIEW explode(p.originalid) oids AS ids
|
||||||
where p.datainfo.deletedbyinference=false and p.datainfo.invisible=false;
|
where p.datainfo.deletedbyinference=false and p.datainfo.invisible=false; /*EOS*/
|
||||||
|
|
||||||
DROP TABLE IF EXISTS ${stats_db_name}.project_organizations purge;
|
DROP TABLE IF EXISTS ${stats_db_name}.project_organizations purge; /*EOS*/
|
||||||
|
|
||||||
CREATE TABLE ${stats_db_name}.project_organizations STORED AS PARQUET AS
|
CREATE TABLE ${stats_db_name}.project_organizations STORED AS PARQUET AS
|
||||||
SELECT substr(r.source, 4) AS id, substr(r.target, 4) AS organization
|
SELECT substr(r.source, 4) AS id, substr(r.target, 4) AS organization
|
||||||
from ${openaire_db_name}.relation r
|
from ${openaire_db_name}.relation r
|
||||||
WHERE r.reltype = 'projectOrganization' and r.source like '40|%'
|
WHERE r.reltype = 'projectOrganization' and r.source like '40|%'
|
||||||
and r.datainfo.deletedbyinference = false and r.datainfo.invisible=false;
|
and r.datainfo.deletedbyinference = false and r.datainfo.invisible=false; /*EOS*/
|
||||||
|
|
||||||
DROP TABLE IF EXISTS ${stats_db_name}.project_results purge;
|
DROP TABLE IF EXISTS ${stats_db_name}.project_results purge; /*EOS*/
|
||||||
|
|
||||||
CREATE TABLE ${stats_db_name}.project_results STORED AS PARQUET AS
|
CREATE TABLE ${stats_db_name}.project_results STORED AS PARQUET AS
|
||||||
SELECT substr(r.target, 4) AS id, substr(r.source, 4) AS result, r.datainfo.provenanceaction.classname as provenance
|
SELECT substr(r.target, 4) AS id, substr(r.source, 4) AS result, r.datainfo.provenanceaction.classname as provenance
|
||||||
FROM ${openaire_db_name}.relation r
|
FROM ${openaire_db_name}.relation r
|
||||||
WHERE r.reltype = 'resultProject' and r.target like '40|%'
|
WHERE r.reltype = 'resultProject' and r.target like '40|%'
|
||||||
and r.datainfo.deletedbyinference = false and r.datainfo.invisible=false;
|
and r.datainfo.deletedbyinference = false and r.datainfo.invisible=false; /*EOS*/
|
||||||
|
|
||||||
DROP TABLE IF EXISTS ${stats_db_name}.project_classification purge;
|
DROP TABLE IF EXISTS ${stats_db_name}.project_classification purge; /*EOS*/
|
||||||
|
|
||||||
create table ${stats_db_name}.project_classification STORED AS PARQUET as
|
create table ${stats_db_name}.project_classification STORED AS PARQUET as
|
||||||
select substr(p.id, 4) as id, class.h2020programme.code, class.level1, class.level2, class.level3
|
select substr(p.id, 4) as id, class.h2020programme.code, class.level1, class.level2, class.level3
|
||||||
from ${openaire_db_name}.project p
|
from ${openaire_db_name}.project p
|
||||||
lateral view explode(p.h2020classification) classifs as class
|
lateral view explode(p.h2020classification) classifs as class
|
||||||
where p.datainfo.deletedbyinference=false and p.datainfo.invisible=false and class.h2020programme is not null;
|
where p.datainfo.deletedbyinference=false and p.datainfo.invisible=false and class.h2020programme is not null; /*EOS*/
|
||||||
|
|
||||||
DROP TABLE IF EXISTS ${stats_db_name}.project_tmp purge;
|
DROP TABLE IF EXISTS ${stats_db_name}.project_tmp purge; /*EOS*/
|
||||||
|
|
||||||
CREATE TABLE ${stats_db_name}.project_tmp
|
CREATE TABLE ${stats_db_name}.project_tmp
|
||||||
(
|
(
|
||||||
|
@ -61,7 +61,7 @@ CREATE TABLE ${stats_db_name}.project_tmp
|
||||||
totalcost FLOAT,
|
totalcost FLOAT,
|
||||||
fundedamount FLOAT,
|
fundedamount FLOAT,
|
||||||
currency STRING
|
currency STRING
|
||||||
) CLUSTERED BY (id) INTO 100 buckets stored AS orc tblproperties ('transactional' = 'true');
|
) CLUSTERED BY (id) INTO 100 buckets stored AS orc tblproperties ('transactional' = 'true'); /*EOS*/
|
||||||
|
|
||||||
INSERT INTO ${stats_db_name}.project_tmp
|
INSERT INTO ${stats_db_name}.project_tmp
|
||||||
SELECT substr(p.id, 4) AS id,
|
SELECT substr(p.id, 4) AS id,
|
||||||
|
@ -88,18 +88,18 @@ SELECT substr(p.id, 4) AS id,
|
||||||
p.fundedamount AS fundedamount,
|
p.fundedamount AS fundedamount,
|
||||||
p.currency.value AS currency
|
p.currency.value AS currency
|
||||||
FROM ${openaire_db_name}.project p
|
FROM ${openaire_db_name}.project p
|
||||||
WHERE p.datainfo.deletedbyinference = false and p.datainfo.invisible=false;
|
WHERE p.datainfo.deletedbyinference = false and p.datainfo.invisible=false; /*EOS*/
|
||||||
|
|
||||||
DROP TABLE IF EXISTS ${stats_db_name}.funder purge;
|
DROP TABLE IF EXISTS ${stats_db_name}.funder purge; /*EOS*/
|
||||||
|
|
||||||
create table ${stats_db_name}.funder STORED AS PARQUET as
|
create table ${stats_db_name}.funder STORED AS PARQUET as
|
||||||
select distinct xpath_string(fund, '//funder/id') as id,
|
select distinct xpath_string(fund, '//funder/id') as id,
|
||||||
xpath_string(fund, '//funder/name') as name,
|
xpath_string(fund, '//funder/name') as name,
|
||||||
xpath_string(fund, '//funder/shortname') as shortname,
|
xpath_string(fund, '//funder/shortname') as shortname,
|
||||||
xpath_string(fundingtree[0].value, '//funder/jurisdiction') as country
|
xpath_string(fundingtree[0].value, '//funder/jurisdiction') as country
|
||||||
from ${openaire_db_name}.project p lateral view explode(p.fundingtree.value) fundingtree as fund;
|
from ${openaire_db_name}.project p lateral view explode(p.fundingtree.value) fundingtree as fund; /*EOS*/
|
||||||
|
|
||||||
DROP TABLE IF EXISTS ${stats_db_name}.project_organization_contribution purge;
|
DROP TABLE IF EXISTS ${stats_db_name}.project_organization_contribution purge; /*EOS*/
|
||||||
|
|
||||||
CREATE TABLE ${stats_db_name}.project_organization_contribution STORED AS PARQUET AS
|
CREATE TABLE ${stats_db_name}.project_organization_contribution STORED AS PARQUET AS
|
||||||
SELECT distinct substr(r.source, 4) AS project, substr(r.target, 4) AS organization,
|
SELECT distinct substr(r.source, 4) AS project, substr(r.target, 4) AS organization,
|
||||||
|
@ -107,4 +107,4 @@ properties[0].value contribution, properties[1].value currency
|
||||||
from ${openaire_db_name}.relation r
|
from ${openaire_db_name}.relation r
|
||||||
LATERAL VIEW explode (r.properties) properties
|
LATERAL VIEW explode (r.properties) properties
|
||||||
where properties[0].key='contribution' and r.reltype = 'projectOrganization' and r.source like '40|%'
|
where properties[0].key='contribution' and r.reltype = 'projectOrganization' and r.source like '40|%'
|
||||||
and properties[0].value>0.0 and r.datainfo.deletedbyinference = false and r.datainfo.invisible=false;
|
and properties[0].value>0.0 and r.datainfo.deletedbyinference = false and r.datainfo.invisible=false; /*EOS*/
|
|
@ -16,7 +16,7 @@ SELECT *, bestlicence AS access_mode
|
||||||
FROM ${stats_db_name}.dataset_tmp
|
FROM ${stats_db_name}.dataset_tmp
|
||||||
UNION ALL
|
UNION ALL
|
||||||
SELECT *, bestlicence AS access_mode
|
SELECT *, bestlicence AS access_mode
|
||||||
FROM ${stats_db_name}.otherresearchproduct_tmp;
|
FROM ${stats_db_name}.otherresearchproduct_tmp; /*EOS*/
|
||||||
|
|
||||||
-- Views on final tables
|
-- Views on final tables
|
||||||
CREATE OR REPLACE VIEW ${stats_db_name}.result_datasources AS
|
CREATE OR REPLACE VIEW ${stats_db_name}.result_datasources AS
|
||||||
|
@ -30,7 +30,7 @@ SELECT *
|
||||||
FROM ${stats_db_name}.dataset_datasources
|
FROM ${stats_db_name}.dataset_datasources
|
||||||
UNION ALL
|
UNION ALL
|
||||||
SELECT *
|
SELECT *
|
||||||
FROM ${stats_db_name}.otherresearchproduct_datasources;
|
FROM ${stats_db_name}.otherresearchproduct_datasources; /*EOS*/
|
||||||
|
|
||||||
CREATE OR REPLACE VIEW ${stats_db_name}.result_citations AS
|
CREATE OR REPLACE VIEW ${stats_db_name}.result_citations AS
|
||||||
SELECT *
|
SELECT *
|
||||||
|
@ -43,7 +43,7 @@ SELECT *
|
||||||
FROM ${stats_db_name}.dataset_citations
|
FROM ${stats_db_name}.dataset_citations
|
||||||
UNION ALL
|
UNION ALL
|
||||||
SELECT *
|
SELECT *
|
||||||
FROM ${stats_db_name}.otherresearchproduct_citations;
|
FROM ${stats_db_name}.otherresearchproduct_citations; /*EOS*/
|
||||||
|
|
||||||
CREATE OR REPLACE VIEW ${stats_db_name}.result_classifications AS
|
CREATE OR REPLACE VIEW ${stats_db_name}.result_classifications AS
|
||||||
SELECT *
|
SELECT *
|
||||||
|
@ -56,7 +56,7 @@ SELECT *
|
||||||
FROM ${stats_db_name}.dataset_classifications
|
FROM ${stats_db_name}.dataset_classifications
|
||||||
UNION ALL
|
UNION ALL
|
||||||
SELECT *
|
SELECT *
|
||||||
FROM ${stats_db_name}.otherresearchproduct_classifications;
|
FROM ${stats_db_name}.otherresearchproduct_classifications; /*EOS*/
|
||||||
|
|
||||||
CREATE OR REPLACE VIEW ${stats_db_name}.result_concepts AS
|
CREATE OR REPLACE VIEW ${stats_db_name}.result_concepts AS
|
||||||
SELECT *
|
SELECT *
|
||||||
|
@ -69,7 +69,7 @@ SELECT *
|
||||||
FROM ${stats_db_name}.dataset_concepts
|
FROM ${stats_db_name}.dataset_concepts
|
||||||
UNION ALL
|
UNION ALL
|
||||||
SELECT *
|
SELECT *
|
||||||
FROM ${stats_db_name}.otherresearchproduct_concepts;
|
FROM ${stats_db_name}.otherresearchproduct_concepts; /*EOS*/
|
||||||
|
|
||||||
CREATE OR REPLACE VIEW ${stats_db_name}.result_languages AS
|
CREATE OR REPLACE VIEW ${stats_db_name}.result_languages AS
|
||||||
SELECT *
|
SELECT *
|
||||||
|
@ -82,7 +82,7 @@ SELECT *
|
||||||
FROM ${stats_db_name}.dataset_languages
|
FROM ${stats_db_name}.dataset_languages
|
||||||
UNION ALL
|
UNION ALL
|
||||||
SELECT *
|
SELECT *
|
||||||
FROM ${stats_db_name}.otherresearchproduct_languages;
|
FROM ${stats_db_name}.otherresearchproduct_languages; /*EOS*/
|
||||||
|
|
||||||
CREATE OR REPLACE VIEW ${stats_db_name}.result_oids AS
|
CREATE OR REPLACE VIEW ${stats_db_name}.result_oids AS
|
||||||
SELECT *
|
SELECT *
|
||||||
|
@ -95,7 +95,7 @@ SELECT *
|
||||||
FROM ${stats_db_name}.dataset_oids
|
FROM ${stats_db_name}.dataset_oids
|
||||||
UNION ALL
|
UNION ALL
|
||||||
SELECT *
|
SELECT *
|
||||||
FROM ${stats_db_name}.otherresearchproduct_oids;
|
FROM ${stats_db_name}.otherresearchproduct_oids; /*EOS*/
|
||||||
|
|
||||||
CREATE OR REPLACE VIEW ${stats_db_name}.result_pids AS
|
CREATE OR REPLACE VIEW ${stats_db_name}.result_pids AS
|
||||||
SELECT *
|
SELECT *
|
||||||
|
@ -108,7 +108,7 @@ SELECT *
|
||||||
FROM ${stats_db_name}.dataset_pids
|
FROM ${stats_db_name}.dataset_pids
|
||||||
UNION ALL
|
UNION ALL
|
||||||
SELECT *
|
SELECT *
|
||||||
FROM ${stats_db_name}.otherresearchproduct_pids;
|
FROM ${stats_db_name}.otherresearchproduct_pids; /*EOS*/
|
||||||
|
|
||||||
CREATE OR REPLACE VIEW ${stats_db_name}.result_topics AS
|
CREATE OR REPLACE VIEW ${stats_db_name}.result_topics AS
|
||||||
SELECT *
|
SELECT *
|
||||||
|
@ -121,9 +121,9 @@ SELECT *
|
||||||
FROM ${stats_db_name}.dataset_topics
|
FROM ${stats_db_name}.dataset_topics
|
||||||
UNION ALL
|
UNION ALL
|
||||||
SELECT *
|
SELECT *
|
||||||
FROM ${stats_db_name}.otherresearchproduct_topics;
|
FROM ${stats_db_name}.otherresearchproduct_topics; /*EOS*/
|
||||||
|
|
||||||
DROP TABLE IF EXISTS ${stats_db_name}.result_fos purge;
|
DROP TABLE IF EXISTS ${stats_db_name}.result_fos purge; /*EOS*/
|
||||||
|
|
||||||
create table ${stats_db_name}.result_fos stored as parquet as
|
create table ${stats_db_name}.result_fos stored as parquet as
|
||||||
with
|
with
|
||||||
|
@ -133,22 +133,22 @@ with
|
||||||
select lvl1.id, lvl1.topic as lvl1, lvl2.topic as lvl2, lvl3.topic as lvl3
|
select lvl1.id, lvl1.topic as lvl1, lvl2.topic as lvl2, lvl3.topic as lvl3
|
||||||
from lvl1
|
from lvl1
|
||||||
join lvl2 on lvl1.id=lvl2.id and substr(lvl2.topic, 1, 2)=substr(lvl1.topic, 1, 2)
|
join lvl2 on lvl1.id=lvl2.id and substr(lvl2.topic, 1, 2)=substr(lvl1.topic, 1, 2)
|
||||||
join lvl3 on lvl3.id=lvl1.id and substr(lvl3.topic, 1, 4)=substr(lvl2.topic, 1, 4);
|
join lvl3 on lvl3.id=lvl1.id and substr(lvl3.topic, 1, 4)=substr(lvl2.topic, 1, 4); /*EOS*/
|
||||||
|
|
||||||
DROP TABLE IF EXISTS ${stats_db_name}.result_organization purge;
|
DROP TABLE IF EXISTS ${stats_db_name}.result_organization purge; /*EOS*/
|
||||||
|
|
||||||
CREATE TABLE ${stats_db_name}.result_organization STORED AS PARQUET AS
|
CREATE TABLE ${stats_db_name}.result_organization STORED AS PARQUET AS
|
||||||
SELECT substr(r.target, 4) AS id, substr(r.source, 4) AS organization
|
SELECT substr(r.target, 4) AS id, substr(r.source, 4) AS organization
|
||||||
FROM ${openaire_db_name}.relation r
|
FROM ${openaire_db_name}.relation r
|
||||||
WHERE r.reltype = 'resultOrganization'
|
WHERE r.reltype = 'resultOrganization'
|
||||||
and r.target like '50|%'
|
and r.target like '50|%'
|
||||||
and r.datainfo.deletedbyinference = false and r.datainfo.invisible=false;
|
and r.datainfo.deletedbyinference = false and r.datainfo.invisible=false; /*EOS*/
|
||||||
|
|
||||||
DROP TABLE IF EXISTS ${stats_db_name}.result_projects purge;
|
DROP TABLE IF EXISTS ${stats_db_name}.result_projects purge; /*EOS*/
|
||||||
|
|
||||||
CREATE TABLE ${stats_db_name}.result_projects STORED AS PARQUET AS
|
CREATE TABLE ${stats_db_name}.result_projects STORED AS PARQUET AS
|
||||||
select pr.result AS id, pr.id AS project, datediff(p.enddate, p.startdate) AS daysfromend, pr.provenance as provenance
|
select pr.result AS id, pr.id AS project, datediff(p.enddate, p.startdate) AS daysfromend, pr.provenance as provenance
|
||||||
FROM ${stats_db_name}.result r
|
FROM ${stats_db_name}.result r
|
||||||
JOIN ${stats_db_name}.project_results pr ON r.id = pr.result
|
JOIN ${stats_db_name}.project_results pr ON r.id = pr.result
|
||||||
JOIN ${stats_db_name}.project_tmp p ON p.id = pr.id;
|
JOIN ${stats_db_name}.project_tmp p ON p.id = pr.id; /*EOS*/
|
||||||
|
|
||||||
|
|
|
@ -5,7 +5,7 @@
|
||||||
-- Datasource table/view and Datasource related tables/views
|
-- Datasource table/view and Datasource related tables/views
|
||||||
------------------------------------------------------------
|
------------------------------------------------------------
|
||||||
------------------------------------------------------------
|
------------------------------------------------------------
|
||||||
DROP TABLE IF EXISTS ${stats_db_name}.datasource_tmp purge;
|
DROP TABLE IF EXISTS ${stats_db_name}.datasource_tmp purge; /*EOS*/
|
||||||
|
|
||||||
CREATE TABLE ${stats_db_name}.datasource_tmp
|
CREATE TABLE ${stats_db_name}.datasource_tmp
|
||||||
(
|
(
|
||||||
|
@ -22,7 +22,7 @@ CREATE TABLE ${stats_db_name}.datasource_tmp
|
||||||
`compatibility` STRING,
|
`compatibility` STRING,
|
||||||
issn_printed STRING,
|
issn_printed STRING,
|
||||||
issn_online STRING
|
issn_online STRING
|
||||||
) CLUSTERED BY (id) INTO 100 buckets stored AS orc tblproperties ('transactional' = 'true');
|
) CLUSTERED BY (id) INTO 100 buckets stored AS orc tblproperties ('transactional' = 'true'); /*EOS*/
|
||||||
|
|
||||||
-- Insert statement that takes into account the piwik_id of the openAIRE graph
|
-- Insert statement that takes into account the piwik_id of the openAIRE graph
|
||||||
INSERT INTO ${stats_db_name}.datasource_tmp
|
INSERT INTO ${stats_db_name}.datasource_tmp
|
||||||
|
@ -46,14 +46,14 @@ FROM ${openaire_db_name}.datasource d1
|
||||||
LATERAL VIEW EXPLODE(originalid) temp AS originalidd
|
LATERAL VIEW EXPLODE(originalid) temp AS originalidd
|
||||||
WHERE originalidd like "piwik:%") AS d2
|
WHERE originalidd like "piwik:%") AS d2
|
||||||
ON d1.id = d2.id
|
ON d1.id = d2.id
|
||||||
WHERE d1.datainfo.deletedbyinference = FALSE and d1.datainfo.invisible=false;
|
WHERE d1.datainfo.deletedbyinference = FALSE and d1.datainfo.invisible=false; /*EOS*/
|
||||||
|
|
||||||
-- Updating temporary table with everything that is not based on results -> This is done with the following "dual" table.
|
-- Updating temporary table with everything that is not based on results -> This is done with the following "dual" table.
|
||||||
-- Creating a temporary dual table that will be removed after the following insert
|
-- Creating a temporary dual table that will be removed after the following insert
|
||||||
|
|
||||||
CREATE TABLE ${stats_db_name}.dual ( dummy CHAR(1));
|
CREATE TABLE ${stats_db_name}.dual ( dummy CHAR(1)); /*EOS*/
|
||||||
|
|
||||||
INSERT INTO ${stats_db_name}.dual VALUES ('X');
|
INSERT INTO ${stats_db_name}.dual VALUES ('X'); /*EOS*/
|
||||||
|
|
||||||
INSERT INTO ${stats_db_name}.datasource_tmp (`id`, `name`, `type`, `dateofvalidation`, `yearofvalidation`, `harvested`,
|
INSERT INTO ${stats_db_name}.datasource_tmp (`id`, `name`, `type`, `dateofvalidation`, `yearofvalidation`, `harvested`,
|
||||||
`piwik_id`, `latitude`, `longitude`, `websiteurl`, `compatibility`, `issn_printed`, `issn_online`)
|
`piwik_id`, `latitude`, `longitude`, `websiteurl`, `compatibility`, `issn_printed`, `issn_online`)
|
||||||
|
@ -71,42 +71,42 @@ SELECT 'other',
|
||||||
null,
|
null,
|
||||||
null
|
null
|
||||||
FROM ${stats_db_name}.dual
|
FROM ${stats_db_name}.dual
|
||||||
WHERE 'other' not in (SELECT id FROM ${stats_db_name}.datasource_tmp WHERE name = 'Unknown Repository');
|
WHERE 'other' not in (SELECT id FROM ${stats_db_name}.datasource_tmp WHERE name = 'Unknown Repository'); /*EOS*/
|
||||||
DROP TABLE ${stats_db_name}.dual;
|
DROP TABLE ${stats_db_name}.dual; /*EOS*/
|
||||||
|
|
||||||
UPDATE ${stats_db_name}.datasource_tmp SET name='Other' WHERE name = 'Unknown Repository';
|
UPDATE ${stats_db_name}.datasource_tmp SET name='Other' WHERE name = 'Unknown Repository'; /*EOS*/
|
||||||
UPDATE ${stats_db_name}.datasource_tmp SET yearofvalidation=null WHERE yearofvalidation = '-1';
|
UPDATE ${stats_db_name}.datasource_tmp SET yearofvalidation=null WHERE yearofvalidation = '-1'; /*EOS*/
|
||||||
|
|
||||||
DROP TABLE IF EXISTS ${stats_db_name}.datasource_languages purge;
|
DROP TABLE IF EXISTS ${stats_db_name}.datasource_languages purge; /*EOS*/
|
||||||
|
|
||||||
CREATE TABLE ${stats_db_name}.datasource_languages STORED AS PARQUET AS
|
CREATE TABLE ${stats_db_name}.datasource_languages STORED AS PARQUET AS
|
||||||
SELECT substr(d.id, 4) AS id, langs.languages AS language
|
SELECT substr(d.id, 4) AS id, langs.languages AS language
|
||||||
FROM ${openaire_db_name}.datasource d LATERAL VIEW explode(d.odlanguages.value) langs AS languages
|
FROM ${openaire_db_name}.datasource d LATERAL VIEW explode(d.odlanguages.value) langs AS languages
|
||||||
where d.datainfo.deletedbyinference=false and d.datainfo.invisible=false;
|
where d.datainfo.deletedbyinference=false and d.datainfo.invisible=false; /*EOS*/
|
||||||
|
|
||||||
DROP TABLE IF EXISTS ${stats_db_name}.datasource_oids purge;
|
DROP TABLE IF EXISTS ${stats_db_name}.datasource_oids purge; /*EOS*/
|
||||||
|
|
||||||
CREATE TABLE ${stats_db_name}.datasource_oids STORED AS PARQUET AS
|
CREATE TABLE ${stats_db_name}.datasource_oids STORED AS PARQUET AS
|
||||||
SELECT substr(d.id, 4) AS id, oids.ids AS oid
|
SELECT substr(d.id, 4) AS id, oids.ids AS oid
|
||||||
FROM ${openaire_db_name}.datasource d LATERAL VIEW explode(d.originalid) oids AS ids
|
FROM ${openaire_db_name}.datasource d LATERAL VIEW explode(d.originalid) oids AS ids
|
||||||
where d.datainfo.deletedbyinference=false and d.datainfo.invisible=false;
|
where d.datainfo.deletedbyinference=false and d.datainfo.invisible=false; /*EOS*/
|
||||||
|
|
||||||
DROP TABLE IF EXISTS ${stats_db_name}.datasource_organizations purge;
|
DROP TABLE IF EXISTS ${stats_db_name}.datasource_organizations purge; /*EOS*/
|
||||||
|
|
||||||
CREATE TABLE ${stats_db_name}.datasource_organizations STORED AS PARQUET AS
|
CREATE TABLE ${stats_db_name}.datasource_organizations STORED AS PARQUET AS
|
||||||
SELECT substr(r.target, 4) AS id, substr(r.source, 4) AS organization
|
SELECT substr(r.target, 4) AS id, substr(r.source, 4) AS organization
|
||||||
FROM ${openaire_db_name}.relation r
|
FROM ${openaire_db_name}.relation r
|
||||||
WHERE r.reltype = 'datasourceOrganization' and r.datainfo.deletedbyinference = false and r.source like '20|%' and r.datainfo.invisible=false;
|
WHERE r.reltype = 'datasourceOrganization' and r.datainfo.deletedbyinference = false and r.source like '20|%' and r.datainfo.invisible=false; /*EOS*/
|
||||||
|
|
||||||
-- datasource sources:
|
-- datasource sources:
|
||||||
-- where the datasource info have been collected from.
|
-- where the datasource info have been collected from.
|
||||||
DROP TABLE IF EXISTS ${stats_db_name}.datasource_sources purge;
|
DROP TABLE IF EXISTS ${stats_db_name}.datasource_sources purge; /*EOS*/
|
||||||
|
|
||||||
create table if not exists ${stats_db_name}.datasource_sources STORED AS PARQUET AS
|
create table if not exists ${stats_db_name}.datasource_sources STORED AS PARQUET AS
|
||||||
select substr(d.id, 4) as id, substr(cf.key, 4) as datasource
|
select substr(d.id, 4) as id, substr(cf.key, 4) as datasource
|
||||||
from ${openaire_db_name}.datasource d lateral view explode(d.collectedfrom) cfrom as cf
|
from ${openaire_db_name}.datasource d lateral view explode(d.collectedfrom) cfrom as cf
|
||||||
where d.datainfo.deletedbyinference = false and d.datainfo.invisible=false;
|
where d.datainfo.deletedbyinference = false and d.datainfo.invisible=false; /*EOS*/
|
||||||
|
|
||||||
CREATE OR REPLACE VIEW ${stats_db_name}.datasource_results AS
|
CREATE OR REPLACE VIEW ${stats_db_name}.datasource_results AS
|
||||||
SELECT datasource AS id, id AS result
|
SELECT datasource AS id, id AS result
|
||||||
FROM ${stats_db_name}.result_datasources;
|
FROM ${stats_db_name}.result_datasources; /*EOS*/
|
||||||
|
|
|
@ -3,7 +3,7 @@
|
||||||
-- Organization table/view and Organization related tables/views
|
-- Organization table/view and Organization related tables/views
|
||||||
----------------------------------------------------------------
|
----------------------------------------------------------------
|
||||||
----------------------------------------------------------------
|
----------------------------------------------------------------
|
||||||
DROP TABLE IF EXISTS ${stats_db_name}.organization purge;
|
DROP TABLE IF EXISTS ${stats_db_name}.organization purge; /*EOS*/
|
||||||
|
|
||||||
CREATE TABLE IF NOT EXISTS ${stats_db_name}.organization STORED AS PARQUET AS
|
CREATE TABLE IF NOT EXISTS ${stats_db_name}.organization STORED AS PARQUET AS
|
||||||
SELECT substr(o.id, 4) as id,
|
SELECT substr(o.id, 4) as id,
|
||||||
|
@ -11,12 +11,12 @@ SELECT substr(o.id, 4) as id,
|
||||||
o.legalshortname.value as legalshortname,
|
o.legalshortname.value as legalshortname,
|
||||||
o.country.classid as country
|
o.country.classid as country
|
||||||
FROM ${openaire_db_name}.organization o
|
FROM ${openaire_db_name}.organization o
|
||||||
WHERE o.datainfo.deletedbyinference = FALSE and o.datainfo.invisible = FALSE;
|
WHERE o.datainfo.deletedbyinference = FALSE and o.datainfo.invisible = FALSE; /*EOS*/
|
||||||
|
|
||||||
CREATE OR REPLACE VIEW ${stats_db_name}.organization_datasources AS
|
CREATE OR REPLACE VIEW ${stats_db_name}.organization_datasources AS
|
||||||
SELECT organization AS id, id AS datasource
|
SELECT organization AS id, id AS datasource
|
||||||
FROM ${stats_db_name}.datasource_organizations;
|
FROM ${stats_db_name}.datasource_organizations; /*EOS*/
|
||||||
|
|
||||||
CREATE OR REPLACE VIEW ${stats_db_name}.organization_projects AS
|
CREATE OR REPLACE VIEW ${stats_db_name}.organization_projects AS
|
||||||
SELECT id AS project, organization as id
|
SELECT id AS project, organization as id
|
||||||
FROM ${stats_db_name}.project_organizations;
|
FROM ${stats_db_name}.project_organizations; /*EOS*/
|
|
@ -154,180 +154,354 @@
|
||||||
</kill>
|
</kill>
|
||||||
|
|
||||||
<action name="Step1">
|
<action name="Step1">
|
||||||
<hive2 xmlns="uri:oozie:hive2-action:0.1">
|
<spark xmlns="uri:oozie:spark-action:0.2">
|
||||||
<jdbc-url>${hive_jdbc_url}</jdbc-url>
|
<master>yarn</master>
|
||||||
<script>scripts/step1.sql</script>
|
<mode>cluster</mode>
|
||||||
<param>stats_db_name=${stats_db_name}</param>
|
<name>Step1</name>
|
||||||
<param>openaire_db_name=${openaire_db_name}</param>
|
<class>eu.dnetlib.dhp.oozie.RunSQLSparkJob</class>
|
||||||
</hive2>
|
<jar>dhp-stats-update-${projectVersion}.jar</jar>
|
||||||
<ok to="Step2"/>
|
<spark-opts>
|
||||||
|
--conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir}
|
||||||
|
${sparkClusterOpts}
|
||||||
|
${sparkResourceOpts}
|
||||||
|
${sparkApplicationOpts}
|
||||||
|
</spark-opts>
|
||||||
|
<arg>--hiveMetastoreUris</arg><arg>${hive_metastore_uris}</arg>
|
||||||
|
<arg>--sql</arg><arg>eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step1.sql</arg>
|
||||||
|
<arg>--stats_db_name</arg><arg>${stats_db_name}</arg>
|
||||||
|
<arg>--openaire_db_name</arg><arg>${openaire_db_name}</arg>
|
||||||
|
</spark>
|
||||||
|
<!-- <ok to="Step2"/>-->
|
||||||
|
<ok to="End"/>
|
||||||
<error to="Kill"/>
|
<error to="Kill"/>
|
||||||
</action>
|
</action>
|
||||||
|
|
||||||
<action name="Step2">
|
<action name="Step2">
|
||||||
<hive2 xmlns="uri:oozie:hive2-action:0.1">
|
<spark xmlns="uri:oozie:spark-action:0.2">
|
||||||
<jdbc-url>${hive_jdbc_url}</jdbc-url>
|
<master>yarn</master>
|
||||||
<script>scripts/step2.sql</script>
|
<mode>cluster</mode>
|
||||||
<param>stats_db_name=${stats_db_name}</param>
|
<name>Step2</name>
|
||||||
<param>openaire_db_name=${openaire_db_name}</param>
|
<class>eu.dnetlib.dhp.oozie.RunSQLSparkJob</class>
|
||||||
</hive2>
|
<jar>dhp-stats-update-${projectVersion}.jar</jar>
|
||||||
|
<spark-opts>
|
||||||
|
--conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir}
|
||||||
|
${sparkClusterOpts}
|
||||||
|
${sparkResourceOpts}
|
||||||
|
${sparkApplicationOpts}
|
||||||
|
</spark-opts>
|
||||||
|
<arg>--hiveMetastoreUris</arg><arg>${hive_metastore_uris}</arg>
|
||||||
|
<arg>--sql</arg><arg>eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step2.sql</arg>
|
||||||
|
<arg>--stats_db_name</arg><arg>${stats_db_name}</arg>
|
||||||
|
<arg>--openaire_db_name</arg><arg>${openaire_db_name}</arg>
|
||||||
|
</spark>
|
||||||
<ok to="Step3"/>
|
<ok to="Step3"/>
|
||||||
<error to="Kill"/>
|
<error to="Kill"/>
|
||||||
</action>
|
</action>
|
||||||
|
|
||||||
<action name="Step3">
|
<action name="Step3">
|
||||||
<hive2 xmlns="uri:oozie:hive2-action:0.1">
|
<spark xmlns="uri:oozie:spark-action:0.2">
|
||||||
<jdbc-url>${hive_jdbc_url}</jdbc-url>
|
<master>yarn</master>
|
||||||
<script>scripts/step3.sql</script>
|
<mode>cluster</mode>
|
||||||
<param>stats_db_name=${stats_db_name}</param>
|
<name>Step3</name>
|
||||||
<param>openaire_db_name=${openaire_db_name}</param>
|
<class>eu.dnetlib.dhp.oozie.RunSQLSparkJob</class>
|
||||||
</hive2>
|
<jar>dhp-stats-update-${projectVersion}.jar</jar>
|
||||||
|
<spark-opts>
|
||||||
|
--conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir}
|
||||||
|
${sparkClusterOpts}
|
||||||
|
${sparkResourceOpts}
|
||||||
|
${sparkApplicationOpts}
|
||||||
|
</spark-opts>
|
||||||
|
<arg>--hiveMetastoreUris</arg><arg>${hive_metastore_uris}</arg>
|
||||||
|
<arg>--sql</arg><arg>eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step3.sql</arg>
|
||||||
|
<arg>--stats_db_name</arg><arg>${stats_db_name}</arg>
|
||||||
|
<arg>--openaire_db_name</arg><arg>${openaire_db_name}</arg>
|
||||||
|
</spark>
|
||||||
<ok to="Step4"/>
|
<ok to="Step4"/>
|
||||||
<error to="Kill"/>
|
<error to="Kill"/>
|
||||||
</action>
|
</action>
|
||||||
|
|
||||||
<action name="Step4">
|
<action name="Step4">
|
||||||
<hive2 xmlns="uri:oozie:hive2-action:0.1">
|
<spark xmlns="uri:oozie:spark-action:0.2">
|
||||||
<jdbc-url>${hive_jdbc_url}</jdbc-url>
|
<master>yarn</master>
|
||||||
<script>scripts/step4.sql</script>
|
<mode>cluster</mode>
|
||||||
<param>stats_db_name=${stats_db_name}</param>
|
<name>Step4</name>
|
||||||
<param>openaire_db_name=${openaire_db_name}</param>
|
<class>eu.dnetlib.dhp.oozie.RunSQLSparkJob</class>
|
||||||
</hive2>
|
<jar>dhp-stats-update-${projectVersion}.jar</jar>
|
||||||
|
<spark-opts>
|
||||||
|
--conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir}
|
||||||
|
${sparkClusterOpts}
|
||||||
|
${sparkResourceOpts}
|
||||||
|
${sparkApplicationOpts}
|
||||||
|
</spark-opts>
|
||||||
|
<arg>--hiveMetastoreUris</arg><arg>${hive_metastore_uris}</arg>
|
||||||
|
<arg>--sql</arg><arg>eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step4.sql</arg>
|
||||||
|
<arg>--stats_db_name</arg><arg>${stats_db_name}</arg>
|
||||||
|
<arg>--openaire_db_name</arg><arg>${openaire_db_name}</arg>
|
||||||
|
</spark>
|
||||||
<ok to="Step5"/>
|
<ok to="Step5"/>
|
||||||
<error to="Kill"/>
|
<error to="Kill"/>
|
||||||
</action>
|
</action>
|
||||||
|
|
||||||
<action name="Step5">
|
<action name="Step5">
|
||||||
<hive2 xmlns="uri:oozie:hive2-action:0.1">
|
<spark xmlns="uri:oozie:spark-action:0.2">
|
||||||
<jdbc-url>${hive_jdbc_url}</jdbc-url>
|
<master>yarn</master>
|
||||||
<script>scripts/step5.sql</script>
|
<mode>cluster</mode>
|
||||||
<param>stats_db_name=${stats_db_name}</param>
|
<name>Step5</name>
|
||||||
<param>openaire_db_name=${openaire_db_name}</param>
|
<class>eu.dnetlib.dhp.oozie.RunSQLSparkJob</class>
|
||||||
</hive2>
|
<jar>dhp-stats-update-${projectVersion}.jar</jar>
|
||||||
|
<spark-opts>
|
||||||
|
--conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir}
|
||||||
|
${sparkClusterOpts}
|
||||||
|
${sparkResourceOpts}
|
||||||
|
${sparkApplicationOpts}
|
||||||
|
</spark-opts>
|
||||||
|
<arg>--hiveMetastoreUris</arg><arg>${hive_metastore_uris}</arg>
|
||||||
|
<arg>--sql</arg><arg>eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step5.sql</arg>
|
||||||
|
<arg>--stats_db_name</arg><arg>${stats_db_name}</arg>
|
||||||
|
<arg>--openaire_db_name</arg><arg>${openaire_db_name}</arg>
|
||||||
|
</spark>
|
||||||
<ok to="Step6"/>
|
<ok to="Step6"/>
|
||||||
<error to="Kill"/>
|
<error to="Kill"/>
|
||||||
</action>
|
</action>
|
||||||
|
|
||||||
<action name="Step6">
|
<action name="Step6">
|
||||||
<hive2 xmlns="uri:oozie:hive2-action:0.1">
|
<spark xmlns="uri:oozie:spark-action:0.2">
|
||||||
<jdbc-url>${hive_jdbc_url}</jdbc-url>
|
<master>yarn</master>
|
||||||
<script>scripts/step6.sql</script>
|
<mode>cluster</mode>
|
||||||
<param>stats_db_name=${stats_db_name}</param>
|
<name>Step6</name>
|
||||||
<param>openaire_db_name=${openaire_db_name}</param>
|
<class>eu.dnetlib.dhp.oozie.RunSQLSparkJob</class>
|
||||||
</hive2>
|
<jar>dhp-stats-update-${projectVersion}.jar</jar>
|
||||||
|
<spark-opts>
|
||||||
|
--conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir}
|
||||||
|
${sparkClusterOpts}
|
||||||
|
${sparkResourceOpts}
|
||||||
|
${sparkApplicationOpts}
|
||||||
|
</spark-opts>
|
||||||
|
<arg>--hiveMetastoreUris</arg><arg>${hive_metastore_uris}</arg>
|
||||||
|
<arg>--sql</arg><arg>eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step6.sql</arg>
|
||||||
|
<arg>--stats_db_name</arg><arg>${stats_db_name}</arg>
|
||||||
|
<arg>--openaire_db_name</arg><arg>${openaire_db_name}</arg>
|
||||||
|
</spark>
|
||||||
<ok to="Step7"/>
|
<ok to="Step7"/>
|
||||||
<error to="Kill"/>
|
<error to="Kill"/>
|
||||||
</action>
|
</action>
|
||||||
|
|
||||||
<action name="Step7">
|
<action name="Step7">
|
||||||
<hive2 xmlns="uri:oozie:hive2-action:0.1">
|
<spark xmlns="uri:oozie:spark-action:0.2">
|
||||||
<jdbc-url>${hive_jdbc_url}</jdbc-url>
|
<master>yarn</master>
|
||||||
<script>scripts/step7.sql</script>
|
<mode>cluster</mode>
|
||||||
<param>stats_db_name=${stats_db_name}</param>
|
<name>Step7</name>
|
||||||
<param>openaire_db_name=${openaire_db_name}</param>
|
<class>eu.dnetlib.dhp.oozie.RunSQLSparkJob</class>
|
||||||
</hive2>
|
<jar>dhp-stats-update-${projectVersion}.jar</jar>
|
||||||
|
<spark-opts>
|
||||||
|
--conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir}
|
||||||
|
${sparkClusterOpts}
|
||||||
|
${sparkResourceOpts}
|
||||||
|
${sparkApplicationOpts}
|
||||||
|
</spark-opts>
|
||||||
|
<arg>--hiveMetastoreUris</arg><arg>${hive_metastore_uris}</arg>
|
||||||
|
<arg>--sql</arg><arg>eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step7.sql</arg>
|
||||||
|
<arg>--stats_db_name</arg><arg>${stats_db_name}</arg>
|
||||||
|
<arg>--openaire_db_name</arg><arg>${openaire_db_name}</arg>
|
||||||
|
</spark>
|
||||||
<ok to="Step8"/>
|
<ok to="Step8"/>
|
||||||
<error to="Kill"/>
|
<error to="Kill"/>
|
||||||
</action>
|
</action>
|
||||||
|
|
||||||
<action name="Step8">
|
<action name="Step8">
|
||||||
<hive2 xmlns="uri:oozie:hive2-action:0.1">
|
<spark xmlns="uri:oozie:spark-action:0.2">
|
||||||
<jdbc-url>${hive_jdbc_url}</jdbc-url>
|
<master>yarn</master>
|
||||||
<script>scripts/step8.sql</script>
|
<mode>cluster</mode>
|
||||||
<param>stats_db_name=${stats_db_name}</param>
|
<name>Step8</name>
|
||||||
<param>openaire_db_name=${openaire_db_name}</param>
|
<class>eu.dnetlib.dhp.oozie.RunSQLSparkJob</class>
|
||||||
</hive2>
|
<jar>dhp-stats-update-${projectVersion}.jar</jar>
|
||||||
|
<spark-opts>
|
||||||
|
--conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir}
|
||||||
|
${sparkClusterOpts}
|
||||||
|
${sparkResourceOpts}
|
||||||
|
${sparkApplicationOpts}
|
||||||
|
</spark-opts>
|
||||||
|
<arg>--hiveMetastoreUris</arg><arg>${hive_metastore_uris}</arg>
|
||||||
|
<arg>--sql</arg><arg>eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step8.sql</arg>
|
||||||
|
<arg>--stats_db_name</arg><arg>${stats_db_name}</arg>
|
||||||
|
<arg>--openaire_db_name</arg><arg>${openaire_db_name}</arg>
|
||||||
|
</spark>
|
||||||
<ok to="Step9"/>
|
<ok to="Step9"/>
|
||||||
<error to="Kill"/>
|
<error to="Kill"/>
|
||||||
</action>
|
</action>
|
||||||
|
|
||||||
<action name="Step9">
|
<action name="Step9">
|
||||||
<hive2 xmlns="uri:oozie:hive2-action:0.1">
|
<spark xmlns="uri:oozie:spark-action:0.2">
|
||||||
<jdbc-url>${hive_jdbc_url}</jdbc-url>
|
<master>yarn</master>
|
||||||
<script>scripts/step9.sql</script>
|
<mode>cluster</mode>
|
||||||
<param>stats_db_name=${stats_db_name}</param>
|
<name>Step9</name>
|
||||||
<param>openaire_db_name=${openaire_db_name}</param>
|
<class>eu.dnetlib.dhp.oozie.RunSQLSparkJob</class>
|
||||||
</hive2>
|
<jar>dhp-stats-update-${projectVersion}.jar</jar>
|
||||||
|
<spark-opts>
|
||||||
|
--conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir}
|
||||||
|
${sparkClusterOpts}
|
||||||
|
${sparkResourceOpts}
|
||||||
|
${sparkApplicationOpts}
|
||||||
|
</spark-opts>
|
||||||
|
<arg>--hiveMetastoreUris</arg><arg>${hive_metastore_uris}</arg>
|
||||||
|
<arg>--sql</arg><arg>eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step9.sql</arg>
|
||||||
|
<arg>--stats_db_name</arg><arg>${stats_db_name}</arg>
|
||||||
|
<arg>--openaire_db_name</arg><arg>${openaire_db_name}</arg>
|
||||||
|
</spark>
|
||||||
<ok to="Step10"/>
|
<ok to="Step10"/>
|
||||||
<error to="Kill"/>
|
<error to="Kill"/>
|
||||||
</action>
|
</action>
|
||||||
|
|
||||||
<action name="Step10">
|
<action name="Step10">
|
||||||
<hive2 xmlns="uri:oozie:hive2-action:0.1">
|
<spark xmlns="uri:oozie:spark-action:0.2">
|
||||||
<jdbc-url>${hive_jdbc_url}</jdbc-url>
|
<master>yarn</master>
|
||||||
<script>scripts/step10.sql</script>
|
<mode>cluster</mode>
|
||||||
<param>stats_db_name=${stats_db_name}</param>
|
<name>Step10</name>
|
||||||
<param>openaire_db_name=${openaire_db_name}</param>
|
<class>eu.dnetlib.dhp.oozie.RunSQLSparkJob</class>
|
||||||
<param>external_stats_db_name=${external_stats_db_name}</param>
|
<jar>dhp-stats-update-${projectVersion}.jar</jar>
|
||||||
</hive2>
|
<spark-opts>
|
||||||
|
--conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir}
|
||||||
|
${sparkClusterOpts}
|
||||||
|
${sparkResourceOpts}
|
||||||
|
${sparkApplicationOpts}
|
||||||
|
</spark-opts>
|
||||||
|
<arg>--hiveMetastoreUris</arg><arg>${hive_metastore_uris}</arg>
|
||||||
|
<arg>--sql</arg><arg>eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step10.sql</arg>
|
||||||
|
<arg>--stats_db_name</arg><arg>${stats_db_name}</arg>
|
||||||
|
<arg>--openaire_db_name</arg><arg>${openaire_db_name}</arg>
|
||||||
|
</spark>
|
||||||
<ok to="Step11"/>
|
<ok to="Step11"/>
|
||||||
<error to="Kill"/>
|
<error to="Kill"/>
|
||||||
</action>
|
</action>
|
||||||
|
|
||||||
<action name="Step11">
|
<action name="Step11">
|
||||||
<hive2 xmlns="uri:oozie:hive2-action:0.1">
|
<spark xmlns="uri:oozie:spark-action:0.2">
|
||||||
<jdbc-url>${hive_jdbc_url}</jdbc-url>
|
<master>yarn</master>
|
||||||
<script>scripts/step11.sql</script>
|
<mode>cluster</mode>
|
||||||
<param>stats_db_name=${stats_db_name}</param>
|
<name>Step11</name>
|
||||||
<param>openaire_db_name=${openaire_db_name}</param>
|
<class>eu.dnetlib.dhp.oozie.RunSQLSparkJob</class>
|
||||||
<param>external_stats_db_name=${external_stats_db_name}</param>
|
<jar>dhp-stats-update-${projectVersion}.jar</jar>
|
||||||
</hive2>
|
<spark-opts>
|
||||||
|
--conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir}
|
||||||
|
${sparkClusterOpts}
|
||||||
|
${sparkResourceOpts}
|
||||||
|
${sparkApplicationOpts}
|
||||||
|
</spark-opts>
|
||||||
|
<arg>--hiveMetastoreUris</arg><arg>${hive_metastore_uris}</arg>
|
||||||
|
<arg>--sql</arg><arg>eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step11.sql</arg>
|
||||||
|
<arg>--stats_db_name</arg><arg>${stats_db_name}</arg>
|
||||||
|
<arg>--openaire_db_name</arg><arg>${openaire_db_name}</arg>
|
||||||
|
</spark>
|
||||||
<ok to="Step12"/>
|
<ok to="Step12"/>
|
||||||
<error to="Kill"/>
|
<error to="Kill"/>
|
||||||
</action>
|
</action>
|
||||||
|
|
||||||
<action name="Step12">
|
<action name="Step12">
|
||||||
<hive2 xmlns="uri:oozie:hive2-action:0.1">
|
<spark xmlns="uri:oozie:spark-action:0.2">
|
||||||
<jdbc-url>${hive_jdbc_url}</jdbc-url>
|
<master>yarn</master>
|
||||||
<script>scripts/step12.sql</script>
|
<mode>cluster</mode>
|
||||||
<param>stats_db_name=${stats_db_name}</param>
|
<name>Step12</name>
|
||||||
<param>openaire_db_name=${openaire_db_name}</param>
|
<class>eu.dnetlib.dhp.oozie.RunSQLSparkJob</class>
|
||||||
</hive2>
|
<jar>dhp-stats-update-${projectVersion}.jar</jar>
|
||||||
|
<spark-opts>
|
||||||
|
--conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir}
|
||||||
|
${sparkClusterOpts}
|
||||||
|
${sparkResourceOpts}
|
||||||
|
${sparkApplicationOpts}
|
||||||
|
</spark-opts>
|
||||||
|
<arg>--hiveMetastoreUris</arg><arg>${hive_metastore_uris}</arg>
|
||||||
|
<arg>--sql</arg><arg>eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step12.sql</arg>
|
||||||
|
<arg>--stats_db_name</arg><arg>${stats_db_name}</arg>
|
||||||
|
<arg>--openaire_db_name</arg><arg>${openaire_db_name}</arg>
|
||||||
|
</spark>
|
||||||
<ok to="Step13"/>
|
<ok to="Step13"/>
|
||||||
<error to="Kill"/>
|
<error to="Kill"/>
|
||||||
</action>
|
</action>
|
||||||
|
|
||||||
<action name="Step13">
|
<action name="Step13">
|
||||||
<hive2 xmlns="uri:oozie:hive2-action:0.1">
|
<spark xmlns="uri:oozie:spark-action:0.2">
|
||||||
<jdbc-url>${hive_jdbc_url}</jdbc-url>
|
<master>yarn</master>
|
||||||
<script>scripts/step13.sql</script>
|
<mode>cluster</mode>
|
||||||
<param>stats_db_name=${stats_db_name}</param>
|
<name>Step13</name>
|
||||||
<param>openaire_db_name=${openaire_db_name}</param>
|
<class>eu.dnetlib.dhp.oozie.RunSQLSparkJob</class>
|
||||||
</hive2>
|
<jar>dhp-stats-update-${projectVersion}.jar</jar>
|
||||||
|
<spark-opts>
|
||||||
|
--conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir}
|
||||||
|
${sparkClusterOpts}
|
||||||
|
${sparkResourceOpts}
|
||||||
|
${sparkApplicationOpts}
|
||||||
|
</spark-opts>
|
||||||
|
<arg>--hiveMetastoreUris</arg><arg>${hive_metastore_uris}</arg>
|
||||||
|
<arg>--sql</arg><arg>eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step13.sql</arg>
|
||||||
|
<arg>--stats_db_name</arg><arg>${stats_db_name}</arg>
|
||||||
|
<arg>--openaire_db_name</arg><arg>${openaire_db_name}</arg>
|
||||||
|
</spark>
|
||||||
<ok to="Step14"/>
|
<ok to="Step14"/>
|
||||||
<error to="Kill"/>
|
<error to="Kill"/>
|
||||||
</action>
|
</action>
|
||||||
|
|
||||||
<action name="Step14">
|
<action name="Step14">
|
||||||
<hive2 xmlns="uri:oozie:hive2-action:0.1">
|
<spark xmlns="uri:oozie:spark-action:0.2">
|
||||||
<jdbc-url>${hive_jdbc_url}</jdbc-url>
|
<master>yarn</master>
|
||||||
<script>scripts/step14.sql</script>
|
<mode>cluster</mode>
|
||||||
<param>stats_db_name=${stats_db_name}</param>
|
<name>Step14</name>
|
||||||
<param>openaire_db_name=${openaire_db_name}</param>
|
<class>eu.dnetlib.dhp.oozie.RunSQLSparkJob</class>
|
||||||
</hive2>
|
<jar>dhp-stats-update-${projectVersion}.jar</jar>
|
||||||
|
<spark-opts>
|
||||||
|
--conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir}
|
||||||
|
${sparkClusterOpts}
|
||||||
|
${sparkResourceOpts}
|
||||||
|
${sparkApplicationOpts}
|
||||||
|
</spark-opts>
|
||||||
|
<arg>--hiveMetastoreUris</arg><arg>${hive_metastore_uris}</arg>
|
||||||
|
<arg>--sql</arg><arg>eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step14.sql</arg>
|
||||||
|
<arg>--stats_db_name</arg><arg>${stats_db_name}</arg>
|
||||||
|
<arg>--openaire_db_name</arg><arg>${openaire_db_name}</arg>
|
||||||
|
</spark>
|
||||||
<ok to="Step15"/>
|
<ok to="Step15"/>
|
||||||
<error to="Kill"/>
|
<error to="Kill"/>
|
||||||
</action>
|
</action>
|
||||||
|
|
||||||
<action name="Step15">
|
<action name="Step15">
|
||||||
<hive2 xmlns="uri:oozie:hive2-action:0.1">
|
<spark xmlns="uri:oozie:spark-action:0.2">
|
||||||
<jdbc-url>${hive_jdbc_url}</jdbc-url>
|
<master>yarn</master>
|
||||||
<script>scripts/step15.sql</script>
|
<mode>cluster</mode>
|
||||||
<param>stats_db_name=${stats_db_name}</param>
|
<name>Step15</name>
|
||||||
<param>openaire_db_name=${openaire_db_name}</param>
|
<class>eu.dnetlib.dhp.oozie.RunSQLSparkJob</class>
|
||||||
</hive2>
|
<jar>dhp-stats-update-${projectVersion}.jar</jar>
|
||||||
|
<spark-opts>
|
||||||
|
--conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir}
|
||||||
|
${sparkClusterOpts}
|
||||||
|
${sparkResourceOpts}
|
||||||
|
${sparkApplicationOpts}
|
||||||
|
</spark-opts>
|
||||||
|
<arg>--hiveMetastoreUris</arg><arg>${hive_metastore_uris}</arg>
|
||||||
|
<arg>--sql</arg><arg>eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step15.sql</arg>
|
||||||
|
<arg>--stats_db_name</arg><arg>${stats_db_name}</arg>
|
||||||
|
<arg>--openaire_db_name</arg><arg>${openaire_db_name}</arg>
|
||||||
|
</spark>
|
||||||
<ok to="Step15_5"/>
|
<ok to="Step15_5"/>
|
||||||
<error to="Kill"/>
|
<error to="Kill"/>
|
||||||
</action>
|
</action>
|
||||||
|
|
||||||
<action name="Step15_5">
|
<action name="Step15_5">
|
||||||
<hive2 xmlns="uri:oozie:hive2-action:0.1">
|
<spark xmlns="uri:oozie:spark-action:0.2">
|
||||||
<jdbc-url>${hive_jdbc_url}</jdbc-url>
|
<master>yarn</master>
|
||||||
<script>scripts/step15_5.sql</script>
|
<mode>cluster</mode>
|
||||||
<param>stats_db_name=${stats_db_name}</param>
|
<name>Step15_5</name>
|
||||||
<param>openaire_db_name=${openaire_db_name}</param>
|
<class>eu.dnetlib.dhp.oozie.RunSQLSparkJob</class>
|
||||||
<param>external_stats_db_name=${external_stats_db_name}</param>
|
<jar>dhp-stats-update-${projectVersion}.jar</jar>
|
||||||
</hive2>
|
<spark-opts>
|
||||||
|
--conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir}
|
||||||
|
${sparkClusterOpts}
|
||||||
|
${sparkResourceOpts}
|
||||||
|
${sparkApplicationOpts}
|
||||||
|
</spark-opts>
|
||||||
|
<arg>--hiveMetastoreUris</arg><arg>${hive_metastore_uris}</arg>
|
||||||
|
<arg>--sql</arg><arg>eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step15_5.sql</arg>
|
||||||
|
<arg>--stats_db_name</arg><arg>${stats_db_name}</arg>
|
||||||
|
<arg>--openaire_db_name</arg><arg>${openaire_db_name}</arg>
|
||||||
|
</spark>
|
||||||
<ok to="Contexts"/>
|
<ok to="Contexts"/>
|
||||||
<error to="Kill"/>
|
<error to="Kill"/>
|
||||||
</action>
|
</action>
|
||||||
|
@ -379,23 +553,45 @@
|
||||||
</action>
|
</action>
|
||||||
|
|
||||||
<action name="Step16_1-definitions">
|
<action name="Step16_1-definitions">
|
||||||
<hive2 xmlns="uri:oozie:hive2-action:0.1">
|
<spark xmlns="uri:oozie:spark-action:0.2">
|
||||||
<jdbc-url>${hive_jdbc_url}</jdbc-url>
|
<master>yarn</master>
|
||||||
<script>scripts/step16_1-definitions.sql</script>
|
<mode>cluster</mode>
|
||||||
<param>stats_db_name=${stats_db_name}</param>
|
<name>Step16_1-definitions</name>
|
||||||
<param>openaire_db_name=${openaire_db_name}</param>
|
<class>eu.dnetlib.dhp.oozie.RunSQLSparkJob</class>
|
||||||
</hive2>
|
<jar>dhp-stats-update-${projectVersion}.jar</jar>
|
||||||
|
<spark-opts>
|
||||||
|
--conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir}
|
||||||
|
${sparkClusterOpts}
|
||||||
|
${sparkResourceOpts}
|
||||||
|
${sparkApplicationOpts}
|
||||||
|
</spark-opts>
|
||||||
|
<arg>--hiveMetastoreUris</arg><arg>${hive_metastore_uris}</arg>
|
||||||
|
<arg>--sql</arg><arg>eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step16_1-definitions.sql</arg>
|
||||||
|
<arg>--stats_db_name</arg><arg>${stats_db_name}</arg>
|
||||||
|
<arg>--openaire_db_name</arg><arg>${openaire_db_name}</arg>
|
||||||
|
</spark>
|
||||||
<ok to="Step16_5"/>
|
<ok to="Step16_5"/>
|
||||||
<error to="Kill"/>
|
<error to="Kill"/>
|
||||||
</action>
|
</action>
|
||||||
|
|
||||||
<action name="Step16_5">
|
<action name="Step16_5">
|
||||||
<hive2 xmlns="uri:oozie:hive2-action:0.1">
|
<spark xmlns="uri:oozie:spark-action:0.2">
|
||||||
<jdbc-url>${hive_jdbc_url}</jdbc-url>
|
<master>yarn</master>
|
||||||
<script>scripts/step16_5.sql</script>
|
<mode>cluster</mode>
|
||||||
<param>stats_db_name=${stats_db_name}</param>
|
<name>Step16_5</name>
|
||||||
<param>openaire_db_name=${openaire_db_name}</param>
|
<class>eu.dnetlib.dhp.oozie.RunSQLSparkJob</class>
|
||||||
</hive2>
|
<jar>dhp-stats-update-${projectVersion}.jar</jar>
|
||||||
|
<spark-opts>
|
||||||
|
--conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir}
|
||||||
|
${sparkClusterOpts}
|
||||||
|
${sparkResourceOpts}
|
||||||
|
${sparkApplicationOpts}
|
||||||
|
</spark-opts>
|
||||||
|
<arg>--hiveMetastoreUris</arg><arg>${hive_metastore_uris}</arg>
|
||||||
|
<arg>--sql</arg><arg>eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step16_5.sql</arg>
|
||||||
|
<arg>--stats_db_name</arg><arg>${stats_db_name}</arg>
|
||||||
|
<arg>--openaire_db_name</arg><arg>${openaire_db_name}</arg>
|
||||||
|
</spark>
|
||||||
<ok to="Step19-finalize"/>
|
<ok to="Step19-finalize"/>
|
||||||
<error to="Kill"/>
|
<error to="Kill"/>
|
||||||
</action>
|
</action>
|
||||||
|
@ -461,12 +657,23 @@
|
||||||
</action>
|
</action>
|
||||||
|
|
||||||
<action name="step21-createObservatoryDB">
|
<action name="step21-createObservatoryDB">
|
||||||
<hive2 xmlns="uri:oozie:hive2-action:0.1">
|
<spark xmlns="uri:oozie:spark-action:0.2">
|
||||||
<jdbc-url>${hive_jdbc_url}</jdbc-url>
|
<master>yarn</master>
|
||||||
<script>scripts/step21-createObservatoryDB.sql</script>
|
<mode>cluster</mode>
|
||||||
<param>stats_db_name=${stats_db_name}</param>
|
<name>Step21-createObservatoryDB</name>
|
||||||
<param>observatory_db_name=${observatory_db_name}</param>
|
<class>eu.dnetlib.dhp.oozie.RunSQLSparkJob</class>
|
||||||
</hive2>
|
<jar>dhp-stats-update-${projectVersion}.jar</jar>
|
||||||
|
<spark-opts>
|
||||||
|
--conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir}
|
||||||
|
${sparkClusterOpts}
|
||||||
|
${sparkResourceOpts}
|
||||||
|
${sparkApplicationOpts}
|
||||||
|
</spark-opts>
|
||||||
|
<arg>--hiveMetastoreUris</arg><arg>${hive_metastore_uris}</arg>
|
||||||
|
<arg>--sql</arg><arg>eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step21-createObservatoryDB.sql</arg>
|
||||||
|
<arg>--stats_db_name</arg><arg>${stats_db_name}</arg>
|
||||||
|
<arg>--openaire_db_name</arg><arg>${openaire_db_name}</arg>
|
||||||
|
</spark>
|
||||||
<ok to="step21-createObservatoryDB-post"/>
|
<ok to="step21-createObservatoryDB-post"/>
|
||||||
<error to="Kill"/>
|
<error to="Kill"/>
|
||||||
</action>
|
</action>
|
||||||
|
|
Loading…
Reference in New Issue