forked from D-Net/dnet-hadoop
added result_orcid, result_project provenance, issn in datasources
This commit is contained in:
parent
1a28a69cac
commit
4a9741825d
|
@ -57,12 +57,14 @@ UNION ALL
|
||||||
SELECT * FROM ${stats_db_name}.software_sources
|
SELECT * FROM ${stats_db_name}.software_sources
|
||||||
UNION ALL
|
UNION ALL
|
||||||
SELECT * FROM ${stats_db_name}.otherresearchproduct_sources;
|
SELECT * FROM ${stats_db_name}.otherresearchproduct_sources;
|
||||||
--
|
|
||||||
-- ANALYZE TABLE ${stats_db_name}.publication_sources COMPUTE STATISTICS;
|
|
||||||
-- ANALYZE TABLE ${stats_db_name}.publication_sources COMPUTE STATISTICS FOR COLUMNS;
|
create table ${stats_db_name}.result_orcid as
|
||||||
-- ANALYZE TABLE ${stats_db_name}.dataset_sources COMPUTE STATISTICS;
|
select distinct res.id, regexp_replace(res.orcid, 'http://orcid.org/' ,'') as orcid
|
||||||
-- ANALYZE TABLE ${stats_db_name}.dataset_sources COMPUTE STATISTICS FOR COLUMNS;
|
from (
|
||||||
-- ANALYZE TABLE ${stats_db_name}.software_sources COMPUTE STATISTICS;
|
SELECT substr(res.id, 4) as id, auth_pid.value as orcid
|
||||||
-- ANALYZE TABLE ${stats_db_name}.software_sources COMPUTE STATISTICS FOR COLUMNS;
|
FROM ${openaire_db_name}.result res
|
||||||
-- ANALYZE TABLE ${stats_db_name}.otherresearchproduct_sources COMPUTE STATISTICS;
|
LATERAL VIEW explode(author) a as auth
|
||||||
-- ANALYZE TABLE ${stats_db_name}.otherresearchproduct_sources COMPUTE STATISTICS FOR COLUMNS;
|
LATERAL VIEW explode(auth.pid) ap as auth_pid
|
||||||
|
LATERAL VIEW explode(auth.pid.qualifier.classid) apt as author_pid_type
|
||||||
|
WHERE res.datainfo.deletedbyinference = FALSE and res.datainfo.invisible = FALSE and author_pid_type = 'orcid') as res
|
|
@ -33,13 +33,4 @@ select * from ${stats_db_name}.dataset_refereed
|
||||||
union all
|
union all
|
||||||
select * from ${stats_db_name}.software_refereed
|
select * from ${stats_db_name}.software_refereed
|
||||||
union all
|
union all
|
||||||
select * from ${stats_db_name}.otherresearchproduct_refereed;
|
select * from ${stats_db_name}.otherresearchproduct_refereed;
|
||||||
--
|
|
||||||
-- ANALYZE TABLE ${stats_db_name}.publication_refereed COMPUTE STATISTICS;
|
|
||||||
-- ANALYZE TABLE ${stats_db_name}.publication_refereed COMPUTE STATISTICS FOR COLUMNS;
|
|
||||||
-- ANALYZE TABLE ${stats_db_name}.dataset_refereed COMPUTE STATISTICS;
|
|
||||||
-- ANALYZE TABLE ${stats_db_name}.dataset_refereed COMPUTE STATISTICS FOR COLUMNS;
|
|
||||||
-- ANALYZE TABLE ${stats_db_name}.software_refereed COMPUTE STATISTICS;
|
|
||||||
-- ANALYZE TABLE ${stats_db_name}.software_refereed COMPUTE STATISTICS FOR COLUMNS;
|
|
||||||
-- ANALYZE TABLE ${stats_db_name}.otherresearchproduct_refereed COMPUTE STATISTICS;
|
|
||||||
-- ANALYZE TABLE ${stats_db_name}.otherresearchproduct_refereed COMPUTE STATISTICS FOR COLUMNS;
|
|
|
@ -13,7 +13,7 @@ WHERE r.reltype = 'projectOrganization'
|
||||||
and r.datainfo.deletedbyinference = false;
|
and r.datainfo.deletedbyinference = false;
|
||||||
|
|
||||||
CREATE TABLE ${stats_db_name}.project_results AS
|
CREATE TABLE ${stats_db_name}.project_results AS
|
||||||
SELECT substr(r.target, 4) AS id, substr(r.source, 4) AS result
|
SELECT substr(r.target, 4) AS id, substr(r.source, 4) AS result, r.datainfo.provenanceaction.classname as provenance
|
||||||
FROM ${openaire_db_name}.relation r
|
FROM ${openaire_db_name}.relation r
|
||||||
WHERE r.reltype = 'resultProject'
|
WHERE r.reltype = 'resultProject'
|
||||||
and r.datainfo.deletedbyinference = false;
|
and r.datainfo.deletedbyinference = false;
|
||||||
|
|
|
@ -130,12 +130,7 @@ WHERE r.reltype = 'resultOrganization'
|
||||||
and r.datainfo.deletedbyinference = false;
|
and r.datainfo.deletedbyinference = false;
|
||||||
|
|
||||||
CREATE TABLE ${stats_db_name}.result_projects AS
|
CREATE TABLE ${stats_db_name}.result_projects AS
|
||||||
select pr.result AS id, pr.id AS project, datediff(p.enddate, p.startdate) AS daysfromend
|
select pr.result AS id, pr.id AS project, datediff(p.enddate, p.startdate) AS daysfromend, pr.provenance as provenance
|
||||||
FROM ${stats_db_name}.result r
|
FROM ${stats_db_name}.result r
|
||||||
JOIN ${stats_db_name}.project_results pr ON r.id = pr.result
|
JOIN ${stats_db_name}.project_results pr ON r.id = pr.result
|
||||||
JOIN ${stats_db_name}.project_tmp p ON p.id = pr.id;
|
JOIN ${stats_db_name}.project_tmp p ON p.id = pr.id;
|
||||||
|
|
||||||
-- ANALYZE TABLE ${stats_db_name}.result_organization COMPUTE STATISTICS;
|
|
||||||
-- ANALYZE TABLE ${stats_db_name}.result_organization COMPUTE STATISTICS FOR COLUMNS;
|
|
||||||
-- ANALYZE TABLE ${stats_db_name}.result_projects COMPUTE STATISTICS;
|
|
||||||
-- ANALYZE TABLE ${stats_db_name}.result_projects COMPUTE STATISTICS FOR COLUMNS;
|
|
|
@ -17,7 +17,9 @@ CREATE TABLE ${stats_db_name}.datasource_tmp
|
||||||
`latitude` STRING,
|
`latitude` STRING,
|
||||||
`longitude` STRING,
|
`longitude` STRING,
|
||||||
`websiteurl` STRING,
|
`websiteurl` STRING,
|
||||||
`compatibility` STRING
|
`compatibility` STRING,
|
||||||
|
issn_printed STRING,
|
||||||
|
issn_online STRING
|
||||||
) CLUSTERED BY (id) INTO 100 buckets stored AS orc tblproperties ('transactional' = 'true');
|
) CLUSTERED BY (id) INTO 100 buckets stored AS orc tblproperties ('transactional' = 'true');
|
||||||
|
|
||||||
-- Insert statement that takes into account the piwik_id of the openAIRE graph
|
-- Insert statement that takes into account the piwik_id of the openAIRE graph
|
||||||
|
@ -32,7 +34,9 @@ SELECT substr(d1.id, 4) AS id,
|
||||||
d1.latitude.value AS latitude,
|
d1.latitude.value AS latitude,
|
||||||
d1.longitude.value AS longitude,
|
d1.longitude.value AS longitude,
|
||||||
d1.websiteurl.value AS websiteurl,
|
d1.websiteurl.value AS websiteurl,
|
||||||
d1.openairecompatibility.classid AS compatibility
|
d1.openairecompatibility.classid AS compatibility,
|
||||||
|
d1.journal.issnprinted AS issn_printed,
|
||||||
|
d1.journal.issnonline AS issn_online
|
||||||
FROM ${openaire_db_name}.datasource d1
|
FROM ${openaire_db_name}.datasource d1
|
||||||
LEFT OUTER JOIN
|
LEFT OUTER JOIN
|
||||||
(SELECT id, split(originalidd, '\\:')[1] as piwik_id
|
(SELECT id, split(originalidd, '\\:')[1] as piwik_id
|
||||||
|
@ -97,13 +101,4 @@ where d.datainfo.deletedbyinference = false;
|
||||||
|
|
||||||
CREATE OR REPLACE VIEW ${stats_db_name}.datasource_results AS
|
CREATE OR REPLACE VIEW ${stats_db_name}.datasource_results AS
|
||||||
SELECT datasource AS id, id AS result
|
SELECT datasource AS id, id AS result
|
||||||
FROM ${stats_db_name}.result_datasources;
|
FROM ${stats_db_name}.result_datasources;
|
||||||
|
|
||||||
-- ANALYZE TABLE ${stats_db_name}.datasource_tmp COMPUTE STATISTICS;
|
|
||||||
-- ANALYZE TABLE ${stats_db_name}.datasource_tmp COMPUTE STATISTICS FOR COLUMNS;
|
|
||||||
-- ANALYZE TABLE ${stats_db_name}.datasource_languages COMPUTE STATISTICS;
|
|
||||||
-- ANALYZE TABLE ${stats_db_name}.datasource_languages COMPUTE STATISTICS FOR COLUMNS;
|
|
||||||
-- ANALYZE TABLE ${stats_db_name}.datasource_oids COMPUTE STATISTICS;
|
|
||||||
-- ANALYZE TABLE ${stats_db_name}.datasource_oids COMPUTE STATISTICS FOR COLUMNS;
|
|
||||||
-- ANALYZE TABLE ${stats_db_name}.datasource_organizations COMPUTE STATISTICS;
|
|
||||||
-- ANALYZE TABLE ${stats_db_name}.datasource_organizations COMPUTE STATISTICS FOR COLUMNS;
|
|
Loading…
Reference in New Issue