forked from D-Net/dnet-hadoop
added result_orcid, result_project provenance, issn in datasources
This commit is contained in:
parent
1a28a69cac
commit
4a9741825d
|
@ -57,12 +57,14 @@ UNION ALL
|
|||
SELECT * FROM ${stats_db_name}.software_sources
|
||||
UNION ALL
|
||||
SELECT * FROM ${stats_db_name}.otherresearchproduct_sources;
|
||||
--
|
||||
-- ANALYZE TABLE ${stats_db_name}.publication_sources COMPUTE STATISTICS;
|
||||
-- ANALYZE TABLE ${stats_db_name}.publication_sources COMPUTE STATISTICS FOR COLUMNS;
|
||||
-- ANALYZE TABLE ${stats_db_name}.dataset_sources COMPUTE STATISTICS;
|
||||
-- ANALYZE TABLE ${stats_db_name}.dataset_sources COMPUTE STATISTICS FOR COLUMNS;
|
||||
-- ANALYZE TABLE ${stats_db_name}.software_sources COMPUTE STATISTICS;
|
||||
-- ANALYZE TABLE ${stats_db_name}.software_sources COMPUTE STATISTICS FOR COLUMNS;
|
||||
-- ANALYZE TABLE ${stats_db_name}.otherresearchproduct_sources COMPUTE STATISTICS;
|
||||
-- ANALYZE TABLE ${stats_db_name}.otherresearchproduct_sources COMPUTE STATISTICS FOR COLUMNS;
|
||||
|
||||
|
||||
create table ${stats_db_name}.result_orcid as
|
||||
select distinct res.id, regexp_replace(res.orcid, 'http://orcid.org/' ,'') as orcid
|
||||
from (
|
||||
SELECT substr(res.id, 4) as id, auth_pid.value as orcid
|
||||
FROM ${openaire_db_name}.result res
|
||||
LATERAL VIEW explode(author) a as auth
|
||||
LATERAL VIEW explode(auth.pid) ap as auth_pid
|
||||
LATERAL VIEW explode(auth.pid.qualifier.classid) apt as author_pid_type
|
||||
WHERE res.datainfo.deletedbyinference = FALSE and res.datainfo.invisible = FALSE and author_pid_type = 'orcid') as res
|
|
@ -33,13 +33,4 @@ select * from ${stats_db_name}.dataset_refereed
|
|||
union all
|
||||
select * from ${stats_db_name}.software_refereed
|
||||
union all
|
||||
select * from ${stats_db_name}.otherresearchproduct_refereed;
|
||||
--
|
||||
-- ANALYZE TABLE ${stats_db_name}.publication_refereed COMPUTE STATISTICS;
|
||||
-- ANALYZE TABLE ${stats_db_name}.publication_refereed COMPUTE STATISTICS FOR COLUMNS;
|
||||
-- ANALYZE TABLE ${stats_db_name}.dataset_refereed COMPUTE STATISTICS;
|
||||
-- ANALYZE TABLE ${stats_db_name}.dataset_refereed COMPUTE STATISTICS FOR COLUMNS;
|
||||
-- ANALYZE TABLE ${stats_db_name}.software_refereed COMPUTE STATISTICS;
|
||||
-- ANALYZE TABLE ${stats_db_name}.software_refereed COMPUTE STATISTICS FOR COLUMNS;
|
||||
-- ANALYZE TABLE ${stats_db_name}.otherresearchproduct_refereed COMPUTE STATISTICS;
|
||||
-- ANALYZE TABLE ${stats_db_name}.otherresearchproduct_refereed COMPUTE STATISTICS FOR COLUMNS;
|
||||
select * from ${stats_db_name}.otherresearchproduct_refereed;
|
|
@ -13,7 +13,7 @@ WHERE r.reltype = 'projectOrganization'
|
|||
and r.datainfo.deletedbyinference = false;
|
||||
|
||||
CREATE TABLE ${stats_db_name}.project_results AS
|
||||
SELECT substr(r.target, 4) AS id, substr(r.source, 4) AS result
|
||||
SELECT substr(r.target, 4) AS id, substr(r.source, 4) AS result, r.datainfo.provenanceaction.classname as provenance
|
||||
FROM ${openaire_db_name}.relation r
|
||||
WHERE r.reltype = 'resultProject'
|
||||
and r.datainfo.deletedbyinference = false;
|
||||
|
|
|
@ -130,12 +130,7 @@ WHERE r.reltype = 'resultOrganization'
|
|||
and r.datainfo.deletedbyinference = false;
|
||||
|
||||
CREATE TABLE ${stats_db_name}.result_projects AS
|
||||
select pr.result AS id, pr.id AS project, datediff(p.enddate, p.startdate) AS daysfromend
|
||||
select pr.result AS id, pr.id AS project, datediff(p.enddate, p.startdate) AS daysfromend, pr.provenance as provenance
|
||||
FROM ${stats_db_name}.result r
|
||||
JOIN ${stats_db_name}.project_results pr ON r.id = pr.result
|
||||
JOIN ${stats_db_name}.project_tmp p ON p.id = pr.id;
|
||||
|
||||
-- ANALYZE TABLE ${stats_db_name}.result_organization COMPUTE STATISTICS;
|
||||
-- ANALYZE TABLE ${stats_db_name}.result_organization COMPUTE STATISTICS FOR COLUMNS;
|
||||
-- ANALYZE TABLE ${stats_db_name}.result_projects COMPUTE STATISTICS;
|
||||
-- ANALYZE TABLE ${stats_db_name}.result_projects COMPUTE STATISTICS FOR COLUMNS;
|
||||
JOIN ${stats_db_name}.project_tmp p ON p.id = pr.id;
|
|
@ -17,7 +17,9 @@ CREATE TABLE ${stats_db_name}.datasource_tmp
|
|||
`latitude` STRING,
|
||||
`longitude` STRING,
|
||||
`websiteurl` STRING,
|
||||
`compatibility` STRING
|
||||
`compatibility` STRING,
|
||||
issn_printed STRING,
|
||||
issn_online STRING
|
||||
) CLUSTERED BY (id) INTO 100 buckets stored AS orc tblproperties ('transactional' = 'true');
|
||||
|
||||
-- Insert statement that takes into account the piwik_id of the openAIRE graph
|
||||
|
@ -32,7 +34,9 @@ SELECT substr(d1.id, 4) AS id,
|
|||
d1.latitude.value AS latitude,
|
||||
d1.longitude.value AS longitude,
|
||||
d1.websiteurl.value AS websiteurl,
|
||||
d1.openairecompatibility.classid AS compatibility
|
||||
d1.openairecompatibility.classid AS compatibility,
|
||||
d1.journal.issnprinted AS issn_printed,
|
||||
d1.journal.issnonline AS issn_online
|
||||
FROM ${openaire_db_name}.datasource d1
|
||||
LEFT OUTER JOIN
|
||||
(SELECT id, split(originalidd, '\\:')[1] as piwik_id
|
||||
|
@ -97,13 +101,4 @@ where d.datainfo.deletedbyinference = false;
|
|||
|
||||
CREATE OR REPLACE VIEW ${stats_db_name}.datasource_results AS
|
||||
SELECT datasource AS id, id AS result
|
||||
FROM ${stats_db_name}.result_datasources;
|
||||
|
||||
-- ANALYZE TABLE ${stats_db_name}.datasource_tmp COMPUTE STATISTICS;
|
||||
-- ANALYZE TABLE ${stats_db_name}.datasource_tmp COMPUTE STATISTICS FOR COLUMNS;
|
||||
-- ANALYZE TABLE ${stats_db_name}.datasource_languages COMPUTE STATISTICS;
|
||||
-- ANALYZE TABLE ${stats_db_name}.datasource_languages COMPUTE STATISTICS FOR COLUMNS;
|
||||
-- ANALYZE TABLE ${stats_db_name}.datasource_oids COMPUTE STATISTICS;
|
||||
-- ANALYZE TABLE ${stats_db_name}.datasource_oids COMPUTE STATISTICS FOR COLUMNS;
|
||||
-- ANALYZE TABLE ${stats_db_name}.datasource_organizations COMPUTE STATISTICS;
|
||||
-- ANALYZE TABLE ${stats_db_name}.datasource_organizations COMPUTE STATISTICS FOR COLUMNS;
|
||||
FROM ${stats_db_name}.result_datasources;
|
Loading…
Reference in New Issue