Various fixes in the stats wf #430

Merged
claudio.atzori merged 12 commits from antonis.lempesis/dnet-hadoop:beta into beta 2024-05-08 13:41:03 +02:00
7 changed files with 18 additions and 6 deletions
Showing only changes of commit 0cada3cc8f - Show all commits

View File

@ -1,3 +1,4 @@
set mapred.job.queue.name=analytics;
------------------------------------------------------
------------------------------------------------------
-- Additional relations

View File

@ -1,3 +1,5 @@
set mapred.job.queue.name=analytics;
------------------------------------------------------
------------------------------------------------------
-- Additional relations
@ -104,4 +106,4 @@ rel.properties[1].value apc_currency
from ${openaire_db_name}.relation rel
join ${openaire_db_name}.organization o on o.id=rel.source
join ${openaire_db_name}.result r on r.id=rel.target
where rel.subreltype = 'affiliation' and rel.datainfo.deletedbyinference = false and size(rel.properties)>0;
where rel.subreltype = 'affiliation' and rel.datainfo.deletedbyinference = false and size(rel.properties)>0;

View File

@ -1,3 +1,5 @@
set mapred.job.queue.name=analytics;
-------------------------------------------
--- Extra tables, mostly used by indicators
@ -63,4 +65,4 @@ from (
join ${stats_db_name}.result res on res.id=r.id
where r.amount is not null;
create or replace view ${stats_db_name}.issn_gold_oa_dataset as select * from ${external_stats_db_name}.issn_gold_oa_dataset;
create or replace view ${stats_db_name}.issn_gold_oa_dataset as select * from ${external_stats_db_name}.issn_gold_oa_dataset;

View File

@ -1,3 +1,5 @@
set mapred.job.queue.name=analytics;
----------------------------------------------------
-- Shortcuts for various definitions in stats db ---
----------------------------------------------------
@ -25,4 +27,4 @@ drop table if exists ${stats_db_name}.result_gold purge;
create table IF NOT EXISTS ${stats_db_name}.result_gold STORED AS PARQUET as
select r.id, case when gold.is_gold=1 then true else false end as gold
from ${stats_db_name}.result r
left outer join ${stats_db_name}.indi_pub_gold_oa gold on gold.id=r.id;
left outer join ${stats_db_name}.indi_pub_gold_oa gold on gold.id=r.id;

View File

@ -1,3 +1,5 @@
set mapred.job.queue.name=analytics;
-- replace the creation of the result view to include the boolean fields from the previous tables (green, gold,
-- peer reviewed)
drop table if exists ${stats_db_name}.result_tmp;
@ -53,4 +55,4 @@ LEFT OUTER JOIN ${stats_db_name}.result_gold gold on gold.id=r.id;
drop table if exists ${stats_db_name}.result;
drop view if exists ${stats_db_name}.result;
create table ${stats_db_name}.result stored as parquet as select * from ${stats_db_name}.result_tmp;
drop table ${stats_db_name}.result_tmp;
drop table ${stats_db_name}.result_tmp;

View File

@ -1,3 +1,5 @@
set mapred.job.queue.name=analytics;
--------------------------------------------------------------
--------------------------------------------------------------
-- Publication table/view and Publication related tables/views
@ -111,4 +113,4 @@ SELECT substr(p.id, 4) AS id, xpath_string(citation.value, "//citation/id[@type=
FROM ${openaire_db_name}.publication p
lateral view explode(p.extrainfo) citations AS citation
WHERE xpath_string(citation.value, "//citation/id[@type='openaire']/@value") != ""
and p.datainfo.deletedbyinference = false and p.datainfo.invisible=false;
and p.datainfo.deletedbyinference = false and p.datainfo.invisible=false;

View File

@ -368,6 +368,7 @@
${sparkClusterOpts}
${sparkResourceOpts}
${sparkApplicationOpts}
--queue analytics
</spark-opts>
<arg>--hiveMetastoreUris</arg><arg>${hive_metastore_uris}</arg>
<arg>--sql</arg><arg>eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step16-createIndicatorsTables.sql</arg>
@ -551,4 +552,4 @@
</action>
<end name="End"/>
</workflow-app>
</workflow-app>