1
0
Fork 0

every step is run in the analytics queue. Hardcoded for now, will make a parameter later

This commit is contained in:
Antonis Lempesis 2024-05-08 13:42:53 +03:00
parent 90a4fb3547
commit 0cada3cc8f
7 changed files with 18 additions and 6 deletions

View File

@ -1,3 +1,4 @@
set mapred.job.queue.name=analytics;
------------------------------------------------------ ------------------------------------------------------
------------------------------------------------------ ------------------------------------------------------
-- Additional relations -- Additional relations

View File

@ -1,3 +1,5 @@
set mapred.job.queue.name=analytics;
------------------------------------------------------ ------------------------------------------------------
------------------------------------------------------ ------------------------------------------------------
-- Additional relations -- Additional relations
@ -104,4 +106,4 @@ rel.properties[1].value apc_currency
from ${openaire_db_name}.relation rel from ${openaire_db_name}.relation rel
join ${openaire_db_name}.organization o on o.id=rel.source join ${openaire_db_name}.organization o on o.id=rel.source
join ${openaire_db_name}.result r on r.id=rel.target join ${openaire_db_name}.result r on r.id=rel.target
where rel.subreltype = 'affiliation' and rel.datainfo.deletedbyinference = false and size(rel.properties)>0; where rel.subreltype = 'affiliation' and rel.datainfo.deletedbyinference = false and size(rel.properties)>0;

View File

@ -1,3 +1,5 @@
set mapred.job.queue.name=analytics;
------------------------------------------- -------------------------------------------
--- Extra tables, mostly used by indicators --- Extra tables, mostly used by indicators
@ -63,4 +65,4 @@ from (
join ${stats_db_name}.result res on res.id=r.id join ${stats_db_name}.result res on res.id=r.id
where r.amount is not null; where r.amount is not null;
create or replace view ${stats_db_name}.issn_gold_oa_dataset as select * from ${external_stats_db_name}.issn_gold_oa_dataset; create or replace view ${stats_db_name}.issn_gold_oa_dataset as select * from ${external_stats_db_name}.issn_gold_oa_dataset;

View File

@ -1,3 +1,5 @@
set mapred.job.queue.name=analytics;
---------------------------------------------------- ----------------------------------------------------
-- Shortcuts for various definitions in stats db --- -- Shortcuts for various definitions in stats db ---
---------------------------------------------------- ----------------------------------------------------
@ -25,4 +27,4 @@ drop table if exists ${stats_db_name}.result_gold purge;
create table IF NOT EXISTS ${stats_db_name}.result_gold STORED AS PARQUET as create table IF NOT EXISTS ${stats_db_name}.result_gold STORED AS PARQUET as
select r.id, case when gold.is_gold=1 then true else false end as gold select r.id, case when gold.is_gold=1 then true else false end as gold
from ${stats_db_name}.result r from ${stats_db_name}.result r
left outer join ${stats_db_name}.indi_pub_gold_oa gold on gold.id=r.id; left outer join ${stats_db_name}.indi_pub_gold_oa gold on gold.id=r.id;

View File

@ -1,3 +1,5 @@
set mapred.job.queue.name=analytics;
-- replace the creation of the result view to include the boolean fields from the previous tables (green, gold, -- replace the creation of the result view to include the boolean fields from the previous tables (green, gold,
-- peer reviewed) -- peer reviewed)
drop table if exists ${stats_db_name}.result_tmp; drop table if exists ${stats_db_name}.result_tmp;
@ -53,4 +55,4 @@ LEFT OUTER JOIN ${stats_db_name}.result_gold gold on gold.id=r.id;
drop table if exists ${stats_db_name}.result; drop table if exists ${stats_db_name}.result;
drop view if exists ${stats_db_name}.result; drop view if exists ${stats_db_name}.result;
create table ${stats_db_name}.result stored as parquet as select * from ${stats_db_name}.result_tmp; create table ${stats_db_name}.result stored as parquet as select * from ${stats_db_name}.result_tmp;
drop table ${stats_db_name}.result_tmp; drop table ${stats_db_name}.result_tmp;

View File

@ -1,3 +1,5 @@
set mapred.job.queue.name=analytics;
-------------------------------------------------------------- --------------------------------------------------------------
-------------------------------------------------------------- --------------------------------------------------------------
-- Publication table/view and Publication related tables/views -- Publication table/view and Publication related tables/views
@ -111,4 +113,4 @@ SELECT substr(p.id, 4) AS id, xpath_string(citation.value, "//citation/id[@type=
FROM ${openaire_db_name}.publication p FROM ${openaire_db_name}.publication p
lateral view explode(p.extrainfo) citations AS citation lateral view explode(p.extrainfo) citations AS citation
WHERE xpath_string(citation.value, "//citation/id[@type='openaire']/@value") != "" WHERE xpath_string(citation.value, "//citation/id[@type='openaire']/@value") != ""
and p.datainfo.deletedbyinference = false and p.datainfo.invisible=false; and p.datainfo.deletedbyinference = false and p.datainfo.invisible=false;

View File

@ -368,6 +368,7 @@
${sparkClusterOpts} ${sparkClusterOpts}
${sparkResourceOpts} ${sparkResourceOpts}
${sparkApplicationOpts} ${sparkApplicationOpts}
--queue analytics
</spark-opts> </spark-opts>
<arg>--hiveMetastoreUris</arg><arg>${hive_metastore_uris}</arg> <arg>--hiveMetastoreUris</arg><arg>${hive_metastore_uris}</arg>
<arg>--sql</arg><arg>eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step16-createIndicatorsTables.sql</arg> <arg>--sql</arg><arg>eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step16-createIndicatorsTables.sql</arg>
@ -551,4 +552,4 @@
</action> </action>
<end name="End"/> <end name="End"/>
</workflow-app> </workflow-app>