Merge pull request 'Added fields: totalcost, fundedamount, currency, in project table' (#257) from antonis.lempesis/dnet-hadoop:beta into beta

Reviewed-on: #257
2022-10-31 13:49:27 +01:00 · 2022-10-31 13:49:27 +01:00 · 22873c9172
parent 2b9a20a4a3 7861c472e0
commit 22873c9172
4 changed files with 14 additions and 8 deletions
--- a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/config-default.xml
+++ b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/config-default.xml
@ -21,7 +21,7 @@
    </property>
    <property>
        <name>hive_jdbc_url</name>
-        <value>jdbc:hive2://iis-cdh5-test-m3.ocean.icm.edu.pl:10000</value>
+        <value>jdbc:hive2://iis-cdh5-test-m3.ocean.icm.edu.pl:10000/;UseNativeQuery=1;?spark.executor.memory=19166291558;spark.yarn.executor.memoryOverhead=3225;spark.driver.memory=11596411699;spark.yarn.driver.memoryOverhead=1228</value>
    </property>
 	<property>
 		<name>oozie.wf.workflow.notification.url</name>
--- a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step11.sql
+++ b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step11.sql
@ -42,7 +42,9 @@ SELECT p.id,
       CASE WHEN prr2.id IS NULL THEN 0 ELSE prr2.dp END             AS delayedpubs,
       p.callidentifier,
       p.code,
-       p.totalcost
+       p.totalcost,
+       p.fundedamount,
+       p.currency
 FROM ${stats_db_name}.project_tmp p
         LEFT JOIN (SELECT pr.id, count(distinct pr.result) AS np
                    FROM ${stats_db_name}.project_results pr
--- a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step13.sql
+++ b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step13.sql
@ -59,7 +59,7 @@ UNION ALL
 SELECT * FROM ${stats_db_name}.otherresearchproduct_sources;


-create table ${stats_db_name}.result_orcid STORED AS PARQUET as
+CREATE TABLE IF NOT EXISTS ${stats_db_name}.result_orcid STORED AS PARQUET as
 select distinct res.id, regexp_replace(res.orcid, 'http://orcid.org/' ,'') as orcid
 from (
    SELECT substr(res.id, 4) as id, auth_pid.value as orcid
@ -69,7 +69,7 @@ from (
    LATERAL VIEW explode(auth.pid.qualifier.classid) apt as author_pid_type
    WHERE res.datainfo.deletedbyinference = FALSE and res.datainfo.invisible = FALSE and author_pid_type = 'orcid') as res;

-create table ${stats_db_name}.result_result stored as parquet as
+CREATE TABLE IF NOT EXISTS ${stats_db_name}.result_result stored as parquet as
 select substr(rel.source, 4) as source, substr(rel.target, 4) as target, relclass, subreltype
 from ${openaire_db_name}.relation rel
 join ${openaire_db_name}.result r1 on rel.source=r1.id
@ -82,7 +82,7 @@ where reltype='resultResult'
    and r2.resulttype.classname != 'other'
    and rel.datainfo.deletedbyinference=false and rel.datainfo.invisible = FALSE;

-create table ${stats_db_name}.result_citations_oc stored as parquet as
+CREATE TABLE IF NOT EXISTS ${stats_db_name}.result_citations_oc stored as parquet as
 select substr(target, 4) as id, count(distinct substr(source, 4)) as citations
 from ${openaire_db_name}.relation rel
 join ${openaire_db_name}.result r1 on rel.source=r1.id
@ -97,7 +97,7 @@ where relClass='Cites' and rel.datainfo.provenanceaction.classid = 'sysimport:cr
    and rel.datainfo.deletedbyinference=false and rel.datainfo.invisible = FALSE
 group by substr(target, 4);

-create table ${stats_db_name}.result_references_oc stored as parquet as
+CREATE TABLE IF NOT EXISTS ${stats_db_name}.result_references_oc stored as parquet as
 select substr(source, 4) as id, count(distinct substr(target, 4)) as references
 from ${openaire_db_name}.relation rel
         join ${openaire_db_name}.result r1 on rel.source=r1.id
--- a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step6.sql
+++ b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step6.sql
@ -48,7 +48,9 @@ CREATE TABLE ${stats_db_name}.project_tmp
    delayedpubs    INT,
    callidentifier STRING,
    code           STRING,
-    totalcost       FLOAT
+    totalcost       FLOAT,
+    fundedamount    FLOAT,
+    currency        STRING
 ) CLUSTERED BY (id) INTO 100 buckets stored AS orc tblproperties ('transactional' = 'true');

 INSERT INTO ${stats_db_name}.project_tmp
@ -72,7 +74,9 @@ SELECT substr(p.id, 4)                                                 AS id,
       0                                                               AS delayedpubs,
       p.callidentifier.value                                          AS callidentifier,
       p.code.value                                                    AS code,
-       p.totalcost                                                     AS totalcost
+       p.totalcost                                                     AS totalcost,
+       p.fundedamount                                                  AS fundedamount,
+       p.currency.value                                                AS currency
 FROM ${openaire_db_name}.project p
 WHERE p.datainfo.deletedbyinference = false and p.datainfo.invisible=false;