forked from D-Net/dnet-hadoop
not using impala since the resulting tables are not visible
This commit is contained in:
parent
dd3d6a6e15
commit
c9cfc165d9
|
@ -1,19 +1,7 @@
|
||||||
----------------------------------------------------
|
----------------------------------------------------
|
||||||
-- Shortcuts for various definitions in stats db ---
|
-- Shortcuts for various definitions in stats db ---
|
||||||
-- since these statements are executed using Impala,
|
|
||||||
-- we'll have to compute the stats for the tables we use
|
|
||||||
----------------------------------------------------
|
----------------------------------------------------
|
||||||
|
|
||||||
COMPUTE STATS result;
|
|
||||||
COMPUTE STATS result_sources;
|
|
||||||
COMPUTE STATS datasource;
|
|
||||||
COMPUTE STATS result_datasources;
|
|
||||||
COMPUTE STATS datasource_sources;
|
|
||||||
COMPUTE STATS country;
|
|
||||||
COMPUTE STATS result_organization;
|
|
||||||
COMPUTE STATS organization;
|
|
||||||
COMPUTE STATS datasource_organizations;
|
|
||||||
|
|
||||||
-- Peer reviewed:
|
-- Peer reviewed:
|
||||||
-- Results that have been collected from Crossref
|
-- Results that have been collected from Crossref
|
||||||
create table ${stats_db_name}.result_peerreviewed as
|
create table ${stats_db_name}.result_peerreviewed as
|
||||||
|
@ -28,9 +16,8 @@ from peer_reviewed
|
||||||
union all
|
union all
|
||||||
select distinct r.id as id, false as peer_reviewed
|
select distinct r.id as id, false as peer_reviewed
|
||||||
from ${stats_db_name}.result r
|
from ${stats_db_name}.result r
|
||||||
where r.id not in (select id from peer_reviewed);
|
left outer join peer_reviewed pr on pr.id=r.id
|
||||||
|
where pr.id is null;
|
||||||
COMPUTE STATS result_peerreviewed;
|
|
||||||
|
|
||||||
-- Green OA:
|
-- Green OA:
|
||||||
-- OA results that are hosted by an Institutional repository and have NOT been harvested from a DOAJ journal.
|
-- OA results that are hosted by an Institutional repository and have NOT been harvested from a DOAJ journal.
|
||||||
|
@ -40,20 +27,21 @@ with result_green as (
|
||||||
from ${stats_db_name}.result r
|
from ${stats_db_name}.result r
|
||||||
join ${stats_db_name}.result_datasources rd on rd.id=r.id
|
join ${stats_db_name}.result_datasources rd on rd.id=r.id
|
||||||
join ${stats_db_name}.datasource d on d.id=rd.datasource
|
join ${stats_db_name}.datasource d on d.id=rd.datasource
|
||||||
where r.bestlicence in ('Open Access', 'Open Source') and d.type='Institutional Repository' and not exists (
|
left outer join (
|
||||||
select 1 from ${stats_db_name}.result_datasources rd
|
select rd.id from ${stats_db_name}.result_datasources rd
|
||||||
join ${stats_db_name}.datasource d on rd.datasource=d.id
|
join ${stats_db_name}.datasource d on rd.datasource=d.id
|
||||||
join ${stats_db_name}.datasource_sources sds on sds.id=d.id
|
join ${stats_db_name}.datasource_sources sds on sds.id=d.id
|
||||||
join ${stats_db_name}.datasource sd on sd.id=sds.datasource
|
join ${stats_db_name}.datasource sd on sd.id=sds.datasource
|
||||||
where sd.name='DOAJ-ARTICLES' and rd.id=r.id))
|
where sd.name='DOAJ-ARTICLES'
|
||||||
|
) as doaj on doaj.id=r.id
|
||||||
|
where r.bestlicence in ('Open Access', 'Open Source') and d.type='Institutional Repository' and doaj.id is null)
|
||||||
select distinct result_green.id, true as green
|
select distinct result_green.id, true as green
|
||||||
from result_green
|
from result_green
|
||||||
union all
|
union all
|
||||||
select distinct r.id as id, false as green
|
select distinct r.id as id, false as green
|
||||||
from ${stats_db_name}.result r
|
from ${stats_db_name}.result r
|
||||||
where r.id not in (select id from result_green);
|
left outer join result_green rg on rg.id=r.id
|
||||||
|
where rg.id is null;
|
||||||
COMPUTE STATS result_greenoa;
|
|
||||||
|
|
||||||
-- GOLD OA:
|
-- GOLD OA:
|
||||||
-- OA results that have been harvested from a DOAJ journal.
|
-- OA results that have been harvested from a DOAJ journal.
|
||||||
|
@ -73,8 +61,6 @@ select distinct r.id, false as gold
|
||||||
from ${stats_db_name}.result r
|
from ${stats_db_name}.result r
|
||||||
where r.id not in (select id from result_gold);
|
where r.id not in (select id from result_gold);
|
||||||
|
|
||||||
COMPUTE STATS result_gold;
|
|
||||||
|
|
||||||
-- shortcut result-country through the organization affiliation
|
-- shortcut result-country through the organization affiliation
|
||||||
create table ${stats_db_name}.result_affiliated_country as
|
create table ${stats_db_name}.result_affiliated_country as
|
||||||
select r.id as id, o.country as country
|
select r.id as id, o.country as country
|
||||||
|
@ -83,8 +69,6 @@ join ${stats_db_name}.result_organization ro on ro.id=r.id
|
||||||
join ${stats_db_name}.organization o on o.id=ro.organization
|
join ${stats_db_name}.organization o on o.id=ro.organization
|
||||||
where o.country is not null and o.country!='';
|
where o.country is not null and o.country!='';
|
||||||
|
|
||||||
COMPUTE STATS result_affiliated_country;
|
|
||||||
|
|
||||||
-- shortcut result-country through datasource of deposition
|
-- shortcut result-country through datasource of deposition
|
||||||
create table ${stats_db_name}.result_deposited_country as
|
create table ${stats_db_name}.result_deposited_country as
|
||||||
select r.id as id, o.country as country
|
select r.id as id, o.country as country
|
||||||
|
@ -94,5 +78,3 @@ join ${stats_db_name}.datasource d on d.id=rd.datasource
|
||||||
join ${stats_db_name}.datasource_organizations dor on dor.id=d.id
|
join ${stats_db_name}.datasource_organizations dor on dor.id=d.id
|
||||||
join ${stats_db_name}.organization o on o.id=dor.organization
|
join ${stats_db_name}.organization o on o.id=dor.organization
|
||||||
where o.country is not null and o.country!='';
|
where o.country is not null and o.country!='';
|
||||||
|
|
||||||
COMPUTE STATS result_deposited_country;
|
|
|
@ -215,31 +215,17 @@
|
||||||
<param>stats_db_name=${stats_db_name}</param>
|
<param>stats_db_name=${stats_db_name}</param>
|
||||||
<param>openaire_db_name=${openaire_db_name}</param>
|
<param>openaire_db_name=${openaire_db_name}</param>
|
||||||
</hive2>
|
</hive2>
|
||||||
<ok to="Step15_5"/>
|
|
||||||
<error to="Kill"/>
|
|
||||||
</action>
|
|
||||||
|
|
||||||
<action name="Step15_5">
|
|
||||||
<hive2 xmlns="uri:oozie:hive2-action:0.1">
|
|
||||||
<jdbc-url>${hive_jdbc_url}</jdbc-url>
|
|
||||||
<script>scripts/step15_5.sql</script>
|
|
||||||
<param>stats_db_name=${stats_db_name}</param>
|
|
||||||
<param>openaire_db_name=${openaire_db_name}</param>
|
|
||||||
</hive2>
|
|
||||||
<ok to="Step16"/>
|
<ok to="Step16"/>
|
||||||
<error to="Kill"/>
|
<error to="Kill"/>
|
||||||
</action>
|
</action>
|
||||||
|
|
||||||
<action name="Step16">
|
<action name="Step16">
|
||||||
<shell xmlns="uri:oozie:shell-action:0.1">
|
<hive2 xmlns="uri:oozie:hive2-action:0.1">
|
||||||
<job-tracker>${jobTracker}</job-tracker>
|
<jdbc-url>${hive_jdbc_url}</jdbc-url>
|
||||||
<name-node>${nameNode}</name-node>
|
<script>scripts/step16.sql</script>
|
||||||
<exec>impala-shell.sh</exec>
|
<param>stats_db_name=${stats_db_name}</param>
|
||||||
<argument>${stats_db_name}</argument>
|
<param>openaire_db_name=${openaire_db_name}</param>
|
||||||
<argument>step16.sql</argument>
|
</hive2>
|
||||||
<argument>/user/${wf:user()}/oa/graph/stats/oozie_app/scripts/step16.sql</argument>
|
|
||||||
<file>impala-shell.sh</file>
|
|
||||||
</shell>
|
|
||||||
<ok to="Step16_5"/>
|
<ok to="Step16_5"/>
|
||||||
<error to="Kill"/>
|
<error to="Kill"/>
|
||||||
</action>
|
</action>
|
||||||
|
|
Loading…
Reference in New Issue