forked from D-Net/dnet-hadoop
mergin with branch beta
This commit is contained in:
commit
476a4708d6
|
@ -76,6 +76,18 @@ public class IndexRecordTransformerTest {
|
|||
testRecordTransformation(record);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testForEOSCFutureDataTransferPilot() throws IOException, TransformerException {
|
||||
final String record = IOUtils.toString(getClass().getResourceAsStream("eosc-future/data-transfer-pilot.xml"));
|
||||
testRecordTransformation(record);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testForEOSCFutureTraining() throws IOException, TransformerException {
|
||||
final String record = IOUtils.toString(getClass().getResourceAsStream("eosc-future/training-notebooks-seadatanet.xml"));
|
||||
testRecordTransformation(record);
|
||||
}
|
||||
|
||||
private void testRecordTransformation(final String record) throws IOException, TransformerException {
|
||||
final String fields = IOUtils.toString(getClass().getResourceAsStream("fields.xml"));
|
||||
final String xslt = IOUtils.toString(getClass().getResourceAsStream("layoutToRecordTransformer.xsl"));
|
||||
|
|
|
@ -0,0 +1,72 @@
|
|||
<record>
|
||||
<result xmlns:dri="http://www.driver-repository.eu/namespace/dri" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance">
|
||||
<header>
|
||||
<dri:objIdentifier>r37b0ad08687::dec0d8520e726f2adda9a51280ac7299</dri:objIdentifier>
|
||||
<dri:dateOfCollection>2021-09-22T08:53:16Z</dri:dateOfCollection>
|
||||
<dri:status>under curation</dri:status>
|
||||
<counters />
|
||||
</header>
|
||||
<metadata>
|
||||
<oaf:entity xmlns:oaf="http://namespace.openaire.eu/oaf"
|
||||
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://namespace.openaire.eu/oaf http://namespace.openaire.eu/oaf http://www.openaire.eu/schema/0.2/oaf-0.2.xsd">
|
||||
<oaf:result>
|
||||
<title classid="main title" classname="main title" schemeid="dnet:dataCite_title" schemename="dnet:dataCite_title">EGI-Foundation/data-transfer-pilot: Include libraries in environment.yml</title>
|
||||
<creator rank="1" name="" surname="">Giuseppe La Rocca</creator>
|
||||
<creator rank="2" name="" surname="">Enol Fernández</creator>
|
||||
<creator rank="3" name="" surname="">Andrea Manzi</creator>
|
||||
<dateofacceptance />
|
||||
<resulttype classid="software" classname="software" schemeid="dnet:result_typologies" schemename="dnet:result_typologies" />
|
||||
<language classid="" classname="" schemeid="dnet:languages" schemename="dnet:languages" />
|
||||
<description>This notebook is used to demonstrate how a scientist from one of the PaNOSC RIs can use the resources provided by EGI to perform analysis on the data sets obtained during an expirement.</description>
|
||||
<country classid="" classname="" schemeid="" schemename="" />
|
||||
<subject classid="keyword" classname="keyword" schemeid="dnet:subject_classification_typologies" schemename="dnet:subject_classification_typologies">EOSC Jupyter Notebook</subject>
|
||||
<relevantdate classid="" classname="" schemeid="" schemename="" />
|
||||
<publisher>Zenodo</publisher>
|
||||
<embargoenddate />
|
||||
<journal issn="" eissn="" lissn="" ep="" iss="" sp="" vol="" />
|
||||
<source />
|
||||
<fulltext />
|
||||
<format />
|
||||
<storagedate />
|
||||
<resourcetype classid="" classname="" schemeid="" schemename="" />
|
||||
<device />
|
||||
<size />
|
||||
<version />
|
||||
<lastmetadataupdate />
|
||||
<metadataversionnumber />
|
||||
<documentationUrl />
|
||||
<codeRepositoryUrl />
|
||||
<programmingLanguage classid="" classname="" schemeid="" schemename="" />
|
||||
<contactperson />
|
||||
<contactgroup />
|
||||
<tool />
|
||||
<originalId>oai:zenodo.org:4218562</originalId>
|
||||
<collectedfrom name="Zenodo" id="re3data_____::7b0ad08687b2c960d5aeef06f811d5e6" />
|
||||
<pid classid="oai" classname="Open Archives Initiative" schemeid="dnet:pid_types" schemename="dnet:pid_types">oai:zenodo.org:4218562</pid>
|
||||
<pid classid="doi" classname="Digital Object Identifier" schemeid="dnet:pid_types" schemename="dnet:pid_types">10.5281/zenodo.4218562</pid>
|
||||
<bestaccessright classid="OPEN" classname="Open Access" schemeid="dnet:access_modes" schemename="dnet:access_modes" />
|
||||
<datainfo>
|
||||
<inferred>false</inferred>
|
||||
<deletedbyinference>false</deletedbyinference>
|
||||
<trust>0.9</trust>
|
||||
<inferenceprovenance />
|
||||
<provenanceaction classid="user:insert" classname="user:insert" schemeid="dnet:provenanceActions" schemename="dnet:provenanceActions" />
|
||||
</datainfo>
|
||||
<rels></rels>
|
||||
<children>
|
||||
<instance id="r37b0ad08687::dec0d8520e726f2adda9a51280ac7299">
|
||||
<instancetype classid="0029" classname="Software" schemeid="dnet:publication_resource" schemename="dnet:publication_resource" />
|
||||
<collectedfrom name="Zenodo" id="re3data_____::7b0ad08687b2c960d5aeef06f811d5e6" />
|
||||
<hostedby name="Zenodo" id="re3data_____::7b0ad08687b2c960d5aeef06f811d5e6" />
|
||||
<accessright classid="OPEN" classname="Open Access" schemeid="dnet:access_modes" schemename="dnet:access_modes" />
|
||||
<dateofacceptance />
|
||||
<webresource>
|
||||
<url>https://zenodo.org/record/4218562</url>
|
||||
</webresource>
|
||||
</instance>
|
||||
</children>
|
||||
</oaf:result>
|
||||
</oaf:entity>
|
||||
</metadata>
|
||||
</result>
|
||||
</record>
|
|
@ -0,0 +1,71 @@
|
|||
<record>
|
||||
<result xmlns:dri="http://www.driver-repository.eu/namespace/dri" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance">
|
||||
<header>
|
||||
<dri:objIdentifier>r37b0ad08687::eb430fb7438e1533ba95d6aa50a477eb</dri:objIdentifier>
|
||||
<dri:dateOfCollection>2021-09-22T08:53:13Z</dri:dateOfCollection>
|
||||
<dri:status>under curation</dri:status>
|
||||
<counters />
|
||||
</header>
|
||||
<metadata>
|
||||
<oaf:entity xmlns:oaf="http://namespace.openaire.eu/oaf"
|
||||
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://namespace.openaire.eu/oaf http://namespace.openaire.eu/oaf http://www.openaire.eu/schema/0.2/oaf-0.2.xsd">
|
||||
|
||||
<oaf:result>
|
||||
<title classid="main title" classname="main title" schemeid="dnet:dataCite_title" schemename="dnet:dataCite_title">EGI-Foundation/training-notebooks-seadatanet: Version 0.4</title>
|
||||
<creator rank="1" name="" surname="">Enol Fernández</creator>
|
||||
<dateofacceptance />
|
||||
<resulttype classid="software" classname="software" schemeid="dnet:result_typologies" schemename="dnet:result_typologies" />
|
||||
<language classid="" classname="" schemeid="dnet:languages" schemename="dnet:languages" />
|
||||
<description>A sample notebook using SeaDataNet data to plot a map that shows surface temperature of Black Sea, Arctic Sea and Baltic Sea. The data is available at EGI DataHub with PID http://hdl.handle.net/21.T15999/qVk6JWQ (run at EGI Notebooks service for easy access to data).This release updates the PID for the data.</description>
|
||||
<country classid="" classname="" schemeid="" schemename="" />
|
||||
<subject classid="keyword" classname="keyword" schemeid="dnet:subject_classification_typologies" schemename="dnet:subject_classification_typologies">EOSC Jupyter Notebook</subject>
|
||||
<relevantdate classid="" classname="" schemeid="" schemename="" />
|
||||
<publisher>Zenodo</publisher>
|
||||
<embargoenddate />
|
||||
<journal issn="" eissn="" lissn="" ep="" iss="" sp="" vol="" />
|
||||
<source />
|
||||
<fulltext />
|
||||
<format />
|
||||
<storagedate />
|
||||
<resourcetype classid="" classname="" schemeid="" schemename="" />
|
||||
<device />
|
||||
<size />
|
||||
<version />
|
||||
<lastmetadataupdate />
|
||||
<metadataversionnumber />
|
||||
<documentationUrl />
|
||||
<codeRepositoryUrl />
|
||||
<programmingLanguage classid="" classname="" schemeid="" schemename="" />
|
||||
<contactperson />
|
||||
<contactgroup />
|
||||
<tool />
|
||||
<originalId>oai:zenodo.org:3561323</originalId>
|
||||
<collectedfrom name="Zenodo" id="re3data_____::7b0ad08687b2c960d5aeef06f811d5e6" />
|
||||
<pid classid="oai" classname="Open Archives Initiative" schemeid="dnet:pid_types" schemename="dnet:pid_types">oai:zenodo.org:3561323</pid>
|
||||
<pid classid="doi" classname="Digital Object Identifier" schemeid="dnet:pid_types" schemename="dnet:pid_types">10.5281/zenodo.3561323</pid>
|
||||
<bestaccessright classid="OPEN" classname="Open Access" schemeid="dnet:access_modes" schemename="dnet:access_modes" />
|
||||
<datainfo>
|
||||
<inferred>false</inferred>
|
||||
<deletedbyinference>false</deletedbyinference>
|
||||
<trust>0.9</trust>
|
||||
<inferenceprovenance />
|
||||
<provenanceaction classid="user:insert" classname="user:insert" schemeid="dnet:provenanceActions" schemename="dnet:provenanceActions" />
|
||||
</datainfo>
|
||||
<rels></rels>
|
||||
<children>
|
||||
<instance id="r37b0ad08687::eb430fb7438e1533ba95d6aa50a477eb">
|
||||
<instancetype classid="0029" classname="Software" schemeid="dnet:publication_resource" schemename="dnet:publication_resource" />
|
||||
<collectedfrom name="Zenodo" id="re3data_____::7b0ad08687b2c960d5aeef06f811d5e6" />
|
||||
<hostedby name="Zenodo" id="re3data_____::7b0ad08687b2c960d5aeef06f811d5e6" />
|
||||
<accessright classid="OPEN" classname="Open Access" schemeid="dnet:access_modes" schemename="dnet:access_modes" />
|
||||
<dateofacceptance />
|
||||
<webresource>
|
||||
<url>https://zenodo.org/record/3561323</url>
|
||||
</webresource>
|
||||
</instance>
|
||||
</children>
|
||||
</oaf:result>
|
||||
</oaf:entity>
|
||||
</metadata>
|
||||
</result>
|
||||
</record>
|
|
@ -15,7 +15,13 @@
|
|||
<FIELD indexable="true" multivalued="false" name="datasourcecompatibilityid" result="false" stat="false" tokenizable="false" xpath="//*[local-name()='entity']/*[local-name()='datasource']/openairecompatibility/@classid"/>
|
||||
<FIELD indexable="true" multivalued="false" name="datasourcecompatibilityname" result="false" stat="false" tokenizable="false" xpath="//*[local-name()='entity']/*[local-name()='datasource']/openairecompatibility/@classname"/>
|
||||
<FIELD indexable="true" multivalued="true" name="datasourcesubject" result="false" stat="false" type="ngramtext" xpath="//*[local-name()='entity']/*[local-name()='datasource']/subjects"/>
|
||||
<FIELD indexable="true" name="versioning" result="false" stat="false" tokenizable="false" xpath="//*[local-name()='entity']/*[local-name()='datasource']/versioning"/><!-- ORGANIZATION FIELDS -->
|
||||
<FIELD indexable="true" name="versioning" result="false" stat="false" tokenizable="false" xpath="//*[local-name()='entity']/*[local-name()='datasource']/versioning"/>
|
||||
<!-- datasource fields for EOSC -->
|
||||
<FIELD indexable="true" name="datasourcejurisdiction" result="false" stat="false" tokenizable="false" xpath="//*[local-name()='entity']/*[local-name()='datasource']/jurisdiction"/>
|
||||
<FIELD indexable="true" name="datasourcethematic" result="false" stat="false" tokenizable="false" xpath="//*[local-name()='entity']/*[local-name()='datasource']/thematic"/>
|
||||
<FIELD indexable="true" name="datasourceknowledge_graph" result="false" stat="false" tokenizable="false" xpath="//*[local-name()='entity']/*[local-name()='datasource']/knowledgegraph"/>
|
||||
<FIELD indexable="true" name="datasourcecontentpolicy" result="false" stat="false" tokenizable="false" xpath="//*[local-name()='entity']/*[local-name()='datasource']/contentpolicy"/>
|
||||
<!-- ORGANIZATION FIELDS -->
|
||||
<FIELD indexable="true" name="organizationlegalshortname" result="false" stat="false" type="ngramtext" xpath="distinct-values(//*[local-name()='entity']/*[local-name()='organization']//legalshortname)"/>
|
||||
<FIELD indexable="true" name="organizationlegalname" result="false" stat="false" type="ngramtext" xpath="distinct-values(//*[local-name()='entity']/*[local-name()='organization']//legalname)"/>
|
||||
<FIELD indexable="true" name="organizationalternativenames" result="false" stat="false" type="ngramtext" xpath="distinct-values(//*[local-name()='entity']/*[local-name()='organization']//alternativeNames)"/>
|
||||
|
@ -28,7 +34,8 @@
|
|||
<FIELD indexable="true" name="organizationecenterprise" result="false" stat="false" xpath="//*[local-name()='entity']/*[local-name()='organization']/ecenterprise"/>
|
||||
<FIELD indexable="true" name="organizationecsmevalidated" result="false" stat="false" xpath="//*[local-name()='entity']/*[local-name()='organization']/ecsmevalidated"/>
|
||||
<FIELD indexable="true" name="organizationecnutscode" result="false" stat="false" xpath="//*[local-name()='entity']/*[local-name()='organization']/ecnutscode"/>
|
||||
<FIELD indexable="true" multivalued="false" name="organizationcountryname" result="false" stat="false" tokenizable="false" xpath="//*[local-name()='entity']/*[local-name()='organization']/country/@classname"/><!-- PROJECT FIELDS -->
|
||||
<FIELD indexable="true" multivalued="false" name="organizationcountryname" result="false" stat="false" tokenizable="false" xpath="//*[local-name()='entity']/*[local-name()='organization']/country/@classname"/>
|
||||
<!-- PROJECT FIELDS -->
|
||||
<FIELD indexable="true" name="projectcode" result="false" stat="false" type="ngramtext" xpath="//*[local-name()='entity']/*[local-name()='project']/code"/>
|
||||
<FIELD indexable="true" name="projectcode_nt" result="false" stat="false" tokenizable="false" xpath="//*[local-name()='entity']/*[local-name()='project']/code"/>
|
||||
<FIELD indexable="true" name="projectacronym" result="false" stat="false" type="ngramtext" xpath="//*[local-name()='entity']/*[local-name()='project']/acronym"/>
|
||||
|
@ -79,6 +86,7 @@
|
|||
<FIELD indexable="true" multivalued="true" name="resultauthor_nt" result="false" stat="false" type="string_ci" xpath="//*[local-name()='entity']/*[local-name()='result']/creator"/>
|
||||
<FIELD indexable="true" multivalued="true" name="authorid" result="false" stat="false" type="string_ci" xpath="//*[local-name()='entity']/*[local-name()='result']/creator/@*[local-name() != 'rank' and local-name() != 'name' and local-name() != 'surname']"/>
|
||||
<FIELD indexable="true" multivalued="true" name="authoridtype" result="false" stat="false" type="string_ci" xpath="//*[local-name()='entity']/*[local-name()='result']/creator/@*[local-name() != 'rank' and local-name() != 'name' and local-name() != 'surname']/local-name()"/>
|
||||
<FIELD indexable="true" multivalued="true" name="orcidtypevalue" result="false" stat="false" type="string_ci" value="string-join((./@*[local-name() = 'orcid' or local-name() = 'orcid_pending'], ./@*[local-name() = 'orcid' or local-name() = 'orcid_pending']/local-name()), '||' )" xpath="//*[local-name()='entity']/*[local-name()='result']/creator"/>
|
||||
<FIELD indexable="true" name="resulthostingdatasource" result="false" stat="false" tokenizable="false" value="distinct-values(concat(./@id, '||', ./@name))" xpath="//*[local-name()='entity']/*[local-name()='result']/children/instance/*[local-name()='hostedby']"/>
|
||||
<FIELD indexable="true" name="resulthostingdatasourceid" result="false" stat="false" tokenizable="false" xpath="distinct-values(//*[local-name()='entity']/*[local-name()='result']/children/instance/*[local-name()='hostedby']/@id)"/>
|
||||
<FIELD indexable="true" name="resulthostingdatasourcename" result="false" stat="false" tokenizable="false" xpath="distinct-values(//*[local-name()='entity']/*[local-name()='result']/children/instance/*[local-name()='hostedby']/@name)"/>
|
||||
|
@ -105,7 +113,7 @@
|
|||
<FIELD indexable="true" name="relorganizationname" result="false" stat="false" xpath="distinct-values(//*[local-name()='entity']/*//rel[./to/@type='organization']/legalname)"/>
|
||||
<FIELD indexable="true" name="relorganizationshortname" result="false" stat="false" xpath="distinct-values(//*[local-name()='entity']/*//rel[./to/@type='organization']/legalshortname)"/>
|
||||
<FIELD indexable="true" name="relresultid" result="false" stat="false" tokenizable="false" xpath="distinct-values(//*[local-name()='entity']/*//rel/to[@type='result'])"/>
|
||||
<FIELD indexable="true" name="relresulttype" result="false" stat="false" tokenizable="false" xpath="distinct-values(//*[local-name()='entity']/*//rel/to/@type)"/>
|
||||
<FIELD indexable="true" name="relresulttype" result="false" stat="false" tokenizable="false" xpath="distinct-values(//*[local-name()='entity']/*//rel/resulttype/@classid)"/>
|
||||
<FIELD indexable="true" name="relclass" result="false" stat="false" tokenizable="false" xpath="distinct-values(//*[local-name()='entity']/*//rel/to/@class)"/>
|
||||
<FIELD indexable="true" name="relfundinglevel0_id" result="false" stat="false" tokenizable="false" xpath="//*[local-name()='entity']//rel/funding/funding_level_0"/>
|
||||
<FIELD indexable="true" name="relfundinglevel0_name" result="false" stat="false" tokenizable="false" xpath="//*[local-name()='entity']//rel/funding/funding_level_0/@name/string()"/>
|
||||
|
@ -130,7 +138,7 @@
|
|||
<FIELD indexable="true" name="collectedfromdatasourceid" result="false" stat="false" tokenizable="false" xpath="distinct-values(//*[local-name()='entity']/*/*[local-name()='collectedfrom']/@id | //*[local-name()='entity']/*//*[local-name() = 'instance']/*[local-name()='collectedfrom']/@id)"/>
|
||||
<FIELD indexable="true" name="collectedfromname" result="false" stat="false" tokenizable="false" xpath="distinct-values(//*[local-name()='entity']/*/*[local-name()='collectedfrom']/@name | //*[local-name()='entity']/*//*[local-name() = 'instance']/*[local-name()='collectedfrom']/@name)"/>
|
||||
<FIELD indexable="true" name="originalid" result="false" stat="false" tokenizable="false" type="string_ci" xpath="//*[local-name()='entity']/*/*[local-name()='originalId']"/>
|
||||
<FIELD indexable="true" name="pid" result="false" stat="false" tokenizable="false" type="string_ci" xpath="//*[local-name()='entity']/*/pid/text()"/>
|
||||
<FIELD indexable="true" name="pid" result="false" stat="false" tokenizable="false" type="string_ci" xpath="distinct-values(//*[local-name()='entity']/*/pid/text()|//*[local-name()='instance']/*[local-name()='alternateidentifier']/text())"/>
|
||||
<FIELD indexable="true" name="pidclassid" result="false" stat="false" tokenizable="false" xpath="distinct-values(//*[local-name()='entity']/*/pid/@classid)"/>
|
||||
<FIELD indexable="true" name="pidclassname" result="false" stat="false" tokenizable="false" xpath="distinct-values(//*[local-name()='entity']/*/pid/@classname)"/>
|
||||
<FIELD indexable="true" name="inferred" result="false" stat="false" tokenizable="false" xpath="//*[local-name()='entity']//datainfo/inferred"/>
|
||||
|
|
|
@ -16,7 +16,7 @@ curl -L ${CONTEXT_API}/contexts/?type=ri,community -H "accept: application/json"
|
|||
cat contexts.csv | cut -d , -f1 | xargs -I {} curl -L ${CONTEXT_API}/context/{}/?all=true | /usr/local/sbin/jq -r '.[]|"\(.id|split(":")[0]),\(.id),\(.label)"' > categories.csv
|
||||
cat categories.csv | cut -d , -f2 | sed 's/:/%3A/g'| xargs -I {} curl -L ${CONTEXT_API}/context/category/{}/?all=true | /usr/local/sbin/jq -r '.[]|"\(.id|split("::")[0])::\(.id|split("::")[1]),\(.id),\(.label)"' > concepts.csv
|
||||
cat contexts.csv | sed 's/^\(.*\),\(.*\)/\1,\1::other,\2/' >> categories.csv
|
||||
cat categories.csv | grep -v ::other | sed 's/^.*,\(.*\),\(.*\)/\1,\1::other,\2/' >> concepts.csv
|
||||
cat categories.csv | sed 's/^.*,\(.*\),\(.*\)/\1,\1::other,\2/' >> concepts.csv
|
||||
|
||||
echo "uploading context data to hdfs"
|
||||
hdfs dfs -mkdir ${TMP}
|
||||
|
|
|
@ -15,5 +15,5 @@ hdfs dfs -copyToLocal $SCRIPT_PATH
|
|||
echo "Creating indicators"
|
||||
impala-shell -q "invalidate metadata"
|
||||
impala-shell -d ${TARGET} -q "show tables" --delimited | sed "s/^\(.*\)/compute stats ${TARGET}.\1;/" | impala-shell -c -f -
|
||||
cat step16_7-createIndicatorsTables.sql | impala-shell -d $TARGET -f -
|
||||
cat step16-createIndicatorsTables.sql | impala-shell -d $TARGET -f -
|
||||
echo "Indicators created"
|
|
@ -9,16 +9,9 @@ fi
|
|||
export SOURCE=$1
|
||||
export TARGET=$2
|
||||
export SHADOW=$3
|
||||
export SCRIPT_PATH=$4
|
||||
|
||||
echo "Getting file from " $4
|
||||
hdfs dfs -copyToLocal $4
|
||||
|
||||
echo "Creating observatory database"
|
||||
impala-shell -q "drop database if exists ${TARGET} cascade"
|
||||
impala-shell -q "create database if not exists ${TARGET}"
|
||||
impala-shell -d ${SOURCE} -q "show tables" --delimited | sed "s/\(.*\)/create view ${TARGET}.\1 as select * from ${SOURCE}.\1;/" | impala-shell -f -
|
||||
cat step21-createObservatoryDB.sql | sed s/SOURCE/$1/g | sed s/TARGET/$2/g1 | impala-shell -f -
|
||||
impala-shell -q "invalidate metadata;"
|
||||
impala-shell -d ${TARGET} -q "show tables" --delimited | sed "s/\(.*\)/compute stats ${TARGET}.\1;/" | impala-shell -f -
|
||||
echo "Impala shell finished"
|
||||
|
||||
echo "Updating shadow observatory database"
|
|
@ -0,0 +1,16 @@
|
|||
export PYTHON_EGG_CACHE=/home/$(whoami)/.python-eggs
|
||||
export link_folder=/tmp/impala-shell-python-egg-cache-$(whoami)
|
||||
if ! [ -L $link_folder ]
|
||||
then
|
||||
rm -Rf "$link_folder"
|
||||
ln -sfn ${PYTHON_EGG_CACHE}${link_folder} ${link_folder}
|
||||
fi
|
||||
|
||||
export SOURCE=$1
|
||||
export TARGET=$2
|
||||
export SHADOW=$3
|
||||
|
||||
echo "Creating observatory database"
|
||||
impala-shell -q "drop database if exists ${TARGET} cascade"
|
||||
impala-shell -q "create database if not exists ${TARGET}"
|
||||
impala-shell -d ${SOURCE} -q "show tables" --delimited | sed "s/\(.*\)/create view ${TARGET}.\1 as select * from ${SOURCE}.\1;/" | impala-shell -f -
|
|
@ -23,6 +23,11 @@ CREATE OR REPLACE VIEW ${stats_db_name}.rndexpediture AS
|
|||
SELECT *
|
||||
FROM ${external_stats_db_name}.rndexpediture;
|
||||
|
||||
CREATE OR REPLACE VIEW ${stats_db_name}.licenses_normalized AS
|
||||
SELECT *
|
||||
FROM ${external_stats_db_name}.licenses_normalized;
|
||||
|
||||
|
||||
------------------------------------------------------------------------------------------------
|
||||
------------------------------------------------------------------------------------------------
|
||||
-- Creation date of the database
|
||||
|
|
|
@ -1,62 +0,0 @@
|
|||
----------------------------------------------------
|
||||
-- Shortcuts for various definitions in stats db ---
|
||||
----------------------------------------------------
|
||||
|
||||
-- Peer reviewed:
|
||||
-- Results that have been collected from Crossref
|
||||
create table ${stats_db_name}.result_peerreviewed as
|
||||
with peer_reviewed as (
|
||||
select distinct r.id as id
|
||||
from ${stats_db_name}.result r
|
||||
join ${stats_db_name}.result_sources rs on rs.id=r.id
|
||||
join ${stats_db_name}.datasource d on d.id=rs.datasource
|
||||
where d.name='Crossref')
|
||||
select distinct peer_reviewed.id as id, true as peer_reviewed
|
||||
from peer_reviewed
|
||||
union all
|
||||
select distinct r.id as id, false as peer_reviewed
|
||||
from ${stats_db_name}.result r
|
||||
left outer join peer_reviewed pr on pr.id=r.id
|
||||
where pr.id is null;
|
||||
|
||||
-- Green OA:
|
||||
-- OA results that are hosted by an Institutional repository and have NOT been harvested from a DOAJ journal.
|
||||
create table ${stats_db_name}.result_greenoa as
|
||||
with result_green as (
|
||||
select distinct r.id as id
|
||||
from ${stats_db_name}.result r
|
||||
join ${stats_db_name}.result_datasources rd on rd.id=r.id
|
||||
join ${stats_db_name}.datasource d on d.id=rd.datasource
|
||||
left outer join (
|
||||
select rd.id from ${stats_db_name}.result_datasources rd
|
||||
join ${stats_db_name}.datasource d on rd.datasource=d.id
|
||||
join ${stats_db_name}.datasource_sources sds on sds.id=d.id
|
||||
join ${stats_db_name}.datasource sd on sd.id=sds.datasource
|
||||
where sd.name='DOAJ-ARTICLES'
|
||||
) as doaj on doaj.id=r.id
|
||||
where r.bestlicence in ('Open Access', 'Open Source') and d.type='Institutional Repository' and doaj.id is null)
|
||||
select distinct result_green.id, true as green
|
||||
from result_green
|
||||
union all
|
||||
select distinct r.id as id, false as green
|
||||
from ${stats_db_name}.result r
|
||||
left outer join result_green rg on rg.id=r.id
|
||||
where rg.id is null;
|
||||
|
||||
-- GOLD OA:
|
||||
-- OA results that have been harvested from a DOAJ journal.
|
||||
create table ${stats_db_name}.result_gold as
|
||||
with result_gold as (
|
||||
select distinct r.id as id
|
||||
from ${stats_db_name}.result r
|
||||
join ${stats_db_name}.result_datasources rd on rd.id=r.id
|
||||
join ${stats_db_name}.datasource d on d.id=rd.datasource
|
||||
join ${stats_db_name}.datasource_sources sds on sds.id=d.id
|
||||
join ${stats_db_name}.datasource sd on sd.id=sds.datasource
|
||||
where r.type='publication' and r.bestlicence='Open Access' and sd.name='DOAJ-Articles')
|
||||
select distinct result_gold.id, true as gold
|
||||
from result_gold
|
||||
union all
|
||||
select distinct r.id, false as gold
|
||||
from ${stats_db_name}.result r
|
||||
where r.id not in (select id from result_gold);
|
|
@ -0,0 +1,22 @@
|
|||
----------------------------------------------------
|
||||
-- Shortcuts for various definitions in stats db ---
|
||||
----------------------------------------------------
|
||||
|
||||
-- Peer reviewed:
|
||||
create table ${stats_db_name}.result_peerreviewed as
|
||||
select r.id as id, case when doi.doi_from_crossref=1 and grey.grey_lit=0 then true else false end as peer_reviewed
|
||||
from ${stats_db_name}.result r
|
||||
left outer join ${stats_db_name}.indi_pub_doi_from_crossref doi on doi.id=r.id
|
||||
left outer join ${stats_db_name}.indi_pub_grey_lit grey on grey.id=r.id;
|
||||
|
||||
-- Green OA:
|
||||
create table ${stats_db_name}.result_greenoa as
|
||||
select r.id, case when green.green_oa=1 then true else false end as green
|
||||
from ${stats_db_name}.result r
|
||||
left outer join ${stats_db_name}.indi_pub_green_oa green on green.id=r.id;
|
||||
|
||||
-- GOLD OA:
|
||||
create table ${stats_db_name}.result_gold as
|
||||
select r.id, case when gold.gold_oa=1 then true else false end as gold
|
||||
from ${stats_db_name}.result r
|
||||
left outer join ${stats_db_name}.indi_pub_gold_oa gold on gold.id=r.id;
|
|
@ -104,25 +104,42 @@ create table TARGET.project_results as select id as result, project as id from T
|
|||
compute stats TARGET.project_results;
|
||||
|
||||
-- indicators
|
||||
create table TARGET.indi_pub_green_oa as select * from SOURCE.indi_pub_green_oa orig where exists (select 1 from TARGET.result r where r.id=orig.id);
|
||||
compute stats TARGET.indi_pub_green_oa;
|
||||
|
||||
create table TARGET.indi_pub_grey_lit as select * from SOURCE.indi_pub_grey_lit orig where exists (select 1 from TARGET.result r where r.id=orig.id);
|
||||
compute stats TARGET.indi_pub_grey_lit;
|
||||
|
||||
create table TARGET.indi_pub_doi_from_crossref as select * from SOURCE.indi_pub_doi_from_crossref orig where exists (select 1 from TARGET.result r where r.id=orig.id);
|
||||
compute stats TARGET.indi_pub_doi_from_crossref;
|
||||
|
||||
create table TARGET.indi_pub_gold_oa as select * from SOURCE.indi_pub_gold_oa orig where exists (select 1 from TARGET.result r where r.id=orig.id);
|
||||
compute stats TARGET.indi_pub_gold_oa;
|
||||
|
||||
create view TARGET.indi_dataset_avg_year_content_oa as select * from SOURCE.indi_dataset_avg_year_content_oa orig;
|
||||
create view TARGET.indi_dataset_avg_year_context_oa as select * from SOURCE.indi_dataset_avg_year_context_oa orig;
|
||||
create view TARGET.indi_dataset_avg_year_country_oa as select * from SOURCE.indi_dataset_avg_year_country_oa orig;
|
||||
|
||||
create view TARGET.indi_other_avg_year_content_oa as select * from SOURCE.indi_other_avg_year_content_oa orig;
|
||||
create view TARGET.indi_other_avg_year_context_oa as select * from SOURCE.indi_other_avg_year_context_oa orig;
|
||||
create view TARGET.indi_other_avg_year_country_oa as select * from SOURCE.indi_other_avg_year_country_oa orig;
|
||||
|
||||
create view TARGET.indi_project_datasets_count as select * from SOURCE.indi_project_datasets_count orig;
|
||||
create view TARGET.indi_project_otherresearch_count as select * from SOURCE.indi_project_otherresearch_count orig;
|
||||
create view TARGET.indi_project_pubs_count as select * from SOURCE.indi_project_pubs_count orig;
|
||||
create view TARGET.indi_project_software_count as select * from SOURCE.indi_project_software_count orig;
|
||||
|
||||
create view TARGET.indi_pub_avg_year_content_oa as select * from SOURCE.indi_pub_avg_year_content_oa orig;
|
||||
create view TARGET.indi_pub_avg_year_context_oa as select * from SOURCE.indi_pub_avg_year_context_oa orig;
|
||||
create view TARGET.indi_pub_avg_year_country_oa as select * from SOURCE.indi_pub_avg_year_country_oa orig;
|
||||
|
||||
create table TARGET.indi_pub_green_oa as select * from SOURCE.indi_pub_green_oa orig where exists (select 1 from TARGET.result r where r.id=orig.id);
|
||||
compute stats TARGET.indi_pub_green_oa;
|
||||
create table TARGET.indi_pub_grey_lit as select * from SOURCE.indi_pub_grey_lit orig where exists (select 1 from TARGET.result r where r.id=orig.id);
|
||||
compute stats TARGET.indi_pub_grey_lit;
|
||||
create table TARGET.indi_pub_doi_from_crossref as select * from SOURCE.indi_pub_doi_from_crossref orig where exists (select 1 from TARGET.result r where r.id=orig.id);
|
||||
compute stats TARGET.indi_pub_doi_from_crossref;
|
||||
create table TARGET.indi_pub_gold_oa as select * from SOURCE.indi_pub_gold_oa orig where exists (select 1 from TARGET.result r where r.id=orig.id);
|
||||
compute stats TARGET.indi_pub_gold_oa;
|
||||
create table TARGET.indi_pub_has_abstract as select * from SOURCE.indi_pub_has_abstract orig where exists (select 1 from TARGET.result r where r.id=orig.id);
|
||||
compute stats TARGET.indi_pub_has_abstract;
|
||||
create table TARGET.indi_pub_has_cc_licence as select * from SOURCE.indi_pub_has_cc_licence orig where exists (select 1 from TARGET.result r where r.id=orig.id);
|
||||
compute stats TARGET.indi_pub_has_cc_licence;
|
||||
create table TARGET.indi_pub_has_cc_licence_url as select * from SOURCE.indi_pub_has_cc_licence_url orig where exists (select 1 from TARGET.result r where r.id=orig.id);
|
||||
compute stats TARGET.indi_pub_has_cc_licence_url;
|
||||
|
||||
create view TARGET.indi_software_avg_year_content_oa as select * from SOURCE.indi_software_avg_year_content_oa orig;
|
||||
create view TARGET.indi_software_avg_year_context_oa as select * from SOURCE.indi_software_avg_year_context_oa orig;
|
||||
create view TARGET.indi_software_avg_year_country_oa as select * from SOURCE.indi_software_avg_year_country_oa orig;
|
||||
|
||||
--denorm
|
||||
alter table TARGET.result rename to TARGET.res_tmp;
|
||||
|
||||
|
|
|
@ -1,259 +1,561 @@
|
|||
create table TARGET.result_affiliated_country stored as parquet as
|
||||
select count(distinct r.id) as total, r.green, r.gold, case when rl.type is not null then true else false end as licence,
|
||||
case when pids.pid is not null then true else false end as pid, case when r.access_mode in ('Open Access', 'Open Source') then true else false end as oa,
|
||||
r.peer_reviewed, r.type, c.code as ccode, c.name as cname
|
||||
from SOURCE.result r
|
||||
join SOURCE.result_organization ro on ro.id=r.id
|
||||
join SOURCE.organization o on o.id=ro.organization
|
||||
join SOURCE.country c on c.code=o.country and c.continent_name='Europe'
|
||||
left outer join SOURCE.result_licenses rl on rl.id=r.id
|
||||
left outer join SOURCE.result_pids pids on pids.id=r.id
|
||||
group by r.green, r.gold, licence, pid, oa, r.peer_reviewed, r.type, c.code, c.name;
|
||||
create table ${observatory_db_name}.result_cc_licence stored as parquet as
|
||||
select r.id, coalesce(rln.count, 0) > 0 as cc_licence
|
||||
from ${stats_db_name}.result r
|
||||
left outer join (
|
||||
select rl.id, sum(case when lower(rln.normalized) like 'cc-%' then 1 else 0 end) as count
|
||||
from ${stats_db_name}.result_licenses rl
|
||||
left outer join ${stats_db_name}.licenses_normalized rln on rl.type=rln.license
|
||||
group by rl.id
|
||||
) rln on rln.id=r.id;
|
||||
|
||||
create table TARGET.result_affiliated_year stored as parquet as
|
||||
select count(distinct r.id) as total, r.green, r.gold, case when rl.type is not null then true else false end as licence,
|
||||
case when pids.pid is not null then true else false end as pid, case when r.access_mode in ('Open Access', 'Open Source') then true else false end as oa, r.peer_reviewed, r.type, r.year
|
||||
from SOURCE.result r
|
||||
join SOURCE.result_organization ro on ro.id=r.id
|
||||
join SOURCE.organization o on o.id=ro.organization
|
||||
join SOURCE.country c on c.code=o.country and c.continent_name='Europe'
|
||||
left outer join SOURCE.result_licenses rl on rl.id=r.id
|
||||
left outer join SOURCE.result_pids pids on pids.id=r.id
|
||||
group by r.green, r.gold, licence, pid, oa, r.peer_reviewed, r.type, r.year;
|
||||
create table ${observatory_db_name}.result_affiliated_country stored as parquet as
|
||||
select
|
||||
count(distinct r.id) as total,
|
||||
r.green,
|
||||
r.gold,
|
||||
case when rl.type is not null then true else false end as licence,
|
||||
case when pids.pid is not null then true else false end as pid,
|
||||
case when r.access_mode in ('Open Access', 'Open Source') then true else false end as oa,
|
||||
r.peer_reviewed,
|
||||
rln.cc_licence,
|
||||
r.abstract as abstract,
|
||||
r.authors > 1 as multiple_authors,
|
||||
rpc.count > 1 as multiple_projects,
|
||||
rfc.count > 1 as multiple_funders,
|
||||
r.type,
|
||||
c.code as ccode, c.name as cname
|
||||
from ${stats_db_name}.result r
|
||||
join ${stats_db_name}.result_organization ro on ro.id=r.id
|
||||
join ${stats_db_name}.organization o on o.id=ro.organization
|
||||
join ${stats_db_name}.country c on c.code=o.country and c.continent_name='Europe'
|
||||
left outer join ${stats_db_name}.result_licenses rl on rl.id=r.id
|
||||
left outer join ${stats_db_name}.result_pids pids on pids.id=r.id
|
||||
left outer join ${observatory_db_name}.result_cc_licence rln on rln.id=r.id
|
||||
left outer join ${stats_db_name}.result_projectcount rpc on rpc.id=r.id
|
||||
left outer join ${stats_db_name}.result_fundercount rfc on rfc.id=r.id
|
||||
group by r.green, r.gold, case when rl.type is not null then true else false end, case when pids.pid is not null then true else false end,
|
||||
case when r.access_mode in ('Open Access', 'Open Source') then true else false end, r.peer_reviewed, r.type, abstract,
|
||||
cc_licence, r.authors > 1, rpc.count > 1, rfc.count > 1, c.code, c.name;
|
||||
|
||||
create table TARGET.result_affiliated_year_country stored as parquet as
|
||||
select count(distinct r.id) as total, r.green, r.gold, case when rl.type is not null then true else false end as licence,
|
||||
case when pids.pid is not null then true else false end as pid, case when r.access_mode in ('Open Access', 'Open Source') then true else false end as oa,
|
||||
r.peer_reviewed, r.type, r.year, c.code as ccode, c.name as cname
|
||||
from SOURCE.result r
|
||||
join SOURCE.result_organization ro on ro.id=r.id
|
||||
join SOURCE.organization o on o.id=ro.organization
|
||||
join SOURCE.country c on c.code=o.country and c.continent_name='Europe'
|
||||
left outer join SOURCE.result_licenses rl on rl.id=r.id
|
||||
left outer join SOURCE.result_pids pids on pids.id=r.id
|
||||
group by r.green, r.gold, licence, pid, oa, r.peer_reviewed, r.type, r.year, c.code, c.name;
|
||||
create table ${observatory_db_name}.result_affiliated_year stored as parquet as
|
||||
select
|
||||
count(distinct r.id) as total,
|
||||
r.green,
|
||||
r.gold,
|
||||
case when rl.type is not null then true else false end as licence,
|
||||
case when pids.pid is not null then true else false end as pid,
|
||||
case when r.access_mode in ('Open Access', 'Open Source') then true else false end as oa,
|
||||
r.peer_reviewed,
|
||||
rln.cc_licence,
|
||||
r.abstract as abstract,
|
||||
r.authors > 1 as multiple_authors,
|
||||
rpc.count > 1 as multiple_projects,
|
||||
rfc.count > 1 as multiple_funders,
|
||||
r.type,
|
||||
r.year
|
||||
from ${stats_db_name}.result r
|
||||
join ${stats_db_name}.result_organization ro on ro.id=r.id
|
||||
join ${stats_db_name}.organization o on o.id=ro.organization
|
||||
join ${stats_db_name}.country c on c.code=o.country and c.continent_name='Europe'
|
||||
left outer join ${stats_db_name}.result_licenses rl on rl.id=r.id
|
||||
left outer join ${stats_db_name}.result_pids pids on pids.id=r.id
|
||||
left outer join ${observatory_db_name}.result_cc_licence rln on rln.id=r.id
|
||||
left outer join ${stats_db_name}.result_projectcount rpc on rpc.id=r.id
|
||||
left outer join ${stats_db_name}.result_fundercount rfc on rfc.id=r.id
|
||||
group by r.green, r.gold, case when rl.type is not null then true else false end, case when pids.pid is not null then true else false end,
|
||||
case when r.access_mode in ('Open Access', 'Open Source') then true else false end, r.peer_reviewed, r.type, abstract,
|
||||
cc_licence, r.authors > 1, rpc.count > 1, rfc.count > 1, r.year;
|
||||
|
||||
create table TARGET.result_affiliated_datasource stored as parquet as
|
||||
select count(distinct r.id) as total, r.green, r.gold, case when rl.type is not null then true else false end as licence,
|
||||
case when pids.pid is not null then true else false end as pid, case when r.access_mode in ('Open Access', 'Open Source') then true else false end as oa, r.peer_reviewed, r.type, d.name as dname
|
||||
from SOURCE.result r
|
||||
join SOURCE.result_organization ro on ro.id=r.id
|
||||
join SOURCE.organization o on o.id=ro.organization
|
||||
join SOURCE.country c on c.code=o.country and c.continent_name='Europe'
|
||||
left outer join SOURCE.result_datasources rd on rd.id=r.id
|
||||
left outer join SOURCE.datasource d on d.id=rd.datasource
|
||||
left outer join SOURCE.result_licenses rl on rl.id=r.id
|
||||
left outer join SOURCE.result_pids pids on pids.id=r.id
|
||||
group by r.green, r.gold, licence, pid, oa, r.peer_reviewed, r.type, d.name;
|
||||
create table ${observatory_db_name}.result_affiliated_year_country stored as parquet as
|
||||
select
|
||||
count(distinct r.id) as total,
|
||||
r.green,
|
||||
r.gold,
|
||||
case when rl.type is not null then true else false end as licence,
|
||||
case when pids.pid is not null then true else false end as pid,
|
||||
case when r.access_mode in ('Open Access', 'Open Source') then true else false end as oa,
|
||||
r.peer_reviewed,
|
||||
rln.cc_licence,
|
||||
r.abstract as abstract,
|
||||
r.authors > 1 as multiple_authors,
|
||||
rpc.count > 1 as multiple_projects,
|
||||
rfc.count > 1 as multiple_funders,
|
||||
r.type,
|
||||
r.year, c.code as ccode, c.name as cname
|
||||
from ${stats_db_name}.result r
|
||||
join ${stats_db_name}.result_organization ro on ro.id=r.id
|
||||
join ${stats_db_name}.organization o on o.id=ro.organization
|
||||
join ${stats_db_name}.country c on c.code=o.country and c.continent_name='Europe'
|
||||
left outer join ${stats_db_name}.result_licenses rl on rl.id=r.id
|
||||
left outer join ${stats_db_name}.result_pids pids on pids.id=r.id
|
||||
left outer join ${observatory_db_name}.result_cc_licence rln on rln.id=r.id
|
||||
left outer join ${stats_db_name}.result_projectcount rpc on rpc.id=r.id
|
||||
left outer join ${stats_db_name}.result_fundercount rfc on rfc.id=r.id
|
||||
group by r.green, r.gold, case when rl.type is not null then true else false end, case when pids.pid is not null then true else false end,
|
||||
case when r.access_mode in ('Open Access', 'Open Source') then true else false end, r.peer_reviewed, r.type, abstract,
|
||||
cc_licence, r.authors > 1, rpc.count > 1, rfc.count > 1, r.year, c.code, c.name;
|
||||
|
||||
create table TARGET.result_affiliated_datasource_country stored as parquet as
|
||||
select count(distinct r.id) as total, r.green, r.gold, case when rl.type is not null then true else false end as licence,
|
||||
case when pids.pid is not null then true else false end as pid, case when r.access_mode in ('Open Access', 'Open Source') then true else false end as oa,
|
||||
r.peer_reviewed, r.type, d.name as dname, c.code as ccode, c.name as cname
|
||||
from SOURCE.result r
|
||||
join SOURCE.result_organization ro on ro.id=r.id
|
||||
join SOURCE.organization o on o.id=ro.organization
|
||||
join SOURCE.country c on c.code=o.country and c.continent_name='Europe'
|
||||
left outer join SOURCE.result_datasources rd on rd.id=r.id
|
||||
left outer join SOURCE.datasource d on d.id=rd.datasource
|
||||
left outer join SOURCE.result_licenses rl on rl.id=r.id
|
||||
left outer join SOURCE.result_pids pids on pids.id=r.id
|
||||
group by r.green, r.gold, licence, pid, oa, r.peer_reviewed, r.type, d.name, c.code, c.name;
|
||||
create table ${observatory_db_name}.result_affiliated_datasource stored as parquet as
|
||||
select
|
||||
count(distinct r.id) as total,
|
||||
r.green,
|
||||
r.gold,
|
||||
case when rl.type is not null then true else false end as licence,
|
||||
case when pids.pid is not null then true else false end as pid,
|
||||
case when r.access_mode in ('Open Access', 'Open Source') then true else false end as oa,
|
||||
r.peer_reviewed,
|
||||
rln.cc_licence,
|
||||
r.abstract as abstract,
|
||||
r.authors > 1 as multiple_authors,
|
||||
rpc.count > 1 as multiple_projects,
|
||||
rfc.count > 1 as multiple_funders,
|
||||
r.type,
|
||||
d.name as dname
|
||||
from ${stats_db_name}.result r
|
||||
join ${stats_db_name}.result_organization ro on ro.id=r.id
|
||||
join ${stats_db_name}.organization o on o.id=ro.organization
|
||||
join ${stats_db_name}.country c on c.code=o.country and c.continent_name='Europe'
|
||||
left outer join ${stats_db_name}.result_datasources rd on rd.id=r.id
|
||||
left outer join ${stats_db_name}.datasource d on d.id=rd.datasource
|
||||
left outer join ${stats_db_name}.result_licenses rl on rl.id=r.id
|
||||
left outer join ${stats_db_name}.result_pids pids on pids.id=r.id
|
||||
left outer join ${observatory_db_name}.result_cc_licence rln on rln.id=r.id
|
||||
left outer join ${stats_db_name}.result_projectcount rpc on rpc.id=r.id
|
||||
left outer join ${stats_db_name}.result_fundercount rfc on rfc.id=r.id
|
||||
group by r.green, r.gold, case when rl.type is not null then true else false end, case when pids.pid is not null then true else false end,
|
||||
case when r.access_mode in ('Open Access', 'Open Source') then true else false end, r.peer_reviewed, r.type, abstract,
|
||||
cc_licence, r.authors > 1, rpc.count > 1, rfc.count > 1, d.name;
|
||||
|
||||
create table TARGET.result_affiliated_organization stored as parquet as
|
||||
select count(distinct r.id) as total, r.green, r.gold, case when rl.type is not null then true else false end as licence,
|
||||
case when pids.pid is not null then true else false end as pid, case when r.access_mode in ('Open Access', 'Open Source') then true else false end as oa,
|
||||
r.peer_reviewed, r.type, o.name as oname
|
||||
from SOURCE.result r
|
||||
join SOURCE.result_organization ro on ro.id=r.id
|
||||
join SOURCE.organization o on o.id=ro.organization
|
||||
join SOURCE.country c on c.code=o.country and c.continent_name='Europe'
|
||||
left outer join SOURCE.result_licenses rl on rl.id=r.id
|
||||
left outer join SOURCE.result_pids pids on pids.id=r.id
|
||||
group by r.green, r.gold, licence, pid, oa, r.peer_reviewed, r.type, o.name;
|
||||
create table ${observatory_db_name}.result_affiliated_datasource_country stored as parquet as
|
||||
select
|
||||
count(distinct r.id) as total,
|
||||
r.green,
|
||||
r.gold,
|
||||
case when rl.type is not null then true else false end as licence,
|
||||
case when pids.pid is not null then true else false end as pid,
|
||||
case when r.access_mode in ('Open Access', 'Open Source') then true else false end as oa,
|
||||
r.peer_reviewed,
|
||||
rln.cc_licence,
|
||||
r.abstract as abstract,
|
||||
r.authors > 1 as multiple_authors,
|
||||
rpc.count > 1 as multiple_projects,
|
||||
rfc.count > 1 as multiple_funders,
|
||||
r.type,
|
||||
d.name as dname, c.code as ccode, c.name as cname
|
||||
from ${stats_db_name}.result r
|
||||
join ${stats_db_name}.result_organization ro on ro.id=r.id
|
||||
join ${stats_db_name}.organization o on o.id=ro.organization
|
||||
join ${stats_db_name}.country c on c.code=o.country and c.continent_name='Europe'
|
||||
left outer join ${stats_db_name}.result_datasources rd on rd.id=r.id
|
||||
left outer join ${stats_db_name}.datasource d on d.id=rd.datasource
|
||||
left outer join ${stats_db_name}.result_licenses rl on rl.id=r.id
|
||||
left outer join ${stats_db_name}.result_pids pids on pids.id=r.id
|
||||
left outer join ${observatory_db_name}.result_cc_licence rln on rln.id=r.id
|
||||
left outer join ${stats_db_name}.result_projectcount rpc on rpc.id=r.id
|
||||
left outer join ${stats_db_name}.result_fundercount rfc on rfc.id=r.id
|
||||
group by r.green, r.gold, case when rl.type is not null then true else false end, case when pids.pid is not null then true else false end,
|
||||
case when r.access_mode in ('Open Access', 'Open Source') then true else false end, r.peer_reviewed, r.type, abstract,
|
||||
cc_licence, r.authors > 1, rpc.count > 1, rfc.count > 1, d.name, c.code, c.name;
|
||||
|
||||
create table TARGET.result_affiliated_organization_country stored as parquet as
|
||||
select count(distinct r.id) as total, r.green, r.gold, case when rl.type is not null then true else false end as licence,
|
||||
case when pids.pid is not null then true else false end as pid, case when r.access_mode in ('Open Access', 'Open Source') then true else false end as oa,
|
||||
r.peer_reviewed, r.type, o.name as oname, c.code as ccode, c.name as cname
|
||||
from SOURCE.result r
|
||||
join SOURCE.result_organization ro on ro.id=r.id
|
||||
join SOURCE.organization o on o.id=ro.organization
|
||||
join SOURCE.country c on c.code=o.country and c.continent_name='Europe'
|
||||
left outer join SOURCE.result_licenses rl on rl.id=r.id
|
||||
left outer join SOURCE.result_pids pids on pids.id=r.id
|
||||
group by r.green, r.gold, licence, pid, oa, r.peer_reviewed, r.type, o.name, c.code, c.name;
|
||||
create table ${observatory_db_name}.result_affiliated_organization stored as parquet as
|
||||
select
|
||||
count(distinct r.id) as total,
|
||||
r.green,
|
||||
r.gold,
|
||||
case when rl.type is not null then true else false end as licence,
|
||||
case when pids.pid is not null then true else false end as pid,
|
||||
case when r.access_mode in ('Open Access', 'Open Source') then true else false end as oa,
|
||||
r.peer_reviewed,
|
||||
rln.cc_licence,
|
||||
r.abstract as abstract,
|
||||
r.authors > 1 as multiple_authors,
|
||||
rpc.count > 1 as multiple_projects,
|
||||
rfc.count > 1 as multiple_funders,
|
||||
r.type,
|
||||
o.name as oname
|
||||
from ${stats_db_name}.result r
|
||||
join ${stats_db_name}.result_organization ro on ro.id=r.id
|
||||
join ${stats_db_name}.organization o on o.id=ro.organization
|
||||
join ${stats_db_name}.country c on c.code=o.country and c.continent_name='Europe'
|
||||
left outer join ${stats_db_name}.result_licenses rl on rl.id=r.id
|
||||
left outer join ${stats_db_name}.result_pids pids on pids.id=r.id
|
||||
left outer join ${observatory_db_name}.result_cc_licence rln on rln.id=r.id
|
||||
left outer join ${stats_db_name}.result_projectcount rpc on rpc.id=r.id
|
||||
left outer join ${stats_db_name}.result_fundercount rfc on rfc.id=r.id
|
||||
group by r.green, r.gold, case when rl.type is not null then true else false end, case when pids.pid is not null then true else false end,
|
||||
case when r.access_mode in ('Open Access', 'Open Source') then true else false end, r.peer_reviewed, r.type, abstract,
|
||||
cc_licence, r.authors > 1, rpc.count > 1, rfc.count > 1, o.name;
|
||||
|
||||
create table TARGET.result_affiliated_funder stored as parquet as
|
||||
select count(distinct r.id) as total, r.green, r.gold, case when rl.type is not null then true else false end as licence,
|
||||
case when pids.pid is not null then true else false end as pid, case when r.access_mode in ('Open Access', 'Open Source') then true else false end as oa, r.peer_reviewed, r.type, p.funder as pfunder
|
||||
from SOURCE.result r
|
||||
join SOURCE.result_organization ro on ro.id=r.id
|
||||
join SOURCE.organization o on o.id=ro.organization
|
||||
join SOURCE.country c on c.code=o.country and c.continent_name='Europe'
|
||||
join SOURCE.result_projects rp on rp.id=r.id
|
||||
join SOURCE.project p on p.id=rp.project
|
||||
left outer join SOURCE.result_licenses rl on rl.id=r.id
|
||||
left outer join SOURCE.result_pids pids on pids.id=r.id
|
||||
group by r.green, r.gold, licence, pid, oa, r.peer_reviewed, r.type, p.funder;
|
||||
create table ${observatory_db_name}.result_affiliated_organization_country stored as parquet as
|
||||
select
|
||||
count(distinct r.id) as total,
|
||||
r.green,
|
||||
r.gold,
|
||||
case when rl.type is not null then true else false end as licence,
|
||||
case when pids.pid is not null then true else false end as pid,
|
||||
case when r.access_mode in ('Open Access', 'Open Source') then true else false end as oa,
|
||||
r.peer_reviewed,
|
||||
rln.cc_licence,
|
||||
r.abstract as abstract,
|
||||
r.authors > 1 as multiple_authors,
|
||||
rpc.count > 1 as multiple_projects,
|
||||
rfc.count > 1 as multiple_funders,
|
||||
r.type,
|
||||
o.name as oname, c.code as ccode, c.name as cname
|
||||
from ${stats_db_name}.result r
|
||||
join ${stats_db_name}.result_organization ro on ro.id=r.id
|
||||
join ${stats_db_name}.organization o on o.id=ro.organization
|
||||
join ${stats_db_name}.country c on c.code=o.country and c.continent_name='Europe'
|
||||
left outer join ${stats_db_name}.result_licenses rl on rl.id=r.id
|
||||
left outer join ${stats_db_name}.result_pids pids on pids.id=r.id
|
||||
left outer join ${observatory_db_name}.result_cc_licence rln on rln.id=r.id
|
||||
left outer join ${stats_db_name}.result_projectcount rpc on rpc.id=r.id
|
||||
left outer join ${stats_db_name}.result_fundercount rfc on rfc.id=r.id
|
||||
group by r.green, r.gold, case when rl.type is not null then true else false end, case when pids.pid is not null then true else false end,
|
||||
case when r.access_mode in ('Open Access', 'Open Source') then true else false end, r.peer_reviewed, r.type, abstract,
|
||||
cc_licence, r.authors > 1, rpc.count > 1, rfc.count > 1, o.name, c.code, c.name;
|
||||
|
||||
create table TARGET.result_affiliated_funder_country stored as parquet as
|
||||
select count(distinct r.id) as total, r.green, r.gold, case when rl.type is not null then true else false end as licence,
|
||||
case when pids.pid is not null then true else false end as pid, case when r.access_mode in ('Open Access', 'Open Source') then true else false end as oa,
|
||||
r.peer_reviewed, r.type, p.funder as pfunder, c.code as ccode, c.name as cname
|
||||
from SOURCE.result r
|
||||
join SOURCE.result_organization ro on ro.id=r.id
|
||||
join SOURCE.organization o on o.id=ro.organization
|
||||
join SOURCE.country c on c.code=o.country and c.continent_name='Europe'
|
||||
join SOURCE.result_projects rp on rp.id=r.id
|
||||
join SOURCE.project p on p.id=rp.project
|
||||
left outer join SOURCE.result_licenses rl on rl.id=r.id
|
||||
left outer join SOURCE.result_pids pids on pids.id=r.id
|
||||
group by r.green, r.gold, licence, pid, oa, r.peer_reviewed, r.type, p.funder, c.code, c.name;
|
||||
create table ${observatory_db_name}.result_affiliated_funder stored as parquet as
|
||||
select
|
||||
count(distinct r.id) as total,
|
||||
r.green,
|
||||
r.gold,
|
||||
case when rl.type is not null then true else false end as licence,
|
||||
case when pids.pid is not null then true else false end as pid,
|
||||
case when r.access_mode in ('Open Access', 'Open Source') then true else false end as oa,
|
||||
r.peer_reviewed,
|
||||
rln.cc_licence,
|
||||
r.abstract as abstract,
|
||||
r.authors > 1 as multiple_authors,
|
||||
rpc.count > 1 as multiple_projects,
|
||||
rfc.count > 1 as multiple_funders,
|
||||
r.type,
|
||||
p.funder as pfunder
|
||||
from ${stats_db_name}.result r
|
||||
join ${stats_db_name}.result_organization ro on ro.id=r.id
|
||||
join ${stats_db_name}.organization o on o.id=ro.organization
|
||||
join ${stats_db_name}.country c on c.code=o.country and c.continent_name='Europe'
|
||||
join ${stats_db_name}.result_projects rp on rp.id=r.id
|
||||
join ${stats_db_name}.project p on p.id=rp.project
|
||||
left outer join ${stats_db_name}.result_licenses rl on rl.id=r.id
|
||||
left outer join ${stats_db_name}.result_pids pids on pids.id=r.id
|
||||
left outer join ${observatory_db_name}.result_cc_licence rln on rln.id=r.id
|
||||
left outer join ${stats_db_name}.result_projectcount rpc on rpc.id=r.id
|
||||
left outer join ${stats_db_name}.result_fundercount rfc on rfc.id=r.id
|
||||
group by r.green, r.gold, case when rl.type is not null then true else false end, case when pids.pid is not null then true else false end,
|
||||
case when r.access_mode in ('Open Access', 'Open Source') then true else false end, r.peer_reviewed, r.type, abstract,
|
||||
cc_licence, r.authors > 1, rpc.count > 1, rfc.count > 1, p.funder;
|
||||
|
||||
create table TARGET.result_deposited_country stored as parquet as
|
||||
select count(distinct r.id) as total, r.green, r.gold, case when rl.type is not null then true else false end as licence,
|
||||
case when pids.pid is not null then true else false end as pid, case when r.access_mode in ('Open Access', 'Open Source') then true else false end as oa,
|
||||
r.peer_reviewed, r.type, c.code as ccode, c.name as cname
|
||||
from SOURCE.result r
|
||||
join SOURCE.result_datasources rd on rd.id=r.id
|
||||
join SOURCE.datasource d on d.id=rd.datasource and d.type in ('Institutional Repository','Data Repository', 'Repository', 'Publication Repository')
|
||||
join SOURCE.datasource_organizations dor on dor.id=d.id
|
||||
join SOURCE.organization o on o.id=dor.organization
|
||||
join SOURCE.country c on c.code=o.country and c.continent_name='Europe'
|
||||
left outer join SOURCE.result_licenses rl on rl.id=r.id
|
||||
left outer join SOURCE.result_pids pids on pids.id=r.id
|
||||
group by r.green, r.gold, licence, pid, oa, r.peer_reviewed, r.type, c.code, c.name;
|
||||
create table ${observatory_db_name}.result_affiliated_funder_country stored as parquet as
|
||||
select
|
||||
count(distinct r.id) as total,
|
||||
r.green,
|
||||
r.gold,
|
||||
case when rl.type is not null then true else false end as licence,
|
||||
case when pids.pid is not null then true else false end as pid,
|
||||
case when r.access_mode in ('Open Access', 'Open Source') then true else false end as oa,
|
||||
r.peer_reviewed,
|
||||
rln.cc_licence,
|
||||
r.abstract as abstract,
|
||||
r.authors > 1 as multiple_authors,
|
||||
rpc.count > 1 as multiple_projects,
|
||||
rfc.count > 1 as multiple_funders,
|
||||
r.type,
|
||||
p.funder as pfunder, c.code as ccode, c.name as cname
|
||||
from ${stats_db_name}.result r
|
||||
join ${stats_db_name}.result_organization ro on ro.id=r.id
|
||||
join ${stats_db_name}.organization o on o.id=ro.organization
|
||||
join ${stats_db_name}.country c on c.code=o.country and c.continent_name='Europe'
|
||||
join ${stats_db_name}.result_projects rp on rp.id=r.id
|
||||
join ${stats_db_name}.project p on p.id=rp.project
|
||||
left outer join ${stats_db_name}.result_licenses rl on rl.id=r.id
|
||||
left outer join ${stats_db_name}.result_pids pids on pids.id=r.id
|
||||
left outer join ${observatory_db_name}.result_cc_licence rln on rln.id=r.id
|
||||
left outer join ${stats_db_name}.result_projectcount rpc on rpc.id=r.id
|
||||
left outer join ${stats_db_name}.result_fundercount rfc on rfc.id=r.id
|
||||
group by r.green, r.gold, case when rl.type is not null then true else false end, case when pids.pid is not null then true else false end,
|
||||
case when r.access_mode in ('Open Access', 'Open Source') then true else false end, r.peer_reviewed, r.type, abstract,
|
||||
cc_licence, r.authors > 1, rpc.count > 1, rfc.count > 1, p.funder, c.code, c.name;
|
||||
|
||||
create table TARGET.result_deposited_year stored as parquet as
|
||||
select count(distinct r.id) as total, r.green, r.gold, case when rl.type is not null then true else false end as licence,
|
||||
case when pids.pid is not null then true else false end as pid, case when r.access_mode in ('Open Access', 'Open Source') then true else false end as oa, r.peer_reviewed, r.type, r.year
|
||||
from SOURCE.result r
|
||||
join SOURCE.result_datasources rd on rd.id=r.id
|
||||
join SOURCE.datasource d on d.id=rd.datasource and d.type in ('Institutional Repository','Data Repository', 'Repository', 'Publication Repository')
|
||||
join SOURCE.datasource_organizations dor on dor.id=d.id
|
||||
join SOURCE.organization o on o.id=dor.organization
|
||||
join SOURCE.country c on c.code=o.country and c.continent_name='Europe'
|
||||
left outer join SOURCE.result_licenses rl on rl.id=r.id
|
||||
left outer join SOURCE.result_pids pids on pids.id=r.id
|
||||
group by r.green, r.gold, licence, pid, oa, r.peer_reviewed, r.type, r.year;
|
||||
create table ${observatory_db_name}.result_deposited_country stored as parquet as
|
||||
select
|
||||
count(distinct r.id) as total,
|
||||
r.green,
|
||||
r.gold,
|
||||
case when rl.type is not null then true else false end as licence,
|
||||
case when pids.pid is not null then true else false end as pid,
|
||||
case when r.access_mode in ('Open Access', 'Open Source') then true else false end as oa,
|
||||
r.peer_reviewed,
|
||||
rln.cc_licence,
|
||||
r.abstract as abstract,
|
||||
r.authors > 1 as multiple_authors,
|
||||
rpc.count > 1 as multiple_projects,
|
||||
rfc.count > 1 as multiple_funders,
|
||||
r.type,
|
||||
c.code as ccode, c.name as cname
|
||||
from ${stats_db_name}.result r
|
||||
join ${stats_db_name}.result_datasources rd on rd.id=r.id
|
||||
join ${stats_db_name}.datasource d on d.id=rd.datasource and d.type in ('Institutional Repository','Data Repository', 'Repository', 'Publication Repository')
|
||||
join ${stats_db_name}.datasource_organizations dor on dor.id=d.id
|
||||
join ${stats_db_name}.organization o on o.id=dor.organization
|
||||
join ${stats_db_name}.country c on c.code=o.country and c.continent_name='Europe'
|
||||
left outer join ${stats_db_name}.result_licenses rl on rl.id=r.id
|
||||
left outer join ${stats_db_name}.result_pids pids on pids.id=r.id
|
||||
left outer join ${observatory_db_name}.result_cc_licence rln on rln.id=r.id
|
||||
left outer join ${stats_db_name}.result_projectcount rpc on rpc.id=r.id
|
||||
left outer join ${stats_db_name}.result_fundercount rfc on rfc.id=r.id
|
||||
group by r.green, r.gold, case when rl.type is not null then true else false end, case when pids.pid is not null then true else false end,
|
||||
case when r.access_mode in ('Open Access', 'Open Source') then true else false end, r.peer_reviewed, r.type, abstract,
|
||||
cc_licence, r.authors > 1, rpc.count > 1, rfc.count > 1, c.code, c.name;
|
||||
|
||||
create table TARGET.result_deposited_year_country stored as parquet as
|
||||
select count(distinct r.id) as total, r.green, r.gold, case when rl.type is not null then true else false end as licence,
|
||||
case when pids.pid is not null then true else false end as pid, case when r.access_mode in ('Open Access', 'Open Source') then true else false end as oa,
|
||||
r.peer_reviewed, r.type, r.year, c.code as ccode, c.name as cname
|
||||
from SOURCE.result r
|
||||
join SOURCE.result_datasources rd on rd.id=r.id
|
||||
join SOURCE.datasource d on d.id=rd.datasource and d.type in ('Institutional Repository','Data Repository', 'Repository', 'Publication Repository')
|
||||
join SOURCE.datasource_organizations dor on dor.id=d.id
|
||||
join SOURCE.organization o on o.id=dor.organization
|
||||
join SOURCE.country c on c.code=o.country and c.continent_name='Europe'
|
||||
left outer join SOURCE.result_licenses rl on rl.id=r.id
|
||||
left outer join SOURCE.result_pids pids on pids.id=r.id
|
||||
group by r.green, r.gold, licence, pid, oa, r.peer_reviewed, r.type, r.year, c.code, c.name;
|
||||
create table ${observatory_db_name}.result_deposited_year stored as parquet as
|
||||
select
|
||||
count(distinct r.id) as total,
|
||||
r.green,
|
||||
r.gold,
|
||||
case when rl.type is not null then true else false end as licence,
|
||||
case when pids.pid is not null then true else false end as pid,
|
||||
case when r.access_mode in ('Open Access', 'Open Source') then true else false end as oa,
|
||||
r.peer_reviewed,
|
||||
rln.cc_licence,
|
||||
r.abstract as abstract,
|
||||
r.authors > 1 as multiple_authors,
|
||||
rpc.count > 1 as multiple_projects,
|
||||
rfc.count > 1 as multiple_funders,
|
||||
r.type,
|
||||
r.year
|
||||
from ${stats_db_name}.result r
|
||||
join ${stats_db_name}.result_datasources rd on rd.id=r.id
|
||||
join ${stats_db_name}.datasource d on d.id=rd.datasource and d.type in ('Institutional Repository','Data Repository', 'Repository', 'Publication Repository')
|
||||
join ${stats_db_name}.datasource_organizations dor on dor.id=d.id
|
||||
join ${stats_db_name}.organization o on o.id=dor.organization
|
||||
join ${stats_db_name}.country c on c.code=o.country and c.continent_name='Europe'
|
||||
left outer join ${stats_db_name}.result_licenses rl on rl.id=r.id
|
||||
left outer join ${stats_db_name}.result_pids pids on pids.id=r.id
|
||||
left outer join ${observatory_db_name}.result_cc_licence rln on rln.id=r.id
|
||||
left outer join ${stats_db_name}.result_projectcount rpc on rpc.id=r.id
|
||||
left outer join ${stats_db_name}.result_fundercount rfc on rfc.id=r.id
|
||||
group by r.green, r.gold, case when rl.type is not null then true else false end, case when pids.pid is not null then true else false end,
|
||||
case when r.access_mode in ('Open Access', 'Open Source') then true else false end, r.peer_reviewed, r.type, abstract,
|
||||
cc_licence, r.authors > 1, rpc.count > 1, rfc.count > 1, r.year;
|
||||
|
||||
create table TARGET.result_deposited_datasource stored as parquet as
|
||||
select count(distinct r.id) as total, r.green, r.gold, case when rl.type is not null then true else false end as licence,
|
||||
case when pids.pid is not null then true else false end as pid, case when r.access_mode in ('Open Access', 'Open Source') then true else false end as oa,
|
||||
r.peer_reviewed, r.type, d.name as dname
|
||||
from SOURCE.result r
|
||||
join SOURCE.result_datasources rd on rd.id=r.id
|
||||
join SOURCE.datasource d on d.id=rd.datasource and d.type in ('Institutional Repository','Data Repository', 'Repository', 'Publication Repository')
|
||||
join SOURCE.datasource_organizations dor on dor.id=d.id
|
||||
join SOURCE.organization o on o.id=dor.organization
|
||||
join SOURCE.country c on c.code=o.country and c.continent_name='Europe'
|
||||
left outer join SOURCE.result_licenses rl on rl.id=r.id
|
||||
left outer join SOURCE.result_pids pids on pids.id=r.id
|
||||
group by r.green, r.gold, licence, pid, oa, r.peer_reviewed, r.type, d.name;
|
||||
create table ${observatory_db_name}.result_deposited_year_country stored as parquet as
|
||||
select
|
||||
count(distinct r.id) as total,
|
||||
r.green,
|
||||
r.gold,
|
||||
case when rl.type is not null then true else false end as licence,
|
||||
case when pids.pid is not null then true else false end as pid,
|
||||
case when r.access_mode in ('Open Access', 'Open Source') then true else false end as oa,
|
||||
r.peer_reviewed,
|
||||
rln.cc_licence,
|
||||
r.abstract as abstract,
|
||||
r.authors > 1 as multiple_authors,
|
||||
rpc.count > 1 as multiple_projects,
|
||||
rfc.count > 1 as multiple_funders,
|
||||
r.type,
|
||||
r.year, c.code as ccode, c.name as cname
|
||||
from ${stats_db_name}.result r
|
||||
join ${stats_db_name}.result_datasources rd on rd.id=r.id
|
||||
join ${stats_db_name}.datasource d on d.id=rd.datasource and d.type in ('Institutional Repository','Data Repository', 'Repository', 'Publication Repository')
|
||||
join ${stats_db_name}.datasource_organizations dor on dor.id=d.id
|
||||
join ${stats_db_name}.organization o on o.id=dor.organization
|
||||
join ${stats_db_name}.country c on c.code=o.country and c.continent_name='Europe'
|
||||
left outer join ${stats_db_name}.result_licenses rl on rl.id=r.id
|
||||
left outer join ${stats_db_name}.result_pids pids on pids.id=r.id
|
||||
left outer join ${observatory_db_name}.result_cc_licence rln on rln.id=r.id
|
||||
left outer join ${stats_db_name}.result_projectcount rpc on rpc.id=r.id
|
||||
left outer join ${stats_db_name}.result_fundercount rfc on rfc.id=r.id
|
||||
group by r.green, r.gold, case when rl.type is not null then true else false end, case when pids.pid is not null then true else false end,
|
||||
case when r.access_mode in ('Open Access', 'Open Source') then true else false end, r.peer_reviewed, r.type, abstract,
|
||||
cc_licence, r.authors > 1, rpc.count > 1, rfc.count > 1, r.year, c.code, c.name;
|
||||
|
||||
create table TARGET.result_deposited_datasource_country stored as parquet as
|
||||
select count(distinct r.id) as total, r.green, r.gold, case when rl.type is not null then true else false end as licence,
|
||||
case when pids.pid is not null then true else false end as pid, case when r.access_mode in ('Open Access', 'Open Source') then true else false end as oa,
|
||||
r.peer_reviewed, r.type, d.name as dname, c.code as ccode, c.name as cname
|
||||
from SOURCE.result r
|
||||
join SOURCE.result_datasources rd on rd.id=r.id
|
||||
join SOURCE.datasource d on d.id=rd.datasource and d.type in ('Institutional Repository','Data Repository', 'Repository', 'Publication Repository')
|
||||
join SOURCE.datasource_organizations dor on dor.id=d.id
|
||||
join SOURCE.organization o on o.id=dor.organization
|
||||
join SOURCE.country c on c.code=o.country and c.continent_name='Europe'
|
||||
left outer join SOURCE.result_licenses rl on rl.id=r.id
|
||||
left outer join SOURCE.result_pids pids on pids.id=r.id
|
||||
group by r.green, r.gold, licence, pid, oa, r.peer_reviewed, r.type, d.name, c.code, c.name;
|
||||
create table ${observatory_db_name}.result_deposited_datasource stored as parquet as
|
||||
select
|
||||
count(distinct r.id) as total,
|
||||
r.green,
|
||||
r.gold,
|
||||
case when rl.type is not null then true else false end as licence,
|
||||
case when pids.pid is not null then true else false end as pid,
|
||||
case when r.access_mode in ('Open Access', 'Open Source') then true else false end as oa,
|
||||
r.peer_reviewed,
|
||||
rln.cc_licence,
|
||||
r.abstract as abstract,
|
||||
r.authors > 1 as multiple_authors,
|
||||
rpc.count > 1 as multiple_projects,
|
||||
rfc.count > 1 as multiple_funders,
|
||||
r.type,
|
||||
d.name as dname
|
||||
from ${stats_db_name}.result r
|
||||
join ${stats_db_name}.result_datasources rd on rd.id=r.id
|
||||
join ${stats_db_name}.datasource d on d.id=rd.datasource and d.type in ('Institutional Repository','Data Repository', 'Repository', 'Publication Repository')
|
||||
join ${stats_db_name}.datasource_organizations dor on dor.id=d.id
|
||||
join ${stats_db_name}.organization o on o.id=dor.organization
|
||||
join ${stats_db_name}.country c on c.code=o.country and c.continent_name='Europe'
|
||||
left outer join ${stats_db_name}.result_licenses rl on rl.id=r.id
|
||||
left outer join ${stats_db_name}.result_pids pids on pids.id=r.id
|
||||
left outer join ${observatory_db_name}.result_cc_licence rln on rln.id=r.id
|
||||
left outer join ${stats_db_name}.result_projectcount rpc on rpc.id=r.id
|
||||
left outer join ${stats_db_name}.result_fundercount rfc on rfc.id=r.id
|
||||
group by r.green, r.gold, case when rl.type is not null then true else false end, case when pids.pid is not null then true else false end,
|
||||
case when r.access_mode in ('Open Access', 'Open Source') then true else false end, r.peer_reviewed, r.type, abstract,
|
||||
cc_licence, r.authors > 1, rpc.count > 1, rfc.count > 1, d.name;
|
||||
|
||||
create table TARGET.result_deposited_organization stored as parquet as
|
||||
select count(distinct r.id) as total, r.green, r.gold, case when rl.type is not null then true else false end as licence,
|
||||
case when pids.pid is not null then true else false end as pid, case when r.access_mode in ('Open Access', 'Open Source') then true else false end as oa, r.peer_reviewed, r.type, o.name as oname
|
||||
from SOURCE.result r
|
||||
join SOURCE.result_datasources rd on rd.id=r.id
|
||||
join SOURCE.datasource d on d.id=rd.datasource and d.type in ('Institutional Repository','Data Repository', 'Repository', 'Publication Repository')
|
||||
join SOURCE.datasource_organizations dor on dor.id=d.id
|
||||
join SOURCE.organization o on o.id=dor.organization
|
||||
join SOURCE.country c on c.code=o.country and c.continent_name='Europe'
|
||||
left outer join SOURCE.result_licenses rl on rl.id=r.id
|
||||
left outer join SOURCE.result_pids pids on pids.id=r.id
|
||||
group by r.green, r.gold, licence, pid, oa, r.peer_reviewed, r.type, o.name;
|
||||
create table ${observatory_db_name}.result_deposited_datasource_country stored as parquet as
|
||||
select
|
||||
count(distinct r.id) as total,
|
||||
r.green,
|
||||
r.gold,
|
||||
case when rl.type is not null then true else false end as licence,
|
||||
case when pids.pid is not null then true else false end as pid,
|
||||
case when r.access_mode in ('Open Access', 'Open Source') then true else false end as oa,
|
||||
r.peer_reviewed,
|
||||
rln.cc_licence,
|
||||
r.abstract as abstract,
|
||||
r.authors > 1 as multiple_authors,
|
||||
rpc.count > 1 as multiple_projects,
|
||||
rfc.count > 1 as multiple_funders,
|
||||
r.type,
|
||||
d.name as dname, c.code as ccode, c.name as cname
|
||||
from ${stats_db_name}.result r
|
||||
join ${stats_db_name}.result_datasources rd on rd.id=r.id
|
||||
join ${stats_db_name}.datasource d on d.id=rd.datasource and d.type in ('Institutional Repository','Data Repository', 'Repository', 'Publication Repository')
|
||||
join ${stats_db_name}.datasource_organizations dor on dor.id=d.id
|
||||
join ${stats_db_name}.organization o on o.id=dor.organization
|
||||
join ${stats_db_name}.country c on c.code=o.country and c.continent_name='Europe'
|
||||
left outer join ${stats_db_name}.result_licenses rl on rl.id=r.id
|
||||
left outer join ${stats_db_name}.result_pids pids on pids.id=r.id
|
||||
left outer join ${observatory_db_name}.result_cc_licence rln on rln.id=r.id
|
||||
left outer join ${stats_db_name}.result_projectcount rpc on rpc.id=r.id
|
||||
left outer join ${stats_db_name}.result_fundercount rfc on rfc.id=r.id
|
||||
group by r.green, r.gold, case when rl.type is not null then true else false end, case when pids.pid is not null then true else false end,
|
||||
case when r.access_mode in ('Open Access', 'Open Source') then true else false end, r.peer_reviewed, r.type, abstract,
|
||||
cc_licence, r.authors > 1, rpc.count > 1, rfc.count > 1, d.name, c.code, c.name;
|
||||
|
||||
create table TARGET.result_deposited_organization_country stored as parquet as
|
||||
select count(distinct r.id) as total, r.green, r.gold, case when rl.type is not null then true else false end as licence,
|
||||
case when pids.pid is not null then true else false end as pid, case when r.access_mode in ('Open Access', 'Open Source') then true else false end as oa,
|
||||
r.peer_reviewed, r.type, o.name as oname, c.code as ccode, c.name as cname
|
||||
from SOURCE.result r
|
||||
join SOURCE.result_datasources rd on rd.id=r.id
|
||||
join SOURCE.datasource d on d.id=rd.datasource and d.type in ('Institutional Repository','Data Repository', 'Repository', 'Publication Repository')
|
||||
join SOURCE.datasource_organizations dor on dor.id=d.id
|
||||
join SOURCE.organization o on o.id=dor.organization
|
||||
join SOURCE.country c on c.code=o.country and c.continent_name='Europe'
|
||||
left outer join SOURCE.result_licenses rl on rl.id=r.id
|
||||
left outer join SOURCE.result_pids pids on pids.id=r.id
|
||||
group by r.green, r.gold, licence, pid, oa, r.peer_reviewed, r.type, o.name, c.code, c.name;
|
||||
create table ${observatory_db_name}.result_deposited_organization stored as parquet as
|
||||
select
|
||||
count(distinct r.id) as total,
|
||||
r.green,
|
||||
r.gold,
|
||||
case when rl.type is not null then true else false end as licence,
|
||||
case when pids.pid is not null then true else false end as pid,
|
||||
case when r.access_mode in ('Open Access', 'Open Source') then true else false end as oa,
|
||||
r.peer_reviewed,
|
||||
rln.cc_licence,
|
||||
r.abstract as abstract,
|
||||
r.authors > 1 as multiple_authors,
|
||||
rpc.count > 1 as multiple_projects,
|
||||
rfc.count > 1 as multiple_funders,
|
||||
r.type,
|
||||
o.name as oname
|
||||
from ${stats_db_name}.result r
|
||||
join ${stats_db_name}.result_datasources rd on rd.id=r.id
|
||||
join ${stats_db_name}.datasource d on d.id=rd.datasource and d.type in ('Institutional Repository','Data Repository', 'Repository', 'Publication Repository')
|
||||
join ${stats_db_name}.datasource_organizations dor on dor.id=d.id
|
||||
join ${stats_db_name}.organization o on o.id=dor.organization
|
||||
join ${stats_db_name}.country c on c.code=o.country and c.continent_name='Europe'
|
||||
left outer join ${stats_db_name}.result_licenses rl on rl.id=r.id
|
||||
left outer join ${stats_db_name}.result_pids pids on pids.id=r.id
|
||||
left outer join ${observatory_db_name}.result_cc_licence rln on rln.id=r.id
|
||||
left outer join ${stats_db_name}.result_projectcount rpc on rpc.id=r.id
|
||||
left outer join ${stats_db_name}.result_fundercount rfc on rfc.id=r.id
|
||||
group by r.green, r.gold, case when rl.type is not null then true else false end, case when pids.pid is not null then true else false end,
|
||||
case when r.access_mode in ('Open Access', 'Open Source') then true else false end, r.peer_reviewed, r.type, abstract,
|
||||
cc_licence, r.authors > 1, rpc.count > 1, rfc.count > 1, o.name;
|
||||
|
||||
create table TARGET.result_deposited_funder stored as parquet as
|
||||
select count(distinct r.id) as total, r.green, r.gold, case when rl.type is not null then true else false end as licence,
|
||||
case when pids.pid is not null then true else false end as pid, case when r.access_mode in ('Open Access', 'Open Source') then true else false end as oa,
|
||||
r.peer_reviewed, r.type, p.funder as pfunder
|
||||
from SOURCE.result r
|
||||
join SOURCE.result_datasources rd on rd.id=r.id
|
||||
join SOURCE.datasource d on d.id=rd.datasource and d.type in ('Institutional Repository','Data Repository', 'Repository', 'Publication Repository')
|
||||
join SOURCE.datasource_organizations dor on dor.id=d.id
|
||||
join SOURCE.organization o on o.id=dor.organization
|
||||
join SOURCE.country c on c.code=o.country and c.continent_name='Europe'
|
||||
join SOURCE.result_projects rp on rp.id=r.id
|
||||
join SOURCE.project p on p.id=rp.project
|
||||
left outer join SOURCE.result_licenses rl on rl.id=r.id
|
||||
left outer join SOURCE.result_pids pids on pids.id=r.id
|
||||
group by r.green, r.gold, licence, pid, oa, r.peer_reviewed, r.type, p.funder;
|
||||
create table ${observatory_db_name}.result_deposited_organization_country stored as parquet as
|
||||
select
|
||||
count(distinct r.id) as total,
|
||||
r.green,
|
||||
r.gold,
|
||||
case when rl.type is not null then true else false end as licence,
|
||||
case when pids.pid is not null then true else false end as pid,
|
||||
case when r.access_mode in ('Open Access', 'Open Source') then true else false end as oa,
|
||||
r.peer_reviewed,
|
||||
rln.cc_licence,
|
||||
r.abstract as abstract,
|
||||
r.authors > 1 as multiple_authors,
|
||||
rpc.count > 1 as multiple_projects,
|
||||
rfc.count > 1 as multiple_funders,
|
||||
r.type,
|
||||
o.name as oname, c.code as ccode, c.name as cname
|
||||
from ${stats_db_name}.result r
|
||||
join ${stats_db_name}.result_datasources rd on rd.id=r.id
|
||||
join ${stats_db_name}.datasource d on d.id=rd.datasource and d.type in ('Institutional Repository','Data Repository', 'Repository', 'Publication Repository')
|
||||
join ${stats_db_name}.datasource_organizations dor on dor.id=d.id
|
||||
join ${stats_db_name}.organization o on o.id=dor.organization
|
||||
join ${stats_db_name}.country c on c.code=o.country and c.continent_name='Europe'
|
||||
left outer join ${stats_db_name}.result_licenses rl on rl.id=r.id
|
||||
left outer join ${stats_db_name}.result_pids pids on pids.id=r.id
|
||||
left outer join ${observatory_db_name}.result_cc_licence rln on rln.id=r.id
|
||||
left outer join ${stats_db_name}.result_projectcount rpc on rpc.id=r.id
|
||||
left outer join ${stats_db_name}.result_fundercount rfc on rfc.id=r.id
|
||||
group by r.green, r.gold, case when rl.type is not null then true else false end, case when pids.pid is not null then true else false end,
|
||||
case when r.access_mode in ('Open Access', 'Open Source') then true else false end, r.peer_reviewed, r.type, abstract,
|
||||
cc_licence, r.authors > 1, rpc.count > 1, rfc.count > 1, o.name, c.code, c.name;
|
||||
|
||||
create table TARGET.result_deposited_funder_country stored as parquet as
|
||||
select count(distinct r.id) as total, r.green, r.gold, case when rl.type is not null then true else false end as licence,
|
||||
case when pids.pid is not null then true else false end as pid, case when r.access_mode in ('Open Access', 'Open Source') then true else false end as oa,
|
||||
r.peer_reviewed, r.type, p.funder as pfunder, c.code as ccode, c.name as cname
|
||||
from SOURCE.result r
|
||||
join SOURCE.result_datasources rd on rd.id=r.id
|
||||
join SOURCE.datasource d on d.id=rd.datasource and d.type in ('Institutional Repository','Data Repository', 'Repository', 'Publication Repository')
|
||||
join SOURCE.datasource_organizations dor on dor.id=d.id
|
||||
join SOURCE.organization o on o.id=dor.organization
|
||||
join SOURCE.country c on c.code=o.country and c.continent_name='Europe'
|
||||
join SOURCE.result_projects rp on rp.id=r.id
|
||||
join SOURCE.project p on p.id=rp.project
|
||||
left outer join SOURCE.result_licenses rl on rl.id=r.id
|
||||
left outer join SOURCE.result_pids pids on pids.id=r.id
|
||||
group by r.green, r.gold, licence, pid, oa, r.peer_reviewed, r.type, p.funder, c.code, c.name;
|
||||
create table ${observatory_db_name}.result_deposited_funder stored as parquet as
|
||||
select
|
||||
count(distinct r.id) as total,
|
||||
r.green,
|
||||
r.gold,
|
||||
case when rl.type is not null then true else false end as licence,
|
||||
case when pids.pid is not null then true else false end as pid,
|
||||
case when r.access_mode in ('Open Access', 'Open Source') then true else false end as oa,
|
||||
r.peer_reviewed,
|
||||
rln.cc_licence,
|
||||
r.abstract as abstract,
|
||||
r.authors > 1 as multiple_authors,
|
||||
rpc.count > 1 as multiple_projects,
|
||||
rfc.count > 1 as multiple_funders,
|
||||
r.type,
|
||||
p.funder as pfunder
|
||||
from ${stats_db_name}.result r
|
||||
join ${stats_db_name}.result_datasources rd on rd.id=r.id
|
||||
join ${stats_db_name}.datasource d on d.id=rd.datasource and d.type in ('Institutional Repository','Data Repository', 'Repository', 'Publication Repository')
|
||||
join ${stats_db_name}.datasource_organizations dor on dor.id=d.id
|
||||
join ${stats_db_name}.organization o on o.id=dor.organization
|
||||
join ${stats_db_name}.country c on c.code=o.country and c.continent_name='Europe'
|
||||
join ${stats_db_name}.result_projects rp on rp.id=r.id
|
||||
join ${stats_db_name}.project p on p.id=rp.project
|
||||
left outer join ${stats_db_name}.result_licenses rl on rl.id=r.id
|
||||
left outer join ${stats_db_name}.result_pids pids on pids.id=r.id
|
||||
left outer join ${observatory_db_name}.result_cc_licence rln on rln.id=r.id
|
||||
left outer join ${stats_db_name}.result_projectcount rpc on rpc.id=r.id
|
||||
left outer join ${stats_db_name}.result_fundercount rfc on rfc.id=r.id
|
||||
group by r.green, r.gold, case when rl.type is not null then true else false end, case when pids.pid is not null then true else false end,
|
||||
case when r.access_mode in ('Open Access', 'Open Source') then true else false end, r.peer_reviewed, r.type, abstract,
|
||||
cc_licence, r.authors > 1, rpc.count > 1, rfc.count > 1, p.funder;
|
||||
|
||||
compute stats TARGET.result_affiliated_country;
|
||||
compute stats TARGET.result_affiliated_year;
|
||||
compute stats TARGET.result_affiliated_year_country;
|
||||
compute stats TARGET.result_affiliated_datasource;
|
||||
compute stats TARGET.result_affiliated_datasource_country;
|
||||
compute stats TARGET.result_affiliated_organization;
|
||||
compute stats TARGET.result_affiliated_organization_country;
|
||||
compute stats TARGET.result_affiliated_funder;
|
||||
compute stats TARGET.result_affiliated_funder_country;
|
||||
compute stats TARGET.result_deposited_country;
|
||||
compute stats TARGET.result_deposited_year;
|
||||
compute stats TARGET.result_deposited_year_country;
|
||||
compute stats TARGET.result_deposited_datasource;
|
||||
compute stats TARGET.result_deposited_datasource_country;
|
||||
compute stats TARGET.result_deposited_organization;
|
||||
compute stats TARGET.result_deposited_organization_country;
|
||||
compute stats TARGET.result_deposited_funder;
|
||||
compute stats TARGET.result_deposited_funder_country;
|
||||
create table ${observatory_db_name}.result_deposited_funder_country stored as parquet as
|
||||
select
|
||||
count(distinct r.id) as total,
|
||||
r.green,
|
||||
r.gold,
|
||||
case when rl.type is not null then true else false end as licence,
|
||||
case when pids.pid is not null then true else false end as pid,
|
||||
case when r.access_mode in ('Open Access', 'Open Source') then true else false end as oa,
|
||||
r.peer_reviewed,
|
||||
rln.cc_licence,
|
||||
r.abstract as abstract,
|
||||
r.authors > 1 as multiple_authors,
|
||||
rpc.count > 1 as multiple_projects,
|
||||
rfc.count > 1 as multiple_funders,
|
||||
r.type,
|
||||
p.funder as pfunder, c.code as ccode, c.name as cname
|
||||
from ${stats_db_name}.result r
|
||||
join ${stats_db_name}.result_datasources rd on rd.id=r.id
|
||||
join ${stats_db_name}.datasource d on d.id=rd.datasource and d.type in ('Institutional Repository','Data Repository', 'Repository', 'Publication Repository')
|
||||
join ${stats_db_name}.datasource_organizations dor on dor.id=d.id
|
||||
join ${stats_db_name}.organization o on o.id=dor.organization
|
||||
join ${stats_db_name}.country c on c.code=o.country and c.continent_name='Europe'
|
||||
join ${stats_db_name}.result_projects rp on rp.id=r.id
|
||||
join ${stats_db_name}.project p on p.id=rp.project
|
||||
left outer join ${stats_db_name}.result_licenses rl on rl.id=r.id
|
||||
left outer join ${stats_db_name}.result_pids pids on pids.id=r.id
|
||||
left outer join ${observatory_db_name}.result_cc_licence rln on rln.id=r.id
|
||||
left outer join ${stats_db_name}.result_projectcount rpc on rpc.id=r.id
|
||||
left outer join ${stats_db_name}.result_fundercount rfc on rfc.id=r.id
|
||||
group by r.green, r.gold, case when rl.type is not null then true else false end, case when pids.pid is not null then true else false end,
|
||||
case when r.access_mode in ('Open Access', 'Open Source') then true else false end, r.peer_reviewed, r.type, abstract,
|
||||
cc_licence, r.authors > 1, rpc.count > 1, rfc.count > 1, p.funder, c.code, c.name;
|
|
@ -239,14 +239,51 @@
|
|||
<param>stats_db_name=${stats_db_name}</param>
|
||||
<param>openaire_db_name=${openaire_db_name}</param>
|
||||
</hive2>
|
||||
<ok to="Step16"/>
|
||||
<ok to="Step15_5"/>
|
||||
<error to="Kill"/>
|
||||
</action>
|
||||
|
||||
<action name="Step16">
|
||||
<action name="Step15_5">
|
||||
<hive2 xmlns="uri:oozie:hive2-action:0.1">
|
||||
<jdbc-url>${hive_jdbc_url}</jdbc-url>
|
||||
<script>scripts/step16.sql</script>
|
||||
<script>scripts/step15_5.sql</script>
|
||||
<param>stats_db_name=${stats_db_name}</param>
|
||||
<param>openaire_db_name=${openaire_db_name}</param>
|
||||
</hive2>
|
||||
<ok to="Contexts"/>
|
||||
<error to="Kill"/>
|
||||
</action>
|
||||
|
||||
<action name="Contexts">
|
||||
<shell xmlns="uri:oozie:shell-action:0.1">
|
||||
<job-tracker>${jobTracker}</job-tracker>
|
||||
<name-node>${nameNode}</name-node>
|
||||
<exec>contexts.sh</exec>
|
||||
<argument>${context_api_url}</argument>
|
||||
<argument>${stats_db_name}</argument>
|
||||
<file>contexts.sh</file>
|
||||
</shell>
|
||||
<ok to="Step16-createIndicatorsTables"/>
|
||||
<error to="Kill"/>
|
||||
</action>
|
||||
|
||||
<action name="Step16-createIndicatorsTables">
|
||||
<shell xmlns="uri:oozie:shell-action:0.1">
|
||||
<job-tracker>${jobTracker}</job-tracker>
|
||||
<name-node>${nameNode}</name-node>
|
||||
<exec>indicators.sh</exec>
|
||||
<argument>${stats_db_name}</argument>
|
||||
<argument>${wf:appPath()}/scripts/step16-createIndicatorsTables.sql</argument>
|
||||
<file>indicators.sh</file>
|
||||
</shell>
|
||||
<ok to="Step16_1-definitions"/>
|
||||
<error to="Kill"/>
|
||||
</action>
|
||||
|
||||
<action name="Step16_1-definitions">
|
||||
<hive2 xmlns="uri:oozie:hive2-action:0.1">
|
||||
<jdbc-url>${hive_jdbc_url}</jdbc-url>
|
||||
<script>scripts/step16_1-definitions.sql</script>
|
||||
<param>stats_db_name=${stats_db_name}</param>
|
||||
<param>openaire_db_name=${openaire_db_name}</param>
|
||||
</hive2>
|
||||
|
@ -261,48 +298,11 @@
|
|||
<param>stats_db_name=${stats_db_name}</param>
|
||||
<param>openaire_db_name=${openaire_db_name}</param>
|
||||
</hive2>
|
||||
<ok to="Step16_6"/>
|
||||
<error to="Kill"/>
|
||||
</action>
|
||||
|
||||
<action name="Step16_6">
|
||||
<hive2 xmlns="uri:oozie:hive2-action:0.1">
|
||||
<jdbc-url>${hive_jdbc_url}</jdbc-url>
|
||||
<script>scripts/step16_6.sql</script>
|
||||
<param>stats_db_name=${stats_db_name}</param>
|
||||
<param>openaire_db_name=${openaire_db_name}</param>
|
||||
</hive2>
|
||||
<ok to="Step16_7-createIndicatorsTables"/>
|
||||
<error to="Kill"/>
|
||||
</action>
|
||||
|
||||
<action name="Step16_7-createIndicatorsTables">
|
||||
<shell xmlns="uri:oozie:shell-action:0.1">
|
||||
<job-tracker>${jobTracker}</job-tracker>
|
||||
<name-node>${nameNode}</name-node>
|
||||
<exec>indicators.sh</exec>
|
||||
<argument>${stats_db_name}</argument>
|
||||
<argument>${wf:appPath()}/scripts/step16_7-createIndicatorsTables.sql</argument>
|
||||
<file>indicators.sh</file>
|
||||
</shell>
|
||||
<ok to="Step17"/>
|
||||
<error to="Kill"/>
|
||||
</action>
|
||||
|
||||
<action name="Step17">
|
||||
<shell xmlns="uri:oozie:shell-action:0.1">
|
||||
<job-tracker>${jobTracker}</job-tracker>
|
||||
<name-node>${nameNode}</name-node>
|
||||
<exec>contexts.sh</exec>
|
||||
<argument>${context_api_url}</argument>
|
||||
<argument>${stats_db_name}</argument>
|
||||
<file>contexts.sh</file>
|
||||
</shell>
|
||||
<ok to="Step19"/>
|
||||
<ok to="Step19-finalize"/>
|
||||
<error to="Kill"/>
|
||||
</action>
|
||||
|
||||
<action name="Step19">
|
||||
<action name="Step19-finalize">
|
||||
<shell xmlns="uri:oozie:shell-action:0.1">
|
||||
<job-tracker>${jobTracker}</job-tracker>
|
||||
<name-node>${nameNode}</name-node>
|
||||
|
@ -326,20 +326,44 @@
|
|||
<argument>${wf:appPath()}/scripts/step20-createMonitorDB.sql</argument>
|
||||
<file>monitor.sh</file>
|
||||
</shell>
|
||||
<ok to="step21-createObservatoryDB-pre"/>
|
||||
<error to="Kill"/>
|
||||
</action>
|
||||
|
||||
<action name="step21-createObservatoryDB-pre">
|
||||
<shell xmlns="uri:oozie:shell-action:0.1">
|
||||
<job-tracker>${jobTracker}</job-tracker>
|
||||
<name-node>${nameNode}</name-node>
|
||||
<exec>observatory-pre.sh</exec>
|
||||
<argument>${stats_db_name}</argument>
|
||||
<argument>${observatory_db_name}</argument>
|
||||
<argument>${observatory_db_shadow_name}</argument>
|
||||
<file>observatory-pre.sh</file>
|
||||
</shell>
|
||||
<ok to="step21-createObservatoryDB"/>
|
||||
<error to="Kill"/>
|
||||
</action>
|
||||
|
||||
<action name="step21-createObservatoryDB">
|
||||
<hive2 xmlns="uri:oozie:hive2-action:0.1">
|
||||
<jdbc-url>${hive_jdbc_url}</jdbc-url>
|
||||
<script>scripts/step21-createObservatoryDB.sql</script>
|
||||
<param>stats_db_name=${stats_db_name}</param>
|
||||
<param>observatory_db_name=${observatory_db_name}</param>
|
||||
</hive2>
|
||||
<ok to="step21-createObservatoryDB-post"/>
|
||||
<error to="Kill"/>
|
||||
</action>
|
||||
|
||||
<action name="step21-createObservatoryDB-post">
|
||||
<shell xmlns="uri:oozie:shell-action:0.1">
|
||||
<job-tracker>${jobTracker}</job-tracker>
|
||||
<name-node>${nameNode}</name-node>
|
||||
<exec>observatory.sh</exec>
|
||||
<exec>observatory-post.sh</exec>
|
||||
<argument>${stats_db_name}</argument>
|
||||
<argument>${observatory_db_name}</argument>
|
||||
<argument>${observatory_db_shadow_name}</argument>
|
||||
<argument>${wf:appPath()}/scripts/step21-createObservatoryDB.sql</argument>
|
||||
<file>observatory.sh</file>
|
||||
<file>observatory-post.sh</file>
|
||||
</shell>
|
||||
<ok to="Step22"/>
|
||||
<error to="Kill"/>
|
||||
|
|
Loading…
Reference in New Issue