1
0
Fork 0

Merge branch 'beta' into pubmed_update

This commit is contained in:
Claudio Atzori 2022-07-25 14:10:22 +02:00
commit c3ede1b379
47 changed files with 1928 additions and 595 deletions

View File

@ -191,7 +191,7 @@ public class ZenodoAPIClient implements Serializable {
* @throws MissingConceptDoiException
*/
public int newVersion(String concept_rec_id) throws IOException, MissingConceptDoiException {
setDepositionId(concept_rec_id);
setDepositionId(concept_rec_id, 1);
String json = "{}";
OkHttpClient httpClient = new OkHttpClient.Builder().connectTimeout(600, TimeUnit.SECONDS).build();
@ -253,9 +253,10 @@ public class ZenodoAPIClient implements Serializable {
}
private void setDepositionId(String concept_rec_id) throws IOException, MissingConceptDoiException {
private void setDepositionId(String concept_rec_id, Integer page) throws IOException, MissingConceptDoiException {
ZenodoModelList zenodoModelList = new Gson().fromJson(getPrevDepositions(), ZenodoModelList.class);
ZenodoModelList zenodoModelList = new Gson()
.fromJson(getPrevDepositions(String.valueOf(page)), ZenodoModelList.class);
for (ZenodoModel zm : zenodoModelList) {
if (zm.getConceptrecid().equals(concept_rec_id)) {
@ -263,16 +264,23 @@ public class ZenodoAPIClient implements Serializable {
return;
}
}
throw new MissingConceptDoiException("The concept record id specified was missing in the list of depositions");
if (zenodoModelList.size() == 0)
throw new MissingConceptDoiException(
"The concept record id specified was missing in the list of depositions");
setDepositionId(concept_rec_id, page + 1);
}
private String getPrevDepositions() throws IOException {
private String getPrevDepositions(String page) throws IOException {
OkHttpClient httpClient = new OkHttpClient.Builder().connectTimeout(600, TimeUnit.SECONDS).build();
HttpUrl.Builder urlBuilder = HttpUrl.parse(urlString).newBuilder();
urlBuilder.addQueryParameter("page", page);
String url = urlBuilder.build().toString();
Request request = new Request.Builder()
.url(urlString)
.url(url)
.addHeader(HttpHeaders.CONTENT_TYPE, ContentType.APPLICATION_JSON.toString()) // add request headers
.addHeader(HttpHeaders.AUTHORIZATION, "Bearer " + access_token)
.get()

View File

@ -19,7 +19,9 @@ import org.apache.spark.sql.Encoder;
import org.apache.spark.sql.Encoders;
import org.apache.spark.sql.SparkSession;
import org.apache.spark.util.LongAccumulator;
import org.junit.jupiter.api.*;
import org.junit.jupiter.api.BeforeEach;
import org.junit.jupiter.api.DisplayName;
import org.junit.jupiter.api.Test;
import org.junit.jupiter.api.extension.ExtendWith;
import org.junit.jupiter.api.io.TempDir;
import org.mockito.junit.jupiter.MockitoExtension;
@ -50,7 +52,7 @@ class TransformationJobTest extends AbstractVocabularyTest {
@Test
@DisplayName("Test Date cleaner")
void testDateCleaner() throws Exception {
DateCleaner dc = new DateCleaner();
final DateCleaner dc = new DateCleaner();
assertEquals("1982-09-20", dc.clean("20/09/1982"));
assertEquals("2002-09-20", dc.clean("20-09-2002"));
assertEquals("2002-09-20", dc.clean("2002-09-20"));
@ -68,9 +70,9 @@ class TransformationJobTest extends AbstractVocabularyTest {
mr.setProvenance(new Provenance("DSID", "DSNAME", "PREFIX"));
mr.setBody(IOUtils.toString(getClass().getResourceAsStream("/eu/dnetlib/dhp/transform/input_zenodo.xml")));
// We Load the XSLT transformation Rule from the classpath
XSLTTransformationFunction tr = loadTransformationRule("/eu/dnetlib/dhp/transform/zenodo_tr.xslt");
final XSLTTransformationFunction tr = loadTransformationRule("/eu/dnetlib/dhp/transform/zenodo_tr.xslt");
MetadataRecord result = tr.call(mr);
final MetadataRecord result = tr.call(mr);
// Print the record
System.out.println(result.getBody());
@ -86,9 +88,9 @@ class TransformationJobTest extends AbstractVocabularyTest {
mr.setProvenance(new Provenance("DSID", "DSNAME", "PREFIX"));
mr.setBody(IOUtils.toString(getClass().getResourceAsStream("/eu/dnetlib/dhp/transform/input_itgv4.xml")));
// We Load the XSLT transformation Rule from the classpath
XSLTTransformationFunction tr = loadTransformationRule("/eu/dnetlib/dhp/transform/zenodo_tr.xslt");
final XSLTTransformationFunction tr = loadTransformationRule("/eu/dnetlib/dhp/transform/zenodo_tr.xslt");
MetadataRecord result = tr.call(mr);
final MetadataRecord result = tr.call(mr);
// Print the record
System.out.println(result.getBody());
@ -108,9 +110,9 @@ class TransformationJobTest extends AbstractVocabularyTest {
mr.setProvenance(new Provenance("DSID", "DSNAME", "PREFIX"));
mr.setBody(IOUtils.toString(getClass().getResourceAsStream("/eu/dnetlib/dhp/transform/input_itgv4.xml")));
// We Load the XSLT transformation Rule from the classpath
XSLTTransformationFunction tr = loadTransformationRule(xslTransformationScript);
final XSLTTransformationFunction tr = loadTransformationRule(xslTransformationScript);
MetadataRecord result = tr.call(mr);
final MetadataRecord result = tr.call(mr);
// Print the record
System.out.println(result.getBody());
@ -129,9 +131,9 @@ class TransformationJobTest extends AbstractVocabularyTest {
mr.setProvenance(new Provenance("DSID", "DSNAME", "PREFIX"));
mr.setBody(IOUtils.toString(getClass().getResourceAsStream("/eu/dnetlib/dhp/transform/input_omicsdi.xml")));
// We Load the XSLT transformation Rule from the classpath
XSLTTransformationFunction tr = loadTransformationRule(xslTransformationScript);
final XSLTTransformationFunction tr = loadTransformationRule(xslTransformationScript);
MetadataRecord result = tr.call(mr);
final MetadataRecord result = tr.call(mr);
// Print the record
System.out.println(result.getBody());
@ -140,7 +142,8 @@ class TransformationJobTest extends AbstractVocabularyTest {
@Test
@DisplayName("Test TransformSparkJobNode.main with oaiOpenaire_datacite (v4)")
void transformTestITGv4OAIdatacite(@TempDir Path testDir) throws Exception {
void transformTestITGv4OAIdatacite(@TempDir
final Path testDir) throws Exception {
try (SparkSession spark = SparkSession.builder().config(sparkConf).getOrCreate()) {
@ -203,7 +206,8 @@ class TransformationJobTest extends AbstractVocabularyTest {
@Test
@DisplayName("Test TransformSparkJobNode.main")
void transformTest(@TempDir Path testDir) throws Exception {
void transformTest(@TempDir
final Path testDir) throws Exception {
try (SparkSession spark = SparkSession.builder().config(sparkConf).getOrCreate()) {
@ -256,6 +260,25 @@ class TransformationJobTest extends AbstractVocabularyTest {
}
}
@Test
@DisplayName("Test Transform Single XML using cnr_explora_tr XSLTTransformator")
void testCnrExploraTransformSaxonHE() throws Exception {
// We Set the input Record getting the XML from the classpath
final MetadataRecord mr = new MetadataRecord();
mr.setProvenance(new Provenance("openaire____::cnr_explora", "CNR ExploRA", "cnr_________"));
mr.setBody(IOUtils.toString(getClass().getResourceAsStream("/eu/dnetlib/dhp/transform/input_cnr_explora.xml")));
// We Load the XSLT transformation Rule from the classpath
final XSLTTransformationFunction tr = loadTransformationRule("/eu/dnetlib/dhp/transform/cnr_explora_tr.xslt");
final MetadataRecord result = tr.call(mr);
// Print the record
System.out.println(result.getBody());
// TODO Create significant Assert
}
private XSLTTransformationFunction loadTransformationRule(final String path) throws Exception {
final String trValue = IOUtils.toString(this.getClass().getResourceAsStream(path));
final LongAccumulator la = new LongAccumulator();

View File

@ -0,0 +1,214 @@
<xsl:stylesheet
xmlns:xsl="http://www.w3.org/1999/XSL/Transform"
xmlns:oaire="http://namespace.openaire.eu/schema/oaire/"
xmlns:vocabulary="http://eu/dnetlib/transform/clean"
xmlns:dateCleaner="http://eu/dnetlib/transform/dateISO"
xmlns:oaf="http://namespace.openaire.eu/oaf"
xmlns:oai="http://www.openarchives.org/OAI/2.0/"
xmlns:datacite="http://datacite.org/schema/kernel-4"
xmlns:dri="http://www.driver-repository.eu/namespace/dri"
xmlns:xs="http://www.w3.org/2001/XMLSchema"
xmlns:dr="http://www.driver-repository.eu/namespace/dr"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xmlns:dc="http://purl.org/dc/elements/1.1/"
exclude-result-prefixes="xsl vocabulary dateCleaner" version="2.0">
<xsl:param name="varOfficialName" />
<xsl:param name="varDataSourceId" />
<xsl:param name="varFP7" select="'corda_______::'" />
<xsl:param name="varH2020" select="'corda__h2020::'" />
<xsl:param name="repoCode"
select="substring-before(//*[local-name() = 'header']/*[local-name()='recordIdentifier'], ':')" />
<xsl:param name="index" select="0" />
<xsl:param name="transDate" select="current-dateTime()" />
<xsl:template match="/">
<record>
<xsl:apply-templates select="//*[local-name() = 'header']" />
<metadata>
<xsl:call-template name="allElements">
<xsl:with-param name="sourceElement" select="//dc:title" />
<xsl:with-param name="targetElement" select="'dc:title'" />
</xsl:call-template>
<xsl:call-template name="allElements">
<xsl:with-param name="sourceElement" select="//dc:creator/replace(., '^(.*)\|.*$', '$1')" />
<xsl:with-param name="targetElement" select="'dc:creator'" />
</xsl:call-template>
<xsl:call-template name="allElements">
<xsl:with-param name="sourceElement" select="//dc:contributor" />
<xsl:with-param name="targetElement" select="'dc:contributor'" />
</xsl:call-template>
<xsl:call-template name="allElements">
<xsl:with-param name="sourceElement" select="//dc:description" />
<xsl:with-param name="targetElement" select="'dc:description'" />
</xsl:call-template>
<xsl:call-template name="allElements">
<xsl:with-param name="sourceElement" select="//dc:subject" />
<xsl:with-param name="targetElement" select="'dc:subject'" />
</xsl:call-template>
<xsl:call-template name="allElements">
<xsl:with-param name="sourceElement" select="//dc:publisher" />
<xsl:with-param name="targetElement" select="'dc:publisher'" />
</xsl:call-template>
<xsl:call-template name="allElements">
<xsl:with-param name="sourceElement" select="//dc:format" />
<xsl:with-param name="targetElement" select="'dc:format'" />
</xsl:call-template>
<xsl:call-template name="allElements">
<xsl:with-param name="sourceElement" select="//dc:type" />
<xsl:with-param name="targetElement" select="'dc:type'" />
</xsl:call-template>
<xsl:call-template name="allElements">
<xsl:with-param name="sourceElement" select="//dc:source" />
<xsl:with-param name="targetElement" select="'dc:source'" />
</xsl:call-template>
<dc:language>
<xsl:value-of select="vocabulary:clean( //dc:language, 'dnet:languages')" />
</dc:language>
<xsl:call-template name="allElements">
<xsl:with-param name="sourceElement" select="//dc:rights" />
<xsl:with-param name="targetElement" select="'dc:rights'" />
</xsl:call-template>
<xsl:call-template name="allElements">
<xsl:with-param name="sourceElement" select="//dc:relation[not(starts-with(.,'info:cnr-pdr'))]" />
<xsl:with-param name="targetElement" select="'dc:relation'" />
</xsl:call-template>
<xsl:call-template name="allElements">
<xsl:with-param name="sourceElement" select="//dc:identifier[starts-with(., 'http')]" />
<xsl:with-param name="targetElement" select="'dc:identifier'" />
</xsl:call-template>
<xsl:for-each select="//dc:relation">
<xsl:if test="matches(normalize-space(.), '(info:eu-repo/grantagreement/ec/fp7/)(\d\d\d\d\d\d)(.*)', 'i')">
<oaf:projectid>
<xsl:value-of select="concat($varFP7, replace(normalize-space(.), '(info:eu-repo/grantagreement/ec/fp7/)(\d\d\d\d\d\d)(.*)', '$2', 'i'))" />
</oaf:projectid>
</xsl:if>
<xsl:if test="matches(normalize-space(.), '(info:eu-repo/grantagreement/ec/h2020/)(\d\d\d\d\d\d)(.*)', 'i')">
<oaf:projectid>
<xsl:value-of select="concat($varH2020, replace(normalize-space(.), '(info:eu-repo/grantagreement/ec/h2020/)(\d\d\d\d\d\d)(.*)', '$2', 'i'))" />
</oaf:projectid>
</xsl:if>
</xsl:for-each>
<oaf:accessrights>
<xsl:value-of select="vocabulary:clean( //dc:rights, 'dnet:access_modes')" />
</oaf:accessrights>
<xsl:variable name="varCobjCategory" select="vocabulary:clean( //dc:type, 'dnet:publication_resource')" />
<xsl:variable name="varSuperType" select="vocabulary:clean( $varCobjCategory, 'dnet:result_typologies')" />
<dr:CobjCategory type="{$varSuperType}"><xsl:value-of select="$varCobjCategory" /></dr:CobjCategory>
<xsl:variable name="varRefereedConvt" select="for $i in (//dc:type, //dc:description, //oai:setSpec) return vocabulary:clean( normalize-space($i), 'dnet:review_levels')" />
<xsl:variable name="varRefereedIdntf" select="(//*[string(node-name(.)) = 'dc:identifier' and matches(lower-case(.), '(^|.*[\.\-_/\s\(\)%\d#])pre[\.\-_/\s\(\)%\d#]?prints?([\.\-_/\s\(\)%\d#].*)?$')][count(//dc:identifier) = 1]/'0002', //*[string(node-name(.)) = 'dc:identifier' and matches(lower-case(.), '(^|.*[\.\-_/\s\(\)%\d#])refereed([\.\-_/\s\(\)\d%\d#].*)?$')]/'0001', //*[string(node-name(.)) = 'dc:identifier' and matches(lower-case(.), '.*-peer-reviewed-(fulltext-)?article-.*')]/'0001')" />
<xsl:variable name="varRefereedSourc" select="//*[string(node-name(.)) = ('dc:source', 'dc:publisher') and matches(lower-case(.), '^(.*\s)?pre[\s\-_]*prints?([\s\.,].*)?$')]/'0002'" />
<xsl:variable name="varRefereedDescr" select="(//dc:description[matches(lower-case(.), '.*(this\s*book|this\s*volume|it)\s*constitutes\s*the\s*(thoroughly\s*)?refereed') or matches(lower-case(.), '.*peer[\.\-_/\s\(\)]?review\s*under\s*responsibility\s*of.*') or matches(lower-case(.), '(this|a)\s*(article|preprint)\s*(has\s*been\s*)?(peer[\-\s]*)?reviewed\s*and\s*recommended\s*by\s*peer[\-\s]*community')]/'0001', //dc:description[matches(., '^version\s*(préliminaire.*|preliminary.*|0$)')]/'0002')" />
<xsl:variable name="varRefereedTitle" select="(//dc:title[matches(lower-case(.), '.*\[.*peer[\s\-\._]*review\s*:.*\]\s*$')]/'0001', //dc:title[matches(lower-case(.), '.*\(\s*pre[\s\-\._]*prints?\s*\)\s*$')]/'0002')" />
<xsl:variable name="varRefereedSubjt" select="(//dc:subject[matches(lower-case(.), '^\s*refereed\s*$')][//oaf:datasourceprefix = 'narcis______']/'0001', //dc:subject[matches(lower-case(.), '^\s*no[nt].{0,3}refereed\s*$')][//oaf:datasourceprefix = 'narcis______']/'0002')" />
<xsl:variable name="varRefereed" select="($varRefereedConvt, $varRefereedIdntf, $varRefereedSourc, $varRefereedDescr, $varRefereedTitle, $varRefereedSubjt)" />
<xsl:choose>
<xsl:when test="count($varRefereed[. = '0001']) &gt; 0">
<oaf:refereed>
<xsl:value-of select="'0001'" />
</oaf:refereed>
</xsl:when>
<xsl:when test="count($varRefereed[. = '0002']) &gt; 0">
<oaf:refereed>
<xsl:value-of select="'0002'" />
</oaf:refereed>
</xsl:when>
</xsl:choose>
<oaf:dateAccepted>
<xsl:value-of select="dateCleaner:dateISO( //dc:date[1] )" />
</oaf:dateAccepted>
<xsl:if test="//dc:relation[starts-with(., 'http')] and //dc:rights[.='info:eu-repo/semantics/openAccess']">
<oaf:fulltext>
<xsl:value-of select="//dc:relation[starts-with(., 'http')]" />
</oaf:fulltext>
</xsl:if>
<oaf:hostedBy name="{$varOfficialName}" id="{$varDataSourceId}" />
<oaf:collectedFrom name="{$varOfficialName}" id="{$varDataSourceId}ß" />
<xsl:variable name="varKnownFileEndings" select="('.bmp', '.doc', '.docx', '.epub', '.flv', '.jpeg', '.jpg', '.m4v', '.mp4', '.mpg', '.odp', '.pdf', '.png', '.ppt', '.tiv', '.txt', '.xls', '.xlsx', '.zip')" />
<xsl:variable name="varIdDoi" select="distinct-values((//dc:identifier[starts-with(., '10.')][matches(., '(10[.][0-9]{4,}[^\s/&gt;]*/[^\s&gt;]+)')], //dc:identifier[starts-with(., 'http') and (contains(., '://dx.doi.org/10.') or contains(., '://doi.org/10.'))]/substring-after(., 'doi.org/'), //dc:identifier[starts-with(lower-case(.), 'doi:10.')]/substring-after(lower-case(.), 'doi:')))" />
<xsl:for-each select="$varIdDoi">
<oaf:identifier identifierType="doi">
<xsl:value-of select="." />
</oaf:identifier>
</xsl:for-each>
<xsl:variable name="varIdHdl" select="distinct-values(//dc:identifier[starts-with(., 'http') and contains(., '://hdl.handle.net/')]/substring-after(., 'hdl.handle.net/'))" />
<xsl:for-each select="$varIdHdl" >
<oaf:identifier identifierType="handle">
<xsl:value-of select="." />
</oaf:identifier>
</xsl:for-each>
<xsl:variable name="varIdUrn" select="distinct-values(//dc:identifier[starts-with(., 'urn:nbn:nl:') or starts-with(., 'URN:NBN:NL:')])" />
<xsl:for-each select="$varIdUrn">
<oaf:identifier identifierType="urn">
<xsl:value-of select="." />
</oaf:identifier>
</xsl:for-each>
<xsl:variable name="varOrigBaseUrl" select="//*[local-name() = 'about']/*[local-name() = 'provenance']//*[local-name() = 'originDescription' and not(./*[local-name() = 'originDescription'])]/*[local-name() = 'baseURL']" />
<xsl:variable name="varIdLdpg" select="distinct-values(//dc:identifier[(contains(substring-after(., '://'), '/') and contains($varOrigBaseUrl, substring-before(substring-after(., '://'), '/'))) or (contains(substring-after(., '://'), ':') and contains($varOrigBaseUrl, substring-before(substring-after(., '://'), ':')))][not(replace(lower-case(.), '.*(\.[a-z]*)$', '$1') = $varKnownFileEndings)])" />
<xsl:for-each select="$varIdLdpg">
<oaf:identifier identifierType="landingPage">
<xsl:value-of select="." />
</oaf:identifier>
</xsl:for-each>
<xsl:variable name="varIdUrl" select="distinct-values(//dc:identifier[starts-with(., 'http')][not(contains(., '://dx.doi.org/') or contains(., '://doi.org/') or contains(., '://hdl.handle.net/'))][count(index-of($varIdLdpg, .)) = 0])" />
<xsl:for-each select="$varIdUrl">
<oaf:identifier identifierType="url">
<xsl:value-of select="." />
</oaf:identifier>
</xsl:for-each>
<xsl:for-each select="//oai:setSpec">
<xsl:variable name="rorDsId" select="vocabulary:clean(., 'cnr:institutes')" />
<xsl:if test="contains($rorDsId, '/ror.org/')">
<oaf:relation relType="resultOrganization" subRelType="affiliation" relClass="hasAuthorInstitution">
<xsl:value-of select="concat('ror_________::', $rorDsId)" />
</oaf:relation>
</xsl:if>
</xsl:for-each>
</metadata>
<xsl:copy-of select="//*[local-name() = 'about']" />
</record>
</xsl:template>
<xsl:template name="allElements">
<xsl:param name="sourceElement" />
<xsl:param name="targetElement" />
<xsl:for-each select="$sourceElement">
<xsl:element name="{$targetElement}">
<xsl:value-of select="normalize-space(.)" />
</xsl:element>
</xsl:for-each>
</xsl:template>
<xsl:template match="//*[local-name() = 'header']">
<xsl:copy>
<xsl:apply-templates select="node()|@*" />
<xsl:element name="dr:dateOfTransformation">
<xsl:value-of select="$transDate" />
</xsl:element>
</xsl:copy>
</xsl:template>
<xsl:template match="node()|@*">
<xsl:copy>
<xsl:apply-templates select="node()|@*" />
</xsl:copy>
</xsl:template>
</xsl:stylesheet>

View File

@ -0,0 +1,57 @@
<?xml version="1.0" encoding="UTF-8"?>
<record xmlns="http://www.openarchives.org/OAI/2.0/">
<header>
<identifier><![CDATA[oai:it.cnr:prodotti:433382]]></identifier>
<datestamp><![CDATA[2020-11-30T15:32:03Z]]></datestamp>
<setSpec><![CDATA[openaire]]></setSpec>
<setSpec><![CDATA[CDS027]]></setSpec>
<setSpec><![CDATA[CDS080]]></setSpec>
</header>
<metadata>
<oai_dc:dc xmlns:oai_dc="http://www.openarchives.org/OAI/2.0/oai_dc/" xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://www.openarchives.org/OAI/2.0/oai_dc/ http://www.openarchives.org/OAI/2.0/oai_dc.xsd">
<dc:type><![CDATA[info:eu-repo/semantics/conferenceObject]]></dc:type>
<dc:type><![CDATA[Presentazione]]></dc:type>
<dc:title><![CDATA[A multiscale observing approach for understanding acidification process in a marginal sea (northern Adriatic)]]></dc:title>
<dc:creator><![CDATA[Cantoni C.]]></dc:creator>
<dc:creator><![CDATA[Barba L.]]></dc:creator>
<dc:creator><![CDATA[Bastianini M.]]></dc:creator>
<dc:creator><![CDATA[Bortoluzzi G.]]></dc:creator>
<dc:creator><![CDATA[Celio M.]]></dc:creator>
<dc:creator><![CDATA[Chiggiato J.]]></dc:creator>
<dc:creator><![CDATA[Cozzi S.]]></dc:creator>
<dc:creator><![CDATA[Luchetta A.]]></dc:creator>
<dc:creator><![CDATA[Ravaioli M.]]></dc:creator>
<dc:creator><![CDATA[Sparnocchia S.]]></dc:creator>
<dc:language><![CDATA[eng]]></dc:language>
<dc:description><![CDATA[The Northern Adriatic is a shallow, semi-enclosed industrialized sub-basin of the Mediterranean affected by significant ecosystem
changes, which are studied through several research activities including the long-term monitoring of ILTER international network.
Changes of pHT (-0.06) and TA (+74 ?mol/kg) in dense winter waters over the last 25 years already showed that this area is prone to
acidification process under a complex inorganic carbon chemistry variability. To understand these changes, monthly sampling of the
main biogeochemical and biological parameters has been carrying out since 2008 by a fixed station (PALOMA, Gulf of Trieste). In 2013
the site has been implemented with continuous pCO2 measurements at 3 m depth and has been regularly visiting during basin wide
surveys for the last two years. The combination of automated in situ measurements, monthly samplings and basin scale
oceanographic cruises is used to better understand the processes controlling air-sea CO2 fluxes and inorganic carbon chemistry under
three different scenarios: an extreme event of dense water formation, the phytoplankton blooms associated with riverine inputs and
the late-summer marked oxygen under saturation in the deeper waters.]]></dc:description>
<dc:source><![CDATA[ASLO - Aquatic Sciences Meeting, Granada, Spagna, 22-27/02/2015]]></dc:source>
<dc:source><![CDATA[info:cnr-pdr/source/autori:Cantoni C., Barba L., Bastianini M., Bortoluzzi G., Celio M., Chiggiato J., Cozzi S., Luchetta A., Ravaioli M., Sparnocchia S./congresso_nome:ASLO - Aquatic Sciences Meeting/congresso_luogo:Granada, Spagna/congresso_data:22-27%2F02%2F2015/anno:2015/pagina_da:/pagina_a:/intervallo_pagine:]]></dc:source>
<dc:date><![CDATA[2015]]></dc:date>
<dc:identifier><![CDATA[http://www.cnr.it/prodotto/i/433382]]></dc:identifier>
<dc:identifier><![CDATA[https://publications.cnr.it/doc/433382]]></dc:identifier>
<dc:identifier><![CDATA[http://sgmeet.com/aslo/granada2015/]]></dc:identifier>
<dc:relation><![CDATA[info:eu-repo/grantAgreement/EC/FP7/211574//Integrated Carbon Observation System/ICOS]]></dc:relation>
<dc:relation><![CDATA[info:cnr-pdr/author/matricola:502/SPARNOCCHIA/STEFANIA]]></dc:relation>
<dc:relation><![CDATA[info:cnr-pdr/author/matricola:990/BASTIANINI/MAURO]]></dc:relation>
<dc:relation><![CDATA[info:cnr-pdr/author/matricola:5185/BARBA/LUISA]]></dc:relation>
<dc:relation><![CDATA[info:cnr-pdr/author/matricola:5453/COZZI/STEFANO]]></dc:relation>
<dc:relation><![CDATA[info:cnr-pdr/author/matricola:12491/LUCHETTA/ANNA]]></dc:relation>
<dc:relation><![CDATA[info:cnr-pdr/author/matricola:12837/CANTONI/CAROLINA]]></dc:relation>
<dc:relation><![CDATA[info:cnr-pdr/author/matricola:18161/RAVAIOLI/MARIANGELA]]></dc:relation>
<dc:rights><![CDATA[info:eu-repo/semantics/openAccess]]></dc:rights>
<dc:subject><![CDATA[ocean acidification]]></dc:subject>
<dc:subject><![CDATA[Northern Adriatic Sea]]></dc:subject>
<dc:subject><![CDATA[inorganic carbon system]]></dc:subject>
<dc:subject><![CDATA[PALOMA]]></dc:subject>
</oai_dc:dc>
</metadata>
</record>

View File

@ -1233,3 +1233,267 @@ dnet:review_levels @=@ 0001 @=@ 印刷物/電子媒体-紀要論文(査読有
dnet:review_levels @=@ 0001 @=@ 印刷物/電子媒体-雑誌記事(査読有り)
dnet:review_levels @=@ 0001 @=@ 原著論文(査読有り)
dnet:review_levels @=@ 0001 @=@ 査読論文
cnr:institutes @=@ https://ror.org/00brf2d87 @=@ CDS001
cnr:institutes @=@ https://ror.org/00brf2d87 @=@ IDASC - Istituto di Acustica e Sensoristica \"Orso Mario Corbino\"
cnr:institutes @=@ https://ror.org/006qkqr45 @=@ CDS002
cnr:institutes @=@ https://ror.org/006qkqr45 @=@ IAMC - Istituto per l'ambiente marino costiero
cnr:institutes @=@ https://ror.org/054ye0e45 @=@ CDS003
cnr:institutes @=@ https://ror.org/054ye0e45 @=@ IASI - Istituto di analisi dei sistemi ed informatica \"Antonio Ruberti\"
cnr:institutes @=@ https://ror.org/00ygy3d85 @=@ CDS004
cnr:institutes @=@ https://ror.org/00ygy3d85 @=@ IAC - Istituto per le applicazioni del calcolo \"Mauro Picone\"
cnr:institutes @=@ https://ror.org/000sy1f36 @=@ CDS005
cnr:institutes @=@ https://ror.org/000sy1f36 @=@ IASF - Istituto di astrofisica spaziale e fisica cosmica
cnr:institutes @=@ https://ror.org/00x5wpm25 @=@ CDS006
cnr:institutes @=@ https://ror.org/00x5wpm25 @=@ IBAM - Istituto per i beni archeologici e monumentali
cnr:institutes @=@ https://ror.org/03eqeqg74 @=@ CDS007
cnr:institutes @=@ https://ror.org/03eqeqg74 @=@ IBP - Istituto di biochimica delle proteine
cnr:institutes @=@ https://ror.org/041xzk838 @=@ CDS008
cnr:institutes @=@ https://ror.org/041xzk838 @=@ IBF - Istituto di biofisica
cnr:institutes @=@ https://ror.org/00s2j5046 @=@ CDS009
cnr:institutes @=@ https://ror.org/00s2j5046 @=@ IBFM - Istituto di bioimmagini e fisiologia molecolare
cnr:institutes @=@ __CDS010__ @=@ CDS010
cnr:institutes @=@ __CDS010__ @=@ IBAF - Istituto di biologia agro-ambientale e forestale
cnr:institutes @=@ __CDS011__ @=@ CDS011
cnr:institutes @=@ __CDS011__ @=@ IBC - Istituto di biologia cellulare
cnr:institutes @=@ https://ror.org/02e5sbe24 @=@ CDS012
cnr:institutes @=@ https://ror.org/02e5sbe24 @=@ IBBA - Istituto di biologia e biotecnologia agraria
cnr:institutes @=@ https://ror.org/01nyatq71 @=@ CDS013
cnr:institutes @=@ https://ror.org/01nyatq71 @=@ IBPM - Istituto di biologia e patologia molecolari
cnr:institutes @=@ https://ror.org/01dy2q607 @=@ CDS014
cnr:institutes @=@ https://ror.org/01dy2q607 @=@ IBIM - Istituto di biomedicina e di immunologia molecolare \"Alberto Monroy\"
cnr:institutes @=@ https://ror.org/05nzf7q96 @=@ CDS015
cnr:institutes @=@ https://ror.org/05nzf7q96 @=@ IBIOM - Istituto di Biomembrane, Bioenergetica e Biotecnologie Molecolari
cnr:institutes @=@ https://ror.org/05m1yqp60 @=@ CDS016
cnr:institutes @=@ https://ror.org/05m1yqp60 @=@ IBIMET - Istituto di biometeorologia
cnr:institutes @=@ https://ror.org/03rqtqb02 @=@ CDS017
cnr:institutes @=@ https://ror.org/03rqtqb02 @=@ IBB - Istituto di biostrutture e bioimmagini
cnr:institutes @=@ https://ror.org/04r5fge26 @=@ CDS018
cnr:institutes @=@ https://ror.org/04r5fge26 @=@ ICAR - Istituto di calcolo e reti ad alte prestazioni
cnr:institutes @=@ https://ror.org/03wyf0g15 @=@ CDS019
cnr:institutes @=@ https://ror.org/03wyf0g15 @=@ ICB - Istituto di chimica biomolecolare
cnr:institutes @=@ https://ror.org/02fkw1114 @=@ CDS020
cnr:institutes @=@ https://ror.org/02fkw1114 @=@ ICCOM - Istituto di chimica dei composti organo metallici
cnr:institutes @=@ https://ror.org/0141vn777 @=@ CDS021
cnr:institutes @=@ https://ror.org/0141vn777 @=@ ICRM - Istituto di chimica del riconoscimento molecolare
cnr:institutes @=@ __CDS022__ @=@ CDS022
cnr:institutes @=@ __CDS022__ @=@ ICTP - Istituto di chimica e tecnologia dei polimeri
cnr:institutes @=@ __CDS023__ @=@ CDS023
cnr:institutes @=@ __CDS023__ @=@ ICIS - Istituto di chimica inorganica e delle superfici
cnr:institutes @=@ https://ror.org/00be3zh53 @=@ CDS024
cnr:institutes @=@ https://ror.org/00be3zh53 @=@ ISASI - Istituto di Scienze Applicate e Sistemi Intelligenti \"Eduardo Caianiello\"
cnr:institutes @=@ __CDS025__ @=@ CDS025
cnr:institutes @=@ __CDS025__ @=@ ICEVO - Istituto di Studi sulle Civiltà dell'Egeo e del Vicino Oriente
cnr:institutes @=@ https://ror.org/02ynrme92 @=@ CDS026
cnr:institutes @=@ https://ror.org/02ynrme92 @=@ ICVBC - Istituto per la conservazione e valorizzazione dei beni culturali
cnr:institutes @=@ https://ror.org/05wba8r86 @=@ CDS027
cnr:institutes @=@ https://ror.org/05wba8r86 @=@ IC - Istituto di cristallografia
cnr:institutes @=@ https://ror.org/03z58xd74 @=@ CDS028
cnr:institutes @=@ https://ror.org/03z58xd74 @=@ IDPA - Istituto per la dinamica dei processi ambientali
cnr:institutes @=@ https://ror.org/00n4jbh84 @=@ CDS029
cnr:institutes @=@ https://ror.org/00n4jbh84 @=@ IEIIT - Istituto di elettronica e di ingegneria dell'informazione e delle telecomunicazioni
cnr:institutes @=@ https://ror.org/04sn06036 @=@ CDS030
cnr:institutes @=@ https://ror.org/04sn06036 @=@ IEOS - Istituto per l'endocrinologia e l'oncologia \"Gaetano Salvatore\"
cnr:institutes @=@ https://ror.org/01rg40y89 @=@ CDS031
cnr:institutes @=@ https://ror.org/01rg40y89 @=@ ICMATE - Istituto di Chimica della Materia Condensata e di Tecnologie per l'Energia
cnr:institutes @=@ https://ror.org/00dqega85 @=@ CDS032
cnr:institutes @=@ https://ror.org/00dqega85 @=@ IFAC - Istituto di fisica applicata \"Nello Carrara\"
cnr:institutes @=@ https://ror.org/02n2bgz18 @=@ CDS033
cnr:institutes @=@ https://ror.org/02n2bgz18 @=@ IFP - Istituto di fisica del plasma \"Piero Caldirola\"
cnr:institutes @=@ __CDS034__ @=@ CDS034
cnr:institutes @=@ __CDS034__ @=@ IFSI - Istituto di fisica dello spazio interplanetario
cnr:institutes @=@ https://ror.org/01kdj2848 @=@ CDS035
cnr:institutes @=@ https://ror.org/01kdj2848 @=@ IFC - Istituto di fisiologia clinica
cnr:institutes @=@ https://ror.org/049ebw417 @=@ CDS036
cnr:institutes @=@ https://ror.org/049ebw417 @=@ IFN - Istituto di fotonica e nanotecnologie
cnr:institutes @=@ https://ror.org/01f5tnx94 @=@ CDS037
cnr:institutes @=@ https://ror.org/01f5tnx94 @=@ IGI - Istituto gas ionizzati
cnr:institutes @=@ __CDS038__ @=@ CDS038
cnr:institutes @=@ __CDS038__ @=@ IGP - Istituto di genetica delle popolazioni
cnr:institutes @=@ https://ror.org/04hadk112 @=@ CDS039
cnr:institutes @=@ https://ror.org/04hadk112 @=@ IGB - Istituto di genetica e biofisica \"Adriano Buzzati Traverso\"
cnr:institutes @=@ https://ror.org/03qpd8w66 @=@ CDS040
cnr:institutes @=@ https://ror.org/03qpd8w66 @=@ IGM - Istituto di genetica molecolare \"Luigi Luca Cavalli Sforza\"
cnr:institutes @=@ https://ror.org/01gtsa866 @=@ CDS041
cnr:institutes @=@ https://ror.org/01gtsa866 @=@ IBBR - Istituto di Bioscienze e Biorisorse
cnr:institutes @=@ https://ror.org/00ytw6m58 @=@ CDS042
cnr:institutes @=@ https://ror.org/00ytw6m58 @=@ IGAG - Istituto di geologia ambientale e geoingegneria
cnr:institutes @=@ https://ror.org/015bmra78 @=@ CDS043
cnr:institutes @=@ https://ror.org/015bmra78 @=@ IGG - Istituto di geoscienze e georisorse
cnr:institutes @=@ https://ror.org/02gdcn153 @=@ CDS044
cnr:institutes @=@ https://ror.org/02gdcn153 @=@ IIT - Istituto di informatica e telematica
cnr:institutes @=@ __CDS045__ @=@ CDS045
cnr:institutes @=@ __CDS045__ @=@ ISIB - Istituto di ingegneria biomedica
cnr:institutes @=@ https://ror.org/05hky6p02 @=@ CDS046
cnr:institutes @=@ https://ror.org/05hky6p02 @=@ IIA - Istituto sull'inquinamento atmosferico
cnr:institutes @=@ https://ror.org/011n2hw53 @=@ CDS047
cnr:institutes @=@ https://ror.org/011n2hw53 @=@ ILIESI - Istituto per il lessico intellettuale europeo e storia delle idee
cnr:institutes @=@ https://ror.org/028g3pe33 @=@ CDS048
cnr:institutes @=@ https://ror.org/028g3pe33 @=@ ILC - Istituto di linguistica computazionale \"Antonio Zampolli\"
cnr:institutes @=@ __CDS049__ @=@ CDS049
cnr:institutes @=@ __CDS049__ @=@ IMAMOTER - Istituto per le macchine agricole e movimento terra
cnr:institutes @=@ https://ror.org/03m0n3c07 @=@ CDS050
cnr:institutes @=@ https://ror.org/03m0n3c07 @=@ IMATI - Istituto di matematica applicata e tecnologie informatiche \"Enrico Magenes\"
cnr:institutes @=@ __CDS051__ @=@ CDS051
cnr:institutes @=@ __CDS051__ @=@ IMCB - Istituto per i materiali compositi e biomedici
cnr:institutes @=@ https://ror.org/00z8ws214 @=@ CDS052
cnr:institutes @=@ https://ror.org/00z8ws214 @=@ IMEM - Istituto dei materiali per l'elettronica ed il magnetismo
cnr:institutes @=@ __CDS053__ @=@ CDS053
cnr:institutes @=@ __CDS053__ @=@ ISB - Istituto per i Sistemi Biologici
cnr:institutes @=@ https://ror.org/00bc51d88 @=@ CDS054
cnr:institutes @=@ https://ror.org/00bc51d88 @=@ NANOTEC - Istituto di Nanotecnologia
cnr:institutes @=@ https://ror.org/024ye7w89 @=@ CDS055
cnr:institutes @=@ https://ror.org/024ye7w89 @=@ IMAA - Istituto di metodologie per l'analisi ambientale
cnr:institutes @=@ __CDS056__ @=@ CDS056
cnr:institutes @=@ __CDS056__ @=@ IMGC - Istituto di metrologia \"Gustavo Colonnetti\"
cnr:institutes @=@ https://ror.org/05vk2g845 @=@ CDS057
cnr:institutes @=@ https://ror.org/05vk2g845 @=@ IMM - Istituto per la microelettronica e microsistemi
cnr:institutes @=@ https://ror.org/02qwy8e97 @=@ CDS058
cnr:institutes @=@ https://ror.org/02qwy8e97 @=@ IM - Istituto motori
cnr:institutes @=@ __CDS059__ @=@ CDS059
cnr:institutes @=@ __CDS059__ @=@ INMM - Istituto di neurobiologia e medicina molecolare
cnr:institutes @=@ https://ror.org/02dr63s31 @=@ CDS060
cnr:institutes @=@ https://ror.org/02dr63s31 @=@ IRGB - Istituto di Ricerca Genetica e Biomedica
cnr:institutes @=@ https://ror.org/0240rwx68 @=@ CDS061
cnr:institutes @=@ https://ror.org/0240rwx68 @=@ IN - Istituto di neuroscienze
cnr:institutes @=@ https://ror.org/02rzxrg25 @=@ CDS062
cnr:institutes @=@ https://ror.org/02rzxrg25 @=@ OVI - Istituto opera del vocabolario italiano
cnr:institutes @=@ https://ror.org/05patmk97 @=@ CDS063
cnr:institutes @=@ https://ror.org/05patmk97 @=@ IPCF - Istituto per i processi chimico-fisici
cnr:institutes @=@ __CDS064__ @=@ CDS064
cnr:institutes @=@ __CDS064__ @=@ IPP - Istituto per la protezione delle piante
cnr:institutes @=@ https://ror.org/029st3z03 @=@ CDS065
cnr:institutes @=@ https://ror.org/029st3z03 @=@ IRA - Istituto di radioastronomia
cnr:institutes @=@ https://ror.org/0040zx077 @=@ CDS066
cnr:institutes @=@ https://ror.org/0040zx077 @=@ IRPI - Istituto di ricerca per la protezione idrogeologica
cnr:institutes @=@ https://ror.org/044bfsy89 @=@ CDS067
cnr:institutes @=@ https://ror.org/044bfsy89 @=@ IRCRES - Istituto di Ricerca sulla Crescita Economica Sostenibile
cnr:institutes @=@ https://ror.org/01n1ayq61 @=@ CDS068
cnr:institutes @=@ https://ror.org/01n1ayq61 @=@ IRPPS - Istituto di ricerche sulla popolazione e le politiche sociali
cnr:institutes @=@ https://ror.org/02db0kh50 @=@ CDS069
cnr:institutes @=@ https://ror.org/02db0kh50 @=@ IRSA - Istituto di ricerca sulle acque
cnr:institutes @=@ https://ror.org/05813wx75 @=@ CDS070
cnr:institutes @=@ https://ror.org/05813wx75 @=@ IRC - Istituto di ricerche sulla combustione
cnr:institutes @=@ https://ror.org/04vnwke91 @=@ CDS071
cnr:institutes @=@ https://ror.org/04vnwke91 @=@ IRISS - Istituto di Ricerca su Innovazione e Servizi per lo Sviluppo
cnr:institutes @=@ https://ror.org/02wxw4x45 @=@ CDS072
cnr:institutes @=@ https://ror.org/02wxw4x45 @=@ IREA - Istituto per il rilevamento elettromagnetico dell'ambiente
cnr:institutes @=@ https://ror.org/01j6drw72 @=@ CDS073
cnr:institutes @=@ https://ror.org/01j6drw72 @=@ ISTEC - Istituto di scienza e tecnologia dei materiali ceramici
cnr:institutes @=@ https://ror.org/05kacka20 @=@ CDS074
cnr:institutes @=@ https://ror.org/05kacka20 @=@ ISTI - Istituto di scienza e tecnologie dell'informazione \"Alessandro Faedo\"
cnr:institutes @=@ https://ror.org/00n8ttd98 @=@ CDS075
cnr:institutes @=@ https://ror.org/00n8ttd98 @=@ ISAC - Istituto di scienze dell'atmosfera e del clima
cnr:institutes @=@ https://ror.org/0013zhk30 @=@ CDS076
cnr:institutes @=@ https://ror.org/0013zhk30 @=@ ISA - Istituto di Scienze dell'Alimentazione
cnr:institutes @=@ https://ror.org/03x7xkr71 @=@ CDS077
cnr:institutes @=@ https://ror.org/03x7xkr71 @=@ ISPA - Istituto di scienze delle produzioni alimentari
cnr:institutes @=@ https://ror.org/05w9g2j85 @=@ CDS078
cnr:institutes @=@ https://ror.org/05w9g2j85 @=@ ISTC - Istituto di scienze e tecnologie della cognizione
cnr:institutes @=@ https://ror.org/032tyv240 @=@ CDS079
cnr:institutes @=@ https://ror.org/032tyv240 @=@ ISTM - Istituto di scienze e tecnologie molecolari
cnr:institutes @=@ https://ror.org/02hdf6119 @=@ CDS080
cnr:institutes @=@ https://ror.org/02hdf6119 @=@ ISMAR - Istituto di scienze marine
cnr:institutes @=@ https://ror.org/01yg57d71 @=@ CDS081
cnr:institutes @=@ https://ror.org/01yg57d71 @=@ ISN - Istituto di scienze neurologiche
cnr:institutes @=@ https://ror.org/021z1mz76 @=@ CDS082
cnr:institutes @=@ https://ror.org/021z1mz76 @=@ ISOF - Istituto per la sintesi organica e la fotoreattività
cnr:institutes @=@ https://ror.org/01wqae691 @=@ CDS083
cnr:institutes @=@ https://ror.org/01wqae691 @=@ ISPAAM - Istituto per il sistema produzione animale in ambiente Mediterraneo
cnr:institutes @=@ __CDS084__ @=@ CDS084
cnr:institutes @=@ __CDS084__ @=@ ISAFoM - Istituto per i sistemi agricoli e forestali del mediterraneo
cnr:institutes @=@ https://ror.org/00awwz417 @=@ CDS085
cnr:institutes @=@ https://ror.org/00awwz417 @=@ ISPF - Istituto per la storia del pensiero filosofico e scientifico moderno
cnr:institutes @=@ https://ror.org/03a111314 @=@ CDS086
cnr:institutes @=@ https://ror.org/03a111314 @=@ ISEM - Istituto di storia dell'Europa mediterranea
cnr:institutes @=@ https://ror.org/01zz9wh30 @=@ CDS087
cnr:institutes @=@ https://ror.org/01zz9wh30 @=@ ISM - Istituto di struttura della materia
cnr:institutes @=@ https://ror.org/035y5td47 @=@ CDS088
cnr:institutes @=@ https://ror.org/035y5td47 @=@ ISGI - Istituto di studi giuridici internazionali
cnr:institutes @=@ __CDS089__ @=@ CDS089
cnr:institutes @=@ __CDS089__ @=@ ISPRI - Istituto sperimentale di studi socio - economici sull'innovazione e le politiche della ricerca
cnr:institutes @=@ https://ror.org/051t1q308 @=@ CDS090
cnr:institutes @=@ https://ror.org/051t1q308 @=@ ISSIA - Istituto di studi sui sistemi intelligenti per l'automazione
cnr:institutes @=@ https://ror.org/05k3cs357 @=@ CDS091
cnr:institutes @=@ https://ror.org/05k3cs357 @=@ ISSIRFA - Istituto di studi sui sistemi regionali federali e sulle autonomie \"Massimo Severo Giannini\"
cnr:institutes @=@ https://ror.org/02gcxw165 @=@ CDS092
cnr:institutes @=@ https://ror.org/02gcxw165 @=@ ISMA - Istituto di Studi sul Mediterraneo Antico
cnr:institutes @=@ https://ror.org/05db0es39 @=@ CDS093
cnr:institutes @=@ https://ror.org/05db0es39 @=@ ISMed - Istituto di studi sul Mediterraneo
cnr:institutes @=@ https://ror.org/029k6t707 @=@ CDS094
cnr:institutes @=@ https://ror.org/029k6t707 @=@ ISE - Istituto per lo studio degli ecosistemi
cnr:institutes @=@ https://ror.org/00w6r1881 @=@ CDS095
cnr:institutes @=@ https://ror.org/00w6r1881 @=@ ISMN - Istituto per lo studio dei materiali nanostrutturati
cnr:institutes @=@ https://ror.org/01mfmr054 @=@ CDS096
cnr:institutes @=@ https://ror.org/01mfmr054 @=@ ISMAC - Istituto per lo studio delle macromolecole
cnr:institutes @=@ https://ror.org/058nrs650 @=@ CDS097
cnr:institutes @=@ https://ror.org/058nrs650 @=@ ITM - Istituto per la tecnologia delle membrane
cnr:institutes @=@ https://ror.org/0331xj092 @=@ CDS098
cnr:institutes @=@ https://ror.org/0331xj092 @=@ ITABC - Istituto per le tecnologie applicate ai beni culturali
cnr:institutes @=@ https://ror.org/052q58629 @=@ CDS099
cnr:institutes @=@ https://ror.org/052q58629 @=@ ITAE - Istituto di tecnologie avanzate per l'energia \"Nicola Giordano\"
cnr:institutes @=@ https://ror.org/04ehykb85 @=@ CDS100
cnr:institutes @=@ https://ror.org/04ehykb85 @=@ ITB - Istituto di tecnologie biomediche
cnr:institutes @=@ https://ror.org/0221agg28 @=@ CDS101
cnr:institutes @=@ https://ror.org/0221agg28 @=@ ITC - Istituto per le tecnologie della costruzione
cnr:institutes @=@ https://ror.org/02xz4xc25 @=@ CDS102
cnr:institutes @=@ https://ror.org/02xz4xc25 @=@ ITD - Istituto per le tecnologie didattiche
cnr:institutes @=@ __CDS103__ @=@ CDS103
cnr:institutes @=@ __CDS103__ @=@ STIIMA - Istituto di Sistemi e Tecnologie Industriali Intelligenti per il Manifatturiero Avanzato
cnr:institutes @=@ https://ror.org/01as2bh37 @=@ CDS104
cnr:institutes @=@ https://ror.org/01as2bh37 @=@ ITTIG - Istituto di teoria e tecniche dell'informazione giuridica
cnr:institutes @=@ https://ror.org/01y5w6t76 @=@ CDS105
cnr:institutes @=@ https://ror.org/01y5w6t76 @=@ ITOI - Istituto per i trapianti d'organo e immunocitologia
cnr:institutes @=@ https://ror.org/04xy2mq71 @=@ CDS106
cnr:institutes @=@ https://ror.org/04xy2mq71 @=@ IVALSA - Istituto per la valorizzazione del legno e delle specie arboree
cnr:institutes @=@ __CDS107__ @=@ CDS107
cnr:institutes @=@ __CDS107__ @=@ IVV - Istituto di virologia vegetale
cnr:institutes @=@ https://ror.org/013nxtf56 @=@ CDS108
cnr:institutes @=@ https://ror.org/013nxtf56 @=@ IRSIG - Istituto di ricerca sui sistemi giudiziari
cnr:institutes @=@ https://ror.org/05rcgef49 @=@ CDS109
cnr:institutes @=@ https://ror.org/05rcgef49 @=@ ISC - Istituto dei sistemi complessi
cnr:institutes @=@ __CDS110__ @=@ CDS110
cnr:institutes @=@ __CDS110__ @=@ INFM - Centro di responsabilità scientifica INFM
cnr:institutes @=@ https://ror.org/02dp3a879 @=@ CDS111
cnr:institutes @=@ https://ror.org/02dp3a879 @=@ INO - Istituto nazionale di ottica
cnr:institutes @=@ __CDS112__ @=@ CDS112
cnr:institutes @=@ __CDS112__ @=@ IDAIC - Centro di responsabilità di attività scientifica IDAIC
cnr:institutes @=@ https://ror.org/00p03yg71 @=@ CDS113
cnr:institutes @=@ https://ror.org/00p03yg71 @=@ SPIN - Istituto superconduttori, materiali innovativi e dispositivi
cnr:institutes @=@ https://ror.org/00yfw2296 @=@ CDS114
cnr:institutes @=@ https://ror.org/00yfw2296 @=@ IOM - Istituto officina dei materiali
cnr:institutes @=@ https://ror.org/0042e5975 @=@ CDS115
cnr:institutes @=@ https://ror.org/0042e5975 @=@ NANO - Istituto Nanoscienze
cnr:institutes @=@ https://ror.org/03ta8pf33 @=@ CDS116
cnr:institutes @=@ https://ror.org/03ta8pf33 @=@ IFT - Istituto di Farmacologia Traslazionale
cnr:institutes @=@ https://ror.org/040xhth73 @=@ CDS117
cnr:institutes @=@ https://ror.org/040xhth73 @=@ IBCN - Istituto di Biologia Cellulare e Neurobiologia
cnr:institutes @=@ https://ror.org/02qnx8e75 @=@ CDS118
cnr:institutes @=@ https://ror.org/02qnx8e75 @=@ INM - Istituto di iNgegneria del Mare
cnr:institutes @=@ https://ror.org/05nr7xa08 @=@ CDS119
cnr:institutes @=@ https://ror.org/05nr7xa08 @=@ IPCB - Istituto per i Polimeri, Compositi e Biomateriali
cnr:institutes @=@ https://ror.org/008fjbg42 @=@ CDS121
cnr:institutes @=@ https://ror.org/008fjbg42 @=@ IPSP - Istituto per la Protezione Sostenibile delle Piante
cnr:institutes @=@ __CDS122__ @=@ CDS122
cnr:institutes @=@ __CDS122__ @=@ IRBIM - Istituto per le Risorse Biologiche e le Biotecnologie Marine
cnr:institutes @=@ __CDS123__ @=@ CDS123
cnr:institutes @=@ __CDS123__ @=@ ISPC - Istituto di Scienze del Patrimonio Culturale
cnr:institutes @=@ __CDS124__ @=@ CDS124
cnr:institutes @=@ __CDS124__ @=@ IAS - Istituto per lo studio degli impatti Antropici e Sostenibilità in ambiente marino
cnr:institutes @=@ __CDS125__ @=@ CDS125
cnr:institutes @=@ __CDS125__ @=@ IRET - Istituto di Ricerca sugli Ecosistemi Terrestri
cnr:institutes @=@ https://ror.org/03a0vt050 @=@ CDS126
cnr:institutes @=@ https://ror.org/03a0vt050 @=@ ISTP - Istituto per la Scienza e Tecnologia dei Plasmi
cnr:institutes @=@ __CDS127__ @=@ CDS127
cnr:institutes @=@ __CDS127__ @=@ ISP - Istituto di Scienze Polari
cnr:institutes @=@ https://ror.org/03byxpq91 @=@ CDS128
cnr:institutes @=@ https://ror.org/03byxpq91 @=@ IRIB - Istituto per la Ricerca e l'Innovazione Biomedica
cnr:institutes @=@ __CDS129__ @=@ CDS129
cnr:institutes @=@ __CDS129__ @=@ IGSG - Istituto di Informatica Giuridica e Sistemi Giudiziari
cnr:institutes @=@ __CDS130__ @=@ CDS130
cnr:institutes @=@ __CDS130__ @=@ IBBC - Istituto di Biochimica e Biologia Cellulare
cnr:institutes @=@ __CDS131__ @=@ CDS131
cnr:institutes @=@ __CDS131__ @=@ IBE - Istituto per la BioEconomia
cnr:institutes @=@ https://ror.org/0263zy895 @=@ CDS132
cnr:institutes @=@ https://ror.org/0263zy895 @=@ SCITEC - Istituto di Scienze e Tecnologie Chimiche \"Giulio Natta\"
cnr:institutes @=@ __CDS133__ @=@ CDS133
cnr:institutes @=@ __CDS133__ @=@ STEMS - Istituto di Scienze e Tecnologie per l'Energia e la Mobilità Sostenibili

View File

@ -1078,3 +1078,136 @@ dnet:topic_types @=@ dnet:topic_types @=@ ENRICH/MISSING/AUTHOR/ORCID @=@ An Ope
dnet:review_levels @=@ dnet:review_levels @=@ 0000 @=@ Unknown
dnet:review_levels @=@ dnet:review_levels @=@ 0002 @=@ nonPeerReviewed
dnet:review_levels @=@ dnet:review_levels @=@ 0001 @=@ peerReviewed
cnr:institutes @=@ cnr:institutes @=@ https://ror.org/00brf2d87 @=@ https://ror.org/00brf2d87
cnr:institutes @=@ cnr:institutes @=@ https://ror.org/006qkqr45 @=@ https://ror.org/006qkqr45
cnr:institutes @=@ cnr:institutes @=@ https://ror.org/054ye0e45 @=@ https://ror.org/054ye0e45
cnr:institutes @=@ cnr:institutes @=@ https://ror.org/00ygy3d85 @=@ https://ror.org/00ygy3d85
cnr:institutes @=@ cnr:institutes @=@ https://ror.org/000sy1f36 @=@ https://ror.org/000sy1f36
cnr:institutes @=@ cnr:institutes @=@ https://ror.org/00x5wpm25 @=@ https://ror.org/00x5wpm25
cnr:institutes @=@ cnr:institutes @=@ https://ror.org/03eqeqg74 @=@ https://ror.org/03eqeqg74
cnr:institutes @=@ cnr:institutes @=@ https://ror.org/041xzk838 @=@ https://ror.org/041xzk838
cnr:institutes @=@ cnr:institutes @=@ https://ror.org/00s2j5046 @=@ https://ror.org/00s2j5046
cnr:institutes @=@ cnr:institutes @=@ __CDS010__ @=@ __CDS010__
cnr:institutes @=@ cnr:institutes @=@ __CDS011__ @=@ __CDS011__
cnr:institutes @=@ cnr:institutes @=@ https://ror.org/02e5sbe24 @=@ https://ror.org/02e5sbe24
cnr:institutes @=@ cnr:institutes @=@ https://ror.org/01nyatq71 @=@ https://ror.org/01nyatq71
cnr:institutes @=@ cnr:institutes @=@ https://ror.org/01dy2q607 @=@ https://ror.org/01dy2q607
cnr:institutes @=@ cnr:institutes @=@ https://ror.org/05nzf7q96 @=@ https://ror.org/05nzf7q96
cnr:institutes @=@ cnr:institutes @=@ https://ror.org/05m1yqp60 @=@ https://ror.org/05m1yqp60
cnr:institutes @=@ cnr:institutes @=@ https://ror.org/03rqtqb02 @=@ https://ror.org/03rqtqb02
cnr:institutes @=@ cnr:institutes @=@ https://ror.org/04r5fge26 @=@ https://ror.org/04r5fge26
cnr:institutes @=@ cnr:institutes @=@ https://ror.org/03wyf0g15 @=@ https://ror.org/03wyf0g15
cnr:institutes @=@ cnr:institutes @=@ https://ror.org/02fkw1114 @=@ https://ror.org/02fkw1114
cnr:institutes @=@ cnr:institutes @=@ https://ror.org/0141vn777 @=@ https://ror.org/0141vn777
cnr:institutes @=@ cnr:institutes @=@ __CDS022__ @=@ __CDS022__
cnr:institutes @=@ cnr:institutes @=@ __CDS023__ @=@ __CDS023__
cnr:institutes @=@ cnr:institutes @=@ https://ror.org/00be3zh53 @=@ https://ror.org/00be3zh53
cnr:institutes @=@ cnr:institutes @=@ __CDS025__ @=@ __CDS025__
cnr:institutes @=@ cnr:institutes @=@ https://ror.org/02ynrme92 @=@ https://ror.org/02ynrme92
cnr:institutes @=@ cnr:institutes @=@ https://ror.org/05wba8r86 @=@ https://ror.org/05wba8r86
cnr:institutes @=@ cnr:institutes @=@ https://ror.org/03z58xd74 @=@ https://ror.org/03z58xd74
cnr:institutes @=@ cnr:institutes @=@ https://ror.org/00n4jbh84 @=@ https://ror.org/00n4jbh84
cnr:institutes @=@ cnr:institutes @=@ https://ror.org/04sn06036 @=@ https://ror.org/04sn06036
cnr:institutes @=@ cnr:institutes @=@ https://ror.org/01rg40y89 @=@ https://ror.org/01rg40y89
cnr:institutes @=@ cnr:institutes @=@ https://ror.org/00dqega85 @=@ https://ror.org/00dqega85
cnr:institutes @=@ cnr:institutes @=@ https://ror.org/02n2bgz18 @=@ https://ror.org/02n2bgz18
cnr:institutes @=@ cnr:institutes @=@ __CDS034__ @=@ __CDS034__
cnr:institutes @=@ cnr:institutes @=@ https://ror.org/01kdj2848 @=@ https://ror.org/01kdj2848
cnr:institutes @=@ cnr:institutes @=@ https://ror.org/049ebw417 @=@ https://ror.org/049ebw417
cnr:institutes @=@ cnr:institutes @=@ https://ror.org/01f5tnx94 @=@ https://ror.org/01f5tnx94
cnr:institutes @=@ cnr:institutes @=@ __CDS038__ @=@ __CDS038__
cnr:institutes @=@ cnr:institutes @=@ https://ror.org/04hadk112 @=@ https://ror.org/04hadk112
cnr:institutes @=@ cnr:institutes @=@ https://ror.org/03qpd8w66 @=@ https://ror.org/03qpd8w66
cnr:institutes @=@ cnr:institutes @=@ https://ror.org/01gtsa866 @=@ https://ror.org/01gtsa866
cnr:institutes @=@ cnr:institutes @=@ https://ror.org/00ytw6m58 @=@ https://ror.org/00ytw6m58
cnr:institutes @=@ cnr:institutes @=@ https://ror.org/015bmra78 @=@ https://ror.org/015bmra78
cnr:institutes @=@ cnr:institutes @=@ https://ror.org/02gdcn153 @=@ https://ror.org/02gdcn153
cnr:institutes @=@ cnr:institutes @=@ __CDS045__ @=@ __CDS045__
cnr:institutes @=@ cnr:institutes @=@ https://ror.org/05hky6p02 @=@ https://ror.org/05hky6p02
cnr:institutes @=@ cnr:institutes @=@ https://ror.org/011n2hw53 @=@ https://ror.org/011n2hw53
cnr:institutes @=@ cnr:institutes @=@ https://ror.org/028g3pe33 @=@ https://ror.org/028g3pe33
cnr:institutes @=@ cnr:institutes @=@ __CDS049__ @=@ __CDS049__
cnr:institutes @=@ cnr:institutes @=@ https://ror.org/03m0n3c07 @=@ https://ror.org/03m0n3c07
cnr:institutes @=@ cnr:institutes @=@ __CDS051__ @=@ __CDS051__
cnr:institutes @=@ cnr:institutes @=@ https://ror.org/00z8ws214 @=@ https://ror.org/00z8ws214
cnr:institutes @=@ cnr:institutes @=@ __CDS053__ @=@ __CDS053__
cnr:institutes @=@ cnr:institutes @=@ https://ror.org/00bc51d88 @=@ https://ror.org/00bc51d88
cnr:institutes @=@ cnr:institutes @=@ https://ror.org/024ye7w89 @=@ https://ror.org/024ye7w89
cnr:institutes @=@ cnr:institutes @=@ __CDS056__ @=@ __CDS056__
cnr:institutes @=@ cnr:institutes @=@ https://ror.org/05vk2g845 @=@ https://ror.org/05vk2g845
cnr:institutes @=@ cnr:institutes @=@ https://ror.org/02qwy8e97 @=@ https://ror.org/02qwy8e97
cnr:institutes @=@ cnr:institutes @=@ __CDS059__ @=@ __CDS059__
cnr:institutes @=@ cnr:institutes @=@ https://ror.org/02dr63s31 @=@ https://ror.org/02dr63s31
cnr:institutes @=@ cnr:institutes @=@ https://ror.org/0240rwx68 @=@ https://ror.org/0240rwx68
cnr:institutes @=@ cnr:institutes @=@ https://ror.org/02rzxrg25 @=@ https://ror.org/02rzxrg25
cnr:institutes @=@ cnr:institutes @=@ https://ror.org/05patmk97 @=@ https://ror.org/05patmk97
cnr:institutes @=@ cnr:institutes @=@ __CDS064__ @=@ __CDS064__
cnr:institutes @=@ cnr:institutes @=@ https://ror.org/029st3z03 @=@ https://ror.org/029st3z03
cnr:institutes @=@ cnr:institutes @=@ https://ror.org/0040zx077 @=@ https://ror.org/0040zx077
cnr:institutes @=@ cnr:institutes @=@ https://ror.org/044bfsy89 @=@ https://ror.org/044bfsy89
cnr:institutes @=@ cnr:institutes @=@ https://ror.org/01n1ayq61 @=@ https://ror.org/01n1ayq61
cnr:institutes @=@ cnr:institutes @=@ https://ror.org/02db0kh50 @=@ https://ror.org/02db0kh50
cnr:institutes @=@ cnr:institutes @=@ https://ror.org/05813wx75 @=@ https://ror.org/05813wx75
cnr:institutes @=@ cnr:institutes @=@ https://ror.org/04vnwke91 @=@ https://ror.org/04vnwke91
cnr:institutes @=@ cnr:institutes @=@ https://ror.org/02wxw4x45 @=@ https://ror.org/02wxw4x45
cnr:institutes @=@ cnr:institutes @=@ https://ror.org/01j6drw72 @=@ https://ror.org/01j6drw72
cnr:institutes @=@ cnr:institutes @=@ https://ror.org/05kacka20 @=@ https://ror.org/05kacka20
cnr:institutes @=@ cnr:institutes @=@ https://ror.org/00n8ttd98 @=@ https://ror.org/00n8ttd98
cnr:institutes @=@ cnr:institutes @=@ https://ror.org/0013zhk30 @=@ https://ror.org/0013zhk30
cnr:institutes @=@ cnr:institutes @=@ https://ror.org/03x7xkr71 @=@ https://ror.org/03x7xkr71
cnr:institutes @=@ cnr:institutes @=@ https://ror.org/05w9g2j85 @=@ https://ror.org/05w9g2j85
cnr:institutes @=@ cnr:institutes @=@ https://ror.org/032tyv240 @=@ https://ror.org/032tyv240
cnr:institutes @=@ cnr:institutes @=@ https://ror.org/02hdf6119 @=@ https://ror.org/02hdf6119
cnr:institutes @=@ cnr:institutes @=@ https://ror.org/01yg57d71 @=@ https://ror.org/01yg57d71
cnr:institutes @=@ cnr:institutes @=@ https://ror.org/021z1mz76 @=@ https://ror.org/021z1mz76
cnr:institutes @=@ cnr:institutes @=@ https://ror.org/01wqae691 @=@ https://ror.org/01wqae691
cnr:institutes @=@ cnr:institutes @=@ __CDS084__ @=@ __CDS084__
cnr:institutes @=@ cnr:institutes @=@ https://ror.org/00awwz417 @=@ https://ror.org/00awwz417
cnr:institutes @=@ cnr:institutes @=@ https://ror.org/03a111314 @=@ https://ror.org/03a111314
cnr:institutes @=@ cnr:institutes @=@ https://ror.org/01zz9wh30 @=@ https://ror.org/01zz9wh30
cnr:institutes @=@ cnr:institutes @=@ https://ror.org/035y5td47 @=@ https://ror.org/035y5td47
cnr:institutes @=@ cnr:institutes @=@ __CDS089__ @=@ __CDS089__
cnr:institutes @=@ cnr:institutes @=@ https://ror.org/051t1q308 @=@ https://ror.org/051t1q308
cnr:institutes @=@ cnr:institutes @=@ https://ror.org/05k3cs357 @=@ https://ror.org/05k3cs357
cnr:institutes @=@ cnr:institutes @=@ https://ror.org/02gcxw165 @=@ https://ror.org/02gcxw165
cnr:institutes @=@ cnr:institutes @=@ https://ror.org/05db0es39 @=@ https://ror.org/05db0es39
cnr:institutes @=@ cnr:institutes @=@ https://ror.org/029k6t707 @=@ https://ror.org/029k6t707
cnr:institutes @=@ cnr:institutes @=@ https://ror.org/00w6r1881 @=@ https://ror.org/00w6r1881
cnr:institutes @=@ cnr:institutes @=@ https://ror.org/01mfmr054 @=@ https://ror.org/01mfmr054
cnr:institutes @=@ cnr:institutes @=@ https://ror.org/058nrs650 @=@ https://ror.org/058nrs650
cnr:institutes @=@ cnr:institutes @=@ https://ror.org/0331xj092 @=@ https://ror.org/0331xj092
cnr:institutes @=@ cnr:institutes @=@ https://ror.org/052q58629 @=@ https://ror.org/052q58629
cnr:institutes @=@ cnr:institutes @=@ https://ror.org/04ehykb85 @=@ https://ror.org/04ehykb85
cnr:institutes @=@ cnr:institutes @=@ https://ror.org/0221agg28 @=@ https://ror.org/0221agg28
cnr:institutes @=@ cnr:institutes @=@ https://ror.org/02xz4xc25 @=@ https://ror.org/02xz4xc25
cnr:institutes @=@ cnr:institutes @=@ __CDS103__ @=@ __CDS103__
cnr:institutes @=@ cnr:institutes @=@ https://ror.org/01as2bh37 @=@ https://ror.org/01as2bh37
cnr:institutes @=@ cnr:institutes @=@ https://ror.org/01y5w6t76 @=@ https://ror.org/01y5w6t76
cnr:institutes @=@ cnr:institutes @=@ https://ror.org/04xy2mq71 @=@ https://ror.org/04xy2mq71
cnr:institutes @=@ cnr:institutes @=@ __CDS107__ @=@ __CDS107__
cnr:institutes @=@ cnr:institutes @=@ https://ror.org/013nxtf56 @=@ https://ror.org/013nxtf56
cnr:institutes @=@ cnr:institutes @=@ https://ror.org/05rcgef49 @=@ https://ror.org/05rcgef49
cnr:institutes @=@ cnr:institutes @=@ __CDS110__ @=@ __CDS110__
cnr:institutes @=@ cnr:institutes @=@ https://ror.org/02dp3a879 @=@ https://ror.org/02dp3a879
cnr:institutes @=@ cnr:institutes @=@ __CDS112__ @=@ __CDS112__
cnr:institutes @=@ cnr:institutes @=@ https://ror.org/00p03yg71 @=@ https://ror.org/00p03yg71
cnr:institutes @=@ cnr:institutes @=@ https://ror.org/00yfw2296 @=@ https://ror.org/00yfw2296
cnr:institutes @=@ cnr:institutes @=@ https://ror.org/0042e5975 @=@ https://ror.org/0042e5975
cnr:institutes @=@ cnr:institutes @=@ https://ror.org/03ta8pf33 @=@ https://ror.org/03ta8pf33
cnr:institutes @=@ cnr:institutes @=@ https://ror.org/040xhth73 @=@ https://ror.org/040xhth73
cnr:institutes @=@ cnr:institutes @=@ https://ror.org/02qnx8e75 @=@ https://ror.org/02qnx8e75
cnr:institutes @=@ cnr:institutes @=@ https://ror.org/05nr7xa08 @=@ https://ror.org/05nr7xa08
cnr:institutes @=@ cnr:institutes @=@ https://ror.org/008fjbg42 @=@ https://ror.org/008fjbg42
cnr:institutes @=@ cnr:institutes @=@ __CDS122__ @=@ __CDS122__
cnr:institutes @=@ cnr:institutes @=@ __CDS123__ @=@ __CDS123__
cnr:institutes @=@ cnr:institutes @=@ __CDS124__ @=@ __CDS124__
cnr:institutes @=@ cnr:institutes @=@ __CDS125__ @=@ __CDS125__
cnr:institutes @=@ cnr:institutes @=@ https://ror.org/03a0vt050 @=@ https://ror.org/03a0vt050
cnr:institutes @=@ cnr:institutes @=@ __CDS127__ @=@ __CDS127__
cnr:institutes @=@ cnr:institutes @=@ https://ror.org/03byxpq91 @=@ https://ror.org/03byxpq91
cnr:institutes @=@ cnr:institutes @=@ __CDS129__ @=@ __CDS129__
cnr:institutes @=@ cnr:institutes @=@ __CDS130__ @=@ __CDS130__
cnr:institutes @=@ cnr:institutes @=@ __CDS131__ @=@ __CDS131__
cnr:institutes @=@ cnr:institutes @=@ https://ror.org/0263zy895 @=@ https://ror.org/0263zy895
cnr:institutes @=@ cnr:institutes @=@ __CDS133__ @=@ __CDS133__

View File

@ -498,9 +498,6 @@ class CrossrefMappingTest {
assertNotNull(pub.getJournal.getIssnOnline)
assertNotNull(pub.getJournal.getName)
}
@Test

View File

@ -5,6 +5,7 @@ import static eu.dnetlib.dhp.PropagationConstant.removeOutputDir;
import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession;
import java.util.ArrayList;
import java.util.Objects;
import java.util.Optional;
import org.apache.commons.io.IOUtils;
@ -102,6 +103,7 @@ public class SparkBulkTagJob {
ResultTagger resultTagger = new ResultTagger();
readPath(spark, inputPath, resultClazz)
.map(patchResult(), Encoders.bean(resultClazz))
.filter(Objects::nonNull)
.map(
(MapFunction<R, R>) value -> resultTagger
.enrichContextCriteria(

View File

@ -10,6 +10,7 @@ import java.util.stream.Collectors;
import org.apache.commons.io.IOUtils;
import org.apache.spark.SparkConf;
import org.apache.spark.api.java.function.FilterFunction;
import org.apache.spark.api.java.function.MapFunction;
import org.apache.spark.api.java.function.MapGroupsFunction;
import org.apache.spark.sql.Dataset;
@ -81,8 +82,9 @@ public class SparkPrepareResultProject implements Serializable {
Dataset<Relation> relation = Utils
.readPath(spark, inputPath + "/relation", Relation.class)
.filter(
"dataInfo.deletedbyinference = false and lower(relClass) = '"
+ ModelConstants.IS_PRODUCED_BY.toLowerCase() + "'");
(FilterFunction<Relation>) r -> !r.getDataInfo().getDeletedbyinference() &&
r.getRelClass().equalsIgnoreCase(ModelConstants.IS_PRODUCED_BY));
Dataset<eu.dnetlib.dhp.schema.oaf.Project> projects = Utils
.readPath(spark, inputPath + "/project", eu.dnetlib.dhp.schema.oaf.Project.class);

View File

@ -7,17 +7,22 @@ import java.io.Serializable;
import java.util.List;
import java.util.Objects;
import java.util.Optional;
import java.util.stream.Collectors;
import org.apache.commons.io.IOUtils;
import org.apache.spark.SparkConf;
import org.apache.spark.api.java.function.FlatMapFunction;
import org.apache.spark.api.java.function.ForeachFunction;
import org.apache.spark.api.java.function.MapFunction;
import org.apache.spark.sql.*;
import org.jetbrains.annotations.NotNull;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import eu.dnetlib.dhp.application.ArgumentApplicationParser;
import eu.dnetlib.dhp.oa.graph.dump.Utils;
import eu.dnetlib.dhp.schema.dump.oaf.community.CommunityResult;
import eu.dnetlib.dhp.schema.dump.oaf.community.Funder;
import eu.dnetlib.dhp.schema.dump.oaf.community.Project;
/**
@ -33,87 +38,83 @@ public class SparkDumpFunderResults implements Serializable {
SparkDumpFunderResults.class
.getResourceAsStream(
"/eu/dnetlib/dhp/oa/graph/dump/funder_result_parameters.json"));
final ArgumentApplicationParser parser = new ArgumentApplicationParser(jsonConfiguration);
parser.parseArgument(args);
Boolean isSparkSessionManaged = Optional
.ofNullable(parser.get("isSparkSessionManaged"))
.map(Boolean::valueOf)
.orElse(Boolean.TRUE);
log.info("isSparkSessionManaged: {}", isSparkSessionManaged);
final String inputPath = parser.get("sourcePath");
log.info("inputPath: {}", inputPath);
final String outputPath = parser.get("outputPath");
log.info("outputPath: {}", outputPath);
final String graphPath = parser.get("graphPath");
log.info("relationPath: {}", graphPath);
SparkConf conf = new SparkConf();
runWithSparkSession(
conf,
isSparkSessionManaged,
spark -> {
Utils.removeOutputDir(spark, outputPath);
writeResultProjectList(spark, inputPath, outputPath, graphPath);
writeResultProjectList(spark, inputPath, outputPath);
});
}
private static void writeResultProjectList(SparkSession spark, String inputPath, String outputPath,
String graphPath) {
Dataset<eu.dnetlib.dhp.schema.oaf.Project> project = Utils
.readPath(spark, graphPath + "/project", eu.dnetlib.dhp.schema.oaf.Project.class);
private static void writeResultProjectList(SparkSession spark, String inputPath, String outputPath) {
Dataset<CommunityResult> result = Utils
.readPath(spark, inputPath + "/publication", CommunityResult.class)
.union(Utils.readPath(spark, inputPath + "/dataset", CommunityResult.class))
.union(Utils.readPath(spark, inputPath + "/orp", CommunityResult.class))
.union(Utils.readPath(spark, inputPath + "/otherresearchproduct", CommunityResult.class))
.union(Utils.readPath(spark, inputPath + "/software", CommunityResult.class));
List<String> funderList = project
.select("id")
.map((MapFunction<Row, String>) value -> value.getString(0).substring(0, 15), Encoders.STRING())
.distinct()
.collectAsList();
log.info("Number of result {}", result.count());
Dataset<String> tmp = result
.flatMap((FlatMapFunction<CommunityResult, String>) cr -> cr.getProjects().stream().map(p -> {
return getFunderName(p);
}).collect(Collectors.toList()).iterator(), Encoders.STRING())
.distinct();
List<String> funderList = tmp.collectAsList();
funderList.forEach(funder -> {
String fundernsp = funder.substring(3);
String funderdump;
if (fundernsp.startsWith("corda")) {
funderdump = "EC_";
if (fundernsp.endsWith("h2020")) {
funderdump += "H2020";
} else {
funderdump += "FP7";
}
} else {
funderdump = fundernsp.substring(0, fundernsp.indexOf("_")).toUpperCase();
}
writeFunderResult(funder, result, outputPath, funderdump);
dumpResults(funder, result, outputPath);
});
}
private static void dumpResults(String nsp, Dataset<CommunityResult> results, String outputPath,
String funderName) {
@NotNull
private static String getFunderName(Project p) {
Optional<Funder> ofunder = Optional.ofNullable(p.getFunder());
if (ofunder.isPresent()) {
String fName = ofunder.get().getShortName();
if (fName.equalsIgnoreCase("ec")) {
fName += "_" + ofunder.get().getFundingStream();
}
return fName;
} else {
String fName = p.getId().substring(3, p.getId().indexOf("_")).toUpperCase();
if (fName.equalsIgnoreCase("ec")) {
if (p.getId().contains("h2020")) {
fName += "_H2020";
} else {
fName += "_FP7";
}
} else if (fName.equalsIgnoreCase("conicytf")) {
fName = "CONICYT";
} else if (fName.equalsIgnoreCase("dfgf")) {
fName = "DFG";
} else if (fName.equalsIgnoreCase("tubitakf")) {
fName = "TUBITAK";
} else if (fName.equalsIgnoreCase("euenvagency")) {
fName = "EEA";
}
return fName;
}
}
private static void dumpResults(String funder, Dataset<CommunityResult> results, String outputPath) {
results.map((MapFunction<CommunityResult, CommunityResult>) r -> {
if (!Optional.ofNullable(r.getProjects()).isPresent()) {
return null;
}
for (Project p : r.getProjects()) {
if (p.getId().startsWith(nsp)) {
if (nsp.startsWith("40|irb")) {
if (p.getFunder().getShortName().equals(funderName))
return r;
else
return null;
}
String fName = getFunderName(p);
if (fName.equalsIgnoreCase(funder)) {
return r;
}
}
@ -123,18 +124,6 @@ public class SparkDumpFunderResults implements Serializable {
.write()
.mode(SaveMode.Overwrite)
.option("compression", "gzip")
.json(outputPath + "/" + funderName);
.json(outputPath + "/" + funder);
}
private static void writeFunderResult(String funder, Dataset<CommunityResult> results, String outputPath,
String funderDump) {
if (funder.startsWith("40|irb")) {
dumpResults(funder, results, outputPath, "HRZZ");
dumpResults(funder, results, outputPath, "MZOS");
} else
dumpResults(funder, results, outputPath, funderDump);
}
}

View File

@ -5,9 +5,12 @@ import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession;
import java.io.Serializable;
import java.util.Optional;
import java.util.stream.Collectors;
import org.apache.commons.io.IOUtils;
import org.apache.spark.SparkConf;
import org.apache.spark.api.java.function.FilterFunction;
import org.apache.spark.api.java.function.FlatMapFunction;
import org.apache.spark.api.java.function.MapFunction;
import org.apache.spark.api.java.function.MapGroupsFunction;
import org.apache.spark.sql.Dataset;
@ -18,11 +21,18 @@ import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import eu.dnetlib.dhp.application.ArgumentApplicationParser;
import eu.dnetlib.dhp.oa.graph.dump.Constants;
import eu.dnetlib.dhp.oa.graph.dump.DumpProducts;
import eu.dnetlib.dhp.oa.graph.dump.ResultMapper;
import eu.dnetlib.dhp.oa.graph.dump.Utils;
import eu.dnetlib.dhp.oa.graph.dump.community.CommunityMap;
import eu.dnetlib.dhp.oa.graph.dump.community.ResultProject;
import eu.dnetlib.dhp.schema.common.ModelConstants;
import eu.dnetlib.dhp.schema.dump.oaf.community.CommunityResult;
import eu.dnetlib.dhp.schema.oaf.Project;
import eu.dnetlib.dhp.schema.oaf.Relation;
import eu.dnetlib.dhp.schema.oaf.Result;
import scala.Tuple2;
/**
* Selects the results linked to projects. Only for these results the dump will be performed.
@ -58,8 +68,10 @@ public class SparkResultLinkedToProject implements Serializable {
final String resultClassName = parser.get("resultTableName");
log.info("resultTableName: {}", resultClassName);
final String graphPath = parser.get("graphPath");
log.info("graphPath: {}", graphPath);
final String resultProjectsPath = parser.get("graphPath");
log.info("graphPath: {}", resultProjectsPath);
String communityMapPath = parser.get("communityMapPath");
@SuppressWarnings("unchecked")
Class<? extends Result> inputClazz = (Class<? extends Result>) Class.forName(resultClassName);
@ -70,43 +82,33 @@ public class SparkResultLinkedToProject implements Serializable {
isSparkSessionManaged,
spark -> {
Utils.removeOutputDir(spark, outputPath);
writeResultsLinkedToProjects(spark, inputClazz, inputPath, outputPath, graphPath);
writeResultsLinkedToProjects(
communityMapPath, spark, inputClazz, inputPath, outputPath, resultProjectsPath);
});
}
private static <R extends Result> void writeResultsLinkedToProjects(SparkSession spark, Class<R> inputClazz,
String inputPath, String outputPath, String graphPath) {
private static <R extends Result> void writeResultsLinkedToProjects(String communityMapPath, SparkSession spark,
Class<R> inputClazz,
String inputPath, String outputPath, String resultProjectsPath) {
Dataset<R> results = Utils
.readPath(spark, inputPath, inputClazz)
.filter("dataInfo.deletedbyinference = false and datainfo.invisible = false");
Dataset<Relation> relations = Utils
.readPath(spark, graphPath + "/relation", Relation.class)
.filter(
"dataInfo.deletedbyinference = false and lower(relClass) = '"
+ ModelConstants.IS_PRODUCED_BY.toLowerCase() + "'");
Dataset<Project> project = Utils.readPath(spark, graphPath + "/project", Project.class);
results.createOrReplaceTempView("result");
relations.createOrReplaceTempView("relation");
project.createOrReplaceTempView("project");
Dataset<R> tmp = spark
.sql(
"Select res.* " +
"from relation rel " +
"join result res " +
"on rel.source = res.id " +
"join project p " +
"on rel.target = p.id " +
"")
.as(Encoders.bean(inputClazz));
tmp
.groupByKey(
(MapFunction<R, String>) value -> value
.getId(),
Encoders.STRING())
.mapGroups((MapGroupsFunction<String, R, R>) (k, it) -> it.next(), Encoders.bean(inputClazz))
(FilterFunction<R>) r -> !r.getDataInfo().getDeletedbyinference() &&
!r.getDataInfo().getInvisible());
Dataset<ResultProject> resultProjectDataset = Utils
.readPath(spark, resultProjectsPath, ResultProject.class);
CommunityMap communityMap = Utils.getCommunityMap(spark, communityMapPath);
results
.joinWith(resultProjectDataset, results.col("id").equalTo(resultProjectDataset.col("resultId")))
.map((MapFunction<Tuple2<R, ResultProject>, CommunityResult>) t2 -> {
CommunityResult cr = (CommunityResult) ResultMapper
.map(
t2._1(),
communityMap, Constants.DUMPTYPE.FUNDER.getType());
cr.setProjects(t2._2().getProjectsList());
return cr;
}, Encoders.bean(CommunityResult.class))
.write()
.mode(SaveMode.Overwrite)
.option("compression", "gzip")

View File

@ -0,0 +1,82 @@
package eu.dnetlib.dhp.oa.graph.dump.projectssubset;
import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession;
import java.io.Serializable;
import java.util.Objects;
import java.util.Optional;
import org.apache.commons.io.IOUtils;
import org.apache.spark.SparkConf;
import org.apache.spark.api.java.function.MapFunction;
import org.apache.spark.sql.Dataset;
import org.apache.spark.sql.Encoders;
import org.apache.spark.sql.SaveMode;
import org.apache.spark.sql.SparkSession;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import eu.dnetlib.dhp.application.ArgumentApplicationParser;
import eu.dnetlib.dhp.oa.graph.dump.Utils;
import eu.dnetlib.dhp.schema.dump.oaf.graph.Project;
import scala.Tuple2;
public class ProjectsSubsetSparkJob implements Serializable {
private static final Logger log = LoggerFactory.getLogger(ProjectsSubsetSparkJob.class);
public static void main(String[] args) throws Exception {
String jsonConfiguration = IOUtils
.toString(
ProjectsSubsetSparkJob.class
.getResourceAsStream(
"/eu/dnetlib/dhp/oa/graph/dump/project_subset_parameters.json"));
final ArgumentApplicationParser parser = new ArgumentApplicationParser(jsonConfiguration);
parser.parseArgument(args);
Boolean isSparkSessionManaged = Optional
.ofNullable(parser.get("isSparkSessionManaged"))
.map(Boolean::valueOf)
.orElse(Boolean.TRUE);
log.info("isSparkSessionManaged: {}", isSparkSessionManaged);
final String inputPath = parser.get("sourcePath");
log.info("inputPath: {}", inputPath);
final String outputPath = parser.get("outputPath");
log.info("outputPath: {}", outputPath);
final String projectListPath = parser.get("projectListPath");
log.info("projectListPath: {}", projectListPath);
SparkConf conf = new SparkConf();
runWithSparkSession(
conf,
isSparkSessionManaged,
spark -> {
Utils.removeOutputDir(spark, outputPath);
getNewProjectList(spark, inputPath, outputPath, projectListPath);
});
}
private static void getNewProjectList(SparkSession spark, String inputPath, String outputPath,
String projectListPath) {
Dataset<String> projectList = spark.read().textFile(projectListPath);
Dataset<Project> projects;
projects = Utils.readPath(spark, inputPath, Project.class);
projects
.joinWith(projectList, projects.col("id").equalTo(projectList.col("value")), "left")
.map((MapFunction<Tuple2<Project, String>, Project>) t2 -> {
if (Optional.ofNullable(t2._2()).isPresent())
return null;
return t2._1();
}, Encoders.bean(Project.class))
.filter(Objects::nonNull)
.write()
.mode(SaveMode.Overwrite)
.option("compression", "gzip")
.json(outputPath);
Utils
.readPath(spark, outputPath, Project.class)
.map((MapFunction<Project, String>) p -> p.getId(), Encoders.STRING())
.write()
.mode(SaveMode.Append)
.option("compression", "gzip")
.text(projectListPath);
}
}

View File

@ -17,10 +17,10 @@
"paramDescription": "true if the spark session is managed, false otherwise",
"paramRequired": false
},
{
"paramName": "gp",
"paramLongName": "graphPath",
"paramDescription": "the relationPath",
"paramRequired": true
}
{
"paramName": "gp",
"paramLongName": "graphPath",
"paramDescription": "the relationPath",
"paramRequired": false
}
]

View File

@ -0,0 +1,20 @@
[
{
"paramName":"s",
"paramLongName":"sourcePath",
"paramDescription": "the path of the sequencial file to read",
"paramRequired": true
},
{
"paramName": "out",
"paramLongName": "outputPath",
"paramDescription": "the path used to store temporary output files",
"paramRequired": true
},
{
"paramName": "ssm",
"paramLongName": "isSparkSessionManaged",
"paramDescription": "true if the spark session is managed, false otherwise",
"paramRequired": false
}
]

View File

@ -28,6 +28,12 @@
"paramLongName":"graphPath",
"paramDescription": "the path to the relations",
"paramRequired": true
},
{
"paramName":"cmp",
"paramLongName":"communityMapPath",
"paramDescription": "the path to the relations",
"paramRequired": true
}
]

View File

@ -0,0 +1,27 @@
[
{
"paramName":"s",
"paramLongName":"sourcePath",
"paramDescription": "the path of the sequencial file to read",
"paramRequired": true
},
{
"paramName": "out",
"paramLongName": "outputPath",
"paramDescription": "the path used to store temporary output files",
"paramRequired": true
},
{
"paramName": "ssm",
"paramLongName": "isSparkSessionManaged",
"paramDescription": "true if the spark session is managed, false otherwise",
"paramRequired": false
},
{
"paramName": "pl",
"paramLongName": "projectListPath",
"paramDescription": "the path of the association result projectlist",
"paramRequired": true
}
]

View File

@ -0,0 +1,171 @@
<workflow-app name="dump_graph" xmlns="uri:oozie:workflow:0.5">
<parameters>
<property>
<name>sourcePath</name>
<description>the source path</description>
</property>
<property>
<name>projectListPath</name>
<description>the path to the project list</description>
</property>
<property>
<name>outputPath</name>
<description>the output path</description>
</property>
<property>
<name>accessToken</name>
<description>the access token used for the deposition in Zenodo</description>
</property>
<property>
<name>connectionUrl</name>
<description>the connection url for Zenodo</description>
</property>
<property>
<name>metadata</name>
<description> the metadata associated to the deposition</description>
</property>
<property>
<name>depositionType</name>
<description>the type of deposition we want to perform. "new" for brand new deposition, "version" for a new version of a published deposition (in this case the concept record id must be provided), "upload" to upload content to an open deposition for which we already have the deposition id (in this case the deposition id should be provided)</description>
</property>
<property>
<name>conceptRecordId</name>
<description>for new version, the id of the record for the old deposition</description>
</property>
<property>
<name>depositionId</name>
<description>the depositionId of a deposition open that has to be added content</description>
</property>
<property>
<name>sparkDriverMemory</name>
<description>memory for driver process</description>
</property>
<property>
<name>sparkExecutorMemory</name>
<description>memory for individual executor</description>
</property>
<property>
<name>sparkExecutorCores</name>
<description>number of cores used by single executor</description>
</property>
<property>
<name>oozieActionShareLibForSpark2</name>
<description>oozie action sharelib for spark 2.*</description>
</property>
<property>
<name>spark2ExtraListeners</name>
<value>com.cloudera.spark.lineage.NavigatorAppListener</value>
<description>spark 2.* extra listeners classname</description>
</property>
<property>
<name>spark2SqlQueryExecutionListeners</name>
<value>com.cloudera.spark.lineage.NavigatorQueryListener</value>
<description>spark 2.* sql query execution listeners classname</description>
</property>
<property>
<name>spark2YarnHistoryServerAddress</name>
<description>spark 2.* yarn history server address</description>
</property>
<property>
<name>spark2EventLogDir</name>
<description>spark 2.* event log dir location</description>
</property>
</parameters>
<global>
<job-tracker>${jobTracker}</job-tracker>
<name-node>${nameNode}</name-node>
<configuration>
<property>
<name>mapreduce.job.queuename</name>
<value>${queueName}</value>
</property>
<property>
<name>oozie.launcher.mapred.job.queue.name</name>
<value>${oozieLauncherQueueName}</value>
</property>
<property>
<name>oozie.action.sharelib.for.spark</name>
<value>${oozieActionShareLibForSpark2}</value>
</property>
</configuration>
</global>
<start to="dump_project"/>
<kill name="Kill">
<message>Action failed, error message[${wf:errorMessage(wf:lastErrorNode())}]</message>
</kill>
<action name="dump_project">
<spark xmlns="uri:oozie:spark-action:0.2">
<master>yarn</master>
<mode>cluster</mode>
<name>Dump table project </name>
<class>eu.dnetlib.dhp.oa.graph.dump.complete.SparkDumpEntitiesJob</class>
<jar>dhp-graph-mapper-${projectVersion}.jar</jar>
<spark-opts>
--executor-memory=${sparkExecutorMemory}
--executor-cores=${sparkExecutorCores}
--driver-memory=${sparkDriverMemory}
--conf spark.extraListeners=${spark2ExtraListeners}
--conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
--conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
--conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
--conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir}
</spark-opts>
<arg>--sourcePath</arg><arg>${sourcePath}/project</arg>
<arg>--resultTableName</arg><arg>eu.dnetlib.dhp.schema.oaf.Project</arg>
<arg>--outputPath</arg><arg>${workingDir}/project</arg>
<arg>--communityMapPath</arg><arg>noneed</arg>
</spark>
<ok to="get_new_projects"/>
<error to="Kill"/>
</action>
<action name="get_new_projects">
<spark xmlns="uri:oozie:spark-action:0.2">
<master>yarn</master>
<mode>cluster</mode>
<name>Dump table project </name>
<class>eu.dnetlib.dhp.oa.graph.dump.projectssubset.ProjectsSubsetSparkJob</class>
<jar>dhp-graph-mapper-${projectVersion}.jar</jar>
<spark-opts>
--executor-memory=${sparkExecutorMemory}
--executor-cores=${sparkExecutorCores}
--driver-memory=${sparkDriverMemory}
--conf spark.extraListeners=${spark2ExtraListeners}
--conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
--conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
--conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
--conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir}
</spark-opts>
<arg>--sourcePath</arg><arg>${workingDir}/project</arg>
<arg>--outputPath</arg><arg>${workingDir}/tar/project</arg>
<arg>--projectListPath</arg><arg>${projectListPath}</arg>
</spark>
<ok to="make_archive"/>
<error to="Kill"/>
</action>
<action name="make_archive">
<java>
<main-class>eu.dnetlib.dhp.oa.graph.dump.MakeTar</main-class>
<arg>--hdfsPath</arg><arg>${outputPath}</arg>
<arg>--nameNode</arg><arg>${nameNode}</arg>
<arg>--sourcePath</arg><arg>${workingDir}/tar</arg>
</java>
<ok to="send_zenodo"/>
<error to="Kill"/>
</action>
<action name="send_zenodo">
<java>
<main-class>eu.dnetlib.dhp.oa.graph.dump.SendToZenodoHDFS</main-class>
<arg>--hdfsPath</arg><arg>${outputPath}</arg>
<arg>--nameNode</arg><arg>${nameNode}</arg>
<arg>--accessToken</arg><arg>${accessToken}</arg>
<arg>--connectionUrl</arg><arg>${connectionUrl}</arg>
<arg>--metadata</arg><arg>${metadata}</arg>
<arg>--conceptRecordId</arg><arg>${conceptRecordId}</arg>
<arg>--depositionType</arg><arg>${depositionType}</arg>
<arg>--depositionId</arg><arg>${depositionId}</arg>
</java>
<ok to="End"/>
<error to="Kill"/>
</action>
<end name="End"/>
</workflow-app>

View File

@ -1,347 +0,0 @@
<workflow-app name="sub_dump_community_funder_results" xmlns="uri:oozie:workflow:0.5">
<parameters>
<property>
<name>sourcePath</name>
<description>the source path</description>
</property>
<property>
<name>outputPath</name>
<description>the output path</description>
</property>
<property>
<name>communityMapPath</name>
<description>the path to the community map</description>
</property>
<property>
<name>selectedResults</name>
<description>the path the the possible subset ot results to be dumped</description>
</property>
<property>
<name>hiveDbName</name>
<description>the target hive database name</description>
</property>
<property>
<name>hiveJdbcUrl</name>
<description>hive server jdbc url</description>
</property>
<property>
<name>hiveMetastoreUris</name>
<description>hive server metastore URIs</description>
</property>
<property>
<name>sparkDriverMemory</name>
<description>memory for driver process</description>
</property>
<property>
<name>sparkExecutorMemory</name>
<description>memory for individual executor</description>
</property>
<property>
<name>sparkExecutorCores</name>
<description>number of cores used by single executor</description>
</property>
<property>
<name>oozieActionShareLibForSpark2</name>
<description>oozie action sharelib for spark 2.*</description>
</property>
<property>
<name>spark2ExtraListeners</name>
<value>com.cloudera.spark.lineage.NavigatorAppListener</value>
<description>spark 2.* extra listeners classname</description>
</property>
<property>
<name>spark2SqlQueryExecutionListeners</name>
<value>com.cloudera.spark.lineage.NavigatorQueryListener</value>
<description>spark 2.* sql query execution listeners classname</description>
</property>
<property>
<name>spark2YarnHistoryServerAddress</name>
<description>spark 2.* yarn history server address</description>
</property>
<property>
<name>spark2EventLogDir</name>
<description>spark 2.* event log dir location</description>
</property>
</parameters>
<global>
<job-tracker>${jobTracker}</job-tracker>
<name-node>${nameNode}</name-node>
<configuration>
<property>
<name>mapreduce.job.queuename</name>
<value>${queueName}</value>
</property>
<property>
<name>oozie.launcher.mapred.job.queue.name</name>
<value>${oozieLauncherQueueName}</value>
</property>
<property>
<name>oozie.action.sharelib.for.spark</name>
<value>${oozieActionShareLibForSpark2}</value>
</property>
</configuration>
</global>
<start to="fork_dump"/>
<kill name="Kill">
<message>Action failed, error message[${wf:errorMessage(wf:lastErrorNode())}]</message>
</kill>
<fork name="fork_dump">
<path start="dump_publication"/>
<path start="dump_dataset"/>
<path start="dump_orp"/>
<path start="dump_software"/>
</fork>
<action name="dump_publication">
<spark xmlns="uri:oozie:spark-action:0.2">
<master>yarn</master>
<mode>cluster</mode>
<name>Dump table publication for community/funder related products</name>
<class>eu.dnetlib.dhp.oa.graph.dump.community.SparkDumpCommunityProducts</class>
<jar>dhp-graph-mapper-${projectVersion}.jar</jar>
<spark-opts>
--executor-memory=${sparkExecutorMemory}
--executor-cores=${sparkExecutorCores}
--driver-memory=${sparkDriverMemory}
--conf spark.extraListeners=${spark2ExtraListeners}
--conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
--conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
--conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
--conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir}
</spark-opts>
<arg>--sourcePath</arg><arg>${selectedResults}/publication</arg>
<arg>--resultTableName</arg><arg>eu.dnetlib.dhp.schema.oaf.Publication</arg>
<arg>--outputPath</arg><arg>${workingDir}/dump/publication</arg>
<arg>--communityMapPath</arg><arg>${communityMapPath}</arg>
<arg>--dumpType</arg><arg>${dumpType}</arg>
</spark>
<ok to="join_dump"/>
<error to="Kill"/>
</action>
<action name="dump_dataset">
<spark xmlns="uri:oozie:spark-action:0.2">
<master>yarn</master>
<mode>cluster</mode>
<name>Dump table dataset for community/funder related products</name>
<class>eu.dnetlib.dhp.oa.graph.dump.community.SparkDumpCommunityProducts</class>
<jar>dhp-graph-mapper-${projectVersion}.jar</jar>
<spark-opts>
--executor-memory=${sparkExecutorMemory}
--executor-cores=${sparkExecutorCores}
--driver-memory=${sparkDriverMemory}
--conf spark.extraListeners=${spark2ExtraListeners}
--conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
--conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
--conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
--conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir}
</spark-opts>
<arg>--sourcePath</arg><arg>${selectedResults}/dataset</arg>
<arg>--resultTableName</arg><arg>eu.dnetlib.dhp.schema.oaf.Dataset</arg>
<arg>--outputPath</arg><arg>${workingDir}/dump/dataset</arg>
<arg>--communityMapPath</arg><arg>${communityMapPath}</arg>
</spark>
<ok to="join_dump"/>
<error to="Kill"/>
</action>
<action name="dump_orp">
<spark xmlns="uri:oozie:spark-action:0.2">
<master>yarn</master>
<mode>cluster</mode>
<name>Dump table ORP for community related products</name>
<class>eu.dnetlib.dhp.oa.graph.dump.community.SparkDumpCommunityProducts</class>
<jar>dhp-graph-mapper-${projectVersion}.jar</jar>
<spark-opts>
--executor-memory=${sparkExecutorMemory}
--executor-cores=${sparkExecutorCores}
--driver-memory=${sparkDriverMemory}
--conf spark.extraListeners=${spark2ExtraListeners}
--conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
--conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
--conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
--conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir}
</spark-opts>
<arg>--sourcePath</arg><arg>${selectedResults}/otherresearchproduct</arg>
<arg>--resultTableName</arg><arg>eu.dnetlib.dhp.schema.oaf.OtherResearchProduct</arg>
<arg>--outputPath</arg><arg>${workingDir}/dump/otherresearchproduct</arg>
<arg>--communityMapPath</arg><arg>${communityMapPath}</arg>
</spark>
<ok to="join_dump"/>
<error to="Kill"/>
</action>
<action name="dump_software">
<spark xmlns="uri:oozie:spark-action:0.2">
<master>yarn</master>
<mode>cluster</mode>
<name>Dump table software for community related products</name>
<class>eu.dnetlib.dhp.oa.graph.dump.community.SparkDumpCommunityProducts</class>
<jar>dhp-graph-mapper-${projectVersion}.jar</jar>
<spark-opts>
--executor-memory=${sparkExecutorMemory}
--executor-cores=${sparkExecutorCores}
--driver-memory=${sparkDriverMemory}
--conf spark.extraListeners=${spark2ExtraListeners}
--conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
--conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
--conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
--conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir}
</spark-opts>
<arg>--sourcePath</arg><arg>${selectedResults}/software</arg>
<arg>--resultTableName</arg><arg>eu.dnetlib.dhp.schema.oaf.Software</arg>
<arg>--outputPath</arg><arg>${workingDir}/dump/software</arg>
<arg>--communityMapPath</arg><arg>${communityMapPath}</arg>
</spark>
<ok to="join_dump"/>
<error to="Kill"/>
</action>
<join name="join_dump" to="prepareResultProject"/>
<action name="prepareResultProject">
<spark xmlns="uri:oozie:spark-action:0.2">
<master>yarn</master>
<mode>cluster</mode>
<name>Prepare association result subset of project info</name>
<class>eu.dnetlib.dhp.oa.graph.dump.community.SparkPrepareResultProject</class>
<jar>dhp-graph-mapper-${projectVersion}.jar</jar>
<spark-opts>
--executor-memory=${sparkExecutorMemory}
--executor-cores=${sparkExecutorCores}
--driver-memory=${sparkDriverMemory}
--conf spark.extraListeners=${spark2ExtraListeners}
--conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
--conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
--conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
--conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir}
</spark-opts>
<arg>--sourcePath</arg><arg>${sourcePath}</arg>
<arg>--outputPath</arg><arg>${workingDir}/preparedInfo</arg>
</spark>
<ok to="fork_extendWithProject"/>
<error to="Kill"/>
</action>
<fork name="fork_extendWithProject">
<path start="extend_publication"/>
<path start="extend_dataset"/>
<path start="extend_orp"/>
<path start="extend_software"/>
</fork>
<action name="extend_publication">
<spark xmlns="uri:oozie:spark-action:0.2">
<master>yarn</master>
<mode>cluster</mode>
<name>Extend dumped publications with information about project</name>
<class>eu.dnetlib.dhp.oa.graph.dump.community.SparkUpdateProjectInfo</class>
<jar>dhp-graph-mapper-${projectVersion}.jar</jar>
<spark-opts>
--executor-memory=${sparkExecutorMemory}
--executor-cores=${sparkExecutorCores}
--driver-memory=${sparkDriverMemory}
--conf spark.extraListeners=${spark2ExtraListeners}
--conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
--conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
--conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
--conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir}
</spark-opts>
<arg>--sourcePath</arg><arg>${workingDir}/dump/publication</arg>
<arg>--outputPath</arg><arg>${outputPath}/ext/publication</arg>
<arg>--preparedInfoPath</arg><arg>${workingDir}/preparedInfo</arg>
</spark>
<ok to="join_extend"/>
<error to="Kill"/>
</action>
<action name="extend_dataset">
<spark xmlns="uri:oozie:spark-action:0.2">
<master>yarn</master>
<mode>cluster</mode>
<name>Extend dumped dataset with information about project</name>
<class>eu.dnetlib.dhp.oa.graph.dump.community.SparkUpdateProjectInfo</class>
<jar>dhp-graph-mapper-${projectVersion}.jar</jar>
<spark-opts>
--executor-memory=${sparkExecutorMemory}
--executor-cores=${sparkExecutorCores}
--driver-memory=${sparkDriverMemory}
--conf spark.extraListeners=${spark2ExtraListeners}
--conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
--conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
--conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
--conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir}
</spark-opts>
<arg>--sourcePath</arg><arg>${workingDir}/dump/dataset</arg>
<arg>--outputPath</arg><arg>${outputPath}/ext/dataset</arg>
<arg>--preparedInfoPath</arg><arg>${workingDir}/preparedInfo</arg>
</spark>
<ok to="join_extend"/>
<error to="Kill"/>
</action>
<action name="extend_orp">
<spark xmlns="uri:oozie:spark-action:0.2">
<master>yarn</master>
<mode>cluster</mode>
<name>Extend dumped ORP with information about project</name>
<class>eu.dnetlib.dhp.oa.graph.dump.community.SparkUpdateProjectInfo</class>
<jar>dhp-graph-mapper-${projectVersion}.jar</jar>
<spark-opts>
--executor-memory=${sparkExecutorMemory}
--executor-cores=${sparkExecutorCores}
--driver-memory=${sparkDriverMemory}
--conf spark.extraListeners=${spark2ExtraListeners}
--conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
--conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
--conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
--conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir}
</spark-opts>
<arg>--sourcePath</arg><arg>${workingDir}/dump/otherresearchproduct</arg>
<arg>--outputPath</arg><arg>${outputPath}/ext/orp</arg>
<arg>--preparedInfoPath</arg><arg>${workingDir}/preparedInfo</arg>
</spark>
<ok to="join_extend"/>
<error to="Kill"/>
</action>
<action name="extend_software">
<spark xmlns="uri:oozie:spark-action:0.2">
<master>yarn</master>
<mode>cluster</mode>
<name>Extend dumped software with information about project</name>
<class>eu.dnetlib.dhp.oa.graph.dump.community.SparkUpdateProjectInfo</class>
<jar>dhp-graph-mapper-${projectVersion}.jar</jar>
<spark-opts>
--executor-memory=${sparkExecutorMemory}
--executor-cores=${sparkExecutorCores}
--driver-memory=${sparkDriverMemory}
--conf spark.extraListeners=${spark2ExtraListeners}
--conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
--conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
--conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
--conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir}
</spark-opts>
<arg>--sourcePath</arg><arg>${workingDir}/dump/software</arg>
<arg>--outputPath</arg><arg>${outputPath}/ext/software</arg>
<arg>--preparedInfoPath</arg><arg>${workingDir}/preparedInfo</arg>
</spark>
<ok to="join_extend"/>
<error to="Kill"/>
</action>
<join name="join_extend" to="End"/>
<end name="End"/>
</workflow-app>

View File

@ -1,2 +0,0 @@
## This is a classpath-based import file (this header is required)
dump_common classpath eu/dnetlib/dhp/oa/graph/dump/wf/subworkflows/commoncommunityfunder/oozie_app

View File

@ -77,42 +77,259 @@
</configuration>
</global>
<start to="common_action_community_funder"/>
<start to="fork_dump"/>
<kill name="Kill">
<message>Action failed, error message[${wf:errorMessage(wf:lastErrorNode())}]</message>
</kill>
<action name="common_action_community_funder">
<sub-workflow>
<app-path>${wf:appPath()}/dump_common
</app-path>
<propagate-configuration/>
<configuration>
<property>
<name>sourcePath</name>
<value>${sourcePath}</value>
</property>
<property>
<name>selectedResults</name>
<value>${sourcePath}</value>
</property>
<property>
<name>communityMapPath</name>
<value>${workingDir}/communityMap</value>
</property>
<property>
<name>outputPath</name>
<value>${workingDir}</value>
</property>
</configuration>
</sub-workflow>
<ok to="splitForCommunities" />
<error to="Kill" />
<fork name="fork_dump">
<path start="dump_publication"/>
<path start="dump_dataset"/>
<path start="dump_orp"/>
<path start="dump_software"/>
</fork>
<action name="dump_publication">
<spark xmlns="uri:oozie:spark-action:0.2">
<master>yarn</master>
<mode>cluster</mode>
<name>Dump table publication for community/funder related products</name>
<class>eu.dnetlib.dhp.oa.graph.dump.community.SparkDumpCommunityProducts</class>
<jar>dhp-graph-mapper-${projectVersion}.jar</jar>
<spark-opts>
--executor-memory=${sparkExecutorMemory}
--executor-cores=${sparkExecutorCores}
--driver-memory=${sparkDriverMemory}
--conf spark.extraListeners=${spark2ExtraListeners}
--conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
--conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
--conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
--conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir}
</spark-opts>
<arg>--sourcePath</arg><arg>${sourcePath}/publication</arg>
<arg>--resultTableName</arg><arg>eu.dnetlib.dhp.schema.oaf.Publication</arg>
<arg>--outputPath</arg><arg>${workingDir}/dump/publication</arg>
<arg>--communityMapPath</arg><arg>${communityMapPath}</arg>
<arg>--dumpType</arg><arg>${dumpType}</arg>
</spark>
<ok to="join_dump"/>
<error to="Kill"/>
</action>
<action name="dump_dataset">
<spark xmlns="uri:oozie:spark-action:0.2">
<master>yarn</master>
<mode>cluster</mode>
<name>Dump table dataset for community/funder related products</name>
<class>eu.dnetlib.dhp.oa.graph.dump.community.SparkDumpCommunityProducts</class>
<jar>dhp-graph-mapper-${projectVersion}.jar</jar>
<spark-opts>
--executor-memory=${sparkExecutorMemory}
--executor-cores=${sparkExecutorCores}
--driver-memory=${sparkDriverMemory}
--conf spark.extraListeners=${spark2ExtraListeners}
--conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
--conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
--conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
--conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir}
</spark-opts>
<arg>--sourcePath</arg><arg>${sourcePath}/dataset</arg>
<arg>--resultTableName</arg><arg>eu.dnetlib.dhp.schema.oaf.Dataset</arg>
<arg>--outputPath</arg><arg>${workingDir}/dump/dataset</arg>
<arg>--communityMapPath</arg><arg>${communityMapPath}</arg>
</spark>
<ok to="join_dump"/>
<error to="Kill"/>
</action>
<action name="dump_orp">
<spark xmlns="uri:oozie:spark-action:0.2">
<master>yarn</master>
<mode>cluster</mode>
<name>Dump table ORP for community related products</name>
<class>eu.dnetlib.dhp.oa.graph.dump.community.SparkDumpCommunityProducts</class>
<jar>dhp-graph-mapper-${projectVersion}.jar</jar>
<spark-opts>
--executor-memory=${sparkExecutorMemory}
--executor-cores=${sparkExecutorCores}
--driver-memory=${sparkDriverMemory}
--conf spark.extraListeners=${spark2ExtraListeners}
--conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
--conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
--conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
--conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir}
</spark-opts>
<arg>--sourcePath</arg><arg>${sourcePath}/otherresearchproduct</arg>
<arg>--resultTableName</arg><arg>eu.dnetlib.dhp.schema.oaf.OtherResearchProduct</arg>
<arg>--outputPath</arg><arg>${workingDir}/dump/otherresearchproduct</arg>
<arg>--communityMapPath</arg><arg>${communityMapPath}</arg>
</spark>
<ok to="join_dump"/>
<error to="Kill"/>
</action>
<action name="dump_software">
<spark xmlns="uri:oozie:spark-action:0.2">
<master>yarn</master>
<mode>cluster</mode>
<name>Dump table software for community related products</name>
<class>eu.dnetlib.dhp.oa.graph.dump.community.SparkDumpCommunityProducts</class>
<jar>dhp-graph-mapper-${projectVersion}.jar</jar>
<spark-opts>
--executor-memory=${sparkExecutorMemory}
--executor-cores=${sparkExecutorCores}
--driver-memory=${sparkDriverMemory}
--conf spark.extraListeners=${spark2ExtraListeners}
--conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
--conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
--conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
--conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir}
</spark-opts>
<arg>--sourcePath</arg><arg>${sourcePath}/software</arg>
<arg>--resultTableName</arg><arg>eu.dnetlib.dhp.schema.oaf.Software</arg>
<arg>--outputPath</arg><arg>${workingDir}/dump/software</arg>
<arg>--communityMapPath</arg><arg>${communityMapPath}</arg>
</spark>
<ok to="join_dump"/>
<error to="Kill"/>
</action>
<join name="join_dump" to="prepareResultProject"/>
<action name="prepareResultProject">
<spark xmlns="uri:oozie:spark-action:0.2">
<master>yarn</master>
<mode>cluster</mode>
<name>Prepare association result subset of project info</name>
<class>eu.dnetlib.dhp.oa.graph.dump.community.SparkPrepareResultProject</class>
<jar>dhp-graph-mapper-${projectVersion}.jar</jar>
<spark-opts>
--executor-memory=${sparkExecutorMemory}
--executor-cores=${sparkExecutorCores}
--driver-memory=${sparkDriverMemory}
--conf spark.extraListeners=${spark2ExtraListeners}
--conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
--conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
--conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
--conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir}
</spark-opts>
<arg>--sourcePath</arg><arg>${sourcePath}</arg>
<arg>--outputPath</arg><arg>${workingDir}/preparedInfo</arg>
</spark>
<ok to="fork_extendWithProject"/>
<error to="Kill"/>
</action>
<fork name="fork_extendWithProject">
<path start="extend_publication"/>
<path start="extend_dataset"/>
<path start="extend_orp"/>
<path start="extend_software"/>
</fork>
<action name="extend_publication">
<spark xmlns="uri:oozie:spark-action:0.2">
<master>yarn</master>
<mode>cluster</mode>
<name>Extend dumped publications with information about project</name>
<class>eu.dnetlib.dhp.oa.graph.dump.community.SparkUpdateProjectInfo</class>
<jar>dhp-graph-mapper-${projectVersion}.jar</jar>
<spark-opts>
--executor-memory=${sparkExecutorMemory}
--executor-cores=${sparkExecutorCores}
--driver-memory=${sparkDriverMemory}
--conf spark.extraListeners=${spark2ExtraListeners}
--conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
--conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
--conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
--conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir}
</spark-opts>
<arg>--sourcePath</arg><arg>${workingDir}/dump/publication</arg>
<arg>--outputPath</arg><arg>${workingDir}/ext/publication</arg>
<arg>--preparedInfoPath</arg><arg>${workingDir}/preparedInfo</arg>
</spark>
<ok to="join_extend"/>
<error to="Kill"/>
</action>
<action name="extend_dataset">
<spark xmlns="uri:oozie:spark-action:0.2">
<master>yarn</master>
<mode>cluster</mode>
<name>Extend dumped dataset with information about project</name>
<class>eu.dnetlib.dhp.oa.graph.dump.community.SparkUpdateProjectInfo</class>
<jar>dhp-graph-mapper-${projectVersion}.jar</jar>
<spark-opts>
--executor-memory=${sparkExecutorMemory}
--executor-cores=${sparkExecutorCores}
--driver-memory=${sparkDriverMemory}
--conf spark.extraListeners=${spark2ExtraListeners}
--conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
--conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
--conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
--conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir}
</spark-opts>
<arg>--sourcePath</arg><arg>${workingDir}/dump/dataset</arg>
<arg>--outputPath</arg><arg>${workingDir}/ext/dataset</arg>
<arg>--preparedInfoPath</arg><arg>${workingDir}/preparedInfo</arg>
</spark>
<ok to="join_extend"/>
<error to="Kill"/>
</action>
<action name="extend_orp">
<spark xmlns="uri:oozie:spark-action:0.2">
<master>yarn</master>
<mode>cluster</mode>
<name>Extend dumped ORP with information about project</name>
<class>eu.dnetlib.dhp.oa.graph.dump.community.SparkUpdateProjectInfo</class>
<jar>dhp-graph-mapper-${projectVersion}.jar</jar>
<spark-opts>
--executor-memory=${sparkExecutorMemory}
--executor-cores=${sparkExecutorCores}
--driver-memory=${sparkDriverMemory}
--conf spark.extraListeners=${spark2ExtraListeners}
--conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
--conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
--conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
--conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir}
</spark-opts>
<arg>--sourcePath</arg><arg>${workingDir}/dump/otherresearchproduct</arg>
<arg>--outputPath</arg><arg>${workingDir}/ext/orp</arg>
<arg>--preparedInfoPath</arg><arg>${workingDir}/preparedInfo</arg>
</spark>
<ok to="join_extend"/>
<error to="Kill"/>
</action>
<action name="extend_software">
<spark xmlns="uri:oozie:spark-action:0.2">
<master>yarn</master>
<mode>cluster</mode>
<name>Extend dumped software with information about project</name>
<class>eu.dnetlib.dhp.oa.graph.dump.community.SparkUpdateProjectInfo</class>
<jar>dhp-graph-mapper-${projectVersion}.jar</jar>
<spark-opts>
--executor-memory=${sparkExecutorMemory}
--executor-cores=${sparkExecutorCores}
--driver-memory=${sparkDriverMemory}
--conf spark.extraListeners=${spark2ExtraListeners}
--conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
--conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
--conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
--conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir}
</spark-opts>
<arg>--sourcePath</arg><arg>${workingDir}/dump/software</arg>
<arg>--outputPath</arg><arg>${workingDir}/ext/software</arg>
<arg>--preparedInfoPath</arg><arg>${workingDir}/preparedInfo</arg>
</spark>
<ok to="join_extend"/>
<error to="Kill"/>
</action>
<join name="join_extend" to="splitForCommunities"/>
<action name="splitForCommunities">
<spark xmlns="uri:oozie:spark-action:0.2">
<master>yarn</master>

View File

@ -298,6 +298,7 @@
--conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
--conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
--conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir}
--conf spark.sql.shuffle.partitions=3840
</spark-opts>
<arg>--sourcePath</arg><arg>${sourcePath}</arg>
<arg>--outputPath</arg><arg>${workingDir}/validrelation</arg>

View File

@ -1,2 +0,0 @@
## This is a classpath-based import file (this header is required)
dump_common classpath eu/dnetlib/dhp/oa/graph/dump/wf/subworkflows/commoncommunityfunder/oozie_app

View File

@ -77,12 +77,36 @@
</configuration>
</global>
<start to="fork_result_linked_to_projects"/>
<start to="prepareResultProject"/>
<kill name="Kill">
<message>Action failed, error message[${wf:errorMessage(wf:lastErrorNode())}]</message>
</kill>
<action name="prepareResultProject">
<spark xmlns="uri:oozie:spark-action:0.2">
<master>yarn</master>
<mode>cluster</mode>
<name>Prepare association result subset of project info</name>
<class>eu.dnetlib.dhp.oa.graph.dump.community.SparkPrepareResultProject</class>
<jar>dhp-graph-mapper-${projectVersion}.jar</jar>
<spark-opts>
--executor-memory=${sparkExecutorMemory}
--executor-cores=${sparkExecutorCores}
--driver-memory=${sparkDriverMemory}
--conf spark.extraListeners=${spark2ExtraListeners}
--conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
--conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
--conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
--conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir}
</spark-opts>
<arg>--sourcePath</arg><arg>${sourcePath}</arg>
<arg>--outputPath</arg><arg>${workingDir}/preparedInfo</arg>
</spark>
<ok to="fork_result_linked_to_projects"/>
<error to="Kill"/>
</action>
<fork name="fork_result_linked_to_projects">
<path start="select_publication_linked_to_projects"/>
@ -111,7 +135,8 @@
<arg>--sourcePath</arg><arg>${sourcePath}/publication</arg>
<arg>--resultTableName</arg><arg>eu.dnetlib.dhp.schema.oaf.Publication</arg>
<arg>--outputPath</arg><arg>${workingDir}/result/publication</arg>
<arg>--graphPath</arg><arg>${sourcePath}</arg>
<arg>--graphPath</arg><arg>${workingDir}/preparedInfo</arg>
<arg>--communityMapPath</arg><arg>${communityMapPath}</arg>
</spark>
<ok to="join_link"/>
<error to="Kill"/>
@ -137,7 +162,8 @@
<arg>--sourcePath</arg><arg>${sourcePath}/dataset</arg>
<arg>--resultTableName</arg><arg>eu.dnetlib.dhp.schema.oaf.Dataset</arg>
<arg>--outputPath</arg><arg>${workingDir}/result/dataset</arg>
<arg>--graphPath</arg><arg>${sourcePath}</arg>
<arg>--graphPath</arg><arg>${workingDir}/preparedInfo</arg>
<arg>--communityMapPath</arg><arg>${communityMapPath}</arg>
</spark>
<ok to="join_link"/>
<error to="Kill"/>
@ -163,7 +189,8 @@
<arg>--sourcePath</arg><arg>${sourcePath}/otherresearchproduct</arg>
<arg>--resultTableName</arg><arg>eu.dnetlib.dhp.schema.oaf.OtherResearchProduct</arg>
<arg>--outputPath</arg><arg>${workingDir}/result/otherresearchproduct</arg>
<arg>--graphPath</arg><arg>${sourcePath}</arg>
<arg>--graphPath</arg><arg>${workingDir}/preparedInfo</arg>
<arg>--communityMapPath</arg><arg>${communityMapPath}</arg>
</spark>
<ok to="join_link"/>
<error to="Kill"/>
@ -189,41 +216,14 @@
<arg>--sourcePath</arg><arg>${sourcePath}/software</arg>
<arg>--resultTableName</arg><arg>eu.dnetlib.dhp.schema.oaf.Software</arg>
<arg>--outputPath</arg><arg>${workingDir}/result/software</arg>
<arg>--graphPath</arg><arg>${sourcePath}</arg>
<arg>--graphPath</arg><arg>${workingDir}/preparedInfo</arg>
<arg>--communityMapPath</arg><arg>${communityMapPath}</arg>
</spark>
<ok to="join_link"/>
<error to="Kill"/>
</action>
<join name="join_link" to="common_action_community_funder"/>
<action name="common_action_community_funder">
<sub-workflow>
<app-path>${wf:appPath()}/dump_common
</app-path>
<propagate-configuration/>
<configuration>
<property>
<name>sourcePath</name>
<value>${sourcePath}</value>
</property>
<property>
<name>selectedResults</name>
<value>${workingDir}/result</value>
</property>
<property>
<name>communityMapPath</name>
<value>${workingDir}/communityMap</value>
</property>
<property>
<name>outputPath</name>
<value>${workingDir}</value>
</property>
</configuration>
</sub-workflow>
<ok to="dump_funder_results" />
<error to="Kill" />
</action>
<join name="join_link" to="dump_funder_results"/>
<action name="dump_funder_results">
<spark xmlns="uri:oozie:spark-action:0.2">
@ -242,9 +242,8 @@
--conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
--conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir}
</spark-opts>
<arg>--sourcePath</arg><arg>${workingDir}/ext</arg>
<arg>--sourcePath</arg><arg>${workingDir}/result</arg>
<arg>--outputPath</arg><arg>${outputPath}</arg>
<arg>--graphPath</arg><arg>${sourcePath}</arg>
</spark>
<ok to="End"/>
<error to="Kill"/>

View File

@ -1,5 +1,6 @@
[
{"paramName":"mt", "paramLongName":"master", "paramDescription": "should be local or yarn", "paramRequired": true},
{"paramName":"s", "paramLongName":"sourcePath", "paramDescription": "the source Path", "paramRequired": true},
{"paramName":"t", "paramLongName":"targetPath", "paramDescription": "the path of the raw graph", "paramRequired": true}
{"paramName":"t", "paramLongName":"targetPath", "paramDescription": "the path of the raw graph", "paramRequired": true},
{"paramName":"r", "paramLongName":"filterRelation", "paramDescription": "the relation to filter", "paramRequired": false}
]

View File

@ -3,5 +3,7 @@
{"paramName":"s", "paramLongName":"sourcePath", "paramDescription": "the source Path", "paramRequired": true},
{"paramName":"su", "paramLongName":"scholixUpdatePath", "paramDescription": "the scholix updated Path", "paramRequired": false},
{"paramName":"t", "paramLongName":"targetPath", "paramDescription": "the path of the raw graph", "paramRequired": true},
{"paramName":"o", "paramLongName":"objectType", "paramDescription": "should be scholix or Summary", "paramRequired": true}
{"paramName":"o", "paramLongName":"objectType", "paramDescription": "should be scholix or Summary", "paramRequired": true},
{"paramName":"mp", "paramLongName":"maxPidNumberFilter", "paramDescription": "filter max number of pids in source/target", "paramRequired": false}
]

View File

@ -0,0 +1,10 @@
<configuration>
<property>
<name>oozie.use.system.libpath</name>
<value>true</value>
</property>
<property>
<name>oozie.action.sharelib.for.spark</name>
<value>spark2</value>
</property>
</configuration>

View File

@ -0,0 +1,145 @@
<workflow-app name="Create Scholix Dump" xmlns="uri:oozie:workflow:0.5">
<parameters>
<property>
<name>sourcePath</name>
<description>the working dir base path</description>
</property>
<property>
<name>targetPath</name>
<description>the final graph path</description>
</property>
<property>
<name>relationFilter</name>
<description>Filter relation semantic</description>
</property>
<property>
<name>maxNumberOfPid</name>
<description>filter relation with at least #maxNumberOfPid</description>
</property>
</parameters>
<start to="ImportDatasetEntities"/>
<kill name="Kill">
<message>Action failed, error message[${wf:errorMessage(wf:lastErrorNode())}]</message>
</kill>
<action name="ImportDatasetEntities">
<spark xmlns="uri:oozie:spark-action:0.2">
<master>yarn</master>
<mode>cluster</mode>
<name>Import JSONRDD to Dataset kryo</name>
<class>eu.dnetlib.dhp.sx.graph.SparkConvertRDDtoDataset</class>
<jar>dhp-graph-mapper-${projectVersion}.jar</jar>
<spark-opts>
--executor-memory=${sparkExecutorMemory}
--executor-cores=${sparkExecutorCores}
--driver-memory=${sparkDriverMemory}
--conf spark.extraListeners=${spark2ExtraListeners}
--conf spark.sql.shuffle.partitions=3000
--conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
--conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
--conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
</spark-opts>
<arg>--master</arg><arg>yarn</arg>
<arg>--sourcePath</arg><arg>${sourcePath}</arg>
<arg>--targetPath</arg><arg>${targetPath}</arg>
<arg>--filterRelation</arg><arg>${relationFilter}</arg>
</spark>
<ok to="CreateSummaries"/>
<error to="Kill"/>
</action>
<action name="CreateSummaries">
<spark xmlns="uri:oozie:spark-action:0.2">
<master>yarn</master>
<mode>cluster</mode>
<name>Convert Entities to summaries</name>
<class>eu.dnetlib.dhp.sx.graph.SparkCreateSummaryObject</class>
<jar>dhp-graph-mapper-${projectVersion}.jar</jar>
<spark-opts>
--executor-memory=${sparkExecutorMemory}
--executor-cores=${sparkExecutorCores}
--driver-memory=${sparkDriverMemory}
--conf spark.extraListeners=${spark2ExtraListeners}
--conf spark.sql.shuffle.partitions=20000
--conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
--conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
--conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
</spark-opts>
<arg>--master</arg><arg>yarn</arg>
<arg>--sourcePath</arg><arg>${targetPath}/entities</arg>
<arg>--targetPath</arg><arg>${targetPath}/provision/summaries</arg>
</spark>
<ok to="CreateScholix"/>
<error to="Kill"/>
</action>
<action name="CreateScholix">
<spark xmlns="uri:oozie:spark-action:0.2">
<master>yarn</master>
<mode>cluster</mode>
<name>Generate Scholix Dataset</name>
<class>eu.dnetlib.dhp.sx.graph.SparkCreateScholix</class>
<jar>dhp-graph-mapper-${projectVersion}.jar</jar>
<spark-opts>
--executor-memory=${sparkExecutorMemory}
--executor-cores=${sparkExecutorCores}
--driver-memory=${sparkDriverMemory}
--conf spark.extraListeners=${spark2ExtraListeners}
--conf spark.sql.shuffle.partitions=30000
--conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
--conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
--conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
</spark-opts>
<arg>--master</arg><arg>yarn</arg>
<arg>--summaryPath</arg><arg>${targetPath}/provision/summaries</arg>
<arg>--targetPath</arg><arg>${targetPath}/provision/scholix</arg>
<arg>--relationPath</arg><arg>${targetPath}/relation</arg>
</spark>
<ok to="DropJSONPath"/>
<error to="Kill"/>
</action>
<action name="DropJSONPath">
<fs>
<delete path='${targetPath}/json'/>
<mkdir path='${targetPath}/json/'/>
</fs>
<ok to="SerializeScholix"/>
<error to="Kill"/>
</action>
<action name="SerializeScholix">
<spark xmlns="uri:oozie:spark-action:0.2">
<master>yarn</master>
<mode>cluster</mode>
<name>Serialize scholix to JSON</name>
<class>eu.dnetlib.dhp.sx.graph.SparkConvertObjectToJson</class>
<jar>dhp-graph-mapper-${projectVersion}.jar</jar>
<spark-opts>
--executor-memory=${sparkExecutorMemory}
--executor-cores=${sparkExecutorCores}
--driver-memory=${sparkDriverMemory}
--conf spark.extraListeners=${spark2ExtraListeners}
--conf spark.sql.shuffle.partitions=6000
--conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
--conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
--conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
</spark-opts>
<arg>--master</arg><arg>yarn</arg>
<arg>--sourcePath</arg><arg>${targetPath}/provision/scholix/scholix</arg>
<arg>--targetPath</arg><arg>${targetPath}/json/scholix_json</arg>
<arg>--objectType</arg><arg>scholix</arg>
<arg>--maxPidNumberFilter</arg><arg>maxNumberOfPid</arg>
</spark>
<ok to="End"/>
<error to="Kill"/>
</action>
<end name="End"/>
</workflow-app>

View File

@ -4,6 +4,7 @@ import com.fasterxml.jackson.databind.ObjectMapper
import eu.dnetlib.dhp.application.ArgumentApplicationParser
import eu.dnetlib.dhp.schema.sx.scholix.Scholix
import eu.dnetlib.dhp.schema.sx.summary.ScholixSummary
import eu.dnetlib.dhp.sx.graph.SparkConvertObjectToJson.toInt
import org.apache.commons.io.IOUtils
import org.apache.hadoop.io.compress.GzipCodec
import org.apache.spark.SparkConf
@ -12,6 +13,14 @@ import org.slf4j.{Logger, LoggerFactory}
object SparkConvertObjectToJson {
def toInt(s: String): Option[Int] = {
try {
Some(s.toInt)
} catch {
case e: Exception => None
}
}
def main(args: Array[String]): Unit = {
val log: Logger = LoggerFactory.getLogger(getClass)
val conf: SparkConf = new SparkConf()
@ -37,6 +46,8 @@ object SparkConvertObjectToJson {
log.info(s"objectType -> $objectType")
val scholixUpdatePath = parser.get("scholixUpdatePath")
log.info(s"scholixUpdatePath -> $scholixUpdatePath")
val maxPidNumberFilter = parser.get("maxPidNumberFilter")
log.info(s"maxPidNumberFilter -> $maxPidNumberFilter")
implicit val scholixEncoder: Encoder[Scholix] = Encoders.kryo[Scholix]
implicit val summaryEncoder: Encoder[ScholixSummary] = Encoders.kryo[ScholixSummary]
@ -47,12 +58,22 @@ object SparkConvertObjectToJson {
case "scholix" =>
log.info("Serialize Scholix")
val d: Dataset[Scholix] = spark.read.load(sourcePath).as[Scholix]
val u: Dataset[Scholix] = spark.read.load(s"$scholixUpdatePath/scholix").as[Scholix]
d.union(u)
.repartition(8000)
.map(s => mapper.writeValueAsString(s))(Encoders.STRING)
.rdd
.saveAsTextFile(targetPath, classOf[GzipCodec])
// val u: Dataset[Scholix] = spark.read.load(s"$scholixUpdatePath/scholix").as[Scholix]
if (maxPidNumberFilter != null && toInt(maxPidNumberFilter).isDefined) {
val mp = toInt(maxPidNumberFilter).get
d
.filter(s => (s.getSource.getIdentifier.size() <= mp) && (s.getTarget.getIdentifier.size() <= mp))
.map(s => mapper.writeValueAsString(s))(Encoders.STRING)
.rdd
.saveAsTextFile(targetPath, classOf[GzipCodec])
} else {
d
.repartition(8000)
.map(s => mapper.writeValueAsString(s))(Encoders.STRING)
.rdd
.saveAsTextFile(targetPath, classOf[GzipCodec])
}
case "summary" =>
log.info("Serialize Summary")
val d: Dataset[ScholixSummary] = spark.read.load(sourcePath).as[ScholixSummary]

View File

@ -4,9 +4,11 @@ import com.fasterxml.jackson.databind.ObjectMapper
import eu.dnetlib.dhp.application.ArgumentApplicationParser
import eu.dnetlib.dhp.schema.oaf.{OtherResearchProduct, Publication, Relation, Result, Software, Dataset => OafDataset}
import org.apache.commons.io.IOUtils
import org.apache.commons.lang3.StringUtils
import org.apache.spark.SparkConf
import org.apache.spark.sql.{Encoder, Encoders, SaveMode, SparkSession}
import org.slf4j.{Logger, LoggerFactory}
import scala.collection.JavaConverters._
object SparkConvertRDDtoDataset {
@ -34,6 +36,9 @@ object SparkConvertRDDtoDataset {
val t = parser.get("targetPath")
log.info(s"targetPath -> $t")
val filterRelation = parser.get("filterRelation")
log.info(s"filterRelation -> $filterRelation")
val entityPath = s"$t/entities"
val relPath = s"$t/relation"
val mapper = new ObjectMapper()
@ -94,28 +99,44 @@ object SparkConvertRDDtoDataset {
log.info("Converting Relation")
val relationSemanticFilter = List(
// "cites",
// "iscitedby",
"merges",
"ismergedin",
"HasAmongTopNSimilarDocuments",
"IsAmongTopNSimilarDocuments"
)
if (filterRelation != null && StringUtils.isNoneBlank(filterRelation)) {
val rddRelation = spark.sparkContext
.textFile(s"$sourcePath/relation")
.map(s => mapper.readValue(s, classOf[Relation]))
.filter(r => r.getDataInfo != null && r.getDataInfo.getDeletedbyinference == false)
.filter(r => r.getSource.startsWith("50") && r.getTarget.startsWith("50"))
//filter OpenCitations relations
.filter(r =>
r.getCollectedfrom != null && r.getCollectedfrom.size() > 0 && !r.getCollectedfrom.asScala.exists(k =>
"opencitations".equalsIgnoreCase(k.getValue)
val rddRelation = spark.sparkContext
.textFile(s"$sourcePath/relation")
.map(s => mapper.readValue(s, classOf[Relation]))
.filter(r => r.getDataInfo != null && r.getDataInfo.getDeletedbyinference == false)
.filter(r => r.getSource.startsWith("50") && r.getTarget.startsWith("50"))
//filter OpenCitations relations
.filter(r =>
r.getCollectedfrom != null && r.getCollectedfrom.size() > 0 && !r.getCollectedfrom.asScala.exists(k =>
"opencitations".equalsIgnoreCase(k.getValue)
)
)
.filter(r => r.getSubRelType != null && r.getSubRelType.equalsIgnoreCase(filterRelation))
spark.createDataset(rddRelation).as[Relation].write.mode(SaveMode.Overwrite).save(s"$relPath")
} else {
val relationSemanticFilter = List(
"merges",
"ismergedin",
"HasAmongTopNSimilarDocuments",
"IsAmongTopNSimilarDocuments"
)
.filter(r => !relationSemanticFilter.exists(k => k.equalsIgnoreCase(r.getRelClass)))
spark.createDataset(rddRelation).as[Relation].write.mode(SaveMode.Overwrite).save(s"$relPath")
val rddRelation = spark.sparkContext
.textFile(s"$sourcePath/relation")
.map(s => mapper.readValue(s, classOf[Relation]))
.filter(r => r.getDataInfo != null && r.getDataInfo.getDeletedbyinference == false)
.filter(r => r.getSource.startsWith("50") && r.getTarget.startsWith("50"))
//filter OpenCitations relations
.filter(r =>
r.getCollectedfrom != null && r.getCollectedfrom.size() > 0 && !r.getCollectedfrom.asScala.exists(k =>
"opencitations".equalsIgnoreCase(k.getValue)
)
)
.filter(r => !relationSemanticFilter.exists(k => k.equalsIgnoreCase(r.getRelClass)))
spark.createDataset(rddRelation).as[Relation].write.mode(SaveMode.Overwrite).save(s"$relPath")
}
}
}

View File

@ -321,4 +321,27 @@ public class PrepareResultProjectJobTest {
3, resultExplodedProvenance.filter("provenance = 'sysimport:crosswalk:entityregistry'").count());
}
@Test
void testMatchx() throws Exception {
final String sourcePath = getClass()
.getResource("/eu/dnetlib/dhp/oa/graph/dump/funderresource/match")
.getPath();
SparkPrepareResultProject.main(new String[] {
"-isSparkSessionManaged", Boolean.FALSE.toString(),
"-outputPath", workingDir.toString() + "/preparedInfo",
"-sourcePath", sourcePath
});
final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext());
JavaRDD<ResultProject> tmp = sc
.textFile(workingDir.toString() + "/preparedInfo")
.map(item -> OBJECT_MAPPER.readValue(item, ResultProject.class));
tmp.foreach(r -> System.out.println(OBJECT_MAPPER.writeValueAsString(r)));
}
}

View File

@ -22,6 +22,7 @@ import org.slf4j.LoggerFactory;
import com.fasterxml.jackson.databind.ObjectMapper;
import eu.dnetlib.dhp.oa.graph.dump.funderresults.SparkResultLinkedToProject;
import eu.dnetlib.dhp.schema.dump.oaf.community.CommunityResult;
import eu.dnetlib.dhp.schema.oaf.Publication;
import eu.dnetlib.dhp.schema.oaf.Result;
@ -76,7 +77,11 @@ public class ResultLinkedToProjectTest {
.getPath();
final String graphPath = getClass()
.getResource("/eu/dnetlib/dhp/oa/graph/dump/funderresource/nomatch")
.getResource("/eu/dnetlib/dhp/oa/graph/dump/funderresource/preparedInfo")
.getPath();
final String communityMapPath = getClass()
.getResource("/eu/dnetlib/dhp/oa/graph/dump/funderresource/communityMapPath")
.getPath();
SparkResultLinkedToProject.main(new String[] {
@ -84,20 +89,18 @@ public class ResultLinkedToProjectTest {
"-outputPath", workingDir.toString() + "/preparedInfo",
"-sourcePath", sourcePath,
"-resultTableName", "eu.dnetlib.dhp.schema.oaf.Publication",
"-graphPath", graphPath
"-graphPath", graphPath,
"-communityMapPath", communityMapPath
});
final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext());
JavaRDD<Result> tmp = sc
JavaRDD<CommunityResult> tmp = sc
.textFile(workingDir.toString() + "/preparedInfo")
.map(item -> OBJECT_MAPPER.readValue(item, Result.class));
.map(item -> OBJECT_MAPPER.readValue(item, CommunityResult.class));
org.apache.spark.sql.Dataset<Result> verificationDataset = spark
.createDataset(tmp.rdd(), Encoders.bean(Result.class));
Assertions.assertEquals(0, verificationDataset.count());
Assertions.assertEquals(0, tmp.count());
}
@ -108,8 +111,12 @@ public class ResultLinkedToProjectTest {
.getResource("/eu/dnetlib/dhp/oa/graph/dump/funderresource/match/papers.json")
.getPath();
final String relationPath = getClass()
.getResource("/eu/dnetlib/dhp/oa/graph/dump/funderresource/match")
final String graphPath = getClass()
.getResource("/eu/dnetlib/dhp/oa/graph/dump/funderresource/preparedInfo")
.getPath();
final String communityMapPath = getClass()
.getResource("/eu/dnetlib/dhp/oa/graph/dump/funderresource/communityMapPath")
.getPath();
SparkResultLinkedToProject.main(new String[] {
@ -117,20 +124,18 @@ public class ResultLinkedToProjectTest {
"-outputPath", workingDir.toString() + "/preparedInfo",
"-sourcePath", sourcePath,
"-resultTableName", "eu.dnetlib.dhp.schema.oaf.Publication",
"-graphPath", relationPath
"-graphPath", graphPath,
"-communityMapPath", communityMapPath
});
final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext());
JavaRDD<Publication> tmp = sc
JavaRDD<CommunityResult> tmp = sc
.textFile(workingDir.toString() + "/preparedInfo")
.map(item -> OBJECT_MAPPER.readValue(item, Publication.class));
.map(item -> OBJECT_MAPPER.readValue(item, CommunityResult.class));
org.apache.spark.sql.Dataset<Publication> verificationDataset = spark
.createDataset(tmp.rdd(), Encoders.bean(Publication.class));
Assertions.assertEquals(1, verificationDataset.count());
Assertions.assertEquals(1, tmp.count());
}

View File

@ -5,10 +5,14 @@ import java.io.IOException;
import java.nio.file.Files;
import java.nio.file.Path;
// import eu.dnetlib.dhp.oa.graph.dump.funderresults.SparkDumpFunderResults2;
// import eu.dnetlib.dhp.oa.graph.dump.funderresults.SparkGetFunderList;
import org.apache.commons.io.FileUtils;
import org.apache.spark.SparkConf;
import org.apache.spark.api.java.JavaRDD;
import org.apache.spark.api.java.JavaSparkContext;
import org.apache.spark.api.java.function.ForeachFunction;
import org.apache.spark.sql.Dataset;
import org.apache.spark.sql.Encoders;
import org.apache.spark.sql.SparkSession;
import org.junit.jupiter.api.AfterAll;
@ -68,20 +72,19 @@ public class SplitPerFunderTest {
void test1() throws Exception {
final String sourcePath = getClass()
.getResource("/eu/dnetlib/dhp/oa/graph/dump/funderresource/extendeddump")
.getResource("/eu/dnetlib/dhp/oa/graph/dump/funderresource/ext")
.getPath();
SparkDumpFunderResults.main(new String[] {
"-isSparkSessionManaged", Boolean.FALSE.toString(),
"-outputPath", workingDir.toString() + "/split",
"-sourcePath", sourcePath,
"-graphPath", sourcePath
"-sourcePath", sourcePath
});
final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext());
// FP7 3
// FP7 3 and H2020 3
JavaRDD<CommunityResult> tmp = sc
.textFile(workingDir.toString() + "/split/EC_FP7")
.map(item -> OBJECT_MAPPER.readValue(item, CommunityResult.class));
@ -143,11 +146,6 @@ public class SplitPerFunderTest {
.map(item -> OBJECT_MAPPER.readValue(item, CommunityResult.class));
Assertions.assertEquals(1, tmp.count());
// CONICYT 0
tmp = sc
.textFile(workingDir.toString() + "/split/CONICYTF")
.map(item -> OBJECT_MAPPER.readValue(item, CommunityResult.class));
Assertions.assertEquals(0, tmp.count());
}
}

View File

@ -0,0 +1,125 @@
package eu.dnetlib.dhp.oa.graph.dump.projectssubset;
import java.io.IOException;
import java.nio.file.Files;
import java.nio.file.Path;
import java.util.HashMap;
import org.apache.commons.io.FileUtils;
import org.apache.spark.SparkConf;
import org.apache.spark.api.java.JavaRDD;
import org.apache.spark.api.java.JavaSparkContext;
import org.apache.spark.sql.SaveMode;
import org.apache.spark.sql.SparkSession;
import org.junit.jupiter.api.AfterAll;
import org.junit.jupiter.api.Assertions;
import org.junit.jupiter.api.BeforeAll;
import org.junit.jupiter.api.Test;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import com.fasterxml.jackson.databind.ObjectMapper;
import eu.dnetlib.dhp.schema.dump.oaf.graph.Project;
public class ProjectSubsetTest {
private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper();
private static SparkSession spark;
private static Path workingDir;
private static final Logger log = LoggerFactory
.getLogger(eu.dnetlib.dhp.oa.graph.dump.projectssubset.ProjectSubsetTest.class);
@BeforeAll
public static void beforeAll() throws IOException {
workingDir = Files
.createTempDirectory(
eu.dnetlib.dhp.oa.graph.dump.projectssubset.ProjectSubsetTest.class.getSimpleName());
log.info("using work dir {}", workingDir);
SparkConf conf = new SparkConf();
conf.setAppName(eu.dnetlib.dhp.oa.graph.dump.projectssubset.ProjectSubsetTest.class.getSimpleName());
conf.setMaster("local[*]");
conf.set("spark.driver.host", "localhost");
conf.set("hive.metastore.local", "true");
conf.set("spark.ui.enabled", "false");
conf.set("spark.sql.warehouse.dir", workingDir.toString());
conf.set("hive.metastore.warehouse.dir", workingDir.resolve("warehouse").toString());
spark = SparkSession
.builder()
.appName(eu.dnetlib.dhp.oa.graph.dump.projectssubset.ProjectSubsetTest.class.getSimpleName())
.config(conf)
.getOrCreate();
}
@AfterAll
public static void afterAll() throws IOException {
FileUtils.deleteDirectory(workingDir.toFile());
spark.stop();
}
@Test
void testAllNew() throws Exception {
final String projectListPath = getClass()
.getResource("/eu/dnetlib/dhp/oa/graph/dump/projectsubset/projectId")
.getPath();
final String sourcePath = getClass()
.getResource("/eu/dnetlib/dhp/oa/graph/dump/projectsubset/allnew/projects")
.getPath();
spark
.read()
.textFile(projectListPath)
.write()
.mode(SaveMode.Overwrite)
.text(workingDir.toString() + "/projectIds");
ProjectsSubsetSparkJob.main(new String[] {
"-isSparkSessionManaged", Boolean.FALSE.toString(),
"-outputPath", workingDir.toString() + "/projects",
"-sourcePath", sourcePath,
"-projectListPath", workingDir.toString() + "/projectIds"
});
final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext());
JavaRDD<Project> tmp = sc
.textFile(workingDir.toString() + "/projects")
.map(item -> OBJECT_MAPPER.readValue(item, Project.class));
Assertions.assertEquals(12, tmp.count());
Assertions.assertEquals(2, tmp.filter(p -> p.getId().substring(3, 15).equals("aka_________")).count());
Assertions.assertEquals(2, tmp.filter(p -> p.getId().substring(3, 15).equals("anr_________")).count());
Assertions.assertEquals(4, tmp.filter(p -> p.getId().substring(3, 15).equals("arc_________")).count());
Assertions.assertEquals(3, tmp.filter(p -> p.getId().substring(3, 15).equals("conicytf____")).count());
Assertions.assertEquals(1, tmp.filter(p -> p.getId().substring(3, 15).equals("corda_______")).count());
Assertions.assertEquals(40, sc.textFile(workingDir.toString() + "/projectIds").count());
}
@Test
void testMatchOne() throws Exception {
final String projectListPath = getClass()
.getResource("/eu/dnetlib/dhp/oa/graph/dump/projectsubset/projectId")
.getPath();
final String sourcePath = getClass()
.getResource("/eu/dnetlib/dhp/oa/graph/dump/projectsubset/matchOne/projects")
.getPath();
spark
.read()
.textFile(projectListPath)
.write()
.mode(SaveMode.Overwrite)
.text(workingDir.toString() + "/projectIds");
ProjectsSubsetSparkJob.main(new String[] {
"-isSparkSessionManaged", Boolean.FALSE.toString(),
"-outputPath", workingDir.toString() + "/projects",
"-sourcePath", sourcePath,
"-projectListPath", workingDir.toString() + "/projectIds"
});
final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext());
JavaRDD<Project> tmp = sc
.textFile(workingDir.toString() + "/projects")
.map(item -> OBJECT_MAPPER.readValue(item, Project.class));
Assertions.assertEquals(11, tmp.count());
Assertions.assertEquals(2, tmp.filter(p -> p.getId().substring(3, 15).equals("aka_________")).count());
Assertions.assertEquals(2, tmp.filter(p -> p.getId().substring(3, 15).equals("anr_________")).count());
Assertions.assertEquals(4, tmp.filter(p -> p.getId().substring(3, 15).equals("arc_________")).count());
Assertions.assertEquals(3, tmp.filter(p -> p.getId().substring(3, 15).equals("conicytf____")).count());
Assertions.assertEquals(0, tmp.filter(p -> p.getId().substring(3, 15).equals("corda__h2020")).count());
Assertions.assertEquals(39, sc.textFile(workingDir.toString() + "/projectIds").count());
}
}

View File

@ -0,0 +1 @@
{"ee":"SDSN - Greece","epos":"EPOS","enrmaps":"Energy Research","fet-h2020":"FET H2020","instruct":"Instruct-Eric","egi":"EGI Federation","euromarine":"Euromarine","covid-19":"COVID-19","dariah":"DARIAH EU","rda":"Research Data Alliance","clarin":"CLARIN","aginfra":"Agricultural and Food Sciences","risis":"RISI","fam":"Fisheries and Aquaculture Management","beopen":"Transport Research","elixir-gr":"ELIXIR GR","fet-fp7":"FET FP7","ifremer":"Ifremer","science-innovation-policy":"Science and Innovation Policy Studies","mes":"European Marine Scinece","oa-pg":"EC Post-Grant Open Access Pilot","ni":"Neuroinformatics","dh-ch":"Digital Humanities and Cultural Heritage"}

File diff suppressed because one or more lines are too long

View File

@ -0,0 +1,8 @@
NSF
CIHR
NWO
NHMRC
NIH
MZOS
SNSF
EC

View File

@ -0,0 +1 @@
{"resultId":"50|a89337edbe55::43e8b61e5e8d682545cb867be8118585","projectsList":[{"id":"40|aka_________::01bb7b48e29d732a1c7bc5150b9195c4","code":"135027","acronym":null,"title":"Dynamic 3D resolution-enhanced low-coherence interferometric imaging / Consortium: Hi-Lo","funder":{"shortName":"AKA","name":"Academy of Finland","jurisdiction":"FI","fundingStream":null},"provenance":{"provenance":"Harvested","trust":"0.900000000000000022"},"validated":null},{"id":"40|aka_________::9d1af21dbd0f5bc719f71553d19a6b3a","code":"316061","acronym":null,"title":"Finnish Imaging of Degenerative Shoulder Study (FIMAGE): A study on the prevalence of degenerative imaging changes of the shoulder and their relevance to clinical symptoms in the general population.","funder":{"shortName":"AKA","name":"Academy of Finland","jurisdiction":"FI","fundingStream":null},"provenance":{"provenance":"Harvested","trust":"0.900000000000000022"},"validated":null}]}

View File

@ -0,0 +1,12 @@
{"id":"40|aka_________::01bb7b48e29d732a1c7bc5150b9195c4","websiteurl":null,"code":"135027","acronym":null,"title":"Dynamic 3D resolution-enhanced low-coherence interferometric imaging / Consortium: Hi-Lo","startdate":null,"enddate":null,"callidentifier":"Fotoniikka ja modernit kuvantamismenetelmät LT","keywords":null,"openaccessmandateforpublications":false,"openaccessmandatefordataset":false,"subject":[],"funding":[{"shortName":"AKA","name":"Academy of Finland","jurisdiction":"FI","funding_stream":null}],"summary":null,"granted":null,"h2020programme":[]}
{"id":"40|aka_________::9d1af21dbd0f5bc719f71553d19a6b3a","websiteurl":null,"code":"316061","acronym":null,"title":"Finnish Imaging of Degenerative Shoulder Study (FIMAGE): A study on the prevalence of degenerative imaging changes of the shoulder and their relevance to clinical symptoms in the general population.","startdate":null,"enddate":null,"callidentifier":"Academy Project Funding TT","keywords":null,"openaccessmandateforpublications":false,"openaccessmandatefordataset":false,"subject":[],"funding":[{"shortName":"AKA","name":"Academy of Finland","jurisdiction":"FI","funding_stream":null}],"summary":null,"granted":null,"h2020programme":[]}
{"id":"40|anr_________::1f21edc5c902be305ee47148955c6e50","websiteurl":null,"code":"ANR-17-CE05-0033","acronym":"MOISE","title":"METAL OXIDES AS LOW LOADED NANO-IRIDIUM SUPPORT FOR COMPETITIVE WATER ELECTROLYSIS","startdate":null,"enddate":null,"callidentifier":null,"keywords":null,"openaccessmandateforpublications":false,"openaccessmandatefordataset":false,"subject":[],"funding":[{"shortName":"ANR","name":"French National Research Agency (ANR)","jurisdiction":"FR","funding_stream":null}],"summary":null,"granted":null,"h2020programme":[]}
{"id":"40|anr_________::547e78ffdcb7d72a1ef31058dede3a33","websiteurl":null,"code":"ANR-09-SEGI-0005","acronym":"GALAXY","title":"DEVELOPPEMENT COLLABORATIF DE SYSTEMES COMPLEXES SELON UNE APPROCHE GUIDEE PAR LES MODELES","startdate":null,"enddate":null,"callidentifier":null,"keywords":null,"openaccessmandateforpublications":false,"openaccessmandatefordataset":false,"subject":[],"funding":[{"shortName":"ANR","name":"French National Research Agency (ANR)","jurisdiction":"FR","funding_stream":null}],"summary":null,"granted":null,"h2020programme":[]}
{"id":"40|arc_________::838e781a8d479e27a11101421fd8b296","websiteurl":"http://purl.org/au-research/grants/arc/LE0347462","code":"LE0347462","acronym":null,"title":"Femtosecond laser micromachining facility","startdate":"2003-01-01","enddate":"2003-12-31","callidentifier":null,"keywords":"biomedical nanostructures,femtosecond laser machining,laser manufacturing,laser micromachining,microphotonics,photonic bandgap structures","openaccessmandateforpublications":false,"openaccessmandatefordataset":false,"subject":[],"funding":[{"shortName":"ARC","name":"Australian Research Council (ARC)","jurisdiction":"AU","funding_stream":{"id":"ARC::Linkage Infrastructure, Equipment and Facilities","description":"Linkage Infrastructure, Equipment and Facilities"}}],"summary":null,"granted":null,"h2020programme":[]}
{"id":"40|arc_________::a461f180f7b6700c0499d4d3d53e58c7","websiteurl":"http://purl.org/au-research/grants/arc/LP140100567","code":"LP140100567","acronym":null,"title":"Linkage Projects - Grant ID: LP140100567","startdate":"2014-01-01","enddate":"2017-12-31","callidentifier":null,"keywords":"EDUCATIONAL MEASUREMENT; EDUCATIONAL MEASUREMENT; HIGH-STAKES TESTING; HIGH-STAKES TESTING; PERFORMANCE ASSESSMENT; PERFORMANCE ASSESSMENT; PERFORMANCE ASSESSMENT","openaccessmandateforpublications":false,"openaccessmandatefordataset":false,"subject":[],"funding":[{"shortName":"ARC","name":"Australian Research Council (ARC)","jurisdiction":"AU","funding_stream":{"id":"ARC::Linkage Projects","description":"Linkage Projects"}}],"summary":null,"granted":null,"h2020programme":[]}
{"id":"40|arc_________::b46b9e07d4cea67ccf497520a75ad0c8","websiteurl":"http://purl.org/au-research/grants/arc/DP180101235","code":"DP180101235","acronym":null,"title":"Discovery Projects - Grant ID: DP180101235","startdate":"2018-01-01","enddate":"2023-12-31","callidentifier":null,"keywords":null,"openaccessmandateforpublications":false,"openaccessmandatefordataset":false,"subject":[],"funding":[{"shortName":"ARC","name":"Australian Research Council (ARC)","jurisdiction":"AU","funding_stream":{"id":"ARC::Discovery Projects","description":"Discovery Projects"}}],"summary":null,"granted":null,"h2020programme":[]}
{"id":"40|arc_________::c5f86314ce288f91a7f31c219b128fab","websiteurl":"http://purl.org/au-research/grants/arc/LE0989831","code":"LE0989831","acronym":null,"title":"The Australian Music Navigator: research infrastructure for discovering, accessing and analysing Australia's musical landscape","startdate":"2009-01-01","enddate":"2009-12-31","callidentifier":null,"keywords":"database metadata,digital sound,electroacoustic music,film music,music,music information retrieval","openaccessmandateforpublications":false,"openaccessmandatefordataset":false,"subject":[],"funding":[{"shortName":"ARC","name":"Australian Research Council (ARC)","jurisdiction":"AU","funding_stream":{"id":"ARC::Linkage Infrastructure, Equipment and Facilities","description":"Linkage Infrastructure, Equipment and Facilities"}}],"summary":null,"granted":null,"h2020programme":[]}
{"id":"40|conicytf____::05539f3427ad605d7c1de0168f3e337f","websiteurl":"http://repositorio.conicyt.cl/handle/10533/183109","code":"3120023","acronym":null,"title":"SYNTHESIS AND STRUCTURE-ACTIVITY RELATIONSHIPS OF HETEROARYLISOQUINOLINE- AND PHENANTHRIDINEQUINONES AS ANTITUMOR AGENTS","startdate":"2011-01-01","enddate":"2014-01-28","callidentifier":null,"keywords":null,"openaccessmandateforpublications":false,"openaccessmandatefordataset":false,"subject":[],"funding":[{"shortName":"CONICYT","name":"Comisión Nacional de Investigación Científica y Tecnológica","jurisdiction":"CL","funding_stream":{"id":"CONICYT::FONDECYT::POSTDOCTORADO","description":"Fondecyt fundings - Fondecyt stream, POSTDOCTORADO"}}],"summary":null,"granted":null,"h2020programme":[]}
{"id":"40|conicytf____::96b47b91a6c061e31f626612b1650c03","websiteurl":"http://repositorio.conicyt.cl/handle/10533/163340","code":"1040240","acronym":null,"title":"ESTUDIO TEORICO-EXPERIMENTAL DE LA PERMEACION DE FLUIDOS SUPERCRITICOS Y LA SEPARACION DE MEZCLAS A ALTA PRESION A TRAVES DE MEMBRANAS MICROPOROSAS.","startdate":"2004-01-15","enddate":"2007-01-15","callidentifier":null,"keywords":null,"openaccessmandateforpublications":false,"openaccessmandatefordataset":false,"subject":[],"funding":[{"shortName":"CONICYT","name":"Comisión Nacional de Investigación Científica y Tecnológica","jurisdiction":"CL","funding_stream":{"id":"CONICYT::FONDECYT::REGULAR","description":"Fondecyt fundings - Fondecyt stream, REGULAR"}}],"summary":null,"granted":null,"h2020programme":[]}
{"id":"40|conicytf____::b122147e0a13f34cdb6311a9d714f9a5","websiteurl":"http://repositorio.conicyt.cl/handle/10533/162452","code":"1020683","acronym":null,"title":"SINTESIS Y CARACTERIZACION DE SALES CUATERNARIAS CON EL ANION CALCOFOSFATO [P2Qy]4- (Q=S,Se;y=6,7) PROPIEDADES FISICAS Y REACCIONES DE INCLUSION.","startdate":"2002-01-15","enddate":"2006-01-15","callidentifier":null,"keywords":null,"openaccessmandateforpublications":false,"openaccessmandatefordataset":false,"subject":[],"funding":[{"shortName":"CONICYT","name":"Comisión Nacional de Investigación Científica y Tecnológica","jurisdiction":"CL","funding_stream":{"id":"CONICYT::FONDECYT::REGULAR","description":"Fondecyt fundings - Fondecyt stream, REGULAR"}}],"summary":null,"granted":null,"h2020programme":[]}
{"id":"40|corda_______::132bac68f17bb81c451d9071be6e4d6d","websiteurl":null,"code":"628405","acronym":"ANIM","title":"Precisely Defined, Surface-Engineered Nanostructures via Crystallization-Driven Self-Assembly of Linear-Dendritic Block Copolymers","startdate":"2014-05-01","enddate":"2016-04-30","callidentifier":"FP7-PEOPLE-2013-IIF","keywords":null,"openaccessmandateforpublications":false,"openaccessmandatefordataset":false,"subject":[],"funding":[{"shortName":"EC","name":"European Commission","jurisdiction":"EU","funding_stream":{"id":"EC::FP7::SP3::PEOPLE","description":"SEVENTH FRAMEWORK PROGRAMME - SP3-People - Marie-Curie Actions"}}],"summary":null,"granted":null,"h2020programme":[]}

View File

@ -0,0 +1,12 @@
{"id":"40|aka_________::01bb7b48e29d732a1c7bc5150b9195c4","websiteurl":null,"code":"135027","acronym":null,"title":"Dynamic 3D resolution-enhanced low-coherence interferometric imaging / Consortium: Hi-Lo","startdate":null,"enddate":null,"callidentifier":"Fotoniikka ja modernit kuvantamismenetelmät LT","keywords":null,"openaccessmandateforpublications":false,"openaccessmandatefordataset":false,"subject":[],"funding":[{"shortName":"AKA","name":"Academy of Finland","jurisdiction":"FI","funding_stream":null}],"summary":null,"granted":null,"h2020programme":[]}
{"id":"40|aka_________::9d1af21dbd0f5bc719f71553d19a6b3a","websiteurl":null,"code":"316061","acronym":null,"title":"Finnish Imaging of Degenerative Shoulder Study (FIMAGE): A study on the prevalence of degenerative imaging changes of the shoulder and their relevance to clinical symptoms in the general population.","startdate":null,"enddate":null,"callidentifier":"Academy Project Funding TT","keywords":null,"openaccessmandateforpublications":false,"openaccessmandatefordataset":false,"subject":[],"funding":[{"shortName":"AKA","name":"Academy of Finland","jurisdiction":"FI","funding_stream":null}],"summary":null,"granted":null,"h2020programme":[]}
{"id":"40|anr_________::1f21edc5c902be305ee47148955c6e50","websiteurl":null,"code":"ANR-17-CE05-0033","acronym":"MOISE","title":"METAL OXIDES AS LOW LOADED NANO-IRIDIUM SUPPORT FOR COMPETITIVE WATER ELECTROLYSIS","startdate":null,"enddate":null,"callidentifier":null,"keywords":null,"openaccessmandateforpublications":false,"openaccessmandatefordataset":false,"subject":[],"funding":[{"shortName":"ANR","name":"French National Research Agency (ANR)","jurisdiction":"FR","funding_stream":null}],"summary":null,"granted":null,"h2020programme":[]}
{"id":"40|anr_________::547e78ffdcb7d72a1ef31058dede3a33","websiteurl":null,"code":"ANR-09-SEGI-0005","acronym":"GALAXY","title":"DEVELOPPEMENT COLLABORATIF DE SYSTEMES COMPLEXES SELON UNE APPROCHE GUIDEE PAR LES MODELES","startdate":null,"enddate":null,"callidentifier":null,"keywords":null,"openaccessmandateforpublications":false,"openaccessmandatefordataset":false,"subject":[],"funding":[{"shortName":"ANR","name":"French National Research Agency (ANR)","jurisdiction":"FR","funding_stream":null}],"summary":null,"granted":null,"h2020programme":[]}
{"id":"40|arc_________::838e781a8d479e27a11101421fd8b296","websiteurl":"http://purl.org/au-research/grants/arc/LE0347462","code":"LE0347462","acronym":null,"title":"Femtosecond laser micromachining facility","startdate":"2003-01-01","enddate":"2003-12-31","callidentifier":null,"keywords":"biomedical nanostructures,femtosecond laser machining,laser manufacturing,laser micromachining,microphotonics,photonic bandgap structures","openaccessmandateforpublications":false,"openaccessmandatefordataset":false,"subject":[],"funding":[{"shortName":"ARC","name":"Australian Research Council (ARC)","jurisdiction":"AU","funding_stream":{"id":"ARC::Linkage Infrastructure, Equipment and Facilities","description":"Linkage Infrastructure, Equipment and Facilities"}}],"summary":null,"granted":null,"h2020programme":[]}
{"id":"40|arc_________::a461f180f7b6700c0499d4d3d53e58c7","websiteurl":"http://purl.org/au-research/grants/arc/LP140100567","code":"LP140100567","acronym":null,"title":"Linkage Projects - Grant ID: LP140100567","startdate":"2014-01-01","enddate":"2017-12-31","callidentifier":null,"keywords":"EDUCATIONAL MEASUREMENT; EDUCATIONAL MEASUREMENT; HIGH-STAKES TESTING; HIGH-STAKES TESTING; PERFORMANCE ASSESSMENT; PERFORMANCE ASSESSMENT; PERFORMANCE ASSESSMENT","openaccessmandateforpublications":false,"openaccessmandatefordataset":false,"subject":[],"funding":[{"shortName":"ARC","name":"Australian Research Council (ARC)","jurisdiction":"AU","funding_stream":{"id":"ARC::Linkage Projects","description":"Linkage Projects"}}],"summary":null,"granted":null,"h2020programme":[]}
{"id":"40|arc_________::b46b9e07d4cea67ccf497520a75ad0c8","websiteurl":"http://purl.org/au-research/grants/arc/DP180101235","code":"DP180101235","acronym":null,"title":"Discovery Projects - Grant ID: DP180101235","startdate":"2018-01-01","enddate":"2023-12-31","callidentifier":null,"keywords":null,"openaccessmandateforpublications":false,"openaccessmandatefordataset":false,"subject":[],"funding":[{"shortName":"ARC","name":"Australian Research Council (ARC)","jurisdiction":"AU","funding_stream":{"id":"ARC::Discovery Projects","description":"Discovery Projects"}}],"summary":null,"granted":null,"h2020programme":[]}
{"id":"40|arc_________::c5f86314ce288f91a7f31c219b128fab","websiteurl":"http://purl.org/au-research/grants/arc/LE0989831","code":"LE0989831","acronym":null,"title":"The Australian Music Navigator: research infrastructure for discovering, accessing and analysing Australia's musical landscape","startdate":"2009-01-01","enddate":"2009-12-31","callidentifier":null,"keywords":"database metadata,digital sound,electroacoustic music,film music,music,music information retrieval","openaccessmandateforpublications":false,"openaccessmandatefordataset":false,"subject":[],"funding":[{"shortName":"ARC","name":"Australian Research Council (ARC)","jurisdiction":"AU","funding_stream":{"id":"ARC::Linkage Infrastructure, Equipment and Facilities","description":"Linkage Infrastructure, Equipment and Facilities"}}],"summary":null,"granted":null,"h2020programme":[]}
{"id":"40|conicytf____::05539f3427ad605d7c1de0168f3e337f","websiteurl":"http://repositorio.conicyt.cl/handle/10533/183109","code":"3120023","acronym":null,"title":"SYNTHESIS AND STRUCTURE-ACTIVITY RELATIONSHIPS OF HETEROARYLISOQUINOLINE- AND PHENANTHRIDINEQUINONES AS ANTITUMOR AGENTS","startdate":"2011-01-01","enddate":"2014-01-28","callidentifier":null,"keywords":null,"openaccessmandateforpublications":false,"openaccessmandatefordataset":false,"subject":[],"funding":[{"shortName":"CONICYT","name":"Comisión Nacional de Investigación Científica y Tecnológica","jurisdiction":"CL","funding_stream":{"id":"CONICYT::FONDECYT::POSTDOCTORADO","description":"Fondecyt fundings - Fondecyt stream, POSTDOCTORADO"}}],"summary":null,"granted":null,"h2020programme":[]}
{"id":"40|conicytf____::96b47b91a6c061e31f626612b1650c03","websiteurl":"http://repositorio.conicyt.cl/handle/10533/163340","code":"1040240","acronym":null,"title":"ESTUDIO TEORICO-EXPERIMENTAL DE LA PERMEACION DE FLUIDOS SUPERCRITICOS Y LA SEPARACION DE MEZCLAS A ALTA PRESION A TRAVES DE MEMBRANAS MICROPOROSAS.","startdate":"2004-01-15","enddate":"2007-01-15","callidentifier":null,"keywords":null,"openaccessmandateforpublications":false,"openaccessmandatefordataset":false,"subject":[],"funding":[{"shortName":"CONICYT","name":"Comisión Nacional de Investigación Científica y Tecnológica","jurisdiction":"CL","funding_stream":{"id":"CONICYT::FONDECYT::REGULAR","description":"Fondecyt fundings - Fondecyt stream, REGULAR"}}],"summary":null,"granted":null,"h2020programme":[]}
{"id":"40|conicytf____::b122147e0a13f34cdb6311a9d714f9a5","websiteurl":"http://repositorio.conicyt.cl/handle/10533/162452","code":"1020683","acronym":null,"title":"SINTESIS Y CARACTERIZACION DE SALES CUATERNARIAS CON EL ANION CALCOFOSFATO [P2Qy]4- (Q=S,Se;y=6,7) PROPIEDADES FISICAS Y REACCIONES DE INCLUSION.","startdate":"2002-01-15","enddate":"2006-01-15","callidentifier":null,"keywords":null,"openaccessmandateforpublications":false,"openaccessmandatefordataset":false,"subject":[],"funding":[{"shortName":"CONICYT","name":"Comisión Nacional de Investigación Científica y Tecnológica","jurisdiction":"CL","funding_stream":{"id":"CONICYT::FONDECYT::REGULAR","description":"Fondecyt fundings - Fondecyt stream, REGULAR"}}],"summary":null,"granted":null,"h2020programme":[]}
{"id":"40|corda__h2020::bf5d35ec8d24ae4abfb4a1c6a0af3856","websiteurl":null,"code":"628405","acronym":"ANIM","title":"Precisely Defined, Surface-Engineered Nanostructures via Crystallization-Driven Self-Assembly of Linear-Dendritic Block Copolymers","startdate":"2014-05-01","enddate":"2016-04-30","callidentifier":"FP7-PEOPLE-2013-IIF","keywords":null,"openaccessmandateforpublications":false,"openaccessmandatefordataset":false,"subject":[],"funding":[{"shortName":"EC","name":"European Commission","jurisdiction":"EU","funding_stream":{"id":"EC::FP7::SP3::PEOPLE","description":"SEVENTH FRAMEWORK PROGRAMME - SP3-People - Marie-Curie Actions"}}],"summary":null,"granted":null,"h2020programme":[]}

View File

@ -0,0 +1,28 @@
40|nih_________::4c32cdbc4c9949853f02219fc4780a30
40|nih_________::b485512ef116af73bee79d50c8f9ca01
40|nih_________::b44d9bc8e99d9a0477ac06897e3e9c19
40|nih_________::7d2d2b7d1644a722a6bbcb031d82fec6
40|nsf_________::6b2674b0341e07b818a56c6f0daa2633
40|nih_________::96bb39aecc8f7b9f3b02ed36ef09538b
40|nsf_________::88d92bdf20ec2fac3ed9740f962b4fad
40|nih_________::4bb8c14729a0082378bb04db8321ce14
40|nih_________::08a8eed6c17c6d8e427afcfd29f87c7b
40|nsf_________::c314f3d35af1990121bf5b803937e112
40|nih_________::3ad6a2e6ebd561206f0da69468337f50
40|nih_________::d02c60c65a59629e69a30abcf2ceaed1
40|nih_________::d5a241cc94253feb72181cde15f51e96
40|nih_________::b5df718bbca69af50d4b7213e26af3f0
40|nih_________::bc90893c1be80503578e48f6ef6b7061
40|rcuk________::2c39b38c26c260b14a9816b88c91c132
40|nih_________::ab103ad117cd0579df66f7592a7d4adf
40|nih_________::147aa6ad8bd201e2a02c7b6cc3f68348
40|corda__h2020::bf5d35ec8d24ae4abfb4a1c6a0af3856
40|nih_________::b8083208156f2764d07c736ba9b49dd2
40|nih_________::f4d1e0aece0e6a9eff8d054c28e082db
40|nsf_________::56297da8b472a4be8ac3f09af813c9f6
40|nsf_________::6b6dc3398eeebb3de1ab66e6eb8c5cb3
40|nih_________::93289a36ebffb0bee3d6b01c6fc0a3d6
40|nih_________::6c3b00dd4ae9d43d6630ff18f189ebae
40|nih_________::1d983a87768f13bc8377b1b7d17290a2
40|nih_________::c3b56e91859b114644c1403e892eb80f
40|rcuk________::c1e15330fc7956063652f9c06e584548