Graph construction [PROD NEW] Data Provision 30 set the path of unresolved entities unresolvedEntityPath /data/unresolved_PROD/content set blacklist of funder nsPrefixes nsPrefixBlacklist conicytf____,dfgf________,gsrt________,innoviris___,miur________,rif_________,rsf_________,sgov________,sfrs________ Set the path containing the PROD AGGREGATOR graph aggregatorGraphPath /tmp/prod_provision/graph/00_prod_graph_aggregator Set the target path to store the RAW graph rawGraphPath /tmp/prod_provision/graph/01_graph_raw Set the target path to store the the consistent graph cleaned cleanedFirstGraphPath /tmp/prod_provision/graph/02_graph_cleaned Set the target path to store the CLEANED graph resolvedGraphPath /tmp/beta_inference/graph/03_graph_resolved Set the target path to store the DEDUPED graph dedupGraphPath /tmp/prod_provision/graph/04_graph_dedup Set the target path to store the INFERRED graph inferredGraphPath /tmp/prod_provision/graph/05_graph_inferred Set the target path to store the CONSISTENCY graph consistentGraphPath /tmp/prod_provision/graph/06_graph_consistent Set the target path to store the ORCID enriched graph orcidGraphPath /tmp/prod_provision/graph/07_graph_orcid Set the target path to store the BULK TAGGED graph bulkTaggingGraphPath /tmp/prod_provision/graph/08_graph_bulktagging Set the target path to store the AFFILIATION from INSTITUTIONAL REPOS graph affiliationGraphPath /tmp/prod_provision/graph/09_graph_affiliation Set the target path to store the COMMUNITY from SELECTED SOURCES graph communityOrganizationGraphPath /tmp/prod_provision/graph/10_graph_comunity_organization Set the target path to store the FUNDING from SEMANTIC RELATION graph fundingGraphPath /tmp/prod_provision/graph/11_graph_funding Set the target path to store the COMMUNITY from SEMANTIC RELATION graph communitySemRelGraphPath /tmp/prod_provision/graph/12_graph_comunity_sem_rel Set the target path to store the COUNTRY enriched graph countryGraphPath /tmp/prod_provision/graph/13_graph_country Set the target path to store the CLEANED graph cleanedGraphPath /tmp/prod_provision/graph/14_graph_cleaned Set the target path to store the blacklisted graph blacklistedGraphPath /tmp/prod_provision/graph/15_graph_blacklisted Set the map of paths for the Bulk Tagging bulkTaggingPathMap {"author" : "$['author'][*]['fullname']", "title" : "$['title'][*]['value']", "orcid" : "$['author'][*]['pid'][*][?(@['key']=='ORCID')]['value']", "contributor" : "$['contributor'][*]['value']", "description" : "$['description'][*]['value']"} Set the map of associations organization, community list for the propagation of community to result through organization propagationOrganizationCommunityMap {"20|corda__h2020::3fb05a9524c3f790391261347852f638":["mes","euromarine"], "20|corda__h2020::e8dbe14cca9bf6fce09d468872f813f8":["mes","euromarine"], "20|snsf________::9b253f265e3bef5cae6d881fdf61aceb":["mes","euromarine"],"20|rcuk________::e054eea0a47665af8c3656b5785ccf76":["mes","euromarine"],"20|corda__h2020::edc18d67c9b11fb616ca9f6e1db1b151":["mes","euromarine"],"20|rcuk________::d5736d9da90521ddcdc7828a05a85e9a":["mes","euromarine"],"20|corda__h2020::f5d418d3aa1cf817ddefcc3fdc039f27":["mes","euromarine"],"20|snsf________::8fa091f8f25a846779acb4ea97b50aef":["mes","euromarine"],"20|corda__h2020::81e020977211c2c40fae2e1a50bffd71":["mes","euromarine"],"20|corda_______::81e020977211c2c40fae2e1a50bffd71":["mes","euromarine"],"20|snsf________::31d0a100e54e3cdb3c6f52d91e638c78":["mes","euromarine"],"20|corda__h2020::ea379ef91b8cc86f9ac5edc4169292db":["mes","euromarine"],"20|corda__h2020::f75ee2ee48e5cb0ec8c8d30aaa8fef70":["mes","euromarine"],"20|rcuk________::e16010089551a1a9182a94604fc0ea59":["mes","euromarine"],"20|corda__h2020::38531a2cce7c5c347ffc439b07c1f43b":["mes","euromarine"],"20|corda_______::38531a2cce7c5c347ffc439b07c1f43b":["mes","euromarine"],"20|grid________::b2cbbf5eadbbf87d534b022bad3191d7":["mes","euromarine"],"20|snsf________::74730ef1439d7f7636a8be58a6b471b8":["mes","euromarine"],"20|nsf_________::ad72e19043a5a467e35f9b444d11563e":["mes","euromarine"],"20|rcuk________::0fc3e92500290902a2d38ec2445e74c3":["mes","euromarine"],"20|grid________::ad2c29905da0eb3c06b3fa80cacd89ea":["mes","euromarine"],"20|corda__h2020::30b53e4d63d3724f00acb9cbaca40860":["mes","euromarine"],"20|corda__h2020::f60f84bee14ad93f0db0e49af1d5c317":["mes","euromarine"], "20|corda__h2020::7bf251ac3765b5e89d82270a1763d09f":["mes","euromarine"], "20|corda__h2020::65531bd11be9935948c7f2f4db1c1832":["mes","euromarine"], "20|corda__h2020::e0e98f86bbc76638bbb72a8fe2302946":["mes","euromarine"], "20|snsf________::3eb43582ac27601459a8d8b3e195724b":["mes","euromarine"], "20|corda__h2020::af2481dab65d06c8ea0ae02b5517b9b6":["mes","euromarine"], "20|corda__h2020::c19d05cfde69a50d3ebc89bd0ee49929":["mes","euromarine"], "20|corda__h2020::af0bfd9fc09f80d9488f56d71a9832f0":["mes","euromarine"], "20|rcuk________::f33c02afb0dc66c49d0ed97ca5dd5cb0":["beopen"], "20|grid________::a867f78acdc5041b34acfe4f9a349157":["beopen"], "20|grid________::7bb116a1a9f95ab812bf9d2dea2be1ff":["beopen"], "20|corda__h2020::6ab0e0739dbe625b99a2ae45842164ad":["beopen"], "20|corda__h2020::8ba50792bc5f4d51d79fca47d860c602":["beopen"], "20|corda_______::8ba50792bc5f4d51d79fca47d860c602":["beopen"], "20|corda__h2020::e70e9114979e963eef24666657b807c3":["beopen"], "20|corda_______::e70e9114979e963eef24666657b807c3":["beopen"], "20|corda_______::15911e01e9744d57205825d77c218737":["beopen"], "20|opendoar____::056a41e24e2a9a67215e87bbee6a80ab":["beopen"], "20|opendoar____::7f67f2e6c6fbb0628f8160fcd3d92ae3":["beopen"], "20|grid________::a8ecfd7c084e561168bcbe6bf0daf3e3":["beopen"], "20|corda_______::7bbe6cc5d8ec1864739a04b0d020c9e9":["beopen"], "20|corda_______::3ff558e30c2e434d688539548300b050":["beopen"], "20|corda__h2020::5ffee5b3b83b33a8cf0e046877bd3a39":["beopen"], "20|corda__h2020::5187217e2e806a6df3579c46f82401bc":["beopen"], "20|grid________::5fa7e2709bcd945e26bfa18689adeec1":["beopen"], "20|corda_______::d8696683c53027438031a96ad27c3c07":["beopen"], "20|corda__h2020::d8696683c53027438031a96ad27c3c07":["beopen"], "20|rcuk________::23a79ebdfa59790864e4a485881568c1":["beopen"], "20|corda__h2020::b76cf8fe49590a966953c37e18608af9":["beopen"], "20|grid________::d2f0204126ee709244a488a4cd3b91c2":["beopen"], "20|corda__h2020::05aba9d2ed17533d15221e5655ac11e6":["beopen"], "20|grid________::802401579481dc32062bdee69f5e6a34":["beopen"], "20|corda__h2020::3f6d9d54cac975a517ba6b252c81582d":["beopen"]} Set the dedup orchestrator name dedupConfig dedup-similarity-result-decisiontree-v2 declares the ActionSet ids to promote in the RAW graph actionSetIdsRawGraph scholexplorer-dump,doiboost,orcidworks-no-doi,iis-entities-software,iis-entities-patent,datacite declares the ActionSet ids to promote in the INFERRED graph actionSetIdsIISGraph iis-researchinitiative,iis-document-citations,iis-document-citations-relations,iis-document-affiliation,iis-document-classes,iis-document-similarities,iis-referenced-datasets-main,iis-referenced-projects-main,iis-referenceextraction-pdb,document_software_url,iis-extracted-metadata,iis-communities,iis-referenced-patents,iis-covid-19,h2020classification,bipfinder-scores Set the IS lookup service address isLookUpUrl http://services.openaire.eu:8280/is/services/isLookUp?wsdl wait configurations reuse cached ODF claims from the PROD aggregation system reuseODFClaims true reuse cached ODF records on HDFS from the PROD aggregation system reuseODFhdfs true reuse cached OAF claims from the PROD aggregation system reuseOAFClaims true reuse cached OAF records on HDFS from the PROD aggregation system reuseOAFhdfs true reuse cached DB content from the PROD aggregation system reuseDB true reuse cached OpenOrgs content from the PROD aggregation system reuseDBOpenorgs true reuse cached ODF content from the PROD aggregation system reuseODF true reuse cached OAF content from the PROD aggregation system reuseOAF true set the PROD aggregator content path contentPath /tmp/prod_aggregator wait configurations create the PROD AGGREGATOR graph executeOozieJob IIS { 'graphOutputPath' : 'aggregatorGraphPath', 'isLookupUrl' : 'isLookUpUrl', 'reuseODFClaims' : 'reuseODFClaims', 'reuseOAFClaims' : 'reuseOAFClaims', 'reuseDB' : 'reuseDB', 'reuseDBOpenorgs' : 'reuseDBOpenorgs', 'reuseODF' : 'reuseODF', 'reuseODF_hdfs' : 'reuseODFhdfs', 'reuseOAF' : 'reuseOAF', 'reuseOAF_hdfs' : 'reuseOAFhdfs', 'contentPath' : 'contentPath', 'nsPrefixBlacklist' : 'nsPrefixBlacklist' } { 'oozie.wf.application.path' : '/lib/dnet/PROD/oa/graph/raw_all/oozie_app', 'mongoURL' : '', 'mongoDb' : '', 'mdstoreManagerUrl' : '', 'postgresURL' : '', 'postgresUser' : '', 'postgresPassword' : '', 'postgresOpenOrgsURL' : '', 'postgresOpenOrgsUser' : '', 'postgresOpenOrgsPassword' : '', 'shouldHashId' : 'true', 'importOpenorgs' : 'true', 'workingDir' : '/tmp/prod_provision/working_dir/prod_aggregator' } build-report create the RAW graph executeOozieJob IIS { 'inputActionSetIds' : 'actionSetIdsRawGraph', 'inputGraphRootPath' : 'aggregatorGraphPath', 'outputGraphRootPath' : 'rawGraphPath', 'isLookupUrl' : 'isLookUpUrl' } { 'oozie.wf.application.path' : '/lib/dnet/PROD/actionmanager/wf/main/oozie_app', 'sparkExecutorCores' : '3', 'sparkExecutorMemory' : '10G', 'activePromoteDatasetActionPayload' : 'true', 'activePromoteDatasourceActionPayload' : 'true', 'activePromoteOrganizationActionPayload' : 'true', 'activePromoteOtherResearchProductActionPayload' : 'true', 'activePromoteProjectActionPayload' : 'true', 'activePromotePublicationActionPayload' : 'true', 'activePromoteRelationActionPayload' : 'true', 'activePromoteResultActionPayload' : 'true', 'activePromoteSoftwareActionPayload' : 'true', 'mergeAndGetStrategy' : 'MERGE_FROM_AND_GET', 'workingDir' : '/tmp/prod_provision/working_dir/promoteActionsRaw' } build-report clean the properties in the graph typed as Qualifier according to the vocabulary indicated in schemeid executeOozieJob IIS { 'graphInputPath' : 'rawGraphPath', 'graphOutputPath': 'cleanedFirstGraphPath', 'isLookupUrl': 'isLookUpUrl' } { 'oozie.wf.application.path' : '/lib/dnet/PROD/oa/graph/clean/oozie_app', 'workingDir' : '/tmp/prod_provision/working_dir/clean' } build-report updates publication's hostedby info according to the ISSNs available from DOAJ and UNIBI executeOozieJob IIS { 'sourcePath' : 'cleanedFirstGraphPath' } { 'resumeFrom' : 'prepareInfo', 'hostedByMapPath' : '/user/dnet.production/data/hostedByMap', 'oozie.wf.application.path' : '/lib/dnet/PROD/oa/graph/hostedbymap/oozie_app', 'workingDir' : '/tmp/prod_provision/working_dir/hostedbymap', 'outputPath' : '/tmp/prod_provision/working_dir/hostedbymap', 'sparkExecutorCores' : '3', 'sparkExecutorMemory' : '10G' } build-report Graph resolution executeOozieJob IIS { 'graphBasePath':'cleanedFirstGraphPath', 'unresolvedPath' :'unresolvedEntityPath', 'targetPath':'resolvedGraphPath' } { 'oozie.wf.application.path' : '/lib/dnet/PROD/oa/graph/resolution/oozie_app', 'workingDir' : '/tmp/prod_provision/working_dir/relation_resolution', 'shouldResolveEntities' : 'false', 'sparkExecutorCores' : '4', 'sparkExecutorMemory' : '9G' } search for duplicates in the raw graph executeOozieJob IIS { 'actionSetId' : 'dedupConfig', 'graphBasePath' : 'resolvedGraphPath', 'dedupGraphPath': 'dedupGraphPath', 'isLookUpUrl' : 'isLookUpUrl' } { 'oozie.wf.application.path' : '/lib/dnet/PROD/oa/dedup/scan/oozie_app', 'actionSetIdOpenorgs' : 'dedup-similarity-organization-simple', 'workingPath' : '/tmp/prod_provision/working_dir/dedup', 'sparkExecutorCores' : '3', 'sparkExecutorMemory' : '10G' } build-report create the INFERRED graph executeOozieJob IIS { 'inputActionSetIds' : 'actionSetIdsIISGraph', 'inputGraphRootPath' : 'dedupGraphPath', 'outputGraphRootPath' : 'inferredGraphPath', 'isLookupUrl' : 'isLookUpUrl' } { 'oozie.wf.application.path' : '/lib/dnet/PROD/actionmanager/wf/main/oozie_app', 'sparkExecutorCores' : '3', 'sparkExecutorMemory' : '10G', 'activePromoteDatasetActionPayload' : 'true', 'activePromoteDatasourceActionPayload' : 'true', 'activePromoteOrganizationActionPayload' : 'true', 'activePromoteOtherResearchProductActionPayload' : 'true', 'activePromoteProjectActionPayload' : 'true', 'activePromotePublicationActionPayload' : 'true', 'activePromoteRelationActionPayload' : 'true', 'activePromoteResultActionPayload' : 'true', 'activePromoteSoftwareActionPayload' : 'true', 'mergeAndGetStrategy' : 'MERGE_FROM_AND_GET', 'workingDir' : '/tmp/prod_provision/working_dir/promoteActionsIIS' } build-report mark duplicates as deleted and redistribute the relationships executeOozieJob IIS { 'graphBasePath' : 'inferredGraphPath', 'graphOutputPath': 'consistentGraphPath' } { 'oozie.wf.application.path' : '/lib/dnet/PROD/oa/dedup/consistency/oozie_app', 'workingPath' : '/tmp/prod_provision/working_dir/dedup' } build-report propagates ORCID among results linked by allowedsemrels semantic relationships executeOozieJob IIS { 'sourcePath' : 'consistentGraphPath', 'outputPath': 'orcidGraphPath' } { 'oozie.wf.application.path' : '/lib/dnet/PROD/oa/enrichment/orcidtoresultfromsemrel/oozie_app', 'workingDir' : '/tmp/prod_provision/working_dir/orcid', 'allowedsemrels' : 'isSupplementedBy;isSupplementTo', 'saveGraph' : 'true' } build-report mark results respecting some rules as belonging to communities executeOozieJob IIS { 'sourcePath' : 'orcidGraphPath', 'outputPath': 'bulkTaggingGraphPath', 'isLookUpUrl' : 'isLookUpUrl', 'pathMap' : 'bulkTaggingPathMap' } { 'oozie.wf.application.path' : '/lib/dnet/PROD/oa/enrichment/bulktag/oozie_app', 'workingDir' : '/tmp/prod_provision/working_dir/bulktag' } build-report creates relashionships between results and organizations when the organizations are associated to institutional repositories executeOozieJob IIS { 'sourcePath' : 'bulkTaggingGraphPath', 'outputPath': 'affiliationGraphPath' } { 'oozie.wf.application.path' : '/lib/dnet/PROD/oa/enrichment/affiliation/oozie_app', 'workingDir' : '/tmp/prod_provision/working_dir/affiliation', 'saveGraph' : 'true', 'blacklist' : 'empty' } build-report marks as belonging to communities the result collected from datasources related to the organizations specified in the organizationCommunityMap executeOozieJob IIS { 'sourcePath' : 'affiliationGraphPath', 'outputPath': 'communityOrganizationGraphPath', 'organizationtoresultcommunitymap': 'propagationOrganizationCommunityMap' } { 'oozie.wf.application.path' : '/lib/dnet/PROD/oa/enrichment/community_organization/oozie_app', 'workingDir' : '/tmp/prod_provision/working_dir/community_organization', 'saveGraph' : 'true' } build-report created relation between projects and results linked to other results trough allowedsemrel semantic relations linked to projects executeOozieJob IIS { 'sourcePath' : 'communityOrganizationGraphPath', 'outputPath': 'fundingGraphPath' } { 'oozie.wf.application.path' : '/lib/dnet/PROD/oa/enrichment/funding/oozie_app', 'workingDir' : '/tmp/prod_provision/working_dir/funding', 'allowedsemrels' : 'isSupplementedBy;isSupplementTo', 'saveGraph' : 'true' } build-report tag as belonging to communitites result in in allowedsemrels relation with other result already linked to communities executeOozieJob IIS { 'sourcePath' : 'fundingGraphPath', 'outputPath': 'communitySemRelGraphPath', 'isLookUpUrl' : 'isLookUpUrl' } { 'oozie.wf.application.path' : '/lib/dnet/PROD/oa/enrichment/community_semrel/oozie_app', 'workingDir' : '/tmp/prod_provision/working_dir/community_semrel', 'allowedsemrels' : 'isSupplementedBy;isSupplementTo', 'saveGraph' : 'true' } build-report associated to results colleced from allowedtypes and those in the whithelist the country of the organization(s) handling the datasource it is collected from executeOozieJob IIS { 'sourcePath' : 'communitySemRelGraphPath', 'outputPath': 'countryGraphPath' } { 'oozie.wf.application.path' : '/lib/dnet/PROD/oa/enrichment/country/oozie_app', 'sparkExecutorCores' : '3', 'sparkExecutorMemory' : '10G', 'workingDir' : '/tmp/prod_provision/working_dir/country', 'allowedtypes' : 'pubsrepository::institutional', 'whitelist' : '10|openaire____::e783372970a1dc066ce99c673090ff88;10|opendoar____::16e6a3326dd7d868cbc926602a61e4d0', 'saveGraph' : 'true' } build-report clean the properties in the graph typed as Qualifier according to the vocabulary indicated in schemeid executeOozieJob IIS { 'graphInputPath' : 'countryGraphPath', 'graphOutputPath': 'cleanedGraphPath', 'isLookupUrl': 'isLookUpUrl' } { 'oozie.wf.application.path' : '/lib/dnet/PROD/oa/graph/clean/oozie_app', 'workingDir' : '/tmp/prod_provision/working_dir/clean' } build-report removes blacklisted relations executeOozieJob IIS { 'sourcePath' : 'cleanedGraphPath', 'outputPath': 'blacklistedGraphPath' } { 'oozie.wf.application.path' : '/lib/dnet/PROD/oa/enrichment/blacklist/oozie_app', 'workingDir' : '/tmp/prod_provision/working_dir/blacklist', 'postgresURL' : '', 'postgresUser' : '', 'postgresPassword' : '' } build-report wf_20210723_171026_279 2021-07-24T00:00:39+00:00 SUCCESS