Graph construction for IIS [BETA]
IIS
30
set blacklist of funder nsPrefixes from the beta aggregator
nsPrefixBlacklist_BETA
gsrt________,rcuk________,fct_________
set blacklist of funder nsPrefixes from the production aggregator
nsPrefixBlacklist_PROD
gsrt________,rcuk________
set the path of the map defining the relations id mappings
idMappingPath
/data/maps/fct_map.json
Set the target path to store the MERGED graph
mergedGraphPath
/tmp/beta_inference/graph/01_graph_merged
Set the target path to store the RAW graph
rawGraphPath
/tmp/beta_inference/graph/02_graph_raw
Set the target path to store the CLEANED graph
cleanedFirstGraphPath
/tmp/beta_inference/graph/03_graph_clean_first
Set the target path to store the DEDUPED graph
dedupGraphPath
/tmp/beta_inference/graph/04_graph_dedup
Set the target path to store the CONSISTENCY graph
consistentGraphPath
/tmp/beta_inference/graph/05_graph_consistent
Set the target path to store the CLEANED graph
cleanedGraphPath
/tmp/beta_inference/graph/06_graph_cleaned
Set the dedup orchestrator name
dedupConfig
dedup-similarity-result-decisiontree-v2
declares the ActionSet ids to promote in the RAW graph
actionSetIdsRawGraph
scholexplorer-dump,doiboost,orcidworks-no-doi,datacite
Set the IS lookup service address
isLookUpUrl
http://beta.services.openaire.eu:8280/is/services/isLookUp?wsdl
wait configurations
reuse cached ODF claims from the PROD aggregation system
reuseODFClaims_PROD
true
reuse cached ODF records on HDFS from the PROD aggregation system
reuseODFhdfs_PROD
true
reuse cached OAF claims from the PROD aggregation system
reuseOAFClaims_PROD
true
reuse cached OAF records on HDFS from the PROD aggregation system
reuseOAFhdfs_PROD
true
reuse cached DB content from the PROD aggregation system
reuseDB_PROD
true
reuse cached OpenOrgs content from the PROD aggregation system
reuseDBOpenorgs_PROD
true
reuse cached ODF content from the PROD aggregation system
reuseODF_PROD
true
reuse cached OAF content from the PROD aggregation system
reuseOAF_PROD
true
should apply the relations id patching based on the provided idMapping on PROD?
shouldPatchRelations_PROD
false
set the PROD aggregator content path
prodContentPath
/tmp/prod_aggregator_for_beta
Set the path containing the PROD AGGREGATOR graph
prodAggregatorGraphPath
/tmp/beta_inference/graph/00_prod_graph_aggregator
reuse cached ODF claims from the BETA aggregation system
reuseODFClaims_BETA
true
reuse cached ODF records on HDFS from the BETA aggregation system
reuseODFhdfs_BETA
true
reuse cached OAF claims from the BETA aggregation system
reuseOAFClaims_BETA
true
reuse cached OAF records on HDFS from the BETA aggregation system
reuseOAFhdfs_BETA
true
reuse cached DB content from the BETA aggregation system
reuseDB_BETA
true
reuse cached OpenOrgs content from the BETA aggregation system
reuseDBOpenorgs_BETA
true
reuse cached ODF content from the BETA aggregation system
reuseODF_BETA
true
reuse cached OAF content from the BETA aggregation system
reuseOAF_BETA
true
should apply the relations id patching based on the provided idMapping on BETA?
shouldPatchRelations_BETA
false
set the BETA aggregator content path
betaContentPath
/tmp/beta_aggregator
Set the path containing the BETA AGGREGATOR graph
betaAggregatorGraphPath
/tmp/beta_inference/graph/00_beta_graph_aggregator
wait configurations
create the BETA AGGREGATOR graph
executeOozieJob
IIS
{
'graphOutputPath' : 'betaAggregatorGraphPath',
'isLookupUrl' : 'isLookUpUrl',
'reuseODFClaims' : 'reuseODFClaims_BETA',
'reuseOAFClaims' : 'reuseOAFClaims_BETA',
'reuseDB' : 'reuseDB_BETA',
'reuseDBOpenorgs' : 'reuseDBOpenorgs_BETA',
'reuseODF' : 'reuseODF_BETA',
'reuseODF_hdfs' : 'reuseODFhdfs_BETA',
'reuseOAF' : 'reuseOAF_BETA',
'reuseOAF_hdfs' : 'reuseOAFhdfs_BETA',
'contentPath' : 'betaContentPath',
'nsPrefixBlacklist' : 'nsPrefixBlacklist_BETA',
'shouldPatchRelations' : 'shouldPatchRelations_BETA',
'idMappingPath' : 'idMappingPath'
}
{
'oozie.wf.application.path' : '/lib/dnet/BETA/oa/graph/raw_all/oozie_app',
'mongoURL' : '',
'mongoDb' : '',
'mdstoreManagerUrl' : '',
'postgresURL' : '',
'postgresUser' : '',
'postgresPassword' : '',
'postgresOpenOrgsURL' : '',
'postgresOpenOrgsUser' : '',
'postgresOpenOrgsPassword' : '',
'shouldHashId' : 'true',
'importOpenorgs' : 'true',
'workingDir' : '/tmp/beta_inference/working_dir/beta_aggregator'
}
build-report
create the PROD AGGREGATOR graph
executeOozieJob
IIS
{
'graphOutputPath' : 'prodAggregatorGraphPath',
'isLookupUrl' : 'isLookUpUrl',
'reuseODFClaims' : 'reuseODFClaims_PROD',
'reuseOAFClaims' : 'reuseOAFClaims_PROD',
'reuseDB' : 'reuseDB_PROD',
'reuseDBOpenorgs' : 'reuseDBOpenorgs_PROD',
'reuseODF' : 'reuseODF_PROD',
'reuseODF_hdfs' : 'reuseODFhdfs_PROD',
'reuseOAF' : 'reuseOAF_PROD',
'reuseOAF_hdfs' : 'reuseOAFhdfs_PROD',
'contentPath' : 'prodContentPath',
'nsPrefixBlacklist' : 'nsPrefixBlacklist_PROD',
'shouldPatchRelations' : 'shouldPatchRelations_PROD',
'idMappingPath' : 'idMappingPath'
}
{
'oozie.wf.application.path' : '/lib/dnet/BETA/oa/graph/raw_all/oozie_app',
'mongoURL' : '',
'mongoDb' : '',
'mdstoreManagerUrl' : '',
'postgresURL' : '',
'postgresUser' : '',
'postgresPassword' : '',
'postgresOpenOrgsURL' : '',
'postgresOpenOrgsUser' : '',
'postgresOpenOrgsPassword' : '',
'shouldHashId' : 'true',
'importOpenorgs' : 'true',
'workingDir' : '/tmp/beta_inference/working_dir/prod_aggregator'
}
build-report
wait configurations
create the AGGREGATOR graph
executeOozieJob
IIS
{
'betaInputGraphPath' : 'betaAggregatorGraphPath',
'prodInputGraphPath' : 'prodAggregatorGraphPath',
'graphOutputPath' : 'mergedGraphPath'
}
{
'oozie.wf.application.path' : '/lib/dnet/BETA/oa/graph/merge/oozie_app',
'workingDir' : '/tmp/beta_inference/working_dir/merge_graph',
'priority' : 'BETA'
}
build-report
create the RAW graph
executeOozieJob
IIS
{
'inputActionSetIds' : 'actionSetIdsRawGraph',
'inputGraphRootPath' : 'mergedGraphPath',
'outputGraphRootPath' : 'rawGraphPath',
'isLookupUrl' : 'isLookUpUrl'
}
{
'oozie.wf.application.path' : '/lib/dnet/BETA/actionmanager/wf/main/oozie_app',
'sparkExecutorCores' : '3',
'sparkExecutorMemory' : '10G',
'activePromoteDatasetActionPayload' : 'true',
'activePromoteDatasourceActionPayload' : 'true',
'activePromoteOrganizationActionPayload' : 'true',
'activePromoteOtherResearchProductActionPayload' : 'true',
'activePromoteProjectActionPayload' : 'true',
'activePromotePublicationActionPayload' : 'true',
'activePromoteRelationActionPayload' : 'true',
'activePromoteResultActionPayload' : 'true',
'activePromoteSoftwareActionPayload' : 'true',
'mergeAndGetStrategy' : 'MERGE_FROM_AND_GET',
'workingDir' : '/tmp/beta_inference/working_dir/promoteActionsRaw'
}
build-report
clean the properties in the graph typed as Qualifier according to the vocabulary indicated in schemeid
executeOozieJob
IIS
{
'graphInputPath' : 'rawGraphPath',
'graphOutputPath': 'cleanedFirstGraphPath',
'isLookupUrl': 'isLookUpUrl'
}
{
'oozie.wf.application.path' : '/lib/dnet/BETA/oa/graph/clean/oozie_app',
'workingDir' : '/tmp/beta_inference/working_dir/clean_first'
}
build-report
search for duplicates in the raw graph
executeOozieJob
IIS
{
'actionSetId' : 'dedupConfig',
'graphBasePath' : 'cleanedFirstGraphPath',
'dedupGraphPath': 'dedupGraphPath',
'isLookUpUrl' : 'isLookUpUrl'
}
{
'oozie.wf.application.path' : '/lib/dnet/BETA/oa/dedup/scan/oozie_app',
'actionSetIdOpenorgs' : 'dedup-similarity-organization-simple',
'workingPath' : '/tmp/beta_inference/working_dir/dedup',
'sparkExecutorCores' : '3',
'sparkExecutorMemory' : '10G'
}
build-report
mark duplicates as deleted and redistribute the relationships
executeOozieJob
IIS
{
'graphBasePath' : 'dedupGraphPath',
'graphOutputPath': 'consistentGraphPath'
}
{
'oozie.wf.application.path' : '/lib/dnet/BETA/oa/dedup/consistency/oozie_app',
'workingPath' : '/tmp/beta_inference/working_dir/dedup'
}
build-report
clean the properties in the graph typed as Qualifier according to the vocabulary indicated in schemeid
executeOozieJob
IIS
{
'graphInputPath' : 'consistentGraphPath',
'graphOutputPath': 'cleanedGraphPath',
'isLookupUrl': 'isLookUpUrl'
}
{
'oozie.wf.application.path' : '/lib/dnet/BETA/oa/graph/clean/oozie_app',
'workingDir' : '/tmp/beta_inference/working_dir/clean'
}
build-report
wf_20210730_094240_462
2021-07-30T15:04:19+00:00
SUCCESS