From 6d0b11252e665f186223d104e0b14d94432f0f4a Mon Sep 17 00:00:00 2001 From: Claudio Atzori Date: Mon, 11 May 2020 17:32:06 +0200 Subject: [PATCH 01/31] bulktagging wfs moved into common dhp-enrichment module --- .../dhp/blacklist/oozie_app/workflow.xml | 61 ++-- .../dhp-bulktag/project-default.properties | 7 - .../{dhp-bulktag => dhp-enrichment}/pom.xml | 23 +- .../eu/dnetlib/dhp/PropagationConstant.java | 0 .../dnetlib/dhp/bulktag/SparkBulkTagJob.java | 16 +- .../dhp/bulktag}/community/Community.java | 23 +- .../community/CommunityConfiguration.java | 24 +- .../CommunityConfigurationFactory.java | 42 ++- .../dhp/bulktag}/community/Constraint.java | 8 +- .../dhp/bulktag}/community/Constraints.java | 16 +- .../dnetlib/dhp/bulktag}/community/Pair.java | 6 +- .../dhp/bulktag}/community/ProtoMap.java | 2 +- .../dhp/bulktag/community/Provider.java} | 14 +- .../community/QueryInformationSystem.java | 10 +- .../dhp/bulktag}/community/ResultTagger.java | 23 +- .../community/SelectionConstraints.java | 11 +- .../bulktag/community/TaggingConstants.java} | 10 +- .../bulktag}/community/ZenodoCommunity.java | 9 +- .../dhp/bulktag/criteria}/ContainsVerb.java | 2 +- .../criteria}/ContainsVerbIgnoreCase.java | 2 +- .../dhp/bulktag/criteria}/EqualVerb.java | 2 +- .../criteria}/EqualVerbIgnoreCase.java | 2 +- .../bulktag/criteria}/InterfaceAdapter.java | 6 +- .../bulktag/criteria}/NotContainsVerb.java | 2 +- .../criteria}/NotContainsVerbIgnoreCase.java | 2 +- .../dhp/bulktag/criteria}/NotEqualVerb.java | 2 +- .../criteria}/NotEqualVerbIgnoreCase.java | 2 +- .../dhp/bulktag/criteria}/Selection.java | 2 +- .../dhp/bulktag/criteria}/VerbClass.java | 2 +- .../dhp/bulktag/criteria}/VerbResolver.java | 12 +- .../criteria}/VerbResolverFactory.java | 2 +- .../dhp/countrypropagation/CountrySbs.java | 0 .../countrypropagation/DatasourceCountry.java | 0 .../PrepareDatasourceCountryAssociation.java | 0 .../PrepareResultCountrySet.java | 0 .../countrypropagation/ResultCountrySet.java | 0 .../SparkCountryPropagationJob.java | 0 .../AutoritativeAuthor.java | 0 .../PrepareResultOrcidAssociationStep1.java | 0 .../PrepareResultOrcidAssociationStep2.java | 0 .../ResultOrcidList.java | 0 .../SparkOrcidToResultFromSemRelJob.java | 0 .../PrepareProjectResultsAssociation.java | 0 .../dhp/projecttoresult/ResultProjectSet.java | 0 .../SparkResultToProjectThroughSemRelJob.java | 0 .../OrganizationMap.java | 0 .../PrepareResultCommunitySet.java | 0 .../ResultCommunityList.java | 0 .../ResultOrganizations.java | 0 ...kResultToCommunityFromOrganizationJob.java | 0 .../PrepareResultCommunitySetStep1.java | 0 .../PrepareResultCommunitySetStep2.java | 0 ...parkResultToCommunityThroughSemRelJob.java | 0 .../DatasourceOrganization.java | 0 .../PrepareResultInstRepoAssociation.java | 0 .../ResultOrganizationSet.java | 0 ...arkResultToOrganizationFromIstRepoJob.java | 0 .../dhp/bulktag/input_bulkTag_parameters.json | 0 .../dhp/bulktag/oozie_app/config-default.xml | 0 .../dhp/bulktag/oozie_app/workflow.xml | 0 .../input_countrypropagation_parameters.json | 0 .../input_prepareassoc_parameters.json | 0 ...input_prepareresultcountry_parameters.json | 0 .../oozie_app/config-default.xml | 0 .../countrypropagation/oozie_app/workflow.xml | 0 .../input_orcidtoresult_parameters.json | 0 ...input_prepareorcidtoresult_parameters.json | 0 ...nput_prepareorcidtoresult_parameters2.json | 0 .../oozie_app/config-default.xml | 0 .../oozie_app/workflow.xml | 0 ...put_prepareprojecttoresult_parameters.json | 0 .../input_projecttoresult_parameters.json | 0 .../oozie_app/config-default.xml | 0 .../projecttoresult/oozie_app/workflow.xml | 0 .../input_communitytoresult_parameters.json | 0 ...t_preparecommunitytoresult_parameters.json | 0 .../oozie_app/config-default.xml | 0 .../oozie_app/workflow.xml | 0 .../input_communitytoresult_parameters.json | 0 ..._preparecommunitytoresult2_parameters.json | 0 ...t_preparecommunitytoresult_parameters.json | 0 .../oozie_app/config-default.xml | 0 .../oozie_app/workflow.xml | 0 .../input_prepareresultorg_parameters.json | 0 ...sulaffiliationfrominstrepo_parameters.json | 0 .../oozie_app/config-default.xml | 0 .../oozie_app/workflow.xml | 0 .../dnetlib/dhp/bulktag}/BulkTagJobTest.java | 326 +++++++----------- .../CommunityConfigurationFactoryTest.java | 22 +- .../CountryPropagationJobTest.java | 88 ++--- .../OrcidPropagationJobTest.java | 2 - .../ProjectPropagationJobTest.java | 153 ++++---- .../ResultToCommunityJobTest.java | 44 +-- .../ResultToCommunityJobTest.java | 33 +- .../ResultToOrganizationJobTest.java} | 163 ++++----- .../community_configuration.json | 0 .../community_configuration.xml | 28 +- .../community_configuration_selcrit.json | 0 .../community_configuration_selcrit.xml | 32 +- .../communityconfiguration/tagging_conf.json | 0 .../communityconfiguration/tagging_conf.xml | 62 ++-- .../dataset/no_updates/dataset_10.json.gz | Bin .../dataset_10.json.gz | Bin .../contextnoprovenance/dataset_10.json.gz | Bin .../nocontext/dataset_10.json.gz | Bin .../dataset_10.json.gz | Bin .../dataset_10.json.gz | Bin 0 -> 6968 bytes .../otherresearchproduct_10.json.gz | Bin .../update_datasource/publication_10.json.gz | Bin .../sample/software/software_10.json.gz | Bin .../preparedInfo/preparedInfo.json.gz | Bin .../sample/software/software_10.json.gz | Bin .../mergedOrcidAssoc/mergedOrcid_17.json.gz | Bin .../sample/noupdate/dataset_10.json.gz | Bin .../sample/oneupdate/dataset_10.json.gz | Bin .../sample/twoupdates/dataset_10.json.gz | Bin .../alreadyLinked/alreadyLinked.json.gz | Bin .../potentialUpdates/potentialUpdates.json.gz | Bin .../potentialUpdates/potentialUpdates.json.gz | Bin .../potentialUpdates/potentialUpdates.json.gz | Bin .../preparedInfo/resultCommunityList.json.gz | Bin .../sample/dataset_10.json.gz | Bin .../mergedResultCommunityList.json.gz | Bin .../sample/dataset_10.json.gz | Bin .../alreadyLinked/alreadyLinked_20.json.gz | Bin .../datasourceOrganization_28.json.gz | Bin .../noupdate_updatenomix/software_10.json.gz | Bin .../sample/updatemix/software_10.json.gz | Bin .../alreadyLinked/alreadyLinked_20.json.gz | Bin .../datasourceOrganization_28.json.gz | Bin .../alreadyLinked/alreadyLinked_20.json.gz | Bin .../datasourceOrganization_28.json.gz | Bin dhp-workflows/dhp-propagation/pom.xml | 43 --- .../eu/dnetlib/dhp/wf/profiles/provision.xml | 2 +- dhp-workflows/pom.xml | 3 +- 135 files changed, 550 insertions(+), 796 deletions(-) delete mode 100644 dhp-workflows/dhp-bulktag/project-default.properties rename dhp-workflows/{dhp-bulktag => dhp-enrichment}/pom.xml (81%) rename dhp-workflows/{dhp-propagation => dhp-enrichment}/src/main/java/eu/dnetlib/dhp/PropagationConstant.java (100%) rename dhp-workflows/{dhp-bulktag => dhp-enrichment}/src/main/java/eu/dnetlib/dhp/bulktag/SparkBulkTagJob.java (97%) rename dhp-workflows/{dhp-bulktag/src/main/java/eu/dnetlib/dhp => dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag}/community/Community.java (79%) rename dhp-workflows/{dhp-bulktag/src/main/java/eu/dnetlib/dhp => dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag}/community/CommunityConfiguration.java (96%) rename dhp-workflows/{dhp-bulktag/src/main/java/eu/dnetlib/dhp => dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag}/community/CommunityConfigurationFactory.java (86%) rename dhp-workflows/{dhp-bulktag/src/main/java/eu/dnetlib/dhp => dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag}/community/Constraint.java (86%) rename dhp-workflows/{dhp-bulktag/src/main/java/eu/dnetlib/dhp => dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag}/community/Constraints.java (94%) rename dhp-workflows/{dhp-bulktag/src/main/java/eu/dnetlib/dhp => dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag}/community/Pair.java (92%) rename dhp-workflows/{dhp-bulktag/src/main/java/eu/dnetlib/dhp => dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag}/community/ProtoMap.java (80%) rename dhp-workflows/{dhp-bulktag/src/main/java/eu/dnetlib/dhp/community/Datasource.java => dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/community/Provider.java} (86%) rename dhp-workflows/{dhp-bulktag/src/main/java/eu/dnetlib/dhp => dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag}/community/QueryInformationSystem.java (98%) rename dhp-workflows/{dhp-bulktag/src/main/java/eu/dnetlib/dhp => dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag}/community/ResultTagger.java (94%) rename dhp-workflows/{dhp-bulktag/src/main/java/eu/dnetlib/dhp => dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag}/community/SelectionConstraints.java (91%) rename dhp-workflows/{dhp-bulktag/src/main/java/eu/dnetlib/dhp/community/TagginConstants.java => dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/community/TaggingConstants.java} (66%) rename dhp-workflows/{dhp-bulktag/src/main/java/eu/dnetlib/dhp => dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag}/community/ZenodoCommunity.java (95%) rename dhp-workflows/{dhp-bulktag/src/main/java/eu/dnetlib/dhp/selectioncriteria => dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/criteria}/ContainsVerb.java (91%) rename dhp-workflows/{dhp-bulktag/src/main/java/eu/dnetlib/dhp/selectioncriteria => dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/criteria}/ContainsVerbIgnoreCase.java (92%) rename dhp-workflows/{dhp-bulktag/src/main/java/eu/dnetlib/dhp/selectioncriteria => dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/criteria}/EqualVerb.java (91%) rename dhp-workflows/{dhp-bulktag/src/main/java/eu/dnetlib/dhp/selectioncriteria => dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/criteria}/EqualVerbIgnoreCase.java (91%) rename dhp-workflows/{dhp-bulktag/src/main/java/eu/dnetlib/dhp/selectioncriteria => dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/criteria}/InterfaceAdapter.java (96%) rename dhp-workflows/{dhp-bulktag/src/main/java/eu/dnetlib/dhp/selectioncriteria => dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/criteria}/NotContainsVerb.java (91%) rename dhp-workflows/{dhp-bulktag/src/main/java/eu/dnetlib/dhp/selectioncriteria => dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/criteria}/NotContainsVerbIgnoreCase.java (92%) rename dhp-workflows/{dhp-bulktag/src/main/java/eu/dnetlib/dhp/selectioncriteria => dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/criteria}/NotEqualVerb.java (91%) rename dhp-workflows/{dhp-bulktag/src/main/java/eu/dnetlib/dhp/selectioncriteria => dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/criteria}/NotEqualVerbIgnoreCase.java (92%) rename dhp-workflows/{dhp-bulktag/src/main/java/eu/dnetlib/dhp/selectioncriteria => dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/criteria}/Selection.java (60%) rename dhp-workflows/{dhp-bulktag/src/main/java/eu/dnetlib/dhp/selectioncriteria => dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/criteria}/VerbClass.java (86%) rename dhp-workflows/{dhp-bulktag/src/main/java/eu/dnetlib/dhp/selectioncriteria => dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/criteria}/VerbResolver.java (97%) rename dhp-workflows/{dhp-bulktag/src/main/java/eu/dnetlib/dhp/selectioncriteria => dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/criteria}/VerbResolverFactory.java (73%) rename dhp-workflows/{dhp-propagation => dhp-enrichment}/src/main/java/eu/dnetlib/dhp/countrypropagation/CountrySbs.java (100%) rename dhp-workflows/{dhp-propagation => dhp-enrichment}/src/main/java/eu/dnetlib/dhp/countrypropagation/DatasourceCountry.java (100%) rename dhp-workflows/{dhp-propagation => dhp-enrichment}/src/main/java/eu/dnetlib/dhp/countrypropagation/PrepareDatasourceCountryAssociation.java (100%) rename dhp-workflows/{dhp-propagation => dhp-enrichment}/src/main/java/eu/dnetlib/dhp/countrypropagation/PrepareResultCountrySet.java (100%) rename dhp-workflows/{dhp-propagation => dhp-enrichment}/src/main/java/eu/dnetlib/dhp/countrypropagation/ResultCountrySet.java (100%) rename dhp-workflows/{dhp-propagation => dhp-enrichment}/src/main/java/eu/dnetlib/dhp/countrypropagation/SparkCountryPropagationJob.java (100%) rename dhp-workflows/{dhp-propagation => dhp-enrichment}/src/main/java/eu/dnetlib/dhp/orcidtoresultfromsemrel/AutoritativeAuthor.java (100%) rename dhp-workflows/{dhp-propagation => dhp-enrichment}/src/main/java/eu/dnetlib/dhp/orcidtoresultfromsemrel/PrepareResultOrcidAssociationStep1.java (100%) rename dhp-workflows/{dhp-propagation => dhp-enrichment}/src/main/java/eu/dnetlib/dhp/orcidtoresultfromsemrel/PrepareResultOrcidAssociationStep2.java (100%) rename dhp-workflows/{dhp-propagation => dhp-enrichment}/src/main/java/eu/dnetlib/dhp/orcidtoresultfromsemrel/ResultOrcidList.java (100%) rename dhp-workflows/{dhp-propagation => dhp-enrichment}/src/main/java/eu/dnetlib/dhp/orcidtoresultfromsemrel/SparkOrcidToResultFromSemRelJob.java (100%) rename dhp-workflows/{dhp-propagation => dhp-enrichment}/src/main/java/eu/dnetlib/dhp/projecttoresult/PrepareProjectResultsAssociation.java (100%) rename dhp-workflows/{dhp-propagation => dhp-enrichment}/src/main/java/eu/dnetlib/dhp/projecttoresult/ResultProjectSet.java (100%) rename dhp-workflows/{dhp-propagation => dhp-enrichment}/src/main/java/eu/dnetlib/dhp/projecttoresult/SparkResultToProjectThroughSemRelJob.java (100%) rename dhp-workflows/{dhp-propagation => dhp-enrichment}/src/main/java/eu/dnetlib/dhp/resulttocommunityfromorganization/OrganizationMap.java (100%) rename dhp-workflows/{dhp-propagation => dhp-enrichment}/src/main/java/eu/dnetlib/dhp/resulttocommunityfromorganization/PrepareResultCommunitySet.java (100%) rename dhp-workflows/{dhp-propagation => dhp-enrichment}/src/main/java/eu/dnetlib/dhp/resulttocommunityfromorganization/ResultCommunityList.java (100%) rename dhp-workflows/{dhp-propagation => dhp-enrichment}/src/main/java/eu/dnetlib/dhp/resulttocommunityfromorganization/ResultOrganizations.java (100%) rename dhp-workflows/{dhp-propagation => dhp-enrichment}/src/main/java/eu/dnetlib/dhp/resulttocommunityfromorganization/SparkResultToCommunityFromOrganizationJob.java (100%) rename dhp-workflows/{dhp-propagation => dhp-enrichment}/src/main/java/eu/dnetlib/dhp/resulttocommunityfromsemrel/PrepareResultCommunitySetStep1.java (100%) rename dhp-workflows/{dhp-propagation => dhp-enrichment}/src/main/java/eu/dnetlib/dhp/resulttocommunityfromsemrel/PrepareResultCommunitySetStep2.java (100%) rename dhp-workflows/{dhp-propagation => dhp-enrichment}/src/main/java/eu/dnetlib/dhp/resulttocommunityfromsemrel/SparkResultToCommunityThroughSemRelJob.java (100%) rename dhp-workflows/{dhp-propagation => dhp-enrichment}/src/main/java/eu/dnetlib/dhp/resulttoorganizationfrominstrepo/DatasourceOrganization.java (100%) rename dhp-workflows/{dhp-propagation => dhp-enrichment}/src/main/java/eu/dnetlib/dhp/resulttoorganizationfrominstrepo/PrepareResultInstRepoAssociation.java (100%) rename dhp-workflows/{dhp-propagation => dhp-enrichment}/src/main/java/eu/dnetlib/dhp/resulttoorganizationfrominstrepo/ResultOrganizationSet.java (100%) rename dhp-workflows/{dhp-propagation => dhp-enrichment}/src/main/java/eu/dnetlib/dhp/resulttoorganizationfrominstrepo/SparkResultToOrganizationFromIstRepoJob.java (100%) rename dhp-workflows/{dhp-bulktag => dhp-enrichment}/src/main/resources/eu/dnetlib/dhp/bulktag/input_bulkTag_parameters.json (100%) rename dhp-workflows/{dhp-bulktag => dhp-enrichment}/src/main/resources/eu/dnetlib/dhp/bulktag/oozie_app/config-default.xml (100%) rename dhp-workflows/{dhp-bulktag => dhp-enrichment}/src/main/resources/eu/dnetlib/dhp/bulktag/oozie_app/workflow.xml (100%) rename dhp-workflows/{dhp-propagation => dhp-enrichment}/src/main/resources/eu/dnetlib/dhp/countrypropagation/input_countrypropagation_parameters.json (100%) rename dhp-workflows/{dhp-propagation => dhp-enrichment}/src/main/resources/eu/dnetlib/dhp/countrypropagation/input_prepareassoc_parameters.json (100%) rename dhp-workflows/{dhp-propagation => dhp-enrichment}/src/main/resources/eu/dnetlib/dhp/countrypropagation/input_prepareresultcountry_parameters.json (100%) rename dhp-workflows/{dhp-propagation => dhp-enrichment}/src/main/resources/eu/dnetlib/dhp/countrypropagation/oozie_app/config-default.xml (100%) rename dhp-workflows/{dhp-propagation => dhp-enrichment}/src/main/resources/eu/dnetlib/dhp/countrypropagation/oozie_app/workflow.xml (100%) rename dhp-workflows/{dhp-propagation => dhp-enrichment}/src/main/resources/eu/dnetlib/dhp/orcidtoresultfromsemrel/input_orcidtoresult_parameters.json (100%) rename dhp-workflows/{dhp-propagation => dhp-enrichment}/src/main/resources/eu/dnetlib/dhp/orcidtoresultfromsemrel/input_prepareorcidtoresult_parameters.json (100%) rename dhp-workflows/{dhp-propagation => dhp-enrichment}/src/main/resources/eu/dnetlib/dhp/orcidtoresultfromsemrel/input_prepareorcidtoresult_parameters2.json (100%) rename dhp-workflows/{dhp-propagation => dhp-enrichment}/src/main/resources/eu/dnetlib/dhp/orcidtoresultfromsemrel/oozie_app/config-default.xml (100%) rename dhp-workflows/{dhp-propagation => dhp-enrichment}/src/main/resources/eu/dnetlib/dhp/orcidtoresultfromsemrel/oozie_app/workflow.xml (100%) rename dhp-workflows/{dhp-propagation => dhp-enrichment}/src/main/resources/eu/dnetlib/dhp/projecttoresult/input_prepareprojecttoresult_parameters.json (100%) rename dhp-workflows/{dhp-propagation => dhp-enrichment}/src/main/resources/eu/dnetlib/dhp/projecttoresult/input_projecttoresult_parameters.json (100%) rename dhp-workflows/{dhp-propagation => dhp-enrichment}/src/main/resources/eu/dnetlib/dhp/projecttoresult/oozie_app/config-default.xml (100%) rename dhp-workflows/{dhp-propagation => dhp-enrichment}/src/main/resources/eu/dnetlib/dhp/projecttoresult/oozie_app/workflow.xml (100%) rename dhp-workflows/{dhp-propagation => dhp-enrichment}/src/main/resources/eu/dnetlib/dhp/resulttocommunityfromorganization/input_communitytoresult_parameters.json (100%) rename dhp-workflows/{dhp-propagation => dhp-enrichment}/src/main/resources/eu/dnetlib/dhp/resulttocommunityfromorganization/input_preparecommunitytoresult_parameters.json (100%) rename dhp-workflows/{dhp-propagation => dhp-enrichment}/src/main/resources/eu/dnetlib/dhp/resulttocommunityfromorganization/oozie_app/config-default.xml (100%) rename dhp-workflows/{dhp-propagation => dhp-enrichment}/src/main/resources/eu/dnetlib/dhp/resulttocommunityfromorganization/oozie_app/workflow.xml (100%) rename dhp-workflows/{dhp-propagation => dhp-enrichment}/src/main/resources/eu/dnetlib/dhp/resulttocommunityfromsemrel/input_communitytoresult_parameters.json (100%) rename dhp-workflows/{dhp-propagation => dhp-enrichment}/src/main/resources/eu/dnetlib/dhp/resulttocommunityfromsemrel/input_preparecommunitytoresult2_parameters.json (100%) rename dhp-workflows/{dhp-propagation => dhp-enrichment}/src/main/resources/eu/dnetlib/dhp/resulttocommunityfromsemrel/input_preparecommunitytoresult_parameters.json (100%) rename dhp-workflows/{dhp-propagation => dhp-enrichment}/src/main/resources/eu/dnetlib/dhp/resulttocommunityfromsemrel/oozie_app/config-default.xml (100%) rename dhp-workflows/{dhp-propagation => dhp-enrichment}/src/main/resources/eu/dnetlib/dhp/resulttocommunityfromsemrel/oozie_app/workflow.xml (100%) rename dhp-workflows/{dhp-propagation => dhp-enrichment}/src/main/resources/eu/dnetlib/dhp/resulttoorganizationfrominstrepo/input_prepareresultorg_parameters.json (100%) rename dhp-workflows/{dhp-propagation => dhp-enrichment}/src/main/resources/eu/dnetlib/dhp/resulttoorganizationfrominstrepo/input_propagationresulaffiliationfrominstrepo_parameters.json (100%) rename dhp-workflows/{dhp-propagation => dhp-enrichment}/src/main/resources/eu/dnetlib/dhp/resulttoorganizationfrominstrepo/oozie_app/config-default.xml (100%) rename dhp-workflows/{dhp-propagation => dhp-enrichment}/src/main/resources/eu/dnetlib/dhp/resulttoorganizationfrominstrepo/oozie_app/workflow.xml (100%) rename dhp-workflows/{dhp-bulktag/src/test/java/eu/dnetlib/dhp => dhp-enrichment/src/test/java/eu/dnetlib/dhp/bulktag}/BulkTagJobTest.java (71%) rename dhp-workflows/{dhp-bulktag/src/test/java/eu/dnetlib/dhp => dhp-enrichment/src/test/java/eu/dnetlib/dhp/bulktag}/CommunityConfigurationFactoryTest.java (93%) rename dhp-workflows/{dhp-propagation => dhp-enrichment}/src/test/java/eu/dnetlib/dhp/countrypropagation/CountryPropagationJobTest.java (81%) rename dhp-workflows/{dhp-propagation => dhp-enrichment}/src/test/java/eu/dnetlib/dhp/orcidtoresultfromsemrel/OrcidPropagationJobTest.java (98%) rename dhp-workflows/{dhp-propagation => dhp-enrichment}/src/test/java/eu/dnetlib/dhp/projecttoresult/ProjectPropagationJobTest.java (62%) rename dhp-workflows/{dhp-propagation => dhp-enrichment}/src/test/java/eu/dnetlib/dhp/resulttocommunityfromorganization/ResultToCommunityJobTest.java (88%) rename dhp-workflows/{dhp-propagation => dhp-enrichment}/src/test/java/eu/dnetlib/dhp/resulttocommunityfromsemrel/ResultToCommunityJobTest.java (88%) rename dhp-workflows/{dhp-propagation/src/test/java/eu/dnetlib/dhp/resulttoorganizationfrominstrepo/Result2OrganizationJobTest.java => dhp-enrichment/src/test/java/eu/dnetlib/dhp/resulttoorganizationfrominstrepo/ResultToOrganizationJobTest.java} (61%) rename dhp-workflows/{dhp-bulktag/src/test/resources/eu/dnetlib/dhp => dhp-enrichment/src/test/resources/eu/dnetlib/dhp/bulktag}/communityconfiguration/community_configuration.json (100%) rename dhp-workflows/{dhp-bulktag/src/test/resources/eu/dnetlib/dhp => dhp-enrichment/src/test/resources/eu/dnetlib/dhp/bulktag}/communityconfiguration/community_configuration.xml (95%) rename dhp-workflows/{dhp-bulktag/src/test/resources/eu/dnetlib/dhp => dhp-enrichment/src/test/resources/eu/dnetlib/dhp/bulktag}/communityconfiguration/community_configuration_selcrit.json (100%) rename dhp-workflows/{dhp-bulktag/src/test/resources/eu/dnetlib/dhp => dhp-enrichment/src/test/resources/eu/dnetlib/dhp/bulktag}/communityconfiguration/community_configuration_selcrit.xml (95%) rename dhp-workflows/{dhp-bulktag/src/test/resources/eu/dnetlib/dhp => dhp-enrichment/src/test/resources/eu/dnetlib/dhp/bulktag}/communityconfiguration/tagging_conf.json (100%) rename dhp-workflows/{dhp-bulktag/src/test/resources/eu/dnetlib/dhp => dhp-enrichment/src/test/resources/eu/dnetlib/dhp/bulktag}/communityconfiguration/tagging_conf.xml (98%) rename dhp-workflows/{dhp-bulktag/src/test/resources/eu/dnetlib/dhp => dhp-enrichment/src/test/resources/eu/dnetlib/dhp/bulktag}/sample/dataset/no_updates/dataset_10.json.gz (100%) rename dhp-workflows/{dhp-bulktag/src/test/resources/eu/dnetlib/dhp => dhp-enrichment/src/test/resources/eu/dnetlib/dhp/bulktag}/sample/dataset/update_datasourcewithconstraints/dataset_10.json.gz (100%) rename dhp-workflows/{dhp-bulktag/src/test/resources/eu/dnetlib/dhp => dhp-enrichment/src/test/resources/eu/dnetlib/dhp/bulktag}/sample/dataset/update_subject/contextnoprovenance/dataset_10.json.gz (100%) rename dhp-workflows/{dhp-bulktag/src/test/resources/eu/dnetlib/dhp => dhp-enrichment/src/test/resources/eu/dnetlib/dhp/bulktag}/sample/dataset/update_subject/nocontext/dataset_10.json.gz (100%) rename dhp-workflows/{dhp-bulktag/src/test/resources/eu/dnetlib/dhp/sample/dataset => dhp-enrichment/src/test/resources/eu/dnetlib/dhp/bulktag/sample/dataset/update_subject}/update_subject_datasource/dataset_10.json.gz (100%) create mode 100644 dhp-workflows/dhp-enrichment/src/test/resources/eu/dnetlib/dhp/bulktag/sample/dataset/update_subject_datasource/dataset_10.json.gz rename dhp-workflows/{dhp-bulktag/src/test/resources/eu/dnetlib/dhp => dhp-enrichment/src/test/resources/eu/dnetlib/dhp/bulktag}/sample/otherresearchproduct/update_zenodocommunity/otherresearchproduct_10.json.gz (100%) rename dhp-workflows/{dhp-bulktag/src/test/resources/eu/dnetlib/dhp => dhp-enrichment/src/test/resources/eu/dnetlib/dhp/bulktag}/sample/publication/update_datasource/publication_10.json.gz (100%) rename dhp-workflows/{dhp-bulktag/src/test/resources/eu/dnetlib/dhp => dhp-enrichment/src/test/resources/eu/dnetlib/dhp/bulktag}/sample/software/software_10.json.gz (100%) rename dhp-workflows/{dhp-propagation => dhp-enrichment}/src/test/resources/eu/dnetlib/dhp/countrypropagation/preparedInfo/preparedInfo.json.gz (100%) rename dhp-workflows/{dhp-propagation => dhp-enrichment}/src/test/resources/eu/dnetlib/dhp/countrypropagation/sample/software/software_10.json.gz (100%) rename dhp-workflows/{dhp-propagation => dhp-enrichment}/src/test/resources/eu/dnetlib/dhp/orcidtoresultfromsemrel/preparedInfo/mergedOrcidAssoc/mergedOrcid_17.json.gz (100%) rename dhp-workflows/{dhp-propagation => dhp-enrichment}/src/test/resources/eu/dnetlib/dhp/orcidtoresultfromsemrel/sample/noupdate/dataset_10.json.gz (100%) rename dhp-workflows/{dhp-propagation => dhp-enrichment}/src/test/resources/eu/dnetlib/dhp/orcidtoresultfromsemrel/sample/oneupdate/dataset_10.json.gz (100%) rename dhp-workflows/{dhp-propagation => dhp-enrichment}/src/test/resources/eu/dnetlib/dhp/orcidtoresultfromsemrel/sample/twoupdates/dataset_10.json.gz (100%) rename dhp-workflows/{dhp-propagation => dhp-enrichment}/src/test/resources/eu/dnetlib/dhp/projecttoresult/preparedInfo/alreadyLinked/alreadyLinked.json.gz (100%) rename dhp-workflows/{dhp-propagation => dhp-enrichment}/src/test/resources/eu/dnetlib/dhp/projecttoresult/preparedInfo/noupdates/potentialUpdates/potentialUpdates.json.gz (100%) rename dhp-workflows/{dhp-propagation => dhp-enrichment}/src/test/resources/eu/dnetlib/dhp/projecttoresult/preparedInfo/tenupdates/potentialUpdates/potentialUpdates.json.gz (100%) rename dhp-workflows/{dhp-propagation => dhp-enrichment}/src/test/resources/eu/dnetlib/dhp/projecttoresult/preparedInfo/updatesmixed/potentialUpdates/potentialUpdates.json.gz (100%) rename dhp-workflows/{dhp-propagation => dhp-enrichment}/src/test/resources/eu/dnetlib/dhp/resulttocommunityfromorganization/preparedInfo/resultCommunityList.json.gz (100%) rename dhp-workflows/{dhp-propagation => dhp-enrichment}/src/test/resources/eu/dnetlib/dhp/resulttocommunityfromorganization/sample/dataset_10.json.gz (100%) rename dhp-workflows/{dhp-propagation => dhp-enrichment}/src/test/resources/eu/dnetlib/dhp/resulttocommunityfromsemrel/preparedInfo/mergedResultCommunityList.json.gz (100%) rename dhp-workflows/{dhp-propagation => dhp-enrichment}/src/test/resources/eu/dnetlib/dhp/resulttocommunityfromsemrel/sample/dataset_10.json.gz (100%) rename dhp-workflows/{dhp-propagation => dhp-enrichment}/src/test/resources/eu/dnetlib/dhp/resulttoorganizationfrominstrepo/noupdate/preparedInfo/alreadyLinked/alreadyLinked_20.json.gz (100%) rename dhp-workflows/{dhp-propagation => dhp-enrichment}/src/test/resources/eu/dnetlib/dhp/resulttoorganizationfrominstrepo/noupdate/preparedInfo/datasourceOrganization/datasourceOrganization_28.json.gz (100%) rename dhp-workflows/{dhp-propagation => dhp-enrichment}/src/test/resources/eu/dnetlib/dhp/resulttoorganizationfrominstrepo/sample/noupdate_updatenomix/software_10.json.gz (100%) rename dhp-workflows/{dhp-propagation => dhp-enrichment}/src/test/resources/eu/dnetlib/dhp/resulttoorganizationfrominstrepo/sample/updatemix/software_10.json.gz (100%) rename dhp-workflows/{dhp-propagation => dhp-enrichment}/src/test/resources/eu/dnetlib/dhp/resulttoorganizationfrominstrepo/updatemix/preparedInfo/alreadyLinked/alreadyLinked_20.json.gz (100%) rename dhp-workflows/{dhp-propagation => dhp-enrichment}/src/test/resources/eu/dnetlib/dhp/resulttoorganizationfrominstrepo/updatemix/preparedInfo/datasourceOrganization/datasourceOrganization_28.json.gz (100%) rename dhp-workflows/{dhp-propagation => dhp-enrichment}/src/test/resources/eu/dnetlib/dhp/resulttoorganizationfrominstrepo/updatenomix/preparedInfo/alreadyLinked/alreadyLinked_20.json.gz (100%) rename dhp-workflows/{dhp-propagation => dhp-enrichment}/src/test/resources/eu/dnetlib/dhp/resulttoorganizationfrominstrepo/updatenomix/preparedInfo/datasourceOrganization/datasourceOrganization_28.json.gz (100%) delete mode 100644 dhp-workflows/dhp-propagation/pom.xml diff --git a/dhp-workflows/dhp-blacklist/src/main/resources/eu/dnetlib/dhp/blacklist/oozie_app/workflow.xml b/dhp-workflows/dhp-blacklist/src/main/resources/eu/dnetlib/dhp/blacklist/oozie_app/workflow.xml index 855cac65e..f71c085b2 100644 --- a/dhp-workflows/dhp-blacklist/src/main/resources/eu/dnetlib/dhp/blacklist/oozie_app/workflow.xml +++ b/dhp-workflows/dhp-blacklist/src/main/resources/eu/dnetlib/dhp/blacklist/oozie_app/workflow.xml @@ -1,31 +1,38 @@ - - - postgresURL - the url of the postgress server to query - - - postgresUser - the username to access the postgres db - - - postgresPassword - the postgres password - - - sourcePath - the source path - - - + + + postgresURL + the url of the postgress server to query + + + postgresUser + the username to access the postgres db + + + postgresPassword + the postgres password + + + sourcePath + the source path + + + outputPath + the graph output path + + - - Action failed, error message[${wf:errorMessage(wf:lastErrorNode())}] - + - + + Action failed, error message[${wf:errorMessage(wf:lastErrorNode())}] + + + - + + + @@ -87,12 +94,14 @@ --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} --sourcePath${sourcePath}/relation - --outputPath${workingDir}/relation + --outputPath${outputPath}/relation --hdfsPath${workingDir}/blacklist --mergesPath${workingDir}/mergesRelation - + + + \ No newline at end of file diff --git a/dhp-workflows/dhp-bulktag/project-default.properties b/dhp-workflows/dhp-bulktag/project-default.properties deleted file mode 100644 index 84a56f19f..000000000 --- a/dhp-workflows/dhp-bulktag/project-default.properties +++ /dev/null @@ -1,7 +0,0 @@ -#sandboxName when not provided explicitly will be generated -sandboxName=${sandboxName} -sandboxDir=/user/${dhp.hadoop.frontend.user.name}/${sandboxName} -workingDir=${sandboxDir}/working_dir -oozie.wf.application.path = ${nameNode}${sandboxDir}/${oozieAppDir} -oozieTopWfApplicationPath = ${oozie.wf.application.path} - diff --git a/dhp-workflows/dhp-bulktag/pom.xml b/dhp-workflows/dhp-enrichment/pom.xml similarity index 81% rename from dhp-workflows/dhp-bulktag/pom.xml rename to dhp-workflows/dhp-enrichment/pom.xml index 7c2afa0cc..fe9833e3e 100644 --- a/dhp-workflows/dhp-bulktag/pom.xml +++ b/dhp-workflows/dhp-enrichment/pom.xml @@ -9,7 +9,7 @@ 4.0.0 - dhp-bulktag + dhp-enrichment @@ -31,6 +31,12 @@ dhp-schemas ${project.version} + + org.apache.spark + spark-hive_2.11 + test + + dom4j dom4j @@ -43,23 +49,16 @@ com.jayway.jsonpath json-path - - org.reflections - reflections - 0.9.11 - compile - - - com.google.guava - guava - 23.3-jre - + io.github.classgraph classgraph 4.8.71 + + + \ No newline at end of file diff --git a/dhp-workflows/dhp-propagation/src/main/java/eu/dnetlib/dhp/PropagationConstant.java b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/PropagationConstant.java similarity index 100% rename from dhp-workflows/dhp-propagation/src/main/java/eu/dnetlib/dhp/PropagationConstant.java rename to dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/PropagationConstant.java diff --git a/dhp-workflows/dhp-bulktag/src/main/java/eu/dnetlib/dhp/bulktag/SparkBulkTagJob.java b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/SparkBulkTagJob.java similarity index 97% rename from dhp-workflows/dhp-bulktag/src/main/java/eu/dnetlib/dhp/bulktag/SparkBulkTagJob.java rename to dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/SparkBulkTagJob.java index e62b4b4fc..e3d74ef3e 100644 --- a/dhp-workflows/dhp-bulktag/src/main/java/eu/dnetlib/dhp/bulktag/SparkBulkTagJob.java +++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/SparkBulkTagJob.java @@ -1,10 +1,11 @@ package eu.dnetlib.dhp.bulktag; -import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession; - -import java.util.Optional; - +import com.fasterxml.jackson.databind.ObjectMapper; +import com.google.gson.Gson; +import eu.dnetlib.dhp.application.ArgumentApplicationParser; +import eu.dnetlib.dhp.bulktag.community.*; +import eu.dnetlib.dhp.schema.oaf.Result; import org.apache.commons.io.IOUtils; import org.apache.spark.SparkConf; import org.apache.spark.api.java.function.MapFunction; @@ -15,12 +16,9 @@ import org.apache.spark.sql.SparkSession; import org.slf4j.Logger; import org.slf4j.LoggerFactory; -import com.fasterxml.jackson.databind.ObjectMapper; -import com.google.gson.Gson; +import java.util.Optional; -import eu.dnetlib.dhp.application.ArgumentApplicationParser; -import eu.dnetlib.dhp.community.*; -import eu.dnetlib.dhp.schema.oaf.*; +import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession; public class SparkBulkTagJob { diff --git a/dhp-workflows/dhp-bulktag/src/main/java/eu/dnetlib/dhp/community/Community.java b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/community/Community.java similarity index 79% rename from dhp-workflows/dhp-bulktag/src/main/java/eu/dnetlib/dhp/community/Community.java rename to dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/community/Community.java index a73ff4d3e..d492b848e 100644 --- a/dhp-workflows/dhp-bulktag/src/main/java/eu/dnetlib/dhp/community/Community.java +++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/community/Community.java @@ -1,15 +1,14 @@ -package eu.dnetlib.dhp.community; +package eu.dnetlib.dhp.bulktag.community; + +import com.google.gson.Gson; +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; import java.io.Serializable; import java.util.ArrayList; import java.util.List; -import org.apache.commons.logging.Log; -import org.apache.commons.logging.LogFactory; - -import com.google.gson.Gson; - /** Created by miriam on 01/08/2018. */ public class Community implements Serializable { @@ -17,7 +16,7 @@ public class Community implements Serializable { private String id; private List subjects = new ArrayList<>(); - private List datasources = new ArrayList<>(); + private List providers = new ArrayList<>(); private List zenodoCommunities = new ArrayList<>(); public String toJson() { @@ -27,7 +26,7 @@ public class Community implements Serializable { public boolean isValid() { return !getSubjects().isEmpty() - || !getDatasources().isEmpty() + || !getProviders().isEmpty() || !getZenodoCommunities().isEmpty(); } @@ -47,12 +46,12 @@ public class Community implements Serializable { this.subjects = subjects; } - public List getDatasources() { - return datasources; + public List getProviders() { + return providers; } - public void setDatasources(List datasources) { - this.datasources = datasources; + public void setProviders(List providers) { + this.providers = providers; } public List getZenodoCommunities() { diff --git a/dhp-workflows/dhp-bulktag/src/main/java/eu/dnetlib/dhp/community/CommunityConfiguration.java b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/community/CommunityConfiguration.java similarity index 96% rename from dhp-workflows/dhp-bulktag/src/main/java/eu/dnetlib/dhp/community/CommunityConfiguration.java rename to dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/community/CommunityConfiguration.java index c5bbb66eb..4e5b9fc9f 100644 --- a/dhp-workflows/dhp-bulktag/src/main/java/eu/dnetlib/dhp/community/CommunityConfiguration.java +++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/community/CommunityConfiguration.java @@ -1,5 +1,14 @@ -package eu.dnetlib.dhp.community; +package eu.dnetlib.dhp.bulktag.community; + +import com.google.common.collect.Lists; +import com.google.common.collect.Maps; +import com.google.gson.Gson; +import com.google.gson.GsonBuilder; +import eu.dnetlib.dhp.bulktag.criteria.InterfaceAdapter; +import eu.dnetlib.dhp.bulktag.criteria.Selection; +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; import java.io.Serializable; import java.util.ArrayList; @@ -8,17 +17,6 @@ import java.util.List; import java.util.Map; import java.util.stream.Collectors; -import org.apache.commons.logging.Log; -import org.apache.commons.logging.LogFactory; - -import com.google.common.collect.Lists; -import com.google.common.collect.Maps; -import com.google.gson.Gson; -import com.google.gson.GsonBuilder; - -import eu.dnetlib.dhp.selectioncriteria.InterfaceAdapter; -import eu.dnetlib.dhp.selectioncriteria.Selection; - /** Created by miriam on 02/08/2018. */ public class CommunityConfiguration implements Serializable { @@ -84,7 +82,7 @@ public class CommunityConfiguration implements Serializable { add(sbj.toLowerCase().trim(), p, subjectMap); } // get datasources - for (Datasource d : c.getDatasources()) { + for (Provider d : c.getProviders()) { add(d.getOpenaireId(), new Pair<>(id, d.getSelectionConstraints()), datasourceMap); } diff --git a/dhp-workflows/dhp-bulktag/src/main/java/eu/dnetlib/dhp/community/CommunityConfigurationFactory.java b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/community/CommunityConfigurationFactory.java similarity index 86% rename from dhp-workflows/dhp-bulktag/src/main/java/eu/dnetlib/dhp/community/CommunityConfigurationFactory.java rename to dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/community/CommunityConfigurationFactory.java index 508f0663d..dc83497c3 100644 --- a/dhp-workflows/dhp-bulktag/src/main/java/eu/dnetlib/dhp/community/CommunityConfigurationFactory.java +++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/community/CommunityConfigurationFactory.java @@ -1,11 +1,14 @@ -package eu.dnetlib.dhp.community; - -import java.io.StringReader; -import java.util.ArrayList; -import java.util.List; -import java.util.Map; +package eu.dnetlib.dhp.bulktag.community; +import com.google.common.collect.Lists; +import com.google.common.collect.Maps; +import com.google.gson.Gson; +import com.google.gson.GsonBuilder; +import eu.dnetlib.dhp.bulktag.criteria.InterfaceAdapter; +import eu.dnetlib.dhp.bulktag.criteria.Selection; +import eu.dnetlib.dhp.bulktag.criteria.VerbResolver; +import eu.dnetlib.dhp.bulktag.criteria.VerbResolverFactory; import org.apache.commons.lang3.StringUtils; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; @@ -14,15 +17,10 @@ import org.dom4j.DocumentException; import org.dom4j.Node; import org.dom4j.io.SAXReader; -import com.google.common.collect.Lists; -import com.google.common.collect.Maps; -import com.google.gson.Gson; -import com.google.gson.GsonBuilder; - -import eu.dnetlib.dhp.selectioncriteria.InterfaceAdapter; -import eu.dnetlib.dhp.selectioncriteria.Selection; -import eu.dnetlib.dhp.selectioncriteria.VerbResolver; -import eu.dnetlib.dhp.selectioncriteria.VerbResolverFactory; +import java.io.StringReader; +import java.util.ArrayList; +import java.util.List; +import java.util.Map; /** Created by miriam on 03/08/2018. */ public class CommunityConfigurationFactory { @@ -77,7 +75,7 @@ public class CommunityConfigurationFactory { log.info(String.format("community id: %s", c.getId())); c.setSubjects(parseSubjects(node)); - c.setDatasources(parseDatasources(node)); + c.setProviders(parseDatasources(node)); c.setZenodoCommunities(parseZenodoCommunities(node)); return c; } @@ -96,17 +94,17 @@ public class CommunityConfigurationFactory { return subjects; } - private static List parseDatasources(final Node node) { + private static List parseDatasources(final Node node) { final List list = node.selectNodes("./datasources/datasource"); - final List datasourceList = new ArrayList<>(); + final List providerList = new ArrayList<>(); for (Node n : list) { - Datasource d = new Datasource(); + Provider d = new Provider(); d.setOpenaireId(n.selectSingleNode("./openaireId").getText()); d.setSelCriteria(n.selectSingleNode("./selcriteria"), resolver); - datasourceList.add(d); + providerList.add(d); } - log.info("size of the datasource list " + datasourceList.size()); - return datasourceList; + log.info("size of the datasource list " + providerList.size()); + return providerList; } private static List parseZenodoCommunities(final Node node) { diff --git a/dhp-workflows/dhp-bulktag/src/main/java/eu/dnetlib/dhp/community/Constraint.java b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/community/Constraint.java similarity index 86% rename from dhp-workflows/dhp-bulktag/src/main/java/eu/dnetlib/dhp/community/Constraint.java rename to dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/community/Constraint.java index 54f381d4a..8e28a7a5f 100644 --- a/dhp-workflows/dhp-bulktag/src/main/java/eu/dnetlib/dhp/community/Constraint.java +++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/community/Constraint.java @@ -1,12 +1,12 @@ -package eu.dnetlib.dhp.community; +package eu.dnetlib.dhp.bulktag.community; + +import eu.dnetlib.dhp.bulktag.criteria.Selection; +import eu.dnetlib.dhp.bulktag.criteria.VerbResolver; import java.io.Serializable; import java.lang.reflect.InvocationTargetException; -import eu.dnetlib.dhp.selectioncriteria.Selection; -import eu.dnetlib.dhp.selectioncriteria.VerbResolver; - public class Constraint implements Serializable { private String verb; private String field; diff --git a/dhp-workflows/dhp-bulktag/src/main/java/eu/dnetlib/dhp/community/Constraints.java b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/community/Constraints.java similarity index 94% rename from dhp-workflows/dhp-bulktag/src/main/java/eu/dnetlib/dhp/community/Constraints.java rename to dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/community/Constraints.java index af095c513..eace3bc35 100644 --- a/dhp-workflows/dhp-bulktag/src/main/java/eu/dnetlib/dhp/community/Constraints.java +++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/community/Constraints.java @@ -1,5 +1,11 @@ -package eu.dnetlib.dhp.community; +package eu.dnetlib.dhp.bulktag.community; + +import com.google.gson.Gson; +import com.google.gson.reflect.TypeToken; +import eu.dnetlib.dhp.bulktag.criteria.VerbResolver; +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; import java.io.Serializable; import java.lang.reflect.InvocationTargetException; @@ -8,14 +14,6 @@ import java.util.Collection; import java.util.List; import java.util.Map; -import org.apache.commons.logging.Log; -import org.apache.commons.logging.LogFactory; - -import com.google.gson.Gson; -import com.google.gson.reflect.TypeToken; - -import eu.dnetlib.dhp.selectioncriteria.VerbResolver; - /** Created by miriam on 02/08/2018. */ public class Constraints implements Serializable { private static final Log log = LogFactory.getLog(Constraints.class); diff --git a/dhp-workflows/dhp-bulktag/src/main/java/eu/dnetlib/dhp/community/Pair.java b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/community/Pair.java similarity index 92% rename from dhp-workflows/dhp-bulktag/src/main/java/eu/dnetlib/dhp/community/Pair.java rename to dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/community/Pair.java index 01cd3ce22..1130a0770 100644 --- a/dhp-workflows/dhp-bulktag/src/main/java/eu/dnetlib/dhp/community/Pair.java +++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/community/Pair.java @@ -1,10 +1,10 @@ -package eu.dnetlib.dhp.community; - -import java.io.Serializable; +package eu.dnetlib.dhp.bulktag.community; import com.google.gson.Gson; +import java.io.Serializable; + /** Created by miriam on 03/08/2018. */ public class Pair implements Serializable { private A fst; diff --git a/dhp-workflows/dhp-bulktag/src/main/java/eu/dnetlib/dhp/community/ProtoMap.java b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/community/ProtoMap.java similarity index 80% rename from dhp-workflows/dhp-bulktag/src/main/java/eu/dnetlib/dhp/community/ProtoMap.java rename to dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/community/ProtoMap.java index d48dce2c6..fd7481719 100644 --- a/dhp-workflows/dhp-bulktag/src/main/java/eu/dnetlib/dhp/community/ProtoMap.java +++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/community/ProtoMap.java @@ -1,5 +1,5 @@ -package eu.dnetlib.dhp.community; +package eu.dnetlib.dhp.bulktag.community; import java.io.Serializable; import java.util.HashMap; diff --git a/dhp-workflows/dhp-bulktag/src/main/java/eu/dnetlib/dhp/community/Datasource.java b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/community/Provider.java similarity index 86% rename from dhp-workflows/dhp-bulktag/src/main/java/eu/dnetlib/dhp/community/Datasource.java rename to dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/community/Provider.java index a3d343087..c4362610e 100644 --- a/dhp-workflows/dhp-bulktag/src/main/java/eu/dnetlib/dhp/community/Datasource.java +++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/community/Provider.java @@ -1,19 +1,17 @@ -package eu.dnetlib.dhp.community; - -import java.io.Serializable; +package eu.dnetlib.dhp.bulktag.community; +import com.google.gson.Gson; +import eu.dnetlib.dhp.bulktag.criteria.VerbResolver; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.dom4j.Node; -import com.google.gson.Gson; - -import eu.dnetlib.dhp.selectioncriteria.VerbResolver; +import java.io.Serializable; /** Created by miriam on 01/08/2018. */ -public class Datasource implements Serializable { - private static final Log log = LogFactory.getLog(Datasource.class); +public class Provider implements Serializable { + private static final Log log = LogFactory.getLog(Provider.class); private String openaireId; diff --git a/dhp-workflows/dhp-bulktag/src/main/java/eu/dnetlib/dhp/community/QueryInformationSystem.java b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/community/QueryInformationSystem.java similarity index 98% rename from dhp-workflows/dhp-bulktag/src/main/java/eu/dnetlib/dhp/community/QueryInformationSystem.java rename to dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/community/QueryInformationSystem.java index 2c18392c7..43eb40940 100644 --- a/dhp-workflows/dhp-bulktag/src/main/java/eu/dnetlib/dhp/community/QueryInformationSystem.java +++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/community/QueryInformationSystem.java @@ -1,15 +1,13 @@ -package eu.dnetlib.dhp.community; - -import java.util.List; - -import org.dom4j.DocumentException; +package eu.dnetlib.dhp.bulktag.community; import com.google.common.base.Joiner; - import eu.dnetlib.dhp.utils.ISLookupClientFactory; import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpException; import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpService; +import org.dom4j.DocumentException; + +import java.util.List; public class QueryInformationSystem { private static final String XQUERY = "for $x in collection('/db/DRIVER/ContextDSResources/ContextDSResourceType') " diff --git a/dhp-workflows/dhp-bulktag/src/main/java/eu/dnetlib/dhp/community/ResultTagger.java b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/community/ResultTagger.java similarity index 94% rename from dhp-workflows/dhp-bulktag/src/main/java/eu/dnetlib/dhp/community/ResultTagger.java rename to dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/community/ResultTagger.java index eb531c6b1..fd4f5497a 100644 --- a/dhp-workflows/dhp-bulktag/src/main/java/eu/dnetlib/dhp/community/ResultTagger.java +++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/community/ResultTagger.java @@ -1,20 +1,19 @@ -package eu.dnetlib.dhp.community; +package eu.dnetlib.dhp.bulktag.community; -import static eu.dnetlib.dhp.community.TagginConstants.*; +import com.google.gson.Gson; +import com.jayway.jsonpath.DocumentContext; +import com.jayway.jsonpath.JsonPath; +import eu.dnetlib.dhp.schema.oaf.*; +import org.apache.commons.lang3.StringUtils; import java.io.Serializable; import java.util.*; import java.util.stream.Collectors; import java.util.stream.Stream; -import org.apache.commons.lang3.StringUtils; - -import com.google.gson.Gson; -import com.jayway.jsonpath.DocumentContext; -import com.jayway.jsonpath.JsonPath; - -import eu.dnetlib.dhp.schema.oaf.*; +import static eu.dnetlib.dhp.bulktag.community.TaggingConstants.*; +import static eu.dnetlib.dhp.schema.common.ModelConstants.*; /** Created by miriam on 02/08/2018. */ public class ResultTagger implements Serializable { @@ -51,7 +50,7 @@ public class ResultTagger implements Serializable { } public R enrichContextCriteria( - final R result, final CommunityConfiguration conf, final Map criteria) { + final R result, final CommunityConfiguration conf, final Map criteria) { // } // public Result enrichContextCriteria(final Result result, final CommunityConfiguration @@ -239,8 +238,8 @@ public class ResultTagger implements Serializable { Qualifier pa = new Qualifier(); pa.setClassid(inference_class_id); pa.setClassname(inference_class_name); - pa.setSchemeid(DNET_SCHEMA_ID); - pa.setSchemename(DNET_SCHEMA_NAME); + pa.setSchemeid(DNET_PROVENANCE_ACTIONS); + pa.setSchemename(DNET_PROVENANCE_ACTIONS); return pa; } } diff --git a/dhp-workflows/dhp-bulktag/src/main/java/eu/dnetlib/dhp/community/SelectionConstraints.java b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/community/SelectionConstraints.java similarity index 91% rename from dhp-workflows/dhp-bulktag/src/main/java/eu/dnetlib/dhp/community/SelectionConstraints.java rename to dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/community/SelectionConstraints.java index 802e2f5d6..28674d9ef 100644 --- a/dhp-workflows/dhp-bulktag/src/main/java/eu/dnetlib/dhp/community/SelectionConstraints.java +++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/community/SelectionConstraints.java @@ -1,5 +1,9 @@ -package eu.dnetlib.dhp.community; +package eu.dnetlib.dhp.bulktag.community; + +import com.google.gson.Gson; +import com.google.gson.reflect.TypeToken; +import eu.dnetlib.dhp.bulktag.criteria.VerbResolver; import java.io.Serializable; import java.lang.reflect.Type; @@ -7,11 +11,6 @@ import java.util.Collection; import java.util.List; import java.util.Map; -import com.google.gson.Gson; -import com.google.gson.reflect.TypeToken; - -import eu.dnetlib.dhp.selectioncriteria.VerbResolver; - public class SelectionConstraints implements Serializable { private List criteria; diff --git a/dhp-workflows/dhp-bulktag/src/main/java/eu/dnetlib/dhp/community/TagginConstants.java b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/community/TaggingConstants.java similarity index 66% rename from dhp-workflows/dhp-bulktag/src/main/java/eu/dnetlib/dhp/community/TagginConstants.java rename to dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/community/TaggingConstants.java index 92d37d089..3cdc7c941 100644 --- a/dhp-workflows/dhp-bulktag/src/main/java/eu/dnetlib/dhp/community/TagginConstants.java +++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/community/TaggingConstants.java @@ -1,20 +1,14 @@ -package eu.dnetlib.dhp.community; +package eu.dnetlib.dhp.bulktag.community; -public class TagginConstants { +public class TaggingConstants { public static final String BULKTAG_DATA_INFO_TYPE = "bulktagging"; - public static final String DNET_SCHEMA_NAME = "dnet:provenanceActions"; - public static final String DNET_SCHEMA_ID = "dnet:provenanceActions"; - public static final String CLASS_ID_SUBJECT = "community:subject"; public static final String CLASS_ID_DATASOURCE = "community:datasource"; public static final String CLASS_ID_CZENODO = "community:zenodocommunity"; - public static final String SCHEMA_ID = "dnet:provenanceActions"; - public static final String COUNTER_GROUP = "Bulk Tagging"; - public static final String ZENODO_COMMUNITY_INDICATOR = "zenodo.org/communities/"; public static final String CLASS_NAME_BULKTAG_SUBJECT = "Bulktagging for Community - Subject"; diff --git a/dhp-workflows/dhp-bulktag/src/main/java/eu/dnetlib/dhp/community/ZenodoCommunity.java b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/community/ZenodoCommunity.java similarity index 95% rename from dhp-workflows/dhp-bulktag/src/main/java/eu/dnetlib/dhp/community/ZenodoCommunity.java rename to dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/community/ZenodoCommunity.java index e1492f6a5..eb0577ffc 100644 --- a/dhp-workflows/dhp-bulktag/src/main/java/eu/dnetlib/dhp/community/ZenodoCommunity.java +++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/community/ZenodoCommunity.java @@ -1,11 +1,10 @@ -package eu.dnetlib.dhp.community; - -import java.io.Serializable; - -import org.dom4j.Node; +package eu.dnetlib.dhp.bulktag.community; import com.google.gson.Gson; +import org.dom4j.Node; + +import java.io.Serializable; /** Created by miriam on 01/08/2018. */ public class ZenodoCommunity implements Serializable { diff --git a/dhp-workflows/dhp-bulktag/src/main/java/eu/dnetlib/dhp/selectioncriteria/ContainsVerb.java b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/criteria/ContainsVerb.java similarity index 91% rename from dhp-workflows/dhp-bulktag/src/main/java/eu/dnetlib/dhp/selectioncriteria/ContainsVerb.java rename to dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/criteria/ContainsVerb.java index a6ef2d908..496630fa3 100644 --- a/dhp-workflows/dhp-bulktag/src/main/java/eu/dnetlib/dhp/selectioncriteria/ContainsVerb.java +++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/criteria/ContainsVerb.java @@ -1,5 +1,5 @@ -package eu.dnetlib.dhp.selectioncriteria; +package eu.dnetlib.dhp.bulktag.criteria; import java.io.Serializable; diff --git a/dhp-workflows/dhp-bulktag/src/main/java/eu/dnetlib/dhp/selectioncriteria/ContainsVerbIgnoreCase.java b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/criteria/ContainsVerbIgnoreCase.java similarity index 92% rename from dhp-workflows/dhp-bulktag/src/main/java/eu/dnetlib/dhp/selectioncriteria/ContainsVerbIgnoreCase.java rename to dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/criteria/ContainsVerbIgnoreCase.java index b8b0262e9..a4a6f5663 100644 --- a/dhp-workflows/dhp-bulktag/src/main/java/eu/dnetlib/dhp/selectioncriteria/ContainsVerbIgnoreCase.java +++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/criteria/ContainsVerbIgnoreCase.java @@ -1,5 +1,5 @@ -package eu.dnetlib.dhp.selectioncriteria; +package eu.dnetlib.dhp.bulktag.criteria; import java.io.Serializable; diff --git a/dhp-workflows/dhp-bulktag/src/main/java/eu/dnetlib/dhp/selectioncriteria/EqualVerb.java b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/criteria/EqualVerb.java similarity index 91% rename from dhp-workflows/dhp-bulktag/src/main/java/eu/dnetlib/dhp/selectioncriteria/EqualVerb.java rename to dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/criteria/EqualVerb.java index 3f17a6bb3..b9088d012 100644 --- a/dhp-workflows/dhp-bulktag/src/main/java/eu/dnetlib/dhp/selectioncriteria/EqualVerb.java +++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/criteria/EqualVerb.java @@ -1,5 +1,5 @@ -package eu.dnetlib.dhp.selectioncriteria; +package eu.dnetlib.dhp.bulktag.criteria; import java.io.Serializable; diff --git a/dhp-workflows/dhp-bulktag/src/main/java/eu/dnetlib/dhp/selectioncriteria/EqualVerbIgnoreCase.java b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/criteria/EqualVerbIgnoreCase.java similarity index 91% rename from dhp-workflows/dhp-bulktag/src/main/java/eu/dnetlib/dhp/selectioncriteria/EqualVerbIgnoreCase.java rename to dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/criteria/EqualVerbIgnoreCase.java index 934406859..c5f0ce070 100644 --- a/dhp-workflows/dhp-bulktag/src/main/java/eu/dnetlib/dhp/selectioncriteria/EqualVerbIgnoreCase.java +++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/criteria/EqualVerbIgnoreCase.java @@ -1,5 +1,5 @@ -package eu.dnetlib.dhp.selectioncriteria; +package eu.dnetlib.dhp.bulktag.criteria; import java.io.Serializable; diff --git a/dhp-workflows/dhp-bulktag/src/main/java/eu/dnetlib/dhp/selectioncriteria/InterfaceAdapter.java b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/criteria/InterfaceAdapter.java similarity index 96% rename from dhp-workflows/dhp-bulktag/src/main/java/eu/dnetlib/dhp/selectioncriteria/InterfaceAdapter.java rename to dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/criteria/InterfaceAdapter.java index 9ef3bd60c..2bc1ab3cf 100644 --- a/dhp-workflows/dhp-bulktag/src/main/java/eu/dnetlib/dhp/selectioncriteria/InterfaceAdapter.java +++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/criteria/InterfaceAdapter.java @@ -1,10 +1,10 @@ -package eu.dnetlib.dhp.selectioncriteria; - -import java.lang.reflect.Type; +package eu.dnetlib.dhp.bulktag.criteria; import com.google.gson.*; +import java.lang.reflect.Type; + public class InterfaceAdapter implements JsonSerializer, JsonDeserializer { private static final String CLASSNAME = "CLASSNAME"; diff --git a/dhp-workflows/dhp-bulktag/src/main/java/eu/dnetlib/dhp/selectioncriteria/NotContainsVerb.java b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/criteria/NotContainsVerb.java similarity index 91% rename from dhp-workflows/dhp-bulktag/src/main/java/eu/dnetlib/dhp/selectioncriteria/NotContainsVerb.java rename to dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/criteria/NotContainsVerb.java index eb83b256e..03ec9804b 100644 --- a/dhp-workflows/dhp-bulktag/src/main/java/eu/dnetlib/dhp/selectioncriteria/NotContainsVerb.java +++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/criteria/NotContainsVerb.java @@ -1,5 +1,5 @@ -package eu.dnetlib.dhp.selectioncriteria; +package eu.dnetlib.dhp.bulktag.criteria; import java.io.Serializable; diff --git a/dhp-workflows/dhp-bulktag/src/main/java/eu/dnetlib/dhp/selectioncriteria/NotContainsVerbIgnoreCase.java b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/criteria/NotContainsVerbIgnoreCase.java similarity index 92% rename from dhp-workflows/dhp-bulktag/src/main/java/eu/dnetlib/dhp/selectioncriteria/NotContainsVerbIgnoreCase.java rename to dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/criteria/NotContainsVerbIgnoreCase.java index fab3efef3..b21be83f0 100644 --- a/dhp-workflows/dhp-bulktag/src/main/java/eu/dnetlib/dhp/selectioncriteria/NotContainsVerbIgnoreCase.java +++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/criteria/NotContainsVerbIgnoreCase.java @@ -1,5 +1,5 @@ -package eu.dnetlib.dhp.selectioncriteria; +package eu.dnetlib.dhp.bulktag.criteria; import java.io.Serializable; diff --git a/dhp-workflows/dhp-bulktag/src/main/java/eu/dnetlib/dhp/selectioncriteria/NotEqualVerb.java b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/criteria/NotEqualVerb.java similarity index 91% rename from dhp-workflows/dhp-bulktag/src/main/java/eu/dnetlib/dhp/selectioncriteria/NotEqualVerb.java rename to dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/criteria/NotEqualVerb.java index 2311c2987..86bf00012 100644 --- a/dhp-workflows/dhp-bulktag/src/main/java/eu/dnetlib/dhp/selectioncriteria/NotEqualVerb.java +++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/criteria/NotEqualVerb.java @@ -1,5 +1,5 @@ -package eu.dnetlib.dhp.selectioncriteria; +package eu.dnetlib.dhp.bulktag.criteria; import java.io.Serializable; diff --git a/dhp-workflows/dhp-bulktag/src/main/java/eu/dnetlib/dhp/selectioncriteria/NotEqualVerbIgnoreCase.java b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/criteria/NotEqualVerbIgnoreCase.java similarity index 92% rename from dhp-workflows/dhp-bulktag/src/main/java/eu/dnetlib/dhp/selectioncriteria/NotEqualVerbIgnoreCase.java rename to dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/criteria/NotEqualVerbIgnoreCase.java index de2f682a5..c6958a641 100644 --- a/dhp-workflows/dhp-bulktag/src/main/java/eu/dnetlib/dhp/selectioncriteria/NotEqualVerbIgnoreCase.java +++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/criteria/NotEqualVerbIgnoreCase.java @@ -1,5 +1,5 @@ -package eu.dnetlib.dhp.selectioncriteria; +package eu.dnetlib.dhp.bulktag.criteria; import java.io.Serializable; diff --git a/dhp-workflows/dhp-bulktag/src/main/java/eu/dnetlib/dhp/selectioncriteria/Selection.java b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/criteria/Selection.java similarity index 60% rename from dhp-workflows/dhp-bulktag/src/main/java/eu/dnetlib/dhp/selectioncriteria/Selection.java rename to dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/criteria/Selection.java index b488bda01..ec9fb716d 100644 --- a/dhp-workflows/dhp-bulktag/src/main/java/eu/dnetlib/dhp/selectioncriteria/Selection.java +++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/criteria/Selection.java @@ -1,5 +1,5 @@ -package eu.dnetlib.dhp.selectioncriteria; +package eu.dnetlib.dhp.bulktag.criteria; public interface Selection { diff --git a/dhp-workflows/dhp-bulktag/src/main/java/eu/dnetlib/dhp/selectioncriteria/VerbClass.java b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/criteria/VerbClass.java similarity index 86% rename from dhp-workflows/dhp-bulktag/src/main/java/eu/dnetlib/dhp/selectioncriteria/VerbClass.java rename to dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/criteria/VerbClass.java index d467f934f..5b35919bd 100644 --- a/dhp-workflows/dhp-bulktag/src/main/java/eu/dnetlib/dhp/selectioncriteria/VerbClass.java +++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/criteria/VerbClass.java @@ -1,5 +1,5 @@ -package eu.dnetlib.dhp.selectioncriteria; +package eu.dnetlib.dhp.bulktag.criteria; import java.lang.annotation.ElementType; import java.lang.annotation.Retention; diff --git a/dhp-workflows/dhp-bulktag/src/main/java/eu/dnetlib/dhp/selectioncriteria/VerbResolver.java b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/criteria/VerbResolver.java similarity index 97% rename from dhp-workflows/dhp-bulktag/src/main/java/eu/dnetlib/dhp/selectioncriteria/VerbResolver.java rename to dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/criteria/VerbResolver.java index 6a8ceebc3..19d6c4615 100644 --- a/dhp-workflows/dhp-bulktag/src/main/java/eu/dnetlib/dhp/selectioncriteria/VerbResolver.java +++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/criteria/VerbResolver.java @@ -1,16 +1,16 @@ -package eu.dnetlib.dhp.selectioncriteria; - -import java.io.Serializable; -import java.lang.reflect.InvocationTargetException; -import java.util.Map; -import java.util.stream.Collectors; +package eu.dnetlib.dhp.bulktag.criteria; import io.github.classgraph.ClassGraph; import io.github.classgraph.ClassInfo; import io.github.classgraph.ClassInfoList; import io.github.classgraph.ScanResult; +import java.io.Serializable; +import java.lang.reflect.InvocationTargetException; +import java.util.Map; +import java.util.stream.Collectors; + public class VerbResolver implements Serializable { private Map> map = null; // = new HashMap<>(); private final ClassGraph classgraph = new ClassGraph(); diff --git a/dhp-workflows/dhp-bulktag/src/main/java/eu/dnetlib/dhp/selectioncriteria/VerbResolverFactory.java b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/criteria/VerbResolverFactory.java similarity index 73% rename from dhp-workflows/dhp-bulktag/src/main/java/eu/dnetlib/dhp/selectioncriteria/VerbResolverFactory.java rename to dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/criteria/VerbResolverFactory.java index 58bf60d42..0bb801999 100644 --- a/dhp-workflows/dhp-bulktag/src/main/java/eu/dnetlib/dhp/selectioncriteria/VerbResolverFactory.java +++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/criteria/VerbResolverFactory.java @@ -1,5 +1,5 @@ -package eu.dnetlib.dhp.selectioncriteria; +package eu.dnetlib.dhp.bulktag.criteria; public class VerbResolverFactory { diff --git a/dhp-workflows/dhp-propagation/src/main/java/eu/dnetlib/dhp/countrypropagation/CountrySbs.java b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/countrypropagation/CountrySbs.java similarity index 100% rename from dhp-workflows/dhp-propagation/src/main/java/eu/dnetlib/dhp/countrypropagation/CountrySbs.java rename to dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/countrypropagation/CountrySbs.java diff --git a/dhp-workflows/dhp-propagation/src/main/java/eu/dnetlib/dhp/countrypropagation/DatasourceCountry.java b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/countrypropagation/DatasourceCountry.java similarity index 100% rename from dhp-workflows/dhp-propagation/src/main/java/eu/dnetlib/dhp/countrypropagation/DatasourceCountry.java rename to dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/countrypropagation/DatasourceCountry.java diff --git a/dhp-workflows/dhp-propagation/src/main/java/eu/dnetlib/dhp/countrypropagation/PrepareDatasourceCountryAssociation.java b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/countrypropagation/PrepareDatasourceCountryAssociation.java similarity index 100% rename from dhp-workflows/dhp-propagation/src/main/java/eu/dnetlib/dhp/countrypropagation/PrepareDatasourceCountryAssociation.java rename to dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/countrypropagation/PrepareDatasourceCountryAssociation.java diff --git a/dhp-workflows/dhp-propagation/src/main/java/eu/dnetlib/dhp/countrypropagation/PrepareResultCountrySet.java b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/countrypropagation/PrepareResultCountrySet.java similarity index 100% rename from dhp-workflows/dhp-propagation/src/main/java/eu/dnetlib/dhp/countrypropagation/PrepareResultCountrySet.java rename to dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/countrypropagation/PrepareResultCountrySet.java diff --git a/dhp-workflows/dhp-propagation/src/main/java/eu/dnetlib/dhp/countrypropagation/ResultCountrySet.java b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/countrypropagation/ResultCountrySet.java similarity index 100% rename from dhp-workflows/dhp-propagation/src/main/java/eu/dnetlib/dhp/countrypropagation/ResultCountrySet.java rename to dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/countrypropagation/ResultCountrySet.java diff --git a/dhp-workflows/dhp-propagation/src/main/java/eu/dnetlib/dhp/countrypropagation/SparkCountryPropagationJob.java b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/countrypropagation/SparkCountryPropagationJob.java similarity index 100% rename from dhp-workflows/dhp-propagation/src/main/java/eu/dnetlib/dhp/countrypropagation/SparkCountryPropagationJob.java rename to dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/countrypropagation/SparkCountryPropagationJob.java diff --git a/dhp-workflows/dhp-propagation/src/main/java/eu/dnetlib/dhp/orcidtoresultfromsemrel/AutoritativeAuthor.java b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/orcidtoresultfromsemrel/AutoritativeAuthor.java similarity index 100% rename from dhp-workflows/dhp-propagation/src/main/java/eu/dnetlib/dhp/orcidtoresultfromsemrel/AutoritativeAuthor.java rename to dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/orcidtoresultfromsemrel/AutoritativeAuthor.java diff --git a/dhp-workflows/dhp-propagation/src/main/java/eu/dnetlib/dhp/orcidtoresultfromsemrel/PrepareResultOrcidAssociationStep1.java b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/orcidtoresultfromsemrel/PrepareResultOrcidAssociationStep1.java similarity index 100% rename from dhp-workflows/dhp-propagation/src/main/java/eu/dnetlib/dhp/orcidtoresultfromsemrel/PrepareResultOrcidAssociationStep1.java rename to dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/orcidtoresultfromsemrel/PrepareResultOrcidAssociationStep1.java diff --git a/dhp-workflows/dhp-propagation/src/main/java/eu/dnetlib/dhp/orcidtoresultfromsemrel/PrepareResultOrcidAssociationStep2.java b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/orcidtoresultfromsemrel/PrepareResultOrcidAssociationStep2.java similarity index 100% rename from dhp-workflows/dhp-propagation/src/main/java/eu/dnetlib/dhp/orcidtoresultfromsemrel/PrepareResultOrcidAssociationStep2.java rename to dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/orcidtoresultfromsemrel/PrepareResultOrcidAssociationStep2.java diff --git a/dhp-workflows/dhp-propagation/src/main/java/eu/dnetlib/dhp/orcidtoresultfromsemrel/ResultOrcidList.java b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/orcidtoresultfromsemrel/ResultOrcidList.java similarity index 100% rename from dhp-workflows/dhp-propagation/src/main/java/eu/dnetlib/dhp/orcidtoresultfromsemrel/ResultOrcidList.java rename to dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/orcidtoresultfromsemrel/ResultOrcidList.java diff --git a/dhp-workflows/dhp-propagation/src/main/java/eu/dnetlib/dhp/orcidtoresultfromsemrel/SparkOrcidToResultFromSemRelJob.java b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/orcidtoresultfromsemrel/SparkOrcidToResultFromSemRelJob.java similarity index 100% rename from dhp-workflows/dhp-propagation/src/main/java/eu/dnetlib/dhp/orcidtoresultfromsemrel/SparkOrcidToResultFromSemRelJob.java rename to dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/orcidtoresultfromsemrel/SparkOrcidToResultFromSemRelJob.java diff --git a/dhp-workflows/dhp-propagation/src/main/java/eu/dnetlib/dhp/projecttoresult/PrepareProjectResultsAssociation.java b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/projecttoresult/PrepareProjectResultsAssociation.java similarity index 100% rename from dhp-workflows/dhp-propagation/src/main/java/eu/dnetlib/dhp/projecttoresult/PrepareProjectResultsAssociation.java rename to dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/projecttoresult/PrepareProjectResultsAssociation.java diff --git a/dhp-workflows/dhp-propagation/src/main/java/eu/dnetlib/dhp/projecttoresult/ResultProjectSet.java b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/projecttoresult/ResultProjectSet.java similarity index 100% rename from dhp-workflows/dhp-propagation/src/main/java/eu/dnetlib/dhp/projecttoresult/ResultProjectSet.java rename to dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/projecttoresult/ResultProjectSet.java diff --git a/dhp-workflows/dhp-propagation/src/main/java/eu/dnetlib/dhp/projecttoresult/SparkResultToProjectThroughSemRelJob.java b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/projecttoresult/SparkResultToProjectThroughSemRelJob.java similarity index 100% rename from dhp-workflows/dhp-propagation/src/main/java/eu/dnetlib/dhp/projecttoresult/SparkResultToProjectThroughSemRelJob.java rename to dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/projecttoresult/SparkResultToProjectThroughSemRelJob.java diff --git a/dhp-workflows/dhp-propagation/src/main/java/eu/dnetlib/dhp/resulttocommunityfromorganization/OrganizationMap.java b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttocommunityfromorganization/OrganizationMap.java similarity index 100% rename from dhp-workflows/dhp-propagation/src/main/java/eu/dnetlib/dhp/resulttocommunityfromorganization/OrganizationMap.java rename to dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttocommunityfromorganization/OrganizationMap.java diff --git a/dhp-workflows/dhp-propagation/src/main/java/eu/dnetlib/dhp/resulttocommunityfromorganization/PrepareResultCommunitySet.java b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttocommunityfromorganization/PrepareResultCommunitySet.java similarity index 100% rename from dhp-workflows/dhp-propagation/src/main/java/eu/dnetlib/dhp/resulttocommunityfromorganization/PrepareResultCommunitySet.java rename to dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttocommunityfromorganization/PrepareResultCommunitySet.java diff --git a/dhp-workflows/dhp-propagation/src/main/java/eu/dnetlib/dhp/resulttocommunityfromorganization/ResultCommunityList.java b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttocommunityfromorganization/ResultCommunityList.java similarity index 100% rename from dhp-workflows/dhp-propagation/src/main/java/eu/dnetlib/dhp/resulttocommunityfromorganization/ResultCommunityList.java rename to dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttocommunityfromorganization/ResultCommunityList.java diff --git a/dhp-workflows/dhp-propagation/src/main/java/eu/dnetlib/dhp/resulttocommunityfromorganization/ResultOrganizations.java b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttocommunityfromorganization/ResultOrganizations.java similarity index 100% rename from dhp-workflows/dhp-propagation/src/main/java/eu/dnetlib/dhp/resulttocommunityfromorganization/ResultOrganizations.java rename to dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttocommunityfromorganization/ResultOrganizations.java diff --git a/dhp-workflows/dhp-propagation/src/main/java/eu/dnetlib/dhp/resulttocommunityfromorganization/SparkResultToCommunityFromOrganizationJob.java b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttocommunityfromorganization/SparkResultToCommunityFromOrganizationJob.java similarity index 100% rename from dhp-workflows/dhp-propagation/src/main/java/eu/dnetlib/dhp/resulttocommunityfromorganization/SparkResultToCommunityFromOrganizationJob.java rename to dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttocommunityfromorganization/SparkResultToCommunityFromOrganizationJob.java diff --git a/dhp-workflows/dhp-propagation/src/main/java/eu/dnetlib/dhp/resulttocommunityfromsemrel/PrepareResultCommunitySetStep1.java b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttocommunityfromsemrel/PrepareResultCommunitySetStep1.java similarity index 100% rename from dhp-workflows/dhp-propagation/src/main/java/eu/dnetlib/dhp/resulttocommunityfromsemrel/PrepareResultCommunitySetStep1.java rename to dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttocommunityfromsemrel/PrepareResultCommunitySetStep1.java diff --git a/dhp-workflows/dhp-propagation/src/main/java/eu/dnetlib/dhp/resulttocommunityfromsemrel/PrepareResultCommunitySetStep2.java b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttocommunityfromsemrel/PrepareResultCommunitySetStep2.java similarity index 100% rename from dhp-workflows/dhp-propagation/src/main/java/eu/dnetlib/dhp/resulttocommunityfromsemrel/PrepareResultCommunitySetStep2.java rename to dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttocommunityfromsemrel/PrepareResultCommunitySetStep2.java diff --git a/dhp-workflows/dhp-propagation/src/main/java/eu/dnetlib/dhp/resulttocommunityfromsemrel/SparkResultToCommunityThroughSemRelJob.java b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttocommunityfromsemrel/SparkResultToCommunityThroughSemRelJob.java similarity index 100% rename from dhp-workflows/dhp-propagation/src/main/java/eu/dnetlib/dhp/resulttocommunityfromsemrel/SparkResultToCommunityThroughSemRelJob.java rename to dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttocommunityfromsemrel/SparkResultToCommunityThroughSemRelJob.java diff --git a/dhp-workflows/dhp-propagation/src/main/java/eu/dnetlib/dhp/resulttoorganizationfrominstrepo/DatasourceOrganization.java b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttoorganizationfrominstrepo/DatasourceOrganization.java similarity index 100% rename from dhp-workflows/dhp-propagation/src/main/java/eu/dnetlib/dhp/resulttoorganizationfrominstrepo/DatasourceOrganization.java rename to dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttoorganizationfrominstrepo/DatasourceOrganization.java diff --git a/dhp-workflows/dhp-propagation/src/main/java/eu/dnetlib/dhp/resulttoorganizationfrominstrepo/PrepareResultInstRepoAssociation.java b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttoorganizationfrominstrepo/PrepareResultInstRepoAssociation.java similarity index 100% rename from dhp-workflows/dhp-propagation/src/main/java/eu/dnetlib/dhp/resulttoorganizationfrominstrepo/PrepareResultInstRepoAssociation.java rename to dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttoorganizationfrominstrepo/PrepareResultInstRepoAssociation.java diff --git a/dhp-workflows/dhp-propagation/src/main/java/eu/dnetlib/dhp/resulttoorganizationfrominstrepo/ResultOrganizationSet.java b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttoorganizationfrominstrepo/ResultOrganizationSet.java similarity index 100% rename from dhp-workflows/dhp-propagation/src/main/java/eu/dnetlib/dhp/resulttoorganizationfrominstrepo/ResultOrganizationSet.java rename to dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttoorganizationfrominstrepo/ResultOrganizationSet.java diff --git a/dhp-workflows/dhp-propagation/src/main/java/eu/dnetlib/dhp/resulttoorganizationfrominstrepo/SparkResultToOrganizationFromIstRepoJob.java b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttoorganizationfrominstrepo/SparkResultToOrganizationFromIstRepoJob.java similarity index 100% rename from dhp-workflows/dhp-propagation/src/main/java/eu/dnetlib/dhp/resulttoorganizationfrominstrepo/SparkResultToOrganizationFromIstRepoJob.java rename to dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttoorganizationfrominstrepo/SparkResultToOrganizationFromIstRepoJob.java diff --git a/dhp-workflows/dhp-bulktag/src/main/resources/eu/dnetlib/dhp/bulktag/input_bulkTag_parameters.json b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/bulktag/input_bulkTag_parameters.json similarity index 100% rename from dhp-workflows/dhp-bulktag/src/main/resources/eu/dnetlib/dhp/bulktag/input_bulkTag_parameters.json rename to dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/bulktag/input_bulkTag_parameters.json diff --git a/dhp-workflows/dhp-bulktag/src/main/resources/eu/dnetlib/dhp/bulktag/oozie_app/config-default.xml b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/bulktag/oozie_app/config-default.xml similarity index 100% rename from dhp-workflows/dhp-bulktag/src/main/resources/eu/dnetlib/dhp/bulktag/oozie_app/config-default.xml rename to dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/bulktag/oozie_app/config-default.xml diff --git a/dhp-workflows/dhp-bulktag/src/main/resources/eu/dnetlib/dhp/bulktag/oozie_app/workflow.xml b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/bulktag/oozie_app/workflow.xml similarity index 100% rename from dhp-workflows/dhp-bulktag/src/main/resources/eu/dnetlib/dhp/bulktag/oozie_app/workflow.xml rename to dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/bulktag/oozie_app/workflow.xml diff --git a/dhp-workflows/dhp-propagation/src/main/resources/eu/dnetlib/dhp/countrypropagation/input_countrypropagation_parameters.json b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/countrypropagation/input_countrypropagation_parameters.json similarity index 100% rename from dhp-workflows/dhp-propagation/src/main/resources/eu/dnetlib/dhp/countrypropagation/input_countrypropagation_parameters.json rename to dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/countrypropagation/input_countrypropagation_parameters.json diff --git a/dhp-workflows/dhp-propagation/src/main/resources/eu/dnetlib/dhp/countrypropagation/input_prepareassoc_parameters.json b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/countrypropagation/input_prepareassoc_parameters.json similarity index 100% rename from dhp-workflows/dhp-propagation/src/main/resources/eu/dnetlib/dhp/countrypropagation/input_prepareassoc_parameters.json rename to dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/countrypropagation/input_prepareassoc_parameters.json diff --git a/dhp-workflows/dhp-propagation/src/main/resources/eu/dnetlib/dhp/countrypropagation/input_prepareresultcountry_parameters.json b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/countrypropagation/input_prepareresultcountry_parameters.json similarity index 100% rename from dhp-workflows/dhp-propagation/src/main/resources/eu/dnetlib/dhp/countrypropagation/input_prepareresultcountry_parameters.json rename to dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/countrypropagation/input_prepareresultcountry_parameters.json diff --git a/dhp-workflows/dhp-propagation/src/main/resources/eu/dnetlib/dhp/countrypropagation/oozie_app/config-default.xml b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/countrypropagation/oozie_app/config-default.xml similarity index 100% rename from dhp-workflows/dhp-propagation/src/main/resources/eu/dnetlib/dhp/countrypropagation/oozie_app/config-default.xml rename to dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/countrypropagation/oozie_app/config-default.xml diff --git a/dhp-workflows/dhp-propagation/src/main/resources/eu/dnetlib/dhp/countrypropagation/oozie_app/workflow.xml b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/countrypropagation/oozie_app/workflow.xml similarity index 100% rename from dhp-workflows/dhp-propagation/src/main/resources/eu/dnetlib/dhp/countrypropagation/oozie_app/workflow.xml rename to dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/countrypropagation/oozie_app/workflow.xml diff --git a/dhp-workflows/dhp-propagation/src/main/resources/eu/dnetlib/dhp/orcidtoresultfromsemrel/input_orcidtoresult_parameters.json b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/orcidtoresultfromsemrel/input_orcidtoresult_parameters.json similarity index 100% rename from dhp-workflows/dhp-propagation/src/main/resources/eu/dnetlib/dhp/orcidtoresultfromsemrel/input_orcidtoresult_parameters.json rename to dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/orcidtoresultfromsemrel/input_orcidtoresult_parameters.json diff --git a/dhp-workflows/dhp-propagation/src/main/resources/eu/dnetlib/dhp/orcidtoresultfromsemrel/input_prepareorcidtoresult_parameters.json b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/orcidtoresultfromsemrel/input_prepareorcidtoresult_parameters.json similarity index 100% rename from dhp-workflows/dhp-propagation/src/main/resources/eu/dnetlib/dhp/orcidtoresultfromsemrel/input_prepareorcidtoresult_parameters.json rename to dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/orcidtoresultfromsemrel/input_prepareorcidtoresult_parameters.json diff --git a/dhp-workflows/dhp-propagation/src/main/resources/eu/dnetlib/dhp/orcidtoresultfromsemrel/input_prepareorcidtoresult_parameters2.json b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/orcidtoresultfromsemrel/input_prepareorcidtoresult_parameters2.json similarity index 100% rename from dhp-workflows/dhp-propagation/src/main/resources/eu/dnetlib/dhp/orcidtoresultfromsemrel/input_prepareorcidtoresult_parameters2.json rename to dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/orcidtoresultfromsemrel/input_prepareorcidtoresult_parameters2.json diff --git a/dhp-workflows/dhp-propagation/src/main/resources/eu/dnetlib/dhp/orcidtoresultfromsemrel/oozie_app/config-default.xml b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/orcidtoresultfromsemrel/oozie_app/config-default.xml similarity index 100% rename from dhp-workflows/dhp-propagation/src/main/resources/eu/dnetlib/dhp/orcidtoresultfromsemrel/oozie_app/config-default.xml rename to dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/orcidtoresultfromsemrel/oozie_app/config-default.xml diff --git a/dhp-workflows/dhp-propagation/src/main/resources/eu/dnetlib/dhp/orcidtoresultfromsemrel/oozie_app/workflow.xml b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/orcidtoresultfromsemrel/oozie_app/workflow.xml similarity index 100% rename from dhp-workflows/dhp-propagation/src/main/resources/eu/dnetlib/dhp/orcidtoresultfromsemrel/oozie_app/workflow.xml rename to dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/orcidtoresultfromsemrel/oozie_app/workflow.xml diff --git a/dhp-workflows/dhp-propagation/src/main/resources/eu/dnetlib/dhp/projecttoresult/input_prepareprojecttoresult_parameters.json b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/projecttoresult/input_prepareprojecttoresult_parameters.json similarity index 100% rename from dhp-workflows/dhp-propagation/src/main/resources/eu/dnetlib/dhp/projecttoresult/input_prepareprojecttoresult_parameters.json rename to dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/projecttoresult/input_prepareprojecttoresult_parameters.json diff --git a/dhp-workflows/dhp-propagation/src/main/resources/eu/dnetlib/dhp/projecttoresult/input_projecttoresult_parameters.json b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/projecttoresult/input_projecttoresult_parameters.json similarity index 100% rename from dhp-workflows/dhp-propagation/src/main/resources/eu/dnetlib/dhp/projecttoresult/input_projecttoresult_parameters.json rename to dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/projecttoresult/input_projecttoresult_parameters.json diff --git a/dhp-workflows/dhp-propagation/src/main/resources/eu/dnetlib/dhp/projecttoresult/oozie_app/config-default.xml b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/projecttoresult/oozie_app/config-default.xml similarity index 100% rename from dhp-workflows/dhp-propagation/src/main/resources/eu/dnetlib/dhp/projecttoresult/oozie_app/config-default.xml rename to dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/projecttoresult/oozie_app/config-default.xml diff --git a/dhp-workflows/dhp-propagation/src/main/resources/eu/dnetlib/dhp/projecttoresult/oozie_app/workflow.xml b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/projecttoresult/oozie_app/workflow.xml similarity index 100% rename from dhp-workflows/dhp-propagation/src/main/resources/eu/dnetlib/dhp/projecttoresult/oozie_app/workflow.xml rename to dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/projecttoresult/oozie_app/workflow.xml diff --git a/dhp-workflows/dhp-propagation/src/main/resources/eu/dnetlib/dhp/resulttocommunityfromorganization/input_communitytoresult_parameters.json b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/resulttocommunityfromorganization/input_communitytoresult_parameters.json similarity index 100% rename from dhp-workflows/dhp-propagation/src/main/resources/eu/dnetlib/dhp/resulttocommunityfromorganization/input_communitytoresult_parameters.json rename to dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/resulttocommunityfromorganization/input_communitytoresult_parameters.json diff --git a/dhp-workflows/dhp-propagation/src/main/resources/eu/dnetlib/dhp/resulttocommunityfromorganization/input_preparecommunitytoresult_parameters.json b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/resulttocommunityfromorganization/input_preparecommunitytoresult_parameters.json similarity index 100% rename from dhp-workflows/dhp-propagation/src/main/resources/eu/dnetlib/dhp/resulttocommunityfromorganization/input_preparecommunitytoresult_parameters.json rename to dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/resulttocommunityfromorganization/input_preparecommunitytoresult_parameters.json diff --git a/dhp-workflows/dhp-propagation/src/main/resources/eu/dnetlib/dhp/resulttocommunityfromorganization/oozie_app/config-default.xml b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/resulttocommunityfromorganization/oozie_app/config-default.xml similarity index 100% rename from dhp-workflows/dhp-propagation/src/main/resources/eu/dnetlib/dhp/resulttocommunityfromorganization/oozie_app/config-default.xml rename to dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/resulttocommunityfromorganization/oozie_app/config-default.xml diff --git a/dhp-workflows/dhp-propagation/src/main/resources/eu/dnetlib/dhp/resulttocommunityfromorganization/oozie_app/workflow.xml b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/resulttocommunityfromorganization/oozie_app/workflow.xml similarity index 100% rename from dhp-workflows/dhp-propagation/src/main/resources/eu/dnetlib/dhp/resulttocommunityfromorganization/oozie_app/workflow.xml rename to dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/resulttocommunityfromorganization/oozie_app/workflow.xml diff --git a/dhp-workflows/dhp-propagation/src/main/resources/eu/dnetlib/dhp/resulttocommunityfromsemrel/input_communitytoresult_parameters.json b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/resulttocommunityfromsemrel/input_communitytoresult_parameters.json similarity index 100% rename from dhp-workflows/dhp-propagation/src/main/resources/eu/dnetlib/dhp/resulttocommunityfromsemrel/input_communitytoresult_parameters.json rename to dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/resulttocommunityfromsemrel/input_communitytoresult_parameters.json diff --git a/dhp-workflows/dhp-propagation/src/main/resources/eu/dnetlib/dhp/resulttocommunityfromsemrel/input_preparecommunitytoresult2_parameters.json b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/resulttocommunityfromsemrel/input_preparecommunitytoresult2_parameters.json similarity index 100% rename from dhp-workflows/dhp-propagation/src/main/resources/eu/dnetlib/dhp/resulttocommunityfromsemrel/input_preparecommunitytoresult2_parameters.json rename to dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/resulttocommunityfromsemrel/input_preparecommunitytoresult2_parameters.json diff --git a/dhp-workflows/dhp-propagation/src/main/resources/eu/dnetlib/dhp/resulttocommunityfromsemrel/input_preparecommunitytoresult_parameters.json b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/resulttocommunityfromsemrel/input_preparecommunitytoresult_parameters.json similarity index 100% rename from dhp-workflows/dhp-propagation/src/main/resources/eu/dnetlib/dhp/resulttocommunityfromsemrel/input_preparecommunitytoresult_parameters.json rename to dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/resulttocommunityfromsemrel/input_preparecommunitytoresult_parameters.json diff --git a/dhp-workflows/dhp-propagation/src/main/resources/eu/dnetlib/dhp/resulttocommunityfromsemrel/oozie_app/config-default.xml b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/resulttocommunityfromsemrel/oozie_app/config-default.xml similarity index 100% rename from dhp-workflows/dhp-propagation/src/main/resources/eu/dnetlib/dhp/resulttocommunityfromsemrel/oozie_app/config-default.xml rename to dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/resulttocommunityfromsemrel/oozie_app/config-default.xml diff --git a/dhp-workflows/dhp-propagation/src/main/resources/eu/dnetlib/dhp/resulttocommunityfromsemrel/oozie_app/workflow.xml b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/resulttocommunityfromsemrel/oozie_app/workflow.xml similarity index 100% rename from dhp-workflows/dhp-propagation/src/main/resources/eu/dnetlib/dhp/resulttocommunityfromsemrel/oozie_app/workflow.xml rename to dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/resulttocommunityfromsemrel/oozie_app/workflow.xml diff --git a/dhp-workflows/dhp-propagation/src/main/resources/eu/dnetlib/dhp/resulttoorganizationfrominstrepo/input_prepareresultorg_parameters.json b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/resulttoorganizationfrominstrepo/input_prepareresultorg_parameters.json similarity index 100% rename from dhp-workflows/dhp-propagation/src/main/resources/eu/dnetlib/dhp/resulttoorganizationfrominstrepo/input_prepareresultorg_parameters.json rename to dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/resulttoorganizationfrominstrepo/input_prepareresultorg_parameters.json diff --git a/dhp-workflows/dhp-propagation/src/main/resources/eu/dnetlib/dhp/resulttoorganizationfrominstrepo/input_propagationresulaffiliationfrominstrepo_parameters.json b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/resulttoorganizationfrominstrepo/input_propagationresulaffiliationfrominstrepo_parameters.json similarity index 100% rename from dhp-workflows/dhp-propagation/src/main/resources/eu/dnetlib/dhp/resulttoorganizationfrominstrepo/input_propagationresulaffiliationfrominstrepo_parameters.json rename to dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/resulttoorganizationfrominstrepo/input_propagationresulaffiliationfrominstrepo_parameters.json diff --git a/dhp-workflows/dhp-propagation/src/main/resources/eu/dnetlib/dhp/resulttoorganizationfrominstrepo/oozie_app/config-default.xml b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/resulttoorganizationfrominstrepo/oozie_app/config-default.xml similarity index 100% rename from dhp-workflows/dhp-propagation/src/main/resources/eu/dnetlib/dhp/resulttoorganizationfrominstrepo/oozie_app/config-default.xml rename to dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/resulttoorganizationfrominstrepo/oozie_app/config-default.xml diff --git a/dhp-workflows/dhp-propagation/src/main/resources/eu/dnetlib/dhp/resulttoorganizationfrominstrepo/oozie_app/workflow.xml b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/resulttoorganizationfrominstrepo/oozie_app/workflow.xml similarity index 100% rename from dhp-workflows/dhp-propagation/src/main/resources/eu/dnetlib/dhp/resulttoorganizationfrominstrepo/oozie_app/workflow.xml rename to dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/resulttoorganizationfrominstrepo/oozie_app/workflow.xml diff --git a/dhp-workflows/dhp-bulktag/src/test/java/eu/dnetlib/dhp/BulkTagJobTest.java b/dhp-workflows/dhp-enrichment/src/test/java/eu/dnetlib/dhp/bulktag/BulkTagJobTest.java similarity index 71% rename from dhp-workflows/dhp-bulktag/src/test/java/eu/dnetlib/dhp/BulkTagJobTest.java rename to dhp-workflows/dhp-enrichment/src/test/java/eu/dnetlib/dhp/bulktag/BulkTagJobTest.java index 75ecb0298..f20678f7b 100644 --- a/dhp-workflows/dhp-bulktag/src/test/java/eu/dnetlib/dhp/BulkTagJobTest.java +++ b/dhp-workflows/dhp-enrichment/src/test/java/eu/dnetlib/dhp/bulktag/BulkTagJobTest.java @@ -1,13 +1,13 @@ -package eu.dnetlib.dhp; - -import static eu.dnetlib.dhp.community.TagginConstants.ZENODO_COMMUNITY_INDICATOR; - -import java.io.IOException; -import java.nio.file.Files; -import java.nio.file.Path; +package eu.dnetlib.dhp.bulktag; +import com.fasterxml.jackson.databind.ObjectMapper; +import eu.dnetlib.dhp.schema.oaf.Dataset; +import eu.dnetlib.dhp.schema.oaf.OtherResearchProduct; +import eu.dnetlib.dhp.schema.oaf.Publication; +import eu.dnetlib.dhp.schema.oaf.Software; import org.apache.commons.io.FileUtils; +import org.apache.commons.io.IOUtils; import org.apache.spark.SparkConf; import org.apache.spark.api.java.JavaRDD; import org.apache.spark.api.java.JavaSparkContext; @@ -18,37 +18,44 @@ import org.junit.jupiter.api.AfterAll; import org.junit.jupiter.api.Assertions; import org.junit.jupiter.api.BeforeAll; import org.junit.jupiter.api.Test; -import org.mortbay.util.IO; + import org.slf4j.Logger; import org.slf4j.LoggerFactory; -import com.fasterxml.jackson.databind.ObjectMapper; +import java.io.IOException; +import java.nio.file.Files; +import java.nio.file.Path; -import eu.dnetlib.dhp.bulktag.SparkBulkTagJob; -import eu.dnetlib.dhp.schema.oaf.Dataset; -import eu.dnetlib.dhp.schema.oaf.OtherResearchProduct; -import eu.dnetlib.dhp.schema.oaf.Publication; -import eu.dnetlib.dhp.schema.oaf.Software; +import static eu.dnetlib.dhp.bulktag.community.TaggingConstants.ZENODO_COMMUNITY_INDICATOR; public class BulkTagJobTest { + private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper(); - private static final ClassLoader cl = eu.dnetlib.dhp.BulkTagJobTest.class.getClassLoader(); + public static final String MOCK_IS_LOOK_UP_URL = "BASEURL:8280/is/services/isLookUp"; + + public static final String pathMap = + "{ \"author\" : \"$['author'][*]['fullname']\"," + + " \"title\" : \"$['title'][*]['value']\"," + + " \"orcid\" : \"$['author'][*]['pid'][*][?(@['key']=='ORCID')]['value']\"," + + " \"contributor\" : \"$['contributor'][*]['value']\"," + + " \"description\" : \"$['description'][*]['value']\"}"; private static SparkSession spark; private static Path workingDir; - private static final Logger log = LoggerFactory.getLogger(eu.dnetlib.dhp.BulkTagJobTest.class); + + private static final Logger log = LoggerFactory.getLogger(BulkTagJobTest.class); private static String taggingConf = ""; static { try { - taggingConf = IO + taggingConf = IOUtils .toString( BulkTagJobTest.class .getResourceAsStream( - "/eu/dnetlib/dhp/communityconfiguration/tagging_conf.xml")); + "/eu/dnetlib/dhp/bulktag/communityconfiguration/tagging_conf.xml")); } catch (IOException e) { e.printStackTrace(); } @@ -56,11 +63,11 @@ public class BulkTagJobTest { @BeforeAll public static void beforeAll() throws IOException { - workingDir = Files.createTempDirectory(eu.dnetlib.dhp.BulkTagJobTest.class.getSimpleName()); + workingDir = Files.createTempDirectory(BulkTagJobTest.class.getSimpleName()); log.info("using work dir {}", workingDir); SparkConf conf = new SparkConf(); - conf.setAppName(eu.dnetlib.dhp.BulkTagJobTest.class.getSimpleName()); + conf.setAppName(BulkTagJobTest.class.getSimpleName()); conf.setMaster("local[*]"); conf.set("spark.driver.host", "localhost"); @@ -84,34 +91,21 @@ public class BulkTagJobTest { @Test public void noUpdatesTest() throws Exception { + final String pathMap = BulkTagJobTest.pathMap; SparkBulkTagJob .main( new String[] { - "-isTest", - Boolean.TRUE.toString(), - "-isSparkSessionManaged", - Boolean.FALSE.toString(), - "-sourcePath", - getClass().getResource("/eu/dnetlib/dhp/sample/dataset/no_updates").getPath(), - "-taggingConf", - taggingConf, - "-resultTableName", - "eu.dnetlib.dhp.schema.oaf.Dataset", - "-outputPath", - workingDir.toString() + "/dataset", - "-isLookUpUrl", - "http://beta.services.openaire.eu:8280/is/services/isLookUp", - "-pathMap", - "{ \"author\" : \"$['author'][*]['fullname']\"," - + " \"title\" : \"$['title'][*]['value']\"," - + " \"orcid\" : \"$['author'][*]['pid'][*][?(@['key']=='ORCID')]['value']\"," - + " \"contributor\" : \"$['contributor'][*]['value']\"," - + " \"description\" : \"$['description'][*]['value']\"}" - // "-preparedInfoPath", - // getClass().getResource("/eu/dnetlib/dhp/resulttocommunityfromsemrel/preparedInfo").getPath() + "-isTest", Boolean.TRUE.toString(), + "-isSparkSessionManaged", Boolean.FALSE.toString(), + "-sourcePath", getClass().getResource("/eu/dnetlib/dhp/bulktag/sample/dataset/no_updates").getPath(), + "-taggingConf", taggingConf, + "-resultTableName", "eu.dnetlib.dhp.schema.oaf.Dataset", + "-outputPath", workingDir.toString() + "/dataset", + "-isLookUpUrl", MOCK_IS_LOOK_UP_URL, + "-pathMap", pathMap }); - final JavaSparkContext sc = new JavaSparkContext(spark.sparkContext()); + final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext()); JavaRDD tmp = sc .textFile(workingDir.toString() + "/dataset") @@ -134,34 +128,24 @@ public class BulkTagJobTest { @Test public void bulktagBySubjectNoPreviousContextTest() throws Exception { + final String sourcePath = getClass() + .getResource("/eu/dnetlib/dhp/bulktag/sample/dataset/update_subject/nocontext") + .getPath(); + final String pathMap = BulkTagJobTest.pathMap; SparkBulkTagJob .main( new String[] { - "-isTest", - Boolean.TRUE.toString(), - "-isSparkSessionManaged", - Boolean.FALSE.toString(), - "-sourcePath", - getClass() - .getResource("/eu/dnetlib/dhp/sample/dataset/update_subject/nocontext") - .getPath(), - "-taggingConf", - taggingConf, - "-resultTableName", - "eu.dnetlib.dhp.schema.oaf.Dataset", - "-outputPath", - workingDir.toString() + "/dataset", - "-isLookUpUrl", - "http://beta.services.openaire.eu:8280/is/services/isLookUp", - "-pathMap", - "{ \"author\" : \"$['author'][*]['fullname']\"," - + " \"title\" : \"$['title'][*]['value']\"," - + " \"orcid\" : \"$['author'][*]['pid'][*][?(@['key']=='ORCID')]['value']\"," - + " \"contributor\" : \"$['contributor'][*]['value']\"," - + " \"description\" : \"$['description'][*]['value']\"}" + "-isTest", Boolean.TRUE.toString(), + "-isSparkSessionManaged", Boolean.FALSE.toString(), + "-sourcePath", sourcePath, + "-taggingConf", taggingConf, + "-resultTableName", "eu.dnetlib.dhp.schema.oaf.Dataset", + "-outputPath", workingDir.toString() + "/dataset", + "-isLookUpUrl", MOCK_IS_LOOK_UP_URL, + "-pathMap", pathMap }); - final JavaSparkContext sc = new JavaSparkContext(spark.sparkContext()); + final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext()); JavaRDD tmp = sc .textFile(workingDir.toString() + "/dataset") @@ -240,32 +224,22 @@ public class BulkTagJobTest { @Test public void bulktagBySubjectPreviousContextNoProvenanceTest() throws Exception { + final String sourcePath = getClass() + .getResource( + "/eu/dnetlib/dhp/bulktag/sample/dataset/update_subject/contextnoprovenance") + .getPath(); + final String pathMap = BulkTagJobTest.pathMap; SparkBulkTagJob .main( new String[] { - "-isTest", - Boolean.TRUE.toString(), - "-isSparkSessionManaged", - Boolean.FALSE.toString(), - "-sourcePath", - getClass() - .getResource( - "/eu/dnetlib/dhp/sample/dataset/update_subject/contextnoprovenance") - .getPath(), - "-taggingConf", - taggingConf, - "-resultTableName", - "eu.dnetlib.dhp.schema.oaf.Dataset", - "-outputPath", - workingDir.toString() + "/dataset", - "-isLookUpUrl", - "http://beta.services.openaire.eu:8280/is/services/isLookUp", - "-pathMap", - "{ \"author\" : \"$['author'][*]['fullname']\"," - + " \"title\" : \"$['title'][*]['value']\"," - + " \"orcid\" : \"$['author'][*]['pid'][*][?(@['key']=='ORCID')]['value']\"," - + " \"contributor\" : \"$['contributor'][*]['value']\"," - + " \"description\" : \"$['description'][*]['value']\"}" + "-isTest", Boolean.TRUE.toString(), + "-isSparkSessionManaged", Boolean.FALSE.toString(), + "-sourcePath", sourcePath, + "-taggingConf", taggingConf, + "-resultTableName", "eu.dnetlib.dhp.schema.oaf.Dataset", + "-outputPath", workingDir.toString() + "/dataset", + "-isLookUpUrl", MOCK_IS_LOOK_UP_URL, + "-pathMap", pathMap }); final JavaSparkContext sc = new JavaSparkContext(spark.sparkContext()); @@ -332,34 +306,23 @@ public class BulkTagJobTest { @Test public void bulktagByDatasourceTest() throws Exception { + final String sourcePath = getClass() + .getResource("/eu/dnetlib/dhp/bulktag/sample/publication/update_datasource") + .getPath(); SparkBulkTagJob .main( new String[] { - "-isTest", - Boolean.TRUE.toString(), - "-isSparkSessionManaged", - Boolean.FALSE.toString(), - "-sourcePath", - getClass() - .getResource("/eu/dnetlib/dhp/sample/publication/update_datasource") - .getPath(), - "-taggingConf", - taggingConf, - "-resultTableName", - "eu.dnetlib.dhp.schema.oaf.Publication", - "-outputPath", - workingDir.toString() + "/publication", - "-isLookUpUrl", - "http://beta.services.openaire.eu:8280/is/services/isLookUp", - "-pathMap", - "{ \"author\" : \"$['author'][*]['fullname']\"," - + " \"title\" : \"$['title'][*]['value']\"," - + " \"orcid\" : \"$['author'][*]['pid'][*][?(@['key']=='ORCID')]['value']\"," - + " \"contributor\" : \"$['contributor'][*]['value']\"," - + " \"description\" : \"$['description'][*]['value']\"}" + "-isTest", Boolean.TRUE.toString(), + "-isSparkSessionManaged", Boolean.FALSE.toString(), + "-sourcePath", sourcePath, + "-taggingConf", taggingConf, + "-resultTableName", "eu.dnetlib.dhp.schema.oaf.Publication", + "-outputPath", workingDir.toString() + "/publication", + "-isLookUpUrl", MOCK_IS_LOOK_UP_URL, + "-pathMap", pathMap }); - final JavaSparkContext sc = new JavaSparkContext(spark.sparkContext()); + final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext()); JavaRDD tmp = sc .textFile(workingDir.toString() + "/publication") @@ -415,35 +378,24 @@ public class BulkTagJobTest { @Test public void bulktagByZenodoCommunityTest() throws Exception { + final String sourcePath = getClass() + .getResource( + "/eu/dnetlib/dhp/bulktag/sample/otherresearchproduct/update_zenodocommunity") + .getPath(); SparkBulkTagJob .main( new String[] { - "-isTest", - Boolean.TRUE.toString(), - "-isSparkSessionManaged", - Boolean.FALSE.toString(), - "-sourcePath", - getClass() - .getResource( - "/eu/dnetlib/dhp/sample/otherresearchproduct/update_zenodocommunity") - .getPath(), - "-taggingConf", - taggingConf, - "-resultTableName", - "eu.dnetlib.dhp.schema.oaf.OtherResearchProduct", - "-outputPath", - workingDir.toString() + "/orp", - "-isLookUpUrl", - "http://beta.services.openaire.eu:8280/is/services/isLookUp", - "-pathMap", - "{ \"author\" : \"$['author'][*]['fullname']\"," - + " \"title\" : \"$['title'][*]['value']\"," - + " \"orcid\" : \"$['author'][*]['pid'][*][?(@['key']=='ORCID')]['value']\"," - + " \"contributor\" : \"$['contributor'][*]['value']\"," - + " \"description\" : \"$['description'][*]['value']\"}" + "-isTest", Boolean.TRUE.toString(), + "-isSparkSessionManaged", Boolean.FALSE.toString(), + "-sourcePath", sourcePath, + "-taggingConf", taggingConf, + "-resultTableName", "eu.dnetlib.dhp.schema.oaf.OtherResearchProduct", + "-outputPath", workingDir.toString() + "/orp", + "-isLookUpUrl", MOCK_IS_LOOK_UP_URL, + "-pathMap", pathMap }); - final JavaSparkContext sc = new JavaSparkContext(spark.sparkContext()); + final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext()); JavaRDD tmp = sc .textFile(workingDir.toString() + "/orp") @@ -548,34 +500,23 @@ public class BulkTagJobTest { @Test public void bulktagBySubjectDatasourceTest() throws Exception { + final String sourcePath = getClass() + .getResource("/eu/dnetlib/dhp/bulktag/sample/dataset/update_subject_datasource") + .getPath(); SparkBulkTagJob .main( new String[] { - "-isTest", - Boolean.TRUE.toString(), - "-isSparkSessionManaged", - Boolean.FALSE.toString(), - "-sourcePath", - getClass() - .getResource("/eu/dnetlib/dhp/sample/dataset/update_subject_datasource") - .getPath(), - "-taggingConf", - taggingConf, - "-resultTableName", - "eu.dnetlib.dhp.schema.oaf.Dataset", - "-outputPath", - workingDir.toString() + "/dataset", - "-isLookUpUrl", - "http://beta.services.openaire.eu:8280/is/services/isLookUp", - "-pathMap", - "{ \"author\" : \"$['author'][*]['fullname']\"," - + " \"title\" : \"$['title'][*]['value']\"," - + " \"orcid\" : \"$['author'][*]['pid'][*][?(@['key']=='ORCID')]['value']\"," - + " \"contributor\" : \"$['contributor'][*]['value']\"," - + " \"description\" : \"$['description'][*]['value']\"}" + "-isTest", Boolean.TRUE.toString(), + "-isSparkSessionManaged", Boolean.FALSE.toString(), + "-sourcePath", sourcePath, + "-taggingConf", taggingConf, + "-resultTableName", "eu.dnetlib.dhp.schema.oaf.Dataset", + "-outputPath", workingDir.toString() + "/dataset", + "-isLookUpUrl", MOCK_IS_LOOK_UP_URL, + "-pathMap", pathMap }); - final JavaSparkContext sc = new JavaSparkContext(spark.sparkContext()); + final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext()); JavaRDD tmp = sc .textFile(workingDir.toString() + "/dataset") @@ -691,29 +632,17 @@ public class BulkTagJobTest { SparkBulkTagJob .main( new String[] { - "-isTest", - Boolean.TRUE.toString(), - "-isSparkSessionManaged", - Boolean.FALSE.toString(), - "-sourcePath", - getClass().getResource("/eu/dnetlib/dhp/sample/software/").getPath(), - "-taggingConf", - taggingConf, - "-resultTableName", - "eu.dnetlib.dhp.schema.oaf.Software", - "-outputPath", - workingDir.toString() + "/software", - "-isLookUpUrl", - "http://beta.services.openaire.eu:8280/is/services/isLookUp", - "-pathMap", - "{ \"author\" : \"$['author'][*]['fullname']\"," - + " \"title\" : \"$['title'][*]['value']\"," - + " \"orcid\" : \"$['author'][*]['pid'][*][?(@['key']=='ORCID')]['value']\"," - + " \"contributor\" : \"$['contributor'][*]['value']\"," - + " \"description\" : \"$['description'][*]['value']\"}" + "-isTest", Boolean.TRUE.toString(), + "-isSparkSessionManaged", Boolean.FALSE.toString(), + "-sourcePath", getClass().getResource("/eu/dnetlib/dhp/bulktag/sample/software/").getPath(), + "-taggingConf", taggingConf, + "-resultTableName", "eu.dnetlib.dhp.schema.oaf.Software", + "-outputPath", workingDir.toString() + "/software", + "-isLookUpUrl", MOCK_IS_LOOK_UP_URL, + "-pathMap", pathMap }); - final JavaSparkContext sc = new JavaSparkContext(spark.sparkContext()); + final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext()); JavaRDD tmp = sc .textFile(workingDir.toString() + "/software") @@ -796,35 +725,24 @@ public class BulkTagJobTest { @Test public void bulktagDatasourcewithConstraintsTest() throws Exception { + final String sourcePath = getClass() + .getResource( + "/eu/dnetlib/dhp/bulktag/sample/dataset/update_datasourcewithconstraints") + .getPath(); SparkBulkTagJob .main( new String[] { - "-isTest", - Boolean.TRUE.toString(), - "-isSparkSessionManaged", - Boolean.FALSE.toString(), - "-sourcePath", - getClass() - .getResource( - "/eu/dnetlib/dhp/sample/dataset/update_datasourcewithconstraints") - .getPath(), - "-taggingConf", - taggingConf, - "-resultTableName", - "eu.dnetlib.dhp.schema.oaf.Dataset", - "-outputPath", - workingDir.toString() + "/dataset", - "-isLookUpUrl", - "http://beta.services.openaire.eu:8280/is/services/isLookUp", - "-pathMap", - "{ \"author\" : \"$['author'][*]['fullname']\"," - + " \"title\" : \"$['title'][*]['value']\"," - + " \"orcid\" : \"$['author'][*]['pid'][*][?(@['key']=='ORCID')]['value']\"," - + " \"contributor\" : \"$['contributor'][*]['value']\"," - + " \"description\" : \"$['description'][*]['value']\"}" + "-isTest", Boolean.TRUE.toString(), + "-isSparkSessionManaged", Boolean.FALSE.toString(), + "-sourcePath", sourcePath, + "-taggingConf", taggingConf, + "-resultTableName", "eu.dnetlib.dhp.schema.oaf.Dataset", + "-outputPath", workingDir.toString() + "/dataset", + "-isLookUpUrl", MOCK_IS_LOOK_UP_URL, + "-pathMap", pathMap }); - final JavaSparkContext sc = new JavaSparkContext(spark.sparkContext()); + final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext()); JavaRDD tmp = sc .textFile(workingDir.toString() + "/dataset") diff --git a/dhp-workflows/dhp-bulktag/src/test/java/eu/dnetlib/dhp/CommunityConfigurationFactoryTest.java b/dhp-workflows/dhp-enrichment/src/test/java/eu/dnetlib/dhp/bulktag/CommunityConfigurationFactoryTest.java similarity index 93% rename from dhp-workflows/dhp-bulktag/src/test/java/eu/dnetlib/dhp/CommunityConfigurationFactoryTest.java rename to dhp-workflows/dhp-enrichment/src/test/java/eu/dnetlib/dhp/bulktag/CommunityConfigurationFactoryTest.java index 3aae9ebee..056c3345c 100644 --- a/dhp-workflows/dhp-bulktag/src/test/java/eu/dnetlib/dhp/CommunityConfigurationFactoryTest.java +++ b/dhp-workflows/dhp-enrichment/src/test/java/eu/dnetlib/dhp/bulktag/CommunityConfigurationFactoryTest.java @@ -1,23 +1,21 @@ -package eu.dnetlib.dhp; - -import java.io.IOException; -import java.lang.reflect.InvocationTargetException; -import java.util.*; +package eu.dnetlib.dhp.bulktag; +import com.google.gson.Gson; +import eu.dnetlib.dhp.bulktag.community.CommunityConfiguration; +import eu.dnetlib.dhp.bulktag.community.CommunityConfigurationFactory; +import eu.dnetlib.dhp.bulktag.community.Constraint; +import eu.dnetlib.dhp.bulktag.community.SelectionConstraints; +import eu.dnetlib.dhp.bulktag.criteria.VerbResolver; import org.apache.commons.io.IOUtils; import org.apache.commons.lang3.StringUtils; import org.dom4j.DocumentException; import org.junit.jupiter.api.Assertions; import org.junit.jupiter.api.Test; -import com.google.gson.Gson; - -import eu.dnetlib.dhp.community.CommunityConfiguration; -import eu.dnetlib.dhp.community.CommunityConfigurationFactory; -import eu.dnetlib.dhp.community.Constraint; -import eu.dnetlib.dhp.community.SelectionConstraints; -import eu.dnetlib.dhp.selectioncriteria.VerbResolver; +import java.io.IOException; +import java.lang.reflect.InvocationTargetException; +import java.util.*; /** Created by miriam on 03/08/2018. */ public class CommunityConfigurationFactoryTest { diff --git a/dhp-workflows/dhp-propagation/src/test/java/eu/dnetlib/dhp/countrypropagation/CountryPropagationJobTest.java b/dhp-workflows/dhp-enrichment/src/test/java/eu/dnetlib/dhp/countrypropagation/CountryPropagationJobTest.java similarity index 81% rename from dhp-workflows/dhp-propagation/src/test/java/eu/dnetlib/dhp/countrypropagation/CountryPropagationJobTest.java rename to dhp-workflows/dhp-enrichment/src/test/java/eu/dnetlib/dhp/countrypropagation/CountryPropagationJobTest.java index 2370d5e6c..b62238089 100644 --- a/dhp-workflows/dhp-propagation/src/test/java/eu/dnetlib/dhp/countrypropagation/CountryPropagationJobTest.java +++ b/dhp-workflows/dhp-enrichment/src/test/java/eu/dnetlib/dhp/countrypropagation/CountryPropagationJobTest.java @@ -5,12 +5,15 @@ import java.io.IOException; import java.nio.file.Files; import java.nio.file.Path; import java.util.ArrayList; +import java.util.Iterator; import java.util.List; import org.apache.commons.io.FileUtils; import org.apache.spark.SparkConf; import org.apache.spark.api.java.JavaRDD; import org.apache.spark.api.java.JavaSparkContext; +import org.apache.spark.api.java.function.FlatMapFunction; +import org.apache.spark.api.java.function.MapFunction; import org.apache.spark.sql.*; import org.junit.jupiter.api.AfterAll; import org.junit.jupiter.api.Assertions; @@ -26,12 +29,11 @@ import eu.dnetlib.dhp.schema.oaf.Software; import scala.Tuple2; public class CountryPropagationJobTest { + private static final Logger log = LoggerFactory.getLogger(CountryPropagationJobTest.class); private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper(); - private static final ClassLoader cl = CountryPropagationJobTest.class.getClassLoader(); - private static SparkSession spark; private static Path workingDir; @@ -101,8 +103,8 @@ public class CountryPropagationJobTest { Assertions.assertEquals(0, verificationDs.filter("size(country) > 2").count()); Dataset countryExploded = verificationDs - .flatMap(row -> row.getCountry().iterator(), Encoders.bean(Country.class)) - .map(c -> c.getClassid(), Encoders.STRING()); + .flatMap((FlatMapFunction) row -> row.getCountry().iterator(), Encoders.bean(Country.class)) + .map((MapFunction) c -> c.getClassid(), Encoders.STRING()); Assertions.assertEquals(9, countryExploded.count()); @@ -115,20 +117,18 @@ public class CountryPropagationJobTest { Assertions.assertEquals(2, countryExploded.filter("value = 'JP'").count()); Dataset> countryExplodedWithCountryclassid = verificationDs - .flatMap( - row -> { - List> prova = new ArrayList(); - List country_list = row.getCountry(); - country_list - .stream() - .forEach( + .flatMap((FlatMapFunction>) row -> { + List> prova = new ArrayList(); + List country_list = row.getCountry(); + country_list + .stream() + .forEach( c -> prova .add( new Tuple2<>( row.getId(), c.getClassid()))); - return prova.iterator(); - }, - Encoders.tuple(Encoders.STRING(), Encoders.STRING())); + return prova.iterator(); + }, Encoders.tuple(Encoders.STRING(), Encoders.STRING())); Assertions.assertEquals(9, countryExplodedWithCountryclassid.count()); @@ -178,20 +178,20 @@ public class CountryPropagationJobTest { Dataset> countryExplodedWithCountryclassname = verificationDs .flatMap( - row -> { - List> prova = new ArrayList(); - List country_list = row.getCountry(); - country_list - .stream() - .forEach( - c -> prova - .add( - new Tuple2<>( - row.getId(), - c.getClassname()))); - return prova.iterator(); - }, - Encoders.tuple(Encoders.STRING(), Encoders.STRING())); + (FlatMapFunction>) row -> { + List> prova = new ArrayList(); + List country_list = row.getCountry(); + country_list + .stream() + .forEach( + c -> prova + .add( + new Tuple2<>( + row.getId(), + c.getClassname()))); + return prova.iterator(); + }, + Encoders.tuple(Encoders.STRING(), Encoders.STRING())); countryExplodedWithCountryclassname.show(false); Assertions @@ -239,22 +239,22 @@ public class CountryPropagationJobTest { Dataset> countryExplodedWithCountryProvenance = verificationDs .flatMap( - row -> { - List> prova = new ArrayList(); - List country_list = row.getCountry(); - country_list - .stream() - .forEach( - c -> prova - .add( - new Tuple2<>( - row.getId(), - c - .getDataInfo() - .getInferenceprovenance()))); - return prova.iterator(); - }, - Encoders.tuple(Encoders.STRING(), Encoders.STRING())); + (FlatMapFunction>) row -> { + List> prova = new ArrayList(); + List country_list = row.getCountry(); + country_list + .stream() + .forEach( + c -> prova + .add( + new Tuple2<>( + row.getId(), + c + .getDataInfo() + .getInferenceprovenance()))); + return prova.iterator(); + }, + Encoders.tuple(Encoders.STRING(), Encoders.STRING())); Assertions .assertEquals( diff --git a/dhp-workflows/dhp-propagation/src/test/java/eu/dnetlib/dhp/orcidtoresultfromsemrel/OrcidPropagationJobTest.java b/dhp-workflows/dhp-enrichment/src/test/java/eu/dnetlib/dhp/orcidtoresultfromsemrel/OrcidPropagationJobTest.java similarity index 98% rename from dhp-workflows/dhp-propagation/src/test/java/eu/dnetlib/dhp/orcidtoresultfromsemrel/OrcidPropagationJobTest.java rename to dhp-workflows/dhp-enrichment/src/test/java/eu/dnetlib/dhp/orcidtoresultfromsemrel/OrcidPropagationJobTest.java index 0b0ec62d1..edd2e7ba7 100644 --- a/dhp-workflows/dhp-propagation/src/test/java/eu/dnetlib/dhp/orcidtoresultfromsemrel/OrcidPropagationJobTest.java +++ b/dhp-workflows/dhp-enrichment/src/test/java/eu/dnetlib/dhp/orcidtoresultfromsemrel/OrcidPropagationJobTest.java @@ -29,8 +29,6 @@ public class OrcidPropagationJobTest { private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper(); - private static final ClassLoader cl = OrcidPropagationJobTest.class.getClassLoader(); - private static SparkSession spark; private static Path workingDir; diff --git a/dhp-workflows/dhp-propagation/src/test/java/eu/dnetlib/dhp/projecttoresult/ProjectPropagationJobTest.java b/dhp-workflows/dhp-enrichment/src/test/java/eu/dnetlib/dhp/projecttoresult/ProjectPropagationJobTest.java similarity index 62% rename from dhp-workflows/dhp-propagation/src/test/java/eu/dnetlib/dhp/projecttoresult/ProjectPropagationJobTest.java rename to dhp-workflows/dhp-enrichment/src/test/java/eu/dnetlib/dhp/projecttoresult/ProjectPropagationJobTest.java index 7ed26b6b2..ff63753b8 100644 --- a/dhp-workflows/dhp-propagation/src/test/java/eu/dnetlib/dhp/projecttoresult/ProjectPropagationJobTest.java +++ b/dhp-workflows/dhp-enrichment/src/test/java/eu/dnetlib/dhp/projecttoresult/ProjectPropagationJobTest.java @@ -9,6 +9,7 @@ import org.apache.commons.io.FileUtils; import org.apache.spark.SparkConf; import org.apache.spark.api.java.JavaRDD; import org.apache.spark.api.java.JavaSparkContext; +import org.apache.spark.api.java.function.FilterFunction; import org.apache.spark.sql.Dataset; import org.apache.spark.sql.Encoders; import org.apache.spark.sql.SparkSession; @@ -29,8 +30,6 @@ public class ProjectPropagationJobTest { private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper(); - private static final ClassLoader cl = ProjectPropagationJobTest.class.getClassLoader(); - private static SparkSession spark; private static Path workingDir; @@ -72,34 +71,26 @@ public class ProjectPropagationJobTest { @Test public void NoUpdateTest() throws Exception { - SparkResultToProjectThroughSemRelJob - .main( + final String potentialUpdateDate = getClass() + .getResource( + "/eu/dnetlib/dhp/projecttoresult/preparedInfo/noupdates/potentialUpdates") + .getPath(); + final String alreadyLinkedPath = getClass() + .getResource( + "/eu/dnetlib/dhp/projecttoresult/preparedInfo/alreadyLinked") + .getPath(); + SparkResultToProjectThroughSemRelJob.main( new String[] { - "-isTest", - Boolean.TRUE.toString(), - "-isSparkSessionManaged", - Boolean.FALSE.toString(), - // "-sourcePath", - // getClass().getResource("/eu/dnetlib/dhp/resulttoorganizationfrominstrepo/sample/relation").getPath(), - "-hive_metastore_uris", - "", - "-saveGraph", - "true", - "-outputPath", - workingDir.toString() + "/relation", - "-potentialUpdatePath", - getClass() - .getResource( - "/eu/dnetlib/dhp/projecttoresult/preparedInfo/noupdates/potentialUpdates") - .getPath(), - "-alreadyLinkedPath", - getClass() - .getResource( - "/eu/dnetlib/dhp/projecttoresult/preparedInfo/alreadyLinked") - .getPath(), + "-isTest", Boolean.TRUE.toString(), + "-isSparkSessionManaged", Boolean.FALSE.toString(), + "-hive_metastore_uris", "", + "-saveGraph", "true", + "-outputPath", workingDir.toString() + "/relation", + "-potentialUpdatePath", potentialUpdateDate, + "-alreadyLinkedPath", alreadyLinkedPath, }); - final JavaSparkContext sc = new JavaSparkContext(spark.sparkContext()); + final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext()); JavaRDD tmp = sc .textFile(workingDir.toString() + "/relation") @@ -115,34 +106,26 @@ public class ProjectPropagationJobTest { */ @Test public void UpdateTenTest() throws Exception { - SparkResultToProjectThroughSemRelJob - .main( + final String potentialUpdatePath = getClass() + .getResource( + "/eu/dnetlib/dhp/projecttoresult/preparedInfo/tenupdates/potentialUpdates") + .getPath(); + final String alreadyLinkedPath = getClass() + .getResource( + "/eu/dnetlib/dhp/projecttoresult/preparedInfo/alreadyLinked") + .getPath(); + SparkResultToProjectThroughSemRelJob.main( new String[] { - "-isTest", - Boolean.TRUE.toString(), - "-isSparkSessionManaged", - Boolean.FALSE.toString(), - // "-sourcePath", - // getClass().getResource("/eu/dnetlib/dhp/resulttoorganizationfrominstrepo/sample/relation").getPath(), - "-hive_metastore_uris", - "", - "-saveGraph", - "true", - "-outputPath", - workingDir.toString() + "/relation", - "-potentialUpdatePath", - getClass() - .getResource( - "/eu/dnetlib/dhp/projecttoresult/preparedInfo/tenupdates/potentialUpdates") - .getPath(), - "-alreadyLinkedPath", - getClass() - .getResource( - "/eu/dnetlib/dhp/projecttoresult/preparedInfo/alreadyLinked") - .getPath(), + "-isTest", Boolean.TRUE.toString(), + "-isSparkSessionManaged", Boolean.FALSE.toString(), + "-hive_metastore_uris", "", + "-saveGraph", "true", + "-outputPath", workingDir.toString() + "/relation", + "-potentialUpdatePath", potentialUpdatePath, + "-alreadyLinkedPath", alreadyLinkedPath, }); - final JavaSparkContext sc = new JavaSparkContext(spark.sparkContext()); + final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext()); JavaRDD tmp = sc .textFile(workingDir.toString() + "/relation") @@ -160,18 +143,18 @@ public class ProjectPropagationJobTest { .assertEquals( 5, verificationDs - .filter( - r -> r.getSource().substring(0, 2).equals("50") - && r.getTarget().substring(0, 2).equals("40") + .filter((FilterFunction) r -> + r.getSource().startsWith("50") + && r.getTarget().startsWith("40") && r.getRelClass().equals("isProducedBy")) .count()); Assertions .assertEquals( 5, verificationDs - .filter( - r -> r.getSource().substring(0, 2).equals("40") - && r.getTarget().substring(0, 2).equals("50") + .filter((FilterFunction) r -> + r.getSource().startsWith("40") + && r.getTarget().startsWith("50") && r.getRelClass().equals("produces")) .count()); @@ -194,34 +177,26 @@ public class ProjectPropagationJobTest { */ @Test public void UpdateMixTest() throws Exception { - SparkResultToProjectThroughSemRelJob - .main( + final String potentialUpdatepath = getClass() + .getResource( + "/eu/dnetlib/dhp/projecttoresult/preparedInfo/updatesmixed/potentialUpdates") + .getPath(); + final String alreadyLinkedPath = getClass() + .getResource( + "/eu/dnetlib/dhp/projecttoresult/preparedInfo/alreadyLinked") + .getPath(); + SparkResultToProjectThroughSemRelJob.main( new String[] { - "-isTest", - Boolean.TRUE.toString(), - "-isSparkSessionManaged", - Boolean.FALSE.toString(), - // "-sourcePath", - // getClass().getResource("/eu/dnetlib/dhp/resulttoorganizationfrominstrepo/sample/relation").getPath(), - "-hive_metastore_uris", - "", - "-saveGraph", - "true", - "-outputPath", - workingDir.toString() + "/relation", - "-potentialUpdatePath", - getClass() - .getResource( - "/eu/dnetlib/dhp/projecttoresult/preparedInfo/updatesmixed/potentialUpdates") - .getPath(), - "-alreadyLinkedPath", - getClass() - .getResource( - "/eu/dnetlib/dhp/projecttoresult/preparedInfo/alreadyLinked") - .getPath(), + "-isTest", Boolean.TRUE.toString(), + "-isSparkSessionManaged", Boolean.FALSE.toString(), + "-hive_metastore_uris", "", + "-saveGraph", "true", + "-outputPath", workingDir.toString() + "/relation", + "-potentialUpdatePath", potentialUpdatepath, + "-alreadyLinkedPath", alreadyLinkedPath, }); - final JavaSparkContext sc = new JavaSparkContext(spark.sparkContext()); + final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext()); JavaRDD tmp = sc .textFile(workingDir.toString() + "/relation") @@ -242,18 +217,18 @@ public class ProjectPropagationJobTest { .assertEquals( 4, verificationDs - .filter( - r -> r.getSource().substring(0, 2).equals("50") - && r.getTarget().substring(0, 2).equals("40") + .filter((FilterFunction) r -> + r.getSource().startsWith("50") + && r.getTarget().startsWith("40") && r.getRelClass().equals("isProducedBy")) .count()); Assertions .assertEquals( 4, verificationDs - .filter( - r -> r.getSource().substring(0, 2).equals("40") - && r.getTarget().substring(0, 2).equals("50") + .filter((FilterFunction) r -> + r.getSource().startsWith("40") + && r.getTarget().startsWith("50") && r.getRelClass().equals("produces")) .count()); diff --git a/dhp-workflows/dhp-propagation/src/test/java/eu/dnetlib/dhp/resulttocommunityfromorganization/ResultToCommunityJobTest.java b/dhp-workflows/dhp-enrichment/src/test/java/eu/dnetlib/dhp/resulttocommunityfromorganization/ResultToCommunityJobTest.java similarity index 88% rename from dhp-workflows/dhp-propagation/src/test/java/eu/dnetlib/dhp/resulttocommunityfromorganization/ResultToCommunityJobTest.java rename to dhp-workflows/dhp-enrichment/src/test/java/eu/dnetlib/dhp/resulttocommunityfromorganization/ResultToCommunityJobTest.java index ba8fb0831..20b20d4ed 100644 --- a/dhp-workflows/dhp-propagation/src/test/java/eu/dnetlib/dhp/resulttocommunityfromorganization/ResultToCommunityJobTest.java +++ b/dhp-workflows/dhp-enrichment/src/test/java/eu/dnetlib/dhp/resulttocommunityfromorganization/ResultToCommunityJobTest.java @@ -32,8 +32,6 @@ public class ResultToCommunityJobTest { private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper(); - private static final ClassLoader cl = ResultToCommunityJobTest.class.getClassLoader(); - private static SparkSession spark; private static Path workingDir; @@ -68,33 +66,24 @@ public class ResultToCommunityJobTest { @Test public void testSparkResultToCommunityFromOrganizationJob() throws Exception { - SparkResultToCommunityFromOrganizationJob - .main( + final String preparedInfoPath = getClass() + .getResource("/eu/dnetlib/dhp/resulttocommunityfromorganization/preparedInfo") + .getPath(); + SparkResultToCommunityFromOrganizationJob.main( new String[] { - "-isTest", - Boolean.TRUE.toString(), - "-isSparkSessionManaged", - Boolean.FALSE.toString(), - "-sourcePath", - getClass() + "-isTest", Boolean.TRUE.toString(), + "-isSparkSessionManaged", Boolean.FALSE.toString(), + "-sourcePath", getClass() .getResource("/eu/dnetlib/dhp/resulttocommunityfromorganization/sample") .getPath(), - "-hive_metastore_uris", - "", - "-saveGraph", - "true", - "-resultTableName", - "eu.dnetlib.dhp.schema.oaf.Dataset", - "-outputPath", - workingDir.toString() + "/dataset", - "-preparedInfoPath", - getClass() - .getResource( - "/eu/dnetlib/dhp/resulttocommunityfromorganization/preparedInfo") - .getPath() + "-hive_metastore_uris", "", + "-saveGraph", "true", + "-resultTableName", "eu.dnetlib.dhp.schema.oaf.Dataset", + "-outputPath", workingDir.toString() + "/dataset", + "-preparedInfoPath", preparedInfoPath }); - final JavaSparkContext sc = new JavaSparkContext(spark.sparkContext()); + final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext()); JavaRDD tmp = sc .textFile(workingDir.toString() + "/dataset") @@ -217,13 +206,6 @@ public class ResultToCommunityJobTest { .get(0) .getString(0)); - /* - * {"communityList":["euromarine","mes"],"resultId":"50|doajarticles::8d817039a63710fcf97e30f14662c6c8"} - * "context" ["id": euromarine] updates = 1 - * {"communityList":["euromarine","mes"],"resultId":"50|doajarticles::3c98f0632f1875b4979e552ba3aa01e6"} context - * = [ni, euromarine] updates = 1 - */ - query = "select id, MyT.id community " + "from dataset " + "lateral view explode(context) c as MyT " diff --git a/dhp-workflows/dhp-propagation/src/test/java/eu/dnetlib/dhp/resulttocommunityfromsemrel/ResultToCommunityJobTest.java b/dhp-workflows/dhp-enrichment/src/test/java/eu/dnetlib/dhp/resulttocommunityfromsemrel/ResultToCommunityJobTest.java similarity index 88% rename from dhp-workflows/dhp-propagation/src/test/java/eu/dnetlib/dhp/resulttocommunityfromsemrel/ResultToCommunityJobTest.java rename to dhp-workflows/dhp-enrichment/src/test/java/eu/dnetlib/dhp/resulttocommunityfromsemrel/ResultToCommunityJobTest.java index 13941b4a3..a8e1ab841 100644 --- a/dhp-workflows/dhp-propagation/src/test/java/eu/dnetlib/dhp/resulttocommunityfromsemrel/ResultToCommunityJobTest.java +++ b/dhp-workflows/dhp-enrichment/src/test/java/eu/dnetlib/dhp/resulttocommunityfromsemrel/ResultToCommunityJobTest.java @@ -29,32 +29,21 @@ import eu.dnetlib.dhp.schema.oaf.Dataset; public class ResultToCommunityJobTest { - private static final Logger log = LoggerFactory - .getLogger( - eu.dnetlib.dhp.resulttocommunityfromsemrel.ResultToCommunityJobTest.class); + private static final Logger log = LoggerFactory.getLogger(ResultToCommunityJobTest.class); private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper(); - private static final ClassLoader cl = eu.dnetlib.dhp.resulttocommunityfromsemrel.ResultToCommunityJobTest.class - .getClassLoader(); - private static SparkSession spark; private static Path workingDir; @BeforeAll public static void beforeAll() throws IOException { - workingDir = Files - .createTempDirectory( - eu.dnetlib.dhp.resulttocommunityfromsemrel.ResultToCommunityJobTest.class - .getSimpleName()); + workingDir = Files.createTempDirectory(ResultToCommunityJobTest.class.getSimpleName()); log.info("using work dir {}", workingDir); SparkConf conf = new SparkConf(); - conf - .setAppName( - eu.dnetlib.dhp.resulttocommunityfromsemrel.ResultToCommunityJobTest.class - .getSimpleName()); + conf.setAppName(ResultToCommunityJobTest.class.getSimpleName()); conf.setMaster("local[*]"); conf.set("spark.driver.host", "localhost"); @@ -65,7 +54,7 @@ public class ResultToCommunityJobTest { spark = SparkSession .builder() - .appName(OrcidPropagationJobTest.class.getSimpleName()) + .appName(ResultToCommunityJobTest.class.getSimpleName()) .config(conf) .getOrCreate(); } @@ -83,22 +72,18 @@ public class ResultToCommunityJobTest { new String[] { "-isTest", Boolean.TRUE.toString(), "-isSparkSessionManaged", Boolean.FALSE.toString(), - "-sourcePath", - getClass() - .getResource( - "/eu/dnetlib/dhp/resulttocommunityfromsemrel/sample") + "-sourcePath", getClass() + .getResource("/eu/dnetlib/dhp/resulttocommunityfromsemrel/sample") .getPath(), "-hive_metastore_uris", "", "-resultTableName", "eu.dnetlib.dhp.schema.oaf.Dataset", "-outputPath", workingDir.toString() + "/dataset", - "-preparedInfoPath", - getClass() - .getResource( - "/eu/dnetlib/dhp/resulttocommunityfromsemrel/preparedInfo") + "-preparedInfoPath", getClass() + .getResource("/eu/dnetlib/dhp/resulttocommunityfromsemrel/preparedInfo") .getPath() }); - final JavaSparkContext sc = new JavaSparkContext(spark.sparkContext()); + final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext()); JavaRDD tmp = sc .textFile(workingDir.toString() + "/dataset") diff --git a/dhp-workflows/dhp-propagation/src/test/java/eu/dnetlib/dhp/resulttoorganizationfrominstrepo/Result2OrganizationJobTest.java b/dhp-workflows/dhp-enrichment/src/test/java/eu/dnetlib/dhp/resulttoorganizationfrominstrepo/ResultToOrganizationJobTest.java similarity index 61% rename from dhp-workflows/dhp-propagation/src/test/java/eu/dnetlib/dhp/resulttoorganizationfrominstrepo/Result2OrganizationJobTest.java rename to dhp-workflows/dhp-enrichment/src/test/java/eu/dnetlib/dhp/resulttoorganizationfrominstrepo/ResultToOrganizationJobTest.java index e7adb260e..30be118d1 100644 --- a/dhp-workflows/dhp-propagation/src/test/java/eu/dnetlib/dhp/resulttoorganizationfrominstrepo/Result2OrganizationJobTest.java +++ b/dhp-workflows/dhp-enrichment/src/test/java/eu/dnetlib/dhp/resulttoorganizationfrominstrepo/ResultToOrganizationJobTest.java @@ -23,23 +23,19 @@ import com.fasterxml.jackson.databind.ObjectMapper; import eu.dnetlib.dhp.schema.oaf.Relation; -public class Result2OrganizationJobTest { +public class ResultToOrganizationJobTest { - private static final Logger log = LoggerFactory.getLogger(Result2OrganizationJobTest.class); + private static final Logger log = LoggerFactory.getLogger(ResultToOrganizationJobTest.class); private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper(); - private static final ClassLoader cl = Result2OrganizationJobTest.class.getClassLoader(); - private static SparkSession spark; private static Path workingDir; @BeforeAll public static void beforeAll() throws IOException { - workingDir = Files - .createTempDirectory( - SparkResultToOrganizationFromIstRepoJob.class.getSimpleName()); + workingDir = Files.createTempDirectory(SparkResultToOrganizationFromIstRepoJob.class.getSimpleName()); log.info("using work dir {}", workingDir); SparkConf conf = new SparkConf(); @@ -72,40 +68,29 @@ public class Result2OrganizationJobTest { */ @Test public void NoUpdateTest() throws Exception { - SparkResultToOrganizationFromIstRepoJob - .main( + final String sourcePath = getClass() + .getResource("/eu/dnetlib/dhp/resulttoorganizationfrominstrepo/sample/noupdate_updatenomix") + .getPath(); + final String datasourceOrganizationPath = getClass() + .getResource("/eu/dnetlib/dhp/resulttoorganizationfrominstrepo/noupdate/preparedInfo/datasourceOrganization") + .getPath(); + final String alreadyLinkedPath = getClass() + .getResource("/eu/dnetlib/dhp/resulttoorganizationfrominstrepo/noupdate/preparedInfo/alreadyLinked") + .getPath(); + SparkResultToOrganizationFromIstRepoJob.main( new String[] { - "-isTest", - Boolean.TRUE.toString(), - "-isSparkSessionManaged", - Boolean.FALSE.toString(), - "-sourcePath", - getClass() - .getResource( - "/eu/dnetlib/dhp/resulttoorganizationfrominstrepo/sample/noupdate_updatenomix") - .getPath(), - "-hive_metastore_uris", - "", - "-resultTableName", - "eu.dnetlib.dhp.schema.oaf.Software", - - "-saveGraph", - "true", - "-outputPath", - workingDir.toString() + "/relation", - "-datasourceOrganizationPath", - getClass() - .getResource( - "/eu/dnetlib/dhp/resulttoorganizationfrominstrepo/noupdate/preparedInfo/datasourceOrganization") - .getPath(), - "-alreadyLinkedPath", - getClass() - .getResource( - "/eu/dnetlib/dhp/resulttoorganizationfrominstrepo/noupdate/preparedInfo/alreadyLinked") - .getPath(), + "-isTest", Boolean.TRUE.toString(), + "-isSparkSessionManaged", Boolean.FALSE.toString(), + "-sourcePath", sourcePath, + "-hive_metastore_uris", "", + "-resultTableName", "eu.dnetlib.dhp.schema.oaf.Software", + "-saveGraph", "true", + "-outputPath", workingDir.toString() + "/relation", + "-datasourceOrganizationPath", datasourceOrganizationPath, + "-alreadyLinkedPath", alreadyLinkedPath, }); - final JavaSparkContext sc = new JavaSparkContext(spark.sparkContext()); + final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext()); JavaRDD tmp = sc .textFile(workingDir.toString() + "/relation") @@ -123,40 +108,29 @@ public class Result2OrganizationJobTest { */ @Test public void UpdateNoMixTest() throws Exception { - SparkResultToOrganizationFromIstRepoJob - .main( + final String sourcePath = getClass() + .getResource("/eu/dnetlib/dhp/resulttoorganizationfrominstrepo/sample/noupdate_updatenomix") + .getPath(); + final String datasourceOrganizationPath = getClass() + .getResource("/eu/dnetlib/dhp/resulttoorganizationfrominstrepo/updatenomix/preparedInfo/datasourceOrganization") + .getPath(); + final String alreadyLinkedPath = getClass() + .getResource("/eu/dnetlib/dhp/resulttoorganizationfrominstrepo/updatenomix/preparedInfo/alreadyLinked") + .getPath(); + SparkResultToOrganizationFromIstRepoJob.main( new String[] { - "-isTest", - Boolean.TRUE.toString(), - "-isSparkSessionManaged", - Boolean.FALSE.toString(), - "-sourcePath", - getClass() - .getResource( - "/eu/dnetlib/dhp/resulttoorganizationfrominstrepo/sample/noupdate_updatenomix") - .getPath(), - "-hive_metastore_uris", - "", - "-resultTableName", - "eu.dnetlib.dhp.schema.oaf.Software", - - "-saveGraph", - "true", - "-outputPath", - workingDir.toString() + "/relation", - "-datasourceOrganizationPath", - getClass() - .getResource( - "/eu/dnetlib/dhp/resulttoorganizationfrominstrepo/updatenomix/preparedInfo/datasourceOrganization") - .getPath(), - "-alreadyLinkedPath", - getClass() - .getResource( - "/eu/dnetlib/dhp/resulttoorganizationfrominstrepo/updatenomix/preparedInfo/alreadyLinked") - .getPath(), + "-isTest", Boolean.TRUE.toString(), + "-isSparkSessionManaged", Boolean.FALSE.toString(), + "-sourcePath", sourcePath, + "-hive_metastore_uris", "", + "-resultTableName", "eu.dnetlib.dhp.schema.oaf.Software", + "-saveGraph", "true", + "-outputPath", workingDir.toString() + "/relation", + "-datasourceOrganizationPath", datasourceOrganizationPath, + "-alreadyLinkedPath", alreadyLinkedPath, }); - final JavaSparkContext sc = new JavaSparkContext(spark.sparkContext()); + final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext()); JavaRDD tmp = sc .textFile(workingDir.toString() + "/relation") @@ -197,40 +171,29 @@ public class Result2OrganizationJobTest { @Test public void UpdateMixTest() throws Exception { - SparkResultToOrganizationFromIstRepoJob - .main( + final String sourcePath = getClass() + .getResource("/eu/dnetlib/dhp/resulttoorganizationfrominstrepo/sample/updatemix") + .getPath(); + final String datasourceOrganizationPath = getClass() + .getResource("/eu/dnetlib/dhp/resulttoorganizationfrominstrepo/updatemix/preparedInfo/datasourceOrganization") + .getPath(); + final String alreadyLinkedPath = getClass() + .getResource("/eu/dnetlib/dhp/resulttoorganizationfrominstrepo/updatemix/preparedInfo/alreadyLinked") + .getPath(); + SparkResultToOrganizationFromIstRepoJob.main( new String[] { - "-isTest", - Boolean.TRUE.toString(), - "-isSparkSessionManaged", - Boolean.FALSE.toString(), - "-sourcePath", - getClass() - .getResource( - "/eu/dnetlib/dhp/resulttoorganizationfrominstrepo/sample/updatemix") - .getPath(), - "-hive_metastore_uris", - "", - "-resultTableName", - "eu.dnetlib.dhp.schema.oaf.Software", - - "-saveGraph", - "true", - "-outputPath", - workingDir.toString() + "/relation", - "-datasourceOrganizationPath", - getClass() - .getResource( - "/eu/dnetlib/dhp/resulttoorganizationfrominstrepo/updatemix/preparedInfo/datasourceOrganization") - .getPath(), - "-alreadyLinkedPath", - getClass() - .getResource( - "/eu/dnetlib/dhp/resulttoorganizationfrominstrepo/updatemix/preparedInfo/alreadyLinked") - .getPath(), + "-isTest", Boolean.TRUE.toString(), + "-isSparkSessionManaged", Boolean.FALSE.toString(), + "-sourcePath", sourcePath, + "-hive_metastore_uris", "", + "-resultTableName", "eu.dnetlib.dhp.schema.oaf.Software", + "-saveGraph", "true", + "-outputPath", workingDir.toString() + "/relation", + "-datasourceOrganizationPath", datasourceOrganizationPath, + "-alreadyLinkedPath", alreadyLinkedPath, }); - final JavaSparkContext sc = new JavaSparkContext(spark.sparkContext()); + final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext()); JavaRDD tmp = sc .textFile(workingDir.toString() + "/relation") diff --git a/dhp-workflows/dhp-bulktag/src/test/resources/eu/dnetlib/dhp/communityconfiguration/community_configuration.json b/dhp-workflows/dhp-enrichment/src/test/resources/eu/dnetlib/dhp/bulktag/communityconfiguration/community_configuration.json similarity index 100% rename from dhp-workflows/dhp-bulktag/src/test/resources/eu/dnetlib/dhp/communityconfiguration/community_configuration.json rename to dhp-workflows/dhp-enrichment/src/test/resources/eu/dnetlib/dhp/bulktag/communityconfiguration/community_configuration.json diff --git a/dhp-workflows/dhp-bulktag/src/test/resources/eu/dnetlib/dhp/communityconfiguration/community_configuration.xml b/dhp-workflows/dhp-enrichment/src/test/resources/eu/dnetlib/dhp/bulktag/communityconfiguration/community_configuration.xml similarity index 95% rename from dhp-workflows/dhp-bulktag/src/test/resources/eu/dnetlib/dhp/communityconfiguration/community_configuration.xml rename to dhp-workflows/dhp-enrichment/src/test/resources/eu/dnetlib/dhp/bulktag/communityconfiguration/community_configuration.xml index 8fec18593..e2cc41063 100644 --- a/dhp-workflows/dhp-bulktag/src/test/resources/eu/dnetlib/dhp/communityconfiguration/community_configuration.xml +++ b/dhp-workflows/dhp-enrichment/src/test/resources/eu/dnetlib/dhp/bulktag/communityconfiguration/community_configuration.xml @@ -2,17 +2,17 @@ - + - + - + @@ -35,7 +35,7 @@ SDG9 - Industry innovation and infrastructure SDG16 - Peace justice and strong institutions - + 123 @@ -45,12 +45,12 @@ - + - + @@ -74,7 +74,7 @@ brain magnetic resonance imaging brain abnormalities - + re3data_____::5b9bf9171d92df854cf3c520692e9122 @@ -95,7 +95,7 @@ doajarticles::0c0e74daa5d95504eade9c81ebbd5b8a - + @@ -106,12 +106,12 @@ aqua sea - + re3data_____::9633d1e8c4309c833c2c442abeb0cfeb - + @@ -134,7 +134,7 @@ food distribution forestry - + opendoar____::1a551829d50f1400b0dab21fdd969c04 @@ -159,18 +159,18 @@ opendoar____::87ae6fb631f7c8a627e8e28785d9992d - + oac_clarin - + re3data_____::a507cdacc5bbcc08761c92185dee5cab - + \ No newline at end of file diff --git a/dhp-workflows/dhp-bulktag/src/test/resources/eu/dnetlib/dhp/communityconfiguration/community_configuration_selcrit.json b/dhp-workflows/dhp-enrichment/src/test/resources/eu/dnetlib/dhp/bulktag/communityconfiguration/community_configuration_selcrit.json similarity index 100% rename from dhp-workflows/dhp-bulktag/src/test/resources/eu/dnetlib/dhp/communityconfiguration/community_configuration_selcrit.json rename to dhp-workflows/dhp-enrichment/src/test/resources/eu/dnetlib/dhp/bulktag/communityconfiguration/community_configuration_selcrit.json diff --git a/dhp-workflows/dhp-bulktag/src/test/resources/eu/dnetlib/dhp/communityconfiguration/community_configuration_selcrit.xml b/dhp-workflows/dhp-enrichment/src/test/resources/eu/dnetlib/dhp/bulktag/communityconfiguration/community_configuration_selcrit.xml similarity index 95% rename from dhp-workflows/dhp-bulktag/src/test/resources/eu/dnetlib/dhp/communityconfiguration/community_configuration_selcrit.xml rename to dhp-workflows/dhp-enrichment/src/test/resources/eu/dnetlib/dhp/bulktag/communityconfiguration/community_configuration_selcrit.xml index ad31e1763..89ace0e5f 100644 --- a/dhp-workflows/dhp-bulktag/src/test/resources/eu/dnetlib/dhp/communityconfiguration/community_configuration_selcrit.xml +++ b/dhp-workflows/dhp-enrichment/src/test/resources/eu/dnetlib/dhp/bulktag/communityconfiguration/community_configuration_selcrit.xml @@ -2,17 +2,17 @@ - + - + - + @@ -35,7 +35,7 @@ SDG9 - Industry innovation and infrastructure SDG16 - Peace justice and strong institutions - + 123 @@ -45,12 +45,12 @@ - + - + @@ -74,7 +74,7 @@ brain magnetic resonance imaging brain abnormalities - + re3data_____::5b9bf9171d92df854cf3c520692e9122 @@ -95,7 +95,7 @@ doajarticles::0c0e74daa5d95504eade9c81ebbd5b8a - + @@ -106,12 +106,12 @@ aqua sea - + re3data_____::9633d1e8c4309c833c2c442abeb0cfeb - + @@ -134,7 +134,7 @@ food distribution forestry - + opendoar____::1a551829d50f1400b0dab21fdd969c04 @@ -159,30 +159,30 @@ opendoar____::87ae6fb631f7c8a627e8e28785d9992d - + oac_clarin - + re3data_____::a507cdacc5bbcc08761c92185dee5cab - + oaa_dariah - + openaire____::1cfdb2e14977f31a98e0118283401f32 {"criteria":[{"constraint":[{"verb":"contains","field":"contributor","value":"DARIAH"}]}]} - + dimpo diff --git a/dhp-workflows/dhp-bulktag/src/test/resources/eu/dnetlib/dhp/communityconfiguration/tagging_conf.json b/dhp-workflows/dhp-enrichment/src/test/resources/eu/dnetlib/dhp/bulktag/communityconfiguration/tagging_conf.json similarity index 100% rename from dhp-workflows/dhp-bulktag/src/test/resources/eu/dnetlib/dhp/communityconfiguration/tagging_conf.json rename to dhp-workflows/dhp-enrichment/src/test/resources/eu/dnetlib/dhp/bulktag/communityconfiguration/tagging_conf.json diff --git a/dhp-workflows/dhp-bulktag/src/test/resources/eu/dnetlib/dhp/communityconfiguration/tagging_conf.xml b/dhp-workflows/dhp-enrichment/src/test/resources/eu/dnetlib/dhp/bulktag/communityconfiguration/tagging_conf.xml similarity index 98% rename from dhp-workflows/dhp-bulktag/src/test/resources/eu/dnetlib/dhp/communityconfiguration/tagging_conf.xml rename to dhp-workflows/dhp-enrichment/src/test/resources/eu/dnetlib/dhp/bulktag/communityconfiguration/tagging_conf.xml index 4f0d25f34..ec52d4799 100644 --- a/dhp-workflows/dhp-bulktag/src/test/resources/eu/dnetlib/dhp/communityconfiguration/tagging_conf.xml +++ b/dhp-workflows/dhp-enrichment/src/test/resources/eu/dnetlib/dhp/bulktag/communityconfiguration/tagging_conf.xml @@ -1,7 +1,7 @@ - + zenodo @@ -11,27 +11,27 @@ Result: 2 - + Result: 3 - + Result: 4 - + re3data_____::a507cdacc5bbcc08761c92185dee5cab - + Result: 5 - + rda @@ -59,7 +59,7 @@ SDG9 - Industry innovation and infrastructure SDG16 - Peace justice and strong institutions - + Result: 7 @@ -132,7 +132,7 @@ architectural vessels - + re3data_____::9ebe127e5f3a0bf401875690f3bb6b81 @@ -169,7 +169,7 @@ opendoar____::97275a23ca44226c9964043c8462be96 - + storm @@ -275,7 +275,7 @@ Fishing fleet Aquaculture - + doajarticles::8cec81178926caaca531afbd8eb5d64c @@ -328,7 +328,7 @@ doajarticles::dd70e44479f0ade25aa106aef3e87a0a - + discardless @@ -393,7 +393,7 @@ brain magnetic resonance imaging brain abnormalities - + re3data_____::5b9bf9171d92df854cf3c520692e9122 @@ -418,7 +418,7 @@ rest________::fb1a3d4523c95e63496e3bc7ba36244b - + neuroinformatics @@ -494,7 +494,7 @@ aqua sea - + adriplan @@ -688,7 +688,7 @@ Result: 11 - + instruct @@ -702,7 +702,7 @@ Result: 12 - + Result: 13 @@ -726,7 +726,7 @@ food distribution forestry - + opendoar____::1a551829d50f1400b0dab21fdd969c04 @@ -751,7 +751,7 @@ opendoar____::87ae6fb631f7c8a627e8e28785d9992d - + edenis @@ -829,7 +829,7 @@ Result: 14 - + opendoar____::7e7757b1e12abcb736ab9a754ffb617a {"criteria":[{"constraint":[{"verb":"contains","field":"contributor","value":"DARIAH"}]}]} @@ -838,7 +838,7 @@ opendoar____::96da2f590cd7246bbde0051047b0d6f7 {"criteria":[{"constraint":[{"verb":"contains","field":"contributor","value":"DARIAH"}]}]} - + dimpo @@ -848,12 +848,12 @@ Result: 15 - + Result: 16 - + Result: 17 @@ -1009,7 +1009,7 @@ Innovative freight delivery systems urban freight delivery infrastructures - + doajarticles::1c5bdf8fca58937894ad1441cca99b76 @@ -1094,7 +1094,7 @@ doajarticles::fba6191177ede7c51ea1cdf58eae7f8b - + jsdtl @@ -1156,17 +1156,17 @@ Result: 18 - + Result: 19 - + Result: 20 - + Result: 21 @@ -1179,7 +1179,7 @@ science policy Policy and Law - + doajarticles::c6f0ed5fa41e98863e7c73501fe4bd6d @@ -1264,7 +1264,7 @@ doajarticles::7ffc35ac5133da01d421ccf8af5b70bc - + risis @@ -1287,7 +1287,7 @@ mesh:COVID-19 COVID2019 - + opendoar____::358aee4cc897452c00244351e4d91f69 {"criteria":[{"constraint":[{"verb":"contains_ignorecase","field":"title","value":"COVID-19"}]}, @@ -1366,7 +1366,7 @@ re3data_____::978378def740bbf2bfb420de868c460b {"criteria":[{"constraint":[{"verb":"contains_ignorecase","field":"title","value":"SARS-CoV-2"}]},{"constraint":[{"verb":"contains_ignorecase","field":"title","value":"COVID-19"}]},{"constraint":[{"verb":"contains_ignorecase","field":"title","value":"2019-nCov"}]}]} - + chicago-covid-19 diff --git a/dhp-workflows/dhp-bulktag/src/test/resources/eu/dnetlib/dhp/sample/dataset/no_updates/dataset_10.json.gz b/dhp-workflows/dhp-enrichment/src/test/resources/eu/dnetlib/dhp/bulktag/sample/dataset/no_updates/dataset_10.json.gz similarity index 100% rename from dhp-workflows/dhp-bulktag/src/test/resources/eu/dnetlib/dhp/sample/dataset/no_updates/dataset_10.json.gz rename to dhp-workflows/dhp-enrichment/src/test/resources/eu/dnetlib/dhp/bulktag/sample/dataset/no_updates/dataset_10.json.gz diff --git a/dhp-workflows/dhp-bulktag/src/test/resources/eu/dnetlib/dhp/sample/dataset/update_datasourcewithconstraints/dataset_10.json.gz b/dhp-workflows/dhp-enrichment/src/test/resources/eu/dnetlib/dhp/bulktag/sample/dataset/update_datasourcewithconstraints/dataset_10.json.gz similarity index 100% rename from dhp-workflows/dhp-bulktag/src/test/resources/eu/dnetlib/dhp/sample/dataset/update_datasourcewithconstraints/dataset_10.json.gz rename to dhp-workflows/dhp-enrichment/src/test/resources/eu/dnetlib/dhp/bulktag/sample/dataset/update_datasourcewithconstraints/dataset_10.json.gz diff --git a/dhp-workflows/dhp-bulktag/src/test/resources/eu/dnetlib/dhp/sample/dataset/update_subject/contextnoprovenance/dataset_10.json.gz b/dhp-workflows/dhp-enrichment/src/test/resources/eu/dnetlib/dhp/bulktag/sample/dataset/update_subject/contextnoprovenance/dataset_10.json.gz similarity index 100% rename from dhp-workflows/dhp-bulktag/src/test/resources/eu/dnetlib/dhp/sample/dataset/update_subject/contextnoprovenance/dataset_10.json.gz rename to dhp-workflows/dhp-enrichment/src/test/resources/eu/dnetlib/dhp/bulktag/sample/dataset/update_subject/contextnoprovenance/dataset_10.json.gz diff --git a/dhp-workflows/dhp-bulktag/src/test/resources/eu/dnetlib/dhp/sample/dataset/update_subject/nocontext/dataset_10.json.gz b/dhp-workflows/dhp-enrichment/src/test/resources/eu/dnetlib/dhp/bulktag/sample/dataset/update_subject/nocontext/dataset_10.json.gz similarity index 100% rename from dhp-workflows/dhp-bulktag/src/test/resources/eu/dnetlib/dhp/sample/dataset/update_subject/nocontext/dataset_10.json.gz rename to dhp-workflows/dhp-enrichment/src/test/resources/eu/dnetlib/dhp/bulktag/sample/dataset/update_subject/nocontext/dataset_10.json.gz diff --git a/dhp-workflows/dhp-bulktag/src/test/resources/eu/dnetlib/dhp/sample/dataset/update_subject_datasource/dataset_10.json.gz b/dhp-workflows/dhp-enrichment/src/test/resources/eu/dnetlib/dhp/bulktag/sample/dataset/update_subject/update_subject_datasource/dataset_10.json.gz similarity index 100% rename from dhp-workflows/dhp-bulktag/src/test/resources/eu/dnetlib/dhp/sample/dataset/update_subject_datasource/dataset_10.json.gz rename to dhp-workflows/dhp-enrichment/src/test/resources/eu/dnetlib/dhp/bulktag/sample/dataset/update_subject/update_subject_datasource/dataset_10.json.gz diff --git a/dhp-workflows/dhp-enrichment/src/test/resources/eu/dnetlib/dhp/bulktag/sample/dataset/update_subject_datasource/dataset_10.json.gz b/dhp-workflows/dhp-enrichment/src/test/resources/eu/dnetlib/dhp/bulktag/sample/dataset/update_subject_datasource/dataset_10.json.gz new file mode 100644 index 0000000000000000000000000000000000000000..fdc76a04c8b66fa7d14e03e25f67e7715b7e5843 GIT binary patch literal 6968 zcmV-88^`1yiwFoNN1$E+17u-zVRL14UokK)YIARH0PS7fj^j3#zMrQc3=A*>xLlHK z`R}e$UCCC^oiw_82FOki0!pN9RwU7(q)IB81~$*IPtccr(HDch%dKzo6#FE54oO+E z>%XTeQWEYqGG&`Thvz%|Js%H$>@ix<(`+Go^B;R6I~Tc#65g9HXp-|je6!$6akl+} z@q`zgMHl)Pp2crL?`tK`c}8{09{f;$rg0%;h84#N&2s^@^zw^b zq-&{)d8}lfe@ByV=8CUnE()nGAiGYQ(UjjMaUQRD%5}jkpCadjVr13*sOM z{Fxtl(JW%4#c&u;`D`2ps4c0)Qe-qag#!N3yJz-)f>vab@ECwuD49YLKlZ-i7a)sV z^NdNVu(bIc%Y(|!X;NZipYZkO1L}%Af!Yh25PtBto(6U%brTf|f`kqDiGnyn+F?n!IQyQ;ipL`AlaH!KJs6OVU zx+V+!2#9_IgO=^h*tMa)#X=;)Vyt_6f2ZbGsyq`(BFU#*Wt+Dd<3C{K`Q{yYLDS8L ztSLjzfB6Z&mcRUz?Fh3|3WZ;xn5{s;y#&%NB}WNZH>lw#qqP9$LtSe#>9)4vEwzFD zDxQ}~fdkFnAO_gxyx1Sew%_h|T2l^=STd1Iam}E1IhCGnvSmq^djjx0IRwMn6X4~V zW+Go*AwX5i)fLyjzQHM^%nBHKbO+AM=nO`Roe}@|%fFvKIq+xKkMVnA?aH$+fg8c^ zK7#ja6fVSyGLfg!!O$iQ=R$$*8D$QJR$Xcq;yN@^K`aOMrkDZl_*{LOosljkzoye~vda|cYD zz>J5!H}Hq#m0LcwdPH4_0#(yEa5_nd$e4h^3(){zNG#z$JTDY2_)^@k-$<#*h$(Tm zQ9D2EHvrK(76t!$H$hYSy(A9ekLhx!O;ibtg)}1|2jmhtXT;)r!WB zX;v`p(7|J5)^zwZdiF~WkOQ(mP}`*w@>O-NN#MVQ5>a`Bu$&$P(BR+VY{VfycT;+HmM~-PK5@}r&$2`Gd!l8IiS0Rm|O-1Ftt8lB_UY_rC zdiQ~^w=N?X=~g(38K)CZBdV5?XUy~!^KC9m72BSJ{;q<5U)WFD3v2$4Wv07FVV#5( zHqST{^eK(V0lDPFcL3cwbQ$Fvm!E+nhNMb+;85)m`hKBP4JWAj-V7kRN#vRtlmp2C>Dr+Mo8Lyu_X~G3PyerL-M3*!-GN8G{9-tL%@tT6lZ9s_g`J_3z|B` zx1o3#k_1B+JbnYi4st1y7ksO49H>N=+j$#6Nkk0mr=2<6@=;>NLF{vmErU8W9mS^3 z52K5NEI!EC;b`C;x|!r2Q~k~q(Ukei=O=~~n1(~2E+RJNQOL%lxc4y(#eT0;334Z`d9PS9Ska6nd;sDeUZQj1 zLf7syunmcfE0$aD5}WdY$&_x1{TNGZC9yZaa2>B;Y0YVhbf#*Rw*PBfDs28m^}EtT zC}%YTEC(TB?blqMGkz}Oda*8k*#51zo)v%0Foi27pkWMGzf|Aa3|W~*^`>BnXTy~l z{^Or-{`8~6|Dnj);pkd&cF3mjB=kq)U@?oLDGMTRJPXHc5yfFViF&t?vv14(E0VMC z!SkKW?#c5FhO@`y`5JlJ=J^J}q<4wuJ2~+E13!G_kLST`9)^$0^L>G5ThBM|6tZle z!KB$>w^ko(eyd5%fB8u@*tECNdmX>^YJO`hmCWe5fRPfU8~}Bi`v#axAkY?tRG@B|n!r(R~CRQwjXf2^51L4Qk^2 zrrzV+nO%hZ?(WQcn?j$&Pi0Fj#=}P&=xPD_9X%(kMI&$l;GwnbHiA z0io-bSIMA(q5#!E39yLdM1rdFfjgmjK*w@;;EfLg&*`)qj^&4OiaIw%Hx0+RDXwr+ z?4HN@Gz|S|Fk_*&@JF5(d5lJZzhG=Op2gni(cKhy8r5Ytg&VM)o5Hy%9?(rO^JbH4 z-4u)IY&0IlBN~rl_-etuc;b(S!^oe6RL7^d(M@q%_HMc<#`kqogrmpwQj9y{rI>nG zMk7Hl#R#L3488g2k)x5^xLfU$W7rhNg6fSVY^SgXoa?lo!oCfrW4)bBNPVDXi{0jX zx|2!#@{`W>)c~?Iv(=7}s|CVfB$fc|TqFO9Yhi9GV&VEtR~%S%r;!=g zr||b%xRK-IqfejI_f4F4>o;T%clU060&7Kp{)J4rlKpzDWm0K_kTGmwIGW5o&F2oD zufBEsL{Pr8<>n;Wyw5njV&tzpK`V;xrkpATTTJU@E>M_t(53iT61NkN;hwzdo4f%F=Ry75mv{m0kNuZ z@H(@)1{ouENf^Bm)Zy{A-=4#f*o|_{=_{M}q_pkD)std_XVR z7!M@^#|`g@m#oI-p^royyuaq82XX4pX3;EUJ`2a=Y3S3%gbm{`yu}fDYxWL0BE!Kr9NyCrIT=5m zCvq!K`<_VuMo*+an-8bX6X`sW5BLbdeF1ex%`LiUkn8PSGOYvgUxF#nnnR*>B;2^v z84zGM>q76g3&aVox|K|XyIb0cF2SwuDu=$Imst)J$E0xZ+{&bQy6c@aTa#5|(S>f( zSf*=U2urqTbh18ytmN}j`>YdkEb|+Hb!3Vzn#A476gx2Qk||*83gs_&4u2X4|7X<& z;d?se@!Sin3TzDT>5`l83L|A=MeU_qUEj?_38;TvZ|v}UXen!#vR-HT>dPdWtZraF0@NH5lmj4|ZS%bJRTxyy`_lx7_AX|6@im!-mM)6eh| z7#yC~X?f?Qaqd)|Y8|2z;m{i%ZdEfF+!j>YxpW;1;y0wzUx@|rpNv=zO-Fig*gDcf zS4a=OBR%}iI+i0n{DJYmmGm&0UVHmG;KRjm6!V!E2Ht}5Vd&8b_x;h7O<1h&<=sSj zxHWtCNDsr&VCDz+B0Y=-fj@mL(nBpz`=kdO^5RO;!*D(t&I7+y(!<|j1cELfI1lt{$njSs}MVnB3!ZW;YE8GC(C=3psJ5U%-cb&oq#Ln+T%b=q) zUyzLBD|zSIE1QrqudbfQc12cSv%h(j>_`kW^E7@ptECdfI4AlSTZ#$d?q7U`V) z1&^=(N^%*gUDLYI;m5hP%DB`e=2ft8S{dCy`}gy!9sJY7gdRjue2{DZ{_9M$F`jJM z7)+Ss5N+O1X(ScZCqdZGYez8K2W~l&$#kc&p)BIf2lI~o<}Jwup6}@Z50^Dr>knh9 z?{FcIWwE#V(~JvDn3t&*t5TkcAK&`|553f-iV&_Wr7FgTWrZ@)eYkM8J zLf0sxGp+-(qsRr!yr~XC(TK-x!cti5bSfP-esAd5qkABOMP#v9(@3M}sQAMi%wgjZ$r?XNo8VLVh zvr!Bcg@VM%J{LvvQz$75f#zpZg18EMTLDc5#Wb= zP*hb8pr_>p0Ru?Q$U7&6GK`s+LgS$y7{G2ejIESSjnm9>Z?tvUr7If51=Ja@pr~_> z@k+oxY<|#vW>G*H8~Q@A*Cl5Le`U0>oeEH}0X|A#Ohg}?%7%UD!6CQZ0+EHTZn$)P zYg4xH6-BUg%M?lZnN)q7%luB!#6b6UqN2ub>|jxFl?o^vD`>X&bx*7RIT{9vw%BLEy8#2C|U9kcZ40S)6fvCr&85z9mmw^joWIrd_79!idu4Q?5HhAFJzas4m_B`!-%`cAaOctr7Klhfm)lUrXlao9XQHe=r(4g4Y5E~*?H-C&IA z-5s$zx`U7FK#mC3MXc{m1Z&zre;d7q-iPURK~C~D$ECSuA3)A}`&6AIT@bs5e`(k7 z#^J3wOUQZP&8c)ikd1JX87_x}6u2X{hZ9>_Gfna_EF7MYTr4wz5A+yUAXoygcMQmU zgybt4;WIm$nP%yanyq7E+8`05%u9+Pa5Nv{=1HXrLz8IEKumefRm03r@2ov!0cb(D zUjJj6@Fi;Nxf16TA0$HGCgXT|C(JuW|Mx%td%StC;?mF=jzRfY%?T@G&6^>A(X##( z6B4PGG^^M{NcqT+b;ZQCLapef!UVR#%ruySJ~!`DeAw4s<~`msKmXlr{yi!ICR^rMFx z=eB!5HqaDZp~2AU)Ukp7kZ3wK(C-&p#|G*I8wj1gU1kHhDbTTj92@B2*gz96oCepi zfhJ+FU|tkN5siIL{WuOij^B;OJQ|O&7v97Mx;1+z*+AjgzZV-Q98CQ2BeH?4JngfA zroEfkK;dK_j9p+VM=-i0!RR@@Np9{ZIiE|rQC?!Xl8!~xcIYoWymYUkv<*ZRd?3Gk z@6rL&fniGIsf+5;DiANY^=+LCg?=uFMyumhy^KPK7%O9kNq1N*m*O6sqMPl!^8Ugm z4#RBl%*2^#bgDW;4XalM+964ATdq0QOBZdUOW}{uabEB^tHuqgi7H@-V0XwT^%^7j#XpgtU%*HxU+JpX*0B*@+<1Zem}vu zFVDojo-jxeM%Xcq@JdTc^AL*7yI3fC8dU(g{? z){x%v8br1dktMIxFHKP$&L)=`Z3HOFuG21(2{Kcx4aS3RM~N-S%W9O@4WJ;>_oAW? ztSA;_;|Vo{$G^@^Oqsn9PPRfDW2Is>?P{}6K;s-4DVlq5doU_%o=8#<7+)zv1A%%^ zGbpqu3wt!PgV-lRrki)n;F6I4lKEPn>-hmD{t_lN91bt`Yk!Wfe>DJoMwt?ZmD)!IiSwevM_K`NV`8nJ6rO#-j1c?2j zj0DJf#uu6_0!syGIxSQc66R_xW6U6JxlH#Mz%iGaJkmD?tu0CSRuCuFgb_@ZmuI}t zErSmj2S}4((DX9R$} zf4%PM2u+u}AZnUnkn>U+fi>L;1w%&PijtJac)*obd%U{C)(fb9-FcC}XWE7wAFSyI zLqdy%vhB0|=PPU~Ko>zLTy5Ulm+_zvOU+1HP-u14qJ$LqexqeQX)@MAniylYQf!gd z;p^T`#hStcmRt%w9AGr}*F8e9yz|XFL-0~^si>u=Y0^|iw}HZ%w6ZM;5$rwZ%EpH8 zgQzJUV`3mXsQ&X3hB_VNN+K-{N3+dafGfW=hV~H*8bPPZOqYO$r7-L1VaOn@I~9C^ z@!u|VS^c$Qcc0I$*BPEALn^?GR z(HSUEUDXv&heW$9JJ_r^FuDQ8b$5y#D(wzdc%B9SIIdqE7G0S*&}}Lt+q};UnAIIF zZQ}AN{f3i_f9ICCt-=t_d~e_dRrDSmo2D6&MDsyhAi~a&AwQRD$;oFFsaQ;26V&T^ zupFR@4Kl04u1jE*)f&k+V|qGU+EkVt2v*G2%_Gw)_RFT--#<=Yqv)rw}M zJ}EXYHKr6FL9@Cr1fzT#7bulgz%aX$fiH|iA0MNq(;>kMu41hY7ZS{c1al$5Tu87# z*aYk1q5PJJeU4}Ldo~=$GwTG;Y}{#{nH!NE&&=`69*}1ic%FY9&x}oG{t*5gPa+8Ay?zxi0-ltI+4n} zFmWX?;)IQfz^@1hND>qP-EtV7A(Gk=(~N$@D`&}_T&q;O#&YeqfkkE-sjagG6S#l~ z59>Fij0oZ{qwWe*nJ;9*=d~Z{kK|=QnYFlLz#h1lPXo!Db#CQhzZXQ5K9x zQOG0CrgRua!PFa$ZhP6|*6f}1o6P+C-Ory4CZowC`c14nZTn3ovn%~3ctL;s$`9wG z;oSF}-^BS%+y(vL;4INRjs(NUV~dkgL0@$zlI||D1H;DLmWk$=yFSq_4Em0%d*Y*Q z9F9c^Ya@#H6S$r$q`O4f@???9SYdc~2SlqtoWK^z>_C{gvgqJww_j)wa8@0W7c@m!o|Ymff2S(myj3wz@M6C9SU0wcgyQu@efP1bpN6^P2F9@`x@Zn} zXHOiWK$ zM2U!=Ni3v*JsVyqf8F~SgT=R2>O1x;eEoN}aQhPvN;rl6@cle@D{frKirae`x0!s_&lb19t;g@HS zaPvMp_=Kv6E9vMW_v#LJxzy$RPN$A8((rmXx`?BT{9frI=oRcXUBr#ZjxO@%PyY{f KBST|kQ~?0ZJb}sp literal 0 HcmV?d00001 diff --git a/dhp-workflows/dhp-bulktag/src/test/resources/eu/dnetlib/dhp/sample/otherresearchproduct/update_zenodocommunity/otherresearchproduct_10.json.gz b/dhp-workflows/dhp-enrichment/src/test/resources/eu/dnetlib/dhp/bulktag/sample/otherresearchproduct/update_zenodocommunity/otherresearchproduct_10.json.gz similarity index 100% rename from dhp-workflows/dhp-bulktag/src/test/resources/eu/dnetlib/dhp/sample/otherresearchproduct/update_zenodocommunity/otherresearchproduct_10.json.gz rename to dhp-workflows/dhp-enrichment/src/test/resources/eu/dnetlib/dhp/bulktag/sample/otherresearchproduct/update_zenodocommunity/otherresearchproduct_10.json.gz diff --git a/dhp-workflows/dhp-bulktag/src/test/resources/eu/dnetlib/dhp/sample/publication/update_datasource/publication_10.json.gz b/dhp-workflows/dhp-enrichment/src/test/resources/eu/dnetlib/dhp/bulktag/sample/publication/update_datasource/publication_10.json.gz similarity index 100% rename from dhp-workflows/dhp-bulktag/src/test/resources/eu/dnetlib/dhp/sample/publication/update_datasource/publication_10.json.gz rename to dhp-workflows/dhp-enrichment/src/test/resources/eu/dnetlib/dhp/bulktag/sample/publication/update_datasource/publication_10.json.gz diff --git a/dhp-workflows/dhp-bulktag/src/test/resources/eu/dnetlib/dhp/sample/software/software_10.json.gz b/dhp-workflows/dhp-enrichment/src/test/resources/eu/dnetlib/dhp/bulktag/sample/software/software_10.json.gz similarity index 100% rename from dhp-workflows/dhp-bulktag/src/test/resources/eu/dnetlib/dhp/sample/software/software_10.json.gz rename to dhp-workflows/dhp-enrichment/src/test/resources/eu/dnetlib/dhp/bulktag/sample/software/software_10.json.gz diff --git a/dhp-workflows/dhp-propagation/src/test/resources/eu/dnetlib/dhp/countrypropagation/preparedInfo/preparedInfo.json.gz b/dhp-workflows/dhp-enrichment/src/test/resources/eu/dnetlib/dhp/countrypropagation/preparedInfo/preparedInfo.json.gz similarity index 100% rename from dhp-workflows/dhp-propagation/src/test/resources/eu/dnetlib/dhp/countrypropagation/preparedInfo/preparedInfo.json.gz rename to dhp-workflows/dhp-enrichment/src/test/resources/eu/dnetlib/dhp/countrypropagation/preparedInfo/preparedInfo.json.gz diff --git a/dhp-workflows/dhp-propagation/src/test/resources/eu/dnetlib/dhp/countrypropagation/sample/software/software_10.json.gz b/dhp-workflows/dhp-enrichment/src/test/resources/eu/dnetlib/dhp/countrypropagation/sample/software/software_10.json.gz similarity index 100% rename from dhp-workflows/dhp-propagation/src/test/resources/eu/dnetlib/dhp/countrypropagation/sample/software/software_10.json.gz rename to dhp-workflows/dhp-enrichment/src/test/resources/eu/dnetlib/dhp/countrypropagation/sample/software/software_10.json.gz diff --git a/dhp-workflows/dhp-propagation/src/test/resources/eu/dnetlib/dhp/orcidtoresultfromsemrel/preparedInfo/mergedOrcidAssoc/mergedOrcid_17.json.gz b/dhp-workflows/dhp-enrichment/src/test/resources/eu/dnetlib/dhp/orcidtoresultfromsemrel/preparedInfo/mergedOrcidAssoc/mergedOrcid_17.json.gz similarity index 100% rename from dhp-workflows/dhp-propagation/src/test/resources/eu/dnetlib/dhp/orcidtoresultfromsemrel/preparedInfo/mergedOrcidAssoc/mergedOrcid_17.json.gz rename to dhp-workflows/dhp-enrichment/src/test/resources/eu/dnetlib/dhp/orcidtoresultfromsemrel/preparedInfo/mergedOrcidAssoc/mergedOrcid_17.json.gz diff --git a/dhp-workflows/dhp-propagation/src/test/resources/eu/dnetlib/dhp/orcidtoresultfromsemrel/sample/noupdate/dataset_10.json.gz b/dhp-workflows/dhp-enrichment/src/test/resources/eu/dnetlib/dhp/orcidtoresultfromsemrel/sample/noupdate/dataset_10.json.gz similarity index 100% rename from dhp-workflows/dhp-propagation/src/test/resources/eu/dnetlib/dhp/orcidtoresultfromsemrel/sample/noupdate/dataset_10.json.gz rename to dhp-workflows/dhp-enrichment/src/test/resources/eu/dnetlib/dhp/orcidtoresultfromsemrel/sample/noupdate/dataset_10.json.gz diff --git a/dhp-workflows/dhp-propagation/src/test/resources/eu/dnetlib/dhp/orcidtoresultfromsemrel/sample/oneupdate/dataset_10.json.gz b/dhp-workflows/dhp-enrichment/src/test/resources/eu/dnetlib/dhp/orcidtoresultfromsemrel/sample/oneupdate/dataset_10.json.gz similarity index 100% rename from dhp-workflows/dhp-propagation/src/test/resources/eu/dnetlib/dhp/orcidtoresultfromsemrel/sample/oneupdate/dataset_10.json.gz rename to dhp-workflows/dhp-enrichment/src/test/resources/eu/dnetlib/dhp/orcidtoresultfromsemrel/sample/oneupdate/dataset_10.json.gz diff --git a/dhp-workflows/dhp-propagation/src/test/resources/eu/dnetlib/dhp/orcidtoresultfromsemrel/sample/twoupdates/dataset_10.json.gz b/dhp-workflows/dhp-enrichment/src/test/resources/eu/dnetlib/dhp/orcidtoresultfromsemrel/sample/twoupdates/dataset_10.json.gz similarity index 100% rename from dhp-workflows/dhp-propagation/src/test/resources/eu/dnetlib/dhp/orcidtoresultfromsemrel/sample/twoupdates/dataset_10.json.gz rename to dhp-workflows/dhp-enrichment/src/test/resources/eu/dnetlib/dhp/orcidtoresultfromsemrel/sample/twoupdates/dataset_10.json.gz diff --git a/dhp-workflows/dhp-propagation/src/test/resources/eu/dnetlib/dhp/projecttoresult/preparedInfo/alreadyLinked/alreadyLinked.json.gz b/dhp-workflows/dhp-enrichment/src/test/resources/eu/dnetlib/dhp/projecttoresult/preparedInfo/alreadyLinked/alreadyLinked.json.gz similarity index 100% rename from dhp-workflows/dhp-propagation/src/test/resources/eu/dnetlib/dhp/projecttoresult/preparedInfo/alreadyLinked/alreadyLinked.json.gz rename to dhp-workflows/dhp-enrichment/src/test/resources/eu/dnetlib/dhp/projecttoresult/preparedInfo/alreadyLinked/alreadyLinked.json.gz diff --git a/dhp-workflows/dhp-propagation/src/test/resources/eu/dnetlib/dhp/projecttoresult/preparedInfo/noupdates/potentialUpdates/potentialUpdates.json.gz b/dhp-workflows/dhp-enrichment/src/test/resources/eu/dnetlib/dhp/projecttoresult/preparedInfo/noupdates/potentialUpdates/potentialUpdates.json.gz similarity index 100% rename from dhp-workflows/dhp-propagation/src/test/resources/eu/dnetlib/dhp/projecttoresult/preparedInfo/noupdates/potentialUpdates/potentialUpdates.json.gz rename to dhp-workflows/dhp-enrichment/src/test/resources/eu/dnetlib/dhp/projecttoresult/preparedInfo/noupdates/potentialUpdates/potentialUpdates.json.gz diff --git a/dhp-workflows/dhp-propagation/src/test/resources/eu/dnetlib/dhp/projecttoresult/preparedInfo/tenupdates/potentialUpdates/potentialUpdates.json.gz b/dhp-workflows/dhp-enrichment/src/test/resources/eu/dnetlib/dhp/projecttoresult/preparedInfo/tenupdates/potentialUpdates/potentialUpdates.json.gz similarity index 100% rename from dhp-workflows/dhp-propagation/src/test/resources/eu/dnetlib/dhp/projecttoresult/preparedInfo/tenupdates/potentialUpdates/potentialUpdates.json.gz rename to dhp-workflows/dhp-enrichment/src/test/resources/eu/dnetlib/dhp/projecttoresult/preparedInfo/tenupdates/potentialUpdates/potentialUpdates.json.gz diff --git a/dhp-workflows/dhp-propagation/src/test/resources/eu/dnetlib/dhp/projecttoresult/preparedInfo/updatesmixed/potentialUpdates/potentialUpdates.json.gz b/dhp-workflows/dhp-enrichment/src/test/resources/eu/dnetlib/dhp/projecttoresult/preparedInfo/updatesmixed/potentialUpdates/potentialUpdates.json.gz similarity index 100% rename from dhp-workflows/dhp-propagation/src/test/resources/eu/dnetlib/dhp/projecttoresult/preparedInfo/updatesmixed/potentialUpdates/potentialUpdates.json.gz rename to dhp-workflows/dhp-enrichment/src/test/resources/eu/dnetlib/dhp/projecttoresult/preparedInfo/updatesmixed/potentialUpdates/potentialUpdates.json.gz diff --git a/dhp-workflows/dhp-propagation/src/test/resources/eu/dnetlib/dhp/resulttocommunityfromorganization/preparedInfo/resultCommunityList.json.gz b/dhp-workflows/dhp-enrichment/src/test/resources/eu/dnetlib/dhp/resulttocommunityfromorganization/preparedInfo/resultCommunityList.json.gz similarity index 100% rename from dhp-workflows/dhp-propagation/src/test/resources/eu/dnetlib/dhp/resulttocommunityfromorganization/preparedInfo/resultCommunityList.json.gz rename to dhp-workflows/dhp-enrichment/src/test/resources/eu/dnetlib/dhp/resulttocommunityfromorganization/preparedInfo/resultCommunityList.json.gz diff --git a/dhp-workflows/dhp-propagation/src/test/resources/eu/dnetlib/dhp/resulttocommunityfromorganization/sample/dataset_10.json.gz b/dhp-workflows/dhp-enrichment/src/test/resources/eu/dnetlib/dhp/resulttocommunityfromorganization/sample/dataset_10.json.gz similarity index 100% rename from dhp-workflows/dhp-propagation/src/test/resources/eu/dnetlib/dhp/resulttocommunityfromorganization/sample/dataset_10.json.gz rename to dhp-workflows/dhp-enrichment/src/test/resources/eu/dnetlib/dhp/resulttocommunityfromorganization/sample/dataset_10.json.gz diff --git a/dhp-workflows/dhp-propagation/src/test/resources/eu/dnetlib/dhp/resulttocommunityfromsemrel/preparedInfo/mergedResultCommunityList.json.gz b/dhp-workflows/dhp-enrichment/src/test/resources/eu/dnetlib/dhp/resulttocommunityfromsemrel/preparedInfo/mergedResultCommunityList.json.gz similarity index 100% rename from dhp-workflows/dhp-propagation/src/test/resources/eu/dnetlib/dhp/resulttocommunityfromsemrel/preparedInfo/mergedResultCommunityList.json.gz rename to dhp-workflows/dhp-enrichment/src/test/resources/eu/dnetlib/dhp/resulttocommunityfromsemrel/preparedInfo/mergedResultCommunityList.json.gz diff --git a/dhp-workflows/dhp-propagation/src/test/resources/eu/dnetlib/dhp/resulttocommunityfromsemrel/sample/dataset_10.json.gz b/dhp-workflows/dhp-enrichment/src/test/resources/eu/dnetlib/dhp/resulttocommunityfromsemrel/sample/dataset_10.json.gz similarity index 100% rename from dhp-workflows/dhp-propagation/src/test/resources/eu/dnetlib/dhp/resulttocommunityfromsemrel/sample/dataset_10.json.gz rename to dhp-workflows/dhp-enrichment/src/test/resources/eu/dnetlib/dhp/resulttocommunityfromsemrel/sample/dataset_10.json.gz diff --git a/dhp-workflows/dhp-propagation/src/test/resources/eu/dnetlib/dhp/resulttoorganizationfrominstrepo/noupdate/preparedInfo/alreadyLinked/alreadyLinked_20.json.gz b/dhp-workflows/dhp-enrichment/src/test/resources/eu/dnetlib/dhp/resulttoorganizationfrominstrepo/noupdate/preparedInfo/alreadyLinked/alreadyLinked_20.json.gz similarity index 100% rename from dhp-workflows/dhp-propagation/src/test/resources/eu/dnetlib/dhp/resulttoorganizationfrominstrepo/noupdate/preparedInfo/alreadyLinked/alreadyLinked_20.json.gz rename to dhp-workflows/dhp-enrichment/src/test/resources/eu/dnetlib/dhp/resulttoorganizationfrominstrepo/noupdate/preparedInfo/alreadyLinked/alreadyLinked_20.json.gz diff --git a/dhp-workflows/dhp-propagation/src/test/resources/eu/dnetlib/dhp/resulttoorganizationfrominstrepo/noupdate/preparedInfo/datasourceOrganization/datasourceOrganization_28.json.gz b/dhp-workflows/dhp-enrichment/src/test/resources/eu/dnetlib/dhp/resulttoorganizationfrominstrepo/noupdate/preparedInfo/datasourceOrganization/datasourceOrganization_28.json.gz similarity index 100% rename from dhp-workflows/dhp-propagation/src/test/resources/eu/dnetlib/dhp/resulttoorganizationfrominstrepo/noupdate/preparedInfo/datasourceOrganization/datasourceOrganization_28.json.gz rename to dhp-workflows/dhp-enrichment/src/test/resources/eu/dnetlib/dhp/resulttoorganizationfrominstrepo/noupdate/preparedInfo/datasourceOrganization/datasourceOrganization_28.json.gz diff --git a/dhp-workflows/dhp-propagation/src/test/resources/eu/dnetlib/dhp/resulttoorganizationfrominstrepo/sample/noupdate_updatenomix/software_10.json.gz b/dhp-workflows/dhp-enrichment/src/test/resources/eu/dnetlib/dhp/resulttoorganizationfrominstrepo/sample/noupdate_updatenomix/software_10.json.gz similarity index 100% rename from dhp-workflows/dhp-propagation/src/test/resources/eu/dnetlib/dhp/resulttoorganizationfrominstrepo/sample/noupdate_updatenomix/software_10.json.gz rename to dhp-workflows/dhp-enrichment/src/test/resources/eu/dnetlib/dhp/resulttoorganizationfrominstrepo/sample/noupdate_updatenomix/software_10.json.gz diff --git a/dhp-workflows/dhp-propagation/src/test/resources/eu/dnetlib/dhp/resulttoorganizationfrominstrepo/sample/updatemix/software_10.json.gz b/dhp-workflows/dhp-enrichment/src/test/resources/eu/dnetlib/dhp/resulttoorganizationfrominstrepo/sample/updatemix/software_10.json.gz similarity index 100% rename from dhp-workflows/dhp-propagation/src/test/resources/eu/dnetlib/dhp/resulttoorganizationfrominstrepo/sample/updatemix/software_10.json.gz rename to dhp-workflows/dhp-enrichment/src/test/resources/eu/dnetlib/dhp/resulttoorganizationfrominstrepo/sample/updatemix/software_10.json.gz diff --git a/dhp-workflows/dhp-propagation/src/test/resources/eu/dnetlib/dhp/resulttoorganizationfrominstrepo/updatemix/preparedInfo/alreadyLinked/alreadyLinked_20.json.gz b/dhp-workflows/dhp-enrichment/src/test/resources/eu/dnetlib/dhp/resulttoorganizationfrominstrepo/updatemix/preparedInfo/alreadyLinked/alreadyLinked_20.json.gz similarity index 100% rename from dhp-workflows/dhp-propagation/src/test/resources/eu/dnetlib/dhp/resulttoorganizationfrominstrepo/updatemix/preparedInfo/alreadyLinked/alreadyLinked_20.json.gz rename to dhp-workflows/dhp-enrichment/src/test/resources/eu/dnetlib/dhp/resulttoorganizationfrominstrepo/updatemix/preparedInfo/alreadyLinked/alreadyLinked_20.json.gz diff --git a/dhp-workflows/dhp-propagation/src/test/resources/eu/dnetlib/dhp/resulttoorganizationfrominstrepo/updatemix/preparedInfo/datasourceOrganization/datasourceOrganization_28.json.gz b/dhp-workflows/dhp-enrichment/src/test/resources/eu/dnetlib/dhp/resulttoorganizationfrominstrepo/updatemix/preparedInfo/datasourceOrganization/datasourceOrganization_28.json.gz similarity index 100% rename from dhp-workflows/dhp-propagation/src/test/resources/eu/dnetlib/dhp/resulttoorganizationfrominstrepo/updatemix/preparedInfo/datasourceOrganization/datasourceOrganization_28.json.gz rename to dhp-workflows/dhp-enrichment/src/test/resources/eu/dnetlib/dhp/resulttoorganizationfrominstrepo/updatemix/preparedInfo/datasourceOrganization/datasourceOrganization_28.json.gz diff --git a/dhp-workflows/dhp-propagation/src/test/resources/eu/dnetlib/dhp/resulttoorganizationfrominstrepo/updatenomix/preparedInfo/alreadyLinked/alreadyLinked_20.json.gz b/dhp-workflows/dhp-enrichment/src/test/resources/eu/dnetlib/dhp/resulttoorganizationfrominstrepo/updatenomix/preparedInfo/alreadyLinked/alreadyLinked_20.json.gz similarity index 100% rename from dhp-workflows/dhp-propagation/src/test/resources/eu/dnetlib/dhp/resulttoorganizationfrominstrepo/updatenomix/preparedInfo/alreadyLinked/alreadyLinked_20.json.gz rename to dhp-workflows/dhp-enrichment/src/test/resources/eu/dnetlib/dhp/resulttoorganizationfrominstrepo/updatenomix/preparedInfo/alreadyLinked/alreadyLinked_20.json.gz diff --git a/dhp-workflows/dhp-propagation/src/test/resources/eu/dnetlib/dhp/resulttoorganizationfrominstrepo/updatenomix/preparedInfo/datasourceOrganization/datasourceOrganization_28.json.gz b/dhp-workflows/dhp-enrichment/src/test/resources/eu/dnetlib/dhp/resulttoorganizationfrominstrepo/updatenomix/preparedInfo/datasourceOrganization/datasourceOrganization_28.json.gz similarity index 100% rename from dhp-workflows/dhp-propagation/src/test/resources/eu/dnetlib/dhp/resulttoorganizationfrominstrepo/updatenomix/preparedInfo/datasourceOrganization/datasourceOrganization_28.json.gz rename to dhp-workflows/dhp-enrichment/src/test/resources/eu/dnetlib/dhp/resulttoorganizationfrominstrepo/updatenomix/preparedInfo/datasourceOrganization/datasourceOrganization_28.json.gz diff --git a/dhp-workflows/dhp-propagation/pom.xml b/dhp-workflows/dhp-propagation/pom.xml deleted file mode 100644 index 9492fa7c5..000000000 --- a/dhp-workflows/dhp-propagation/pom.xml +++ /dev/null @@ -1,43 +0,0 @@ - - - - dhp-workflows - eu.dnetlib.dhp - 1.2.1-SNAPSHOT - - 4.0.0 - - dhp-propagation - - - - org.apache.spark - spark-core_2.11 - - - org.apache.spark - spark-sql_2.11 - - - - eu.dnetlib.dhp - dhp-common - ${project.version} - - - eu.dnetlib.dhp - dhp-schemas - ${project.version} - - - org.apache.spark - spark-hive_2.11 - test - - - - - - \ No newline at end of file diff --git a/dhp-workflows/dhp-worfklow-profiles/src/main/resources/eu/dnetlib/dhp/wf/profiles/provision.xml b/dhp-workflows/dhp-worfklow-profiles/src/main/resources/eu/dnetlib/dhp/wf/profiles/provision.xml index 0467e618f..f99ea7aed 100644 --- a/dhp-workflows/dhp-worfklow-profiles/src/main/resources/eu/dnetlib/dhp/wf/profiles/provision.xml +++ b/dhp-workflows/dhp-worfklow-profiles/src/main/resources/eu/dnetlib/dhp/wf/profiles/provision.xml @@ -482,7 +482,7 @@ - marks as belonging to communities the result collected from datasources related to the organizations specified in the organizationCommunityMap + marks as belonging to communities the result collected from providers related to the organizations specified in the organizationCommunityMap executeOozieJob IIS diff --git a/dhp-workflows/pom.xml b/dhp-workflows/pom.xml index badd8ca8a..271c66939 100644 --- a/dhp-workflows/pom.xml +++ b/dhp-workflows/pom.xml @@ -22,8 +22,7 @@ dhp-actionmanager dhp-graph-mapper dhp-dedup-openaire - dhp-bulktag - dhp-propagation + dhp-enrichment dhp-graph-provision dhp-dedup-scholexplorer dhp-graph-provision-scholexplorer From c6b028f2af0c07671c5fe8c799896b87c36aa474 Mon Sep 17 00:00:00 2001 From: Claudio Atzori Date: Mon, 11 May 2020 17:38:08 +0200 Subject: [PATCH 02/31] code formatting --- .../dhp/schema/common/ModelSupport.java | 491 +++++++++--------- .../blacklist/PrepareMergedRelationJob.java | 13 - .../dhp/blacklist/ReadBlacklistFromDB.java | 3 +- .../SparkRemoveBlacklistedRelationJob.java | 26 +- .../dnetlib/dhp/blacklist/BlackListTest.java | 1 + .../dnetlib/dhp/bulktag/SparkBulkTagJob.java | 16 +- .../dhp/bulktag/community/Community.java | 9 +- .../community/CommunityConfiguration.java | 20 +- .../CommunityConfigurationFactory.java | 26 +- .../dhp/bulktag/community/Constraint.java | 6 +- .../dhp/bulktag/community/Constraints.java | 14 +- .../dnetlib/dhp/bulktag/community/Pair.java | 4 +- .../dhp/bulktag/community/Provider.java | 8 +- .../community/QueryInformationSystem.java | 8 +- .../dhp/bulktag/community/ResultTagger.java | 18 +- .../community/SelectionConstraints.java | 9 +- .../bulktag/community/ZenodoCommunity.java | 5 +- .../bulktag/criteria/InterfaceAdapter.java | 4 +- .../dhp/bulktag/criteria/VerbResolver.java | 10 +- .../dnetlib/dhp/bulktag/BulkTagJobTest.java | 69 +-- .../CommunityConfigurationFactoryTest.java | 20 +- .../CountryPropagationJobTest.java | 71 +-- .../ProjectPropagationJobTest.java | 61 +-- .../ResultToCommunityJobTest.java | 7 +- .../ResultToOrganizationJobTest.java | 48 +- 25 files changed, 491 insertions(+), 476 deletions(-) diff --git a/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/common/ModelSupport.java b/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/common/ModelSupport.java index 7b0b9a1e2..fc85b1ac1 100644 --- a/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/common/ModelSupport.java +++ b/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/common/ModelSupport.java @@ -1,3 +1,4 @@ + package eu.dnetlib.dhp.schema.common; import java.util.Map; @@ -65,216 +66,216 @@ public class ModelSupport { static { relationInverseMap - .put( - "personResult_authorship_isAuthorOf", new RelationInverse() - .setRelation("isAuthorOf") - .setInverse("hasAuthor") - .setRelType("personResult") - .setSubReltype("authorship")); + .put( + "personResult_authorship_isAuthorOf", new RelationInverse() + .setRelation("isAuthorOf") + .setInverse("hasAuthor") + .setRelType("personResult") + .setSubReltype("authorship")); relationInverseMap - .put( - "personResult_authorship_hasAuthor", new RelationInverse() - .setInverse("isAuthorOf") - .setRelation("hasAuthor") - .setRelType("personResult") - .setSubReltype("authorship")); + .put( + "personResult_authorship_hasAuthor", new RelationInverse() + .setInverse("isAuthorOf") + .setRelation("hasAuthor") + .setRelType("personResult") + .setSubReltype("authorship")); relationInverseMap - .put( - "projectOrganization_participation_isParticipant", new RelationInverse() - .setRelation("isParticipant") - .setInverse("hasParticipant") - .setRelType("projectOrganization") - .setSubReltype("participation")); + .put( + "projectOrganization_participation_isParticipant", new RelationInverse() + .setRelation("isParticipant") + .setInverse("hasParticipant") + .setRelType("projectOrganization") + .setSubReltype("participation")); relationInverseMap - .put( - "projectOrganization_participation_hasParticipant", new RelationInverse() - .setInverse("isParticipant") - .setRelation("hasParticipant") - .setRelType("projectOrganization") - .setSubReltype("participation")); + .put( + "projectOrganization_participation_hasParticipant", new RelationInverse() + .setInverse("isParticipant") + .setRelation("hasParticipant") + .setRelType("projectOrganization") + .setSubReltype("participation")); relationInverseMap - .put( - "resultOrganization_affiliation_hasAuthorInstitution", new RelationInverse() - .setRelation("hasAuthorInstitution") - .setInverse("isAuthorInstitutionOf") - .setRelType("resultOrganization") - .setSubReltype("affiliation")); + .put( + "resultOrganization_affiliation_hasAuthorInstitution", new RelationInverse() + .setRelation("hasAuthorInstitution") + .setInverse("isAuthorInstitutionOf") + .setRelType("resultOrganization") + .setSubReltype("affiliation")); relationInverseMap - .put( - "resultOrganization_affiliation_isAuthorInstitutionOf", new RelationInverse() - .setInverse("hasAuthorInstitution") - .setRelation("isAuthorInstitutionOf") - .setRelType("resultOrganization") - .setSubReltype("affiliation")); + .put( + "resultOrganization_affiliation_isAuthorInstitutionOf", new RelationInverse() + .setInverse("hasAuthorInstitution") + .setRelation("isAuthorInstitutionOf") + .setRelType("resultOrganization") + .setSubReltype("affiliation")); relationInverseMap - .put( - "organizationOrganization_dedup_merges", new RelationInverse() - .setRelation("merges") - .setInverse("isMergedIn") - .setRelType("organizationOrganization") - .setSubReltype("dedup")); + .put( + "organizationOrganization_dedup_merges", new RelationInverse() + .setRelation("merges") + .setInverse("isMergedIn") + .setRelType("organizationOrganization") + .setSubReltype("dedup")); relationInverseMap - .put( - "organizationOrganization_dedup_isMergedIn", new RelationInverse() - .setInverse("merges") - .setRelation("isMergedIn") - .setRelType("organizationOrganization") - .setSubReltype("dedup")); + .put( + "organizationOrganization_dedup_isMergedIn", new RelationInverse() + .setInverse("merges") + .setRelation("isMergedIn") + .setRelType("organizationOrganization") + .setSubReltype("dedup")); relationInverseMap - .put( - "organizationOrganization_dedupSimilarity_isSimilarTo", new RelationInverse() - .setInverse("isSimilarTo") - .setRelation("isSimilarTo") - .setRelType("organizationOrganization") - .setSubReltype("dedupSimilarity")); + .put( + "organizationOrganization_dedupSimilarity_isSimilarTo", new RelationInverse() + .setInverse("isSimilarTo") + .setRelation("isSimilarTo") + .setRelType("organizationOrganization") + .setSubReltype("dedupSimilarity")); relationInverseMap - .put( - "resultProject_outcome_isProducedBy", new RelationInverse() - .setRelation("isProducedBy") - .setInverse("produces") - .setRelType("resultProject") - .setSubReltype("outcome")); + .put( + "resultProject_outcome_isProducedBy", new RelationInverse() + .setRelation("isProducedBy") + .setInverse("produces") + .setRelType("resultProject") + .setSubReltype("outcome")); relationInverseMap - .put( - "resultProject_outcome_produces", new RelationInverse() - .setInverse("isProducedBy") - .setRelation("produces") - .setRelType("resultProject") - .setSubReltype("outcome")); + .put( + "resultProject_outcome_produces", new RelationInverse() + .setInverse("isProducedBy") + .setRelation("produces") + .setRelType("resultProject") + .setSubReltype("outcome")); relationInverseMap - .put( - "projectPerson_contactPerson_isContact", new RelationInverse() - .setRelation("isContact") - .setInverse("hasContact") - .setRelType("projectPerson") - .setSubReltype("contactPerson")); + .put( + "projectPerson_contactPerson_isContact", new RelationInverse() + .setRelation("isContact") + .setInverse("hasContact") + .setRelType("projectPerson") + .setSubReltype("contactPerson")); relationInverseMap - .put( - "projectPerson_contactPerson_hasContact", new RelationInverse() - .setInverse("isContact") - .setRelation("hasContact") - .setRelType("personPerson") - .setSubReltype("coAuthorship")); + .put( + "projectPerson_contactPerson_hasContact", new RelationInverse() + .setInverse("isContact") + .setRelation("hasContact") + .setRelType("personPerson") + .setSubReltype("coAuthorship")); relationInverseMap - .put( - "personPerson_coAuthorship_isCoauthorOf", new RelationInverse() - .setInverse("isCoAuthorOf") - .setRelation("isCoAuthorOf") - .setRelType("personPerson") - .setSubReltype("coAuthorship")); + .put( + "personPerson_coAuthorship_isCoauthorOf", new RelationInverse() + .setInverse("isCoAuthorOf") + .setRelation("isCoAuthorOf") + .setRelType("personPerson") + .setSubReltype("coAuthorship")); relationInverseMap - .put( - "personPerson_dedup_merges", new RelationInverse() - .setInverse("isMergedIn") - .setRelation("merges") - .setRelType("personPerson") - .setSubReltype("dedup")); + .put( + "personPerson_dedup_merges", new RelationInverse() + .setInverse("isMergedIn") + .setRelation("merges") + .setRelType("personPerson") + .setSubReltype("dedup")); relationInverseMap - .put( - "personPerson_dedup_isMergedIn", new RelationInverse() - .setInverse("merges") - .setRelation("isMergedIn") - .setRelType("personPerson") - .setSubReltype("dedup")); + .put( + "personPerson_dedup_isMergedIn", new RelationInverse() + .setInverse("merges") + .setRelation("isMergedIn") + .setRelType("personPerson") + .setSubReltype("dedup")); relationInverseMap - .put( - "personPerson_dedupSimilarity_isSimilarTo", new RelationInverse() - .setInverse("isSimilarTo") - .setRelation("isSimilarTo") - .setRelType("personPerson") - .setSubReltype("dedupSimilarity")); + .put( + "personPerson_dedupSimilarity_isSimilarTo", new RelationInverse() + .setInverse("isSimilarTo") + .setRelation("isSimilarTo") + .setRelType("personPerson") + .setSubReltype("dedupSimilarity")); relationInverseMap - .put( - "datasourceOrganization_provision_isProvidedBy", new RelationInverse() - .setInverse("provides") - .setRelation("isProvidedBy") - .setRelType("datasourceOrganization") - .setSubReltype("provision")); + .put( + "datasourceOrganization_provision_isProvidedBy", new RelationInverse() + .setInverse("provides") + .setRelation("isProvidedBy") + .setRelType("datasourceOrganization") + .setSubReltype("provision")); relationInverseMap - .put( - "datasourceOrganization_provision_provides", new RelationInverse() - .setInverse("isProvidedBy") - .setRelation("provides") - .setRelType("datasourceOrganization") - .setSubReltype("provision")); + .put( + "datasourceOrganization_provision_provides", new RelationInverse() + .setInverse("isProvidedBy") + .setRelation("provides") + .setRelType("datasourceOrganization") + .setSubReltype("provision")); relationInverseMap - .put( - "resultResult_similarity_hasAmongTopNSimilarDocuments", new RelationInverse() - .setInverse("isAmongTopNSimilarDocuments") - .setRelation("hasAmongTopNSimilarDocuments") - .setRelType("resultResult") - .setSubReltype("similarity")); + .put( + "resultResult_similarity_hasAmongTopNSimilarDocuments", new RelationInverse() + .setInverse("isAmongTopNSimilarDocuments") + .setRelation("hasAmongTopNSimilarDocuments") + .setRelType("resultResult") + .setSubReltype("similarity")); relationInverseMap - .put( - "resultResult_similarity_isAmongTopNSimilarDocuments", new RelationInverse() - .setInverse("hasAmongTopNSimilarDocuments") - .setRelation("isAmongTopNSimilarDocuments") - .setRelType("resultResult") - .setSubReltype("similarity")); + .put( + "resultResult_similarity_isAmongTopNSimilarDocuments", new RelationInverse() + .setInverse("hasAmongTopNSimilarDocuments") + .setRelation("isAmongTopNSimilarDocuments") + .setRelType("resultResult") + .setSubReltype("similarity")); relationInverseMap - .put( - "resultResult_relationship_isRelatedTo", new RelationInverse() - .setInverse("isRelatedTo") - .setRelation("isRelatedTo") - .setRelType("resultResult") - .setSubReltype("relationship")); + .put( + "resultResult_relationship_isRelatedTo", new RelationInverse() + .setInverse("isRelatedTo") + .setRelation("isRelatedTo") + .setRelType("resultResult") + .setSubReltype("relationship")); relationInverseMap - .put( - "resultResult_similarity_isAmongTopNSimilarDocuments", new RelationInverse() - .setInverse("hasAmongTopNSimilarDocuments") - .setRelation("isAmongTopNSimilarDocuments") - .setRelType("resultResult") - .setSubReltype("similarity")); + .put( + "resultResult_similarity_isAmongTopNSimilarDocuments", new RelationInverse() + .setInverse("hasAmongTopNSimilarDocuments") + .setRelation("isAmongTopNSimilarDocuments") + .setRelType("resultResult") + .setSubReltype("similarity")); relationInverseMap - .put( - "resultResult_supplement_isSupplementTo", new RelationInverse() - .setInverse("isSupplementedBy") - .setRelation("isSupplementTo") - .setRelType("resultResult") - .setSubReltype("supplement")); + .put( + "resultResult_supplement_isSupplementTo", new RelationInverse() + .setInverse("isSupplementedBy") + .setRelation("isSupplementTo") + .setRelType("resultResult") + .setSubReltype("supplement")); relationInverseMap - .put( - "resultResult_supplement_isSupplementedBy", new RelationInverse() - .setInverse("isSupplementTo") - .setRelation("isSupplementedBy") - .setRelType("resultResult") - .setSubReltype("supplement")); + .put( + "resultResult_supplement_isSupplementedBy", new RelationInverse() + .setInverse("isSupplementTo") + .setRelation("isSupplementedBy") + .setRelType("resultResult") + .setSubReltype("supplement")); relationInverseMap - .put( - "resultResult_part_isPartOf", new RelationInverse() - .setInverse("hasPart") - .setRelation("isPartOf") - .setRelType("resultResult") - .setSubReltype("part")); + .put( + "resultResult_part_isPartOf", new RelationInverse() + .setInverse("hasPart") + .setRelation("isPartOf") + .setRelType("resultResult") + .setSubReltype("part")); relationInverseMap - .put( - "resultResult_part_hasPart", new RelationInverse() - .setInverse("isPartOf") - .setRelation("hasPart") - .setRelType("resultResult") - .setSubReltype("part")); + .put( + "resultResult_part_hasPart", new RelationInverse() + .setInverse("isPartOf") + .setRelation("hasPart") + .setRelType("resultResult") + .setSubReltype("part")); relationInverseMap - .put( - "resultResult_dedup_merges", new RelationInverse() - .setInverse("isMergedIn") - .setRelation("merges") - .setRelType("resultResult") - .setSubReltype("dedup")); + .put( + "resultResult_dedup_merges", new RelationInverse() + .setInverse("isMergedIn") + .setRelation("merges") + .setRelType("resultResult") + .setSubReltype("dedup")); relationInverseMap - .put( - "resultResult_dedup_isMergedIn", new RelationInverse() - .setInverse("merges") - .setRelation("isMergedIn") - .setRelType("resultResult") - .setSubReltype("dedup")); + .put( + "resultResult_dedup_isMergedIn", new RelationInverse() + .setInverse("merges") + .setRelation("isMergedIn") + .setRelType("resultResult") + .setSubReltype("dedup")); relationInverseMap - .put( - "resultResult_dedupSimilarity_isSimilarTo", new RelationInverse() - .setInverse("isSimilarTo") - .setRelation("isSimilarTo") - .setRelType("resultResult") - .setSubReltype("dedupSimilarity")); + .put( + "resultResult_dedupSimilarity_isSimilarTo", new RelationInverse() + .setInverse("isSimilarTo") + .setRelation("isSimilarTo") + .setRelType("resultResult") + .setSubReltype("dedupSimilarity")); } @@ -293,7 +294,7 @@ public class ModelSupport { * @return True if X is a subclass of Y */ public static Boolean isSubClass( - X subClazzObject, Y superClazzObject) { + X subClazzObject, Y superClazzObject) { return isSubClass(subClazzObject.getClass(), superClazzObject.getClass()); } @@ -307,7 +308,7 @@ public class ModelSupport { * @return True if X is a subclass of Y */ public static Boolean isSubClass( - X subClazzObject, Class superClazz) { + X subClazzObject, Class superClazz) { return isSubClass(subClazzObject.getClass(), superClazz); } @@ -321,7 +322,7 @@ public class ModelSupport { * @return True if X is a subclass of Y */ public static Boolean isSubClass( - Class subClazz, Class superClazz) { + Class subClazz, Class superClazz) { return superClazz.isAssignableFrom(subClazz); } @@ -333,32 +334,32 @@ public class ModelSupport { */ public static Class[] getOafModelClasses() { return new Class[] { - Author.class, - Context.class, - Country.class, - DataInfo.class, - Dataset.class, - Datasource.class, - ExternalReference.class, - ExtraInfo.class, - Field.class, - GeoLocation.class, - Instance.class, - Journal.class, - KeyValue.class, - Oaf.class, - OafEntity.class, - OAIProvenance.class, - Organization.class, - OriginDescription.class, - OtherResearchProduct.class, - Project.class, - Publication.class, - Qualifier.class, - Relation.class, - Result.class, - Software.class, - StructuredProperty.class + Author.class, + Context.class, + Country.class, + DataInfo.class, + Dataset.class, + Datasource.class, + ExternalReference.class, + ExtraInfo.class, + Field.class, + GeoLocation.class, + Instance.class, + Journal.class, + KeyValue.class, + Oaf.class, + OafEntity.class, + OAIProvenance.class, + Organization.class, + OriginDescription.class, + OtherResearchProduct.class, + Project.class, + Publication.class, + Qualifier.class, + Relation.class, + Result.class, + Software.class, + StructuredProperty.class }; } @@ -372,10 +373,10 @@ public class ModelSupport { public static String getScheme(final String sourceType, final String targetType) { return String - .format( - schemeTemplate, - entityMapping.get(EntityType.valueOf(sourceType)).name(), - entityMapping.get(EntityType.valueOf(targetType)).name()); + .format( + schemeTemplate, + entityMapping.get(EntityType.valueOf(sourceType)).name(), + entityMapping.get(EntityType.valueOf(targetType)).name()); } public static Function idFn() { @@ -390,38 +391,38 @@ public class ModelSupport { private static String idFnForRelation(T t) { Relation r = (Relation) t; return Optional - .ofNullable(r.getSource()) - .map( - source -> Optional - .ofNullable(r.getTarget()) - .map( - target -> Optional - .ofNullable(r.getRelType()) - .map( - relType -> Optional - .ofNullable(r.getSubRelType()) - .map( - subRelType -> Optional - .ofNullable(r.getRelClass()) - .map( - relClass -> String - .join( - source, - target, - relType, - subRelType, - relClass)) - .orElse( - String - .join( - source, - target, - relType, - subRelType))) - .orElse(String.join(source, target, relType))) - .orElse(String.join(source, target))) - .orElse(source)) - .orElse(null); + .ofNullable(r.getSource()) + .map( + source -> Optional + .ofNullable(r.getTarget()) + .map( + target -> Optional + .ofNullable(r.getRelType()) + .map( + relType -> Optional + .ofNullable(r.getSubRelType()) + .map( + subRelType -> Optional + .ofNullable(r.getRelClass()) + .map( + relClass -> String + .join( + source, + target, + relType, + subRelType, + relClass)) + .orElse( + String + .join( + source, + target, + relType, + subRelType))) + .orElse(String.join(source, target, relType))) + .orElse(String.join(source, target))) + .orElse(source)) + .orElse(null); } private static String idFnForOafEntity(T t) { diff --git a/dhp-workflows/dhp-blacklist/src/main/java/eu/dnetlib/dhp/blacklist/PrepareMergedRelationJob.java b/dhp-workflows/dhp-blacklist/src/main/java/eu/dnetlib/dhp/blacklist/PrepareMergedRelationJob.java index d5c2b518a..0ef59e8c2 100644 --- a/dhp-workflows/dhp-blacklist/src/main/java/eu/dnetlib/dhp/blacklist/PrepareMergedRelationJob.java +++ b/dhp-workflows/dhp-blacklist/src/main/java/eu/dnetlib/dhp/blacklist/PrepareMergedRelationJob.java @@ -73,19 +73,6 @@ public class PrepareMergedRelationJob { .mode(SaveMode.Overwrite) .option("compression", "gzip") .json(outputPath); -// relation.createOrReplaceTempView("relation"); -// -// spark -// .sql( -// "Select * from relation " + -// "where relclass = 'merges' " + -// "and datainfo.deletedbyinference = false") -// .as(Encoders.bean(Relation.class)) -// .toJSON() -// .write() -// .mode(SaveMode.Overwrite) -// .option("compression", "gzip") -// .text(outputPath); } public static org.apache.spark.sql.Dataset readRelations( diff --git a/dhp-workflows/dhp-blacklist/src/main/java/eu/dnetlib/dhp/blacklist/ReadBlacklistFromDB.java b/dhp-workflows/dhp-blacklist/src/main/java/eu/dnetlib/dhp/blacklist/ReadBlacklistFromDB.java index 704cab375..2caa66db4 100644 --- a/dhp-workflows/dhp-blacklist/src/main/java/eu/dnetlib/dhp/blacklist/ReadBlacklistFromDB.java +++ b/dhp-workflows/dhp-blacklist/src/main/java/eu/dnetlib/dhp/blacklist/ReadBlacklistFromDB.java @@ -65,8 +65,7 @@ public class ReadBlacklistFromDB implements Closeable { } } - public void execute(final String sql, final Function> producer) - throws Exception { + public void execute(final String sql, final Function> producer) throws Exception { final Consumer consumer = rs -> producer.apply(rs).forEach(r -> writeRelation(r)); diff --git a/dhp-workflows/dhp-blacklist/src/main/java/eu/dnetlib/dhp/blacklist/SparkRemoveBlacklistedRelationJob.java b/dhp-workflows/dhp-blacklist/src/main/java/eu/dnetlib/dhp/blacklist/SparkRemoveBlacklistedRelationJob.java index c5104058c..86587bfc9 100644 --- a/dhp-workflows/dhp-blacklist/src/main/java/eu/dnetlib/dhp/blacklist/SparkRemoveBlacklistedRelationJob.java +++ b/dhp-workflows/dhp-blacklist/src/main/java/eu/dnetlib/dhp/blacklist/SparkRemoveBlacklistedRelationJob.java @@ -84,7 +84,7 @@ public class SparkRemoveBlacklistedRelationJob { .joinWith( mergesRelation, blackListed.col("source").equalTo(mergesRelation.col("target")), "left_outer") - .map(c -> { + .map((MapFunction, Relation>) c -> { Optional .ofNullable(c._2()) .ifPresent(mr -> c._1().setSource(mr.getSource())); @@ -95,7 +95,7 @@ public class SparkRemoveBlacklistedRelationJob { .joinWith( mergesRelation, dedupSource.col("target").equalTo(mergesRelation.col("target")), "left_outer") - .map(c -> { + .map((MapFunction, Relation>) c -> { Optional .ofNullable(c._2()) .ifPresent(mr -> c._1().setTarget(mr.getSource())); @@ -107,7 +107,6 @@ public class SparkRemoveBlacklistedRelationJob { .mode(SaveMode.Overwrite) .json(blacklistPath + "/deduped"); - inputRelation .joinWith( dedupBL, (inputRelation @@ -118,26 +117,23 @@ public class SparkRemoveBlacklistedRelationJob { .col("target") .equalTo(dedupBL.col("target")))), "left_outer") - .map(c -> { - Relation ir = c._1(); - Optional obl = Optional.ofNullable(c._2()); - if (obl.isPresent()) { - if (ir.equals(obl.get())) { - return null; + .map((MapFunction, Relation>) c -> { + Relation ir = c._1(); + Optional obl = Optional.ofNullable(c._2()); + if (obl.isPresent()) { + if (ir.equals(obl.get())) { + return null; + } } - } - return ir; - - }, Encoders.bean(Relation.class)) + return ir; + }, Encoders.bean(Relation.class)) .filter(Objects::nonNull) .write() .mode(SaveMode.Overwrite) .option("compression", "gzip") .json(outputPath); - } - public static org.apache.spark.sql.Dataset readRelations( SparkSession spark, String inputPath) { return spark diff --git a/dhp-workflows/dhp-blacklist/src/test/java/eu/dnetlib/dhp/blacklist/BlackListTest.java b/dhp-workflows/dhp-blacklist/src/test/java/eu/dnetlib/dhp/blacklist/BlackListTest.java index 2d6b1061b..bbfd15674 100644 --- a/dhp-workflows/dhp-blacklist/src/test/java/eu/dnetlib/dhp/blacklist/BlackListTest.java +++ b/dhp-workflows/dhp-blacklist/src/test/java/eu/dnetlib/dhp/blacklist/BlackListTest.java @@ -19,6 +19,7 @@ import org.slf4j.Logger; import org.slf4j.LoggerFactory; import com.fasterxml.jackson.databind.ObjectMapper; + import eu.dnetlib.dhp.schema.oaf.Relation; public class BlackListTest { diff --git a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/SparkBulkTagJob.java b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/SparkBulkTagJob.java index e3d74ef3e..75d85e2ba 100644 --- a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/SparkBulkTagJob.java +++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/SparkBulkTagJob.java @@ -1,11 +1,10 @@ package eu.dnetlib.dhp.bulktag; -import com.fasterxml.jackson.databind.ObjectMapper; -import com.google.gson.Gson; -import eu.dnetlib.dhp.application.ArgumentApplicationParser; -import eu.dnetlib.dhp.bulktag.community.*; -import eu.dnetlib.dhp.schema.oaf.Result; +import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession; + +import java.util.Optional; + import org.apache.commons.io.IOUtils; import org.apache.spark.SparkConf; import org.apache.spark.api.java.function.MapFunction; @@ -16,9 +15,12 @@ import org.apache.spark.sql.SparkSession; import org.slf4j.Logger; import org.slf4j.LoggerFactory; -import java.util.Optional; +import com.fasterxml.jackson.databind.ObjectMapper; +import com.google.gson.Gson; -import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession; +import eu.dnetlib.dhp.application.ArgumentApplicationParser; +import eu.dnetlib.dhp.bulktag.community.*; +import eu.dnetlib.dhp.schema.oaf.Result; public class SparkBulkTagJob { diff --git a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/community/Community.java b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/community/Community.java index d492b848e..0f45d3beb 100644 --- a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/community/Community.java +++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/community/Community.java @@ -1,14 +1,15 @@ package eu.dnetlib.dhp.bulktag.community; -import com.google.gson.Gson; -import org.apache.commons.logging.Log; -import org.apache.commons.logging.LogFactory; - import java.io.Serializable; import java.util.ArrayList; import java.util.List; +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; + +import com.google.gson.Gson; + /** Created by miriam on 01/08/2018. */ public class Community implements Serializable { diff --git a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/community/CommunityConfiguration.java b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/community/CommunityConfiguration.java index 4e5b9fc9f..29ddde15f 100644 --- a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/community/CommunityConfiguration.java +++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/community/CommunityConfiguration.java @@ -1,15 +1,6 @@ package eu.dnetlib.dhp.bulktag.community; -import com.google.common.collect.Lists; -import com.google.common.collect.Maps; -import com.google.gson.Gson; -import com.google.gson.GsonBuilder; -import eu.dnetlib.dhp.bulktag.criteria.InterfaceAdapter; -import eu.dnetlib.dhp.bulktag.criteria.Selection; -import org.apache.commons.logging.Log; -import org.apache.commons.logging.LogFactory; - import java.io.Serializable; import java.util.ArrayList; import java.util.HashMap; @@ -17,6 +8,17 @@ import java.util.List; import java.util.Map; import java.util.stream.Collectors; +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; + +import com.google.common.collect.Lists; +import com.google.common.collect.Maps; +import com.google.gson.Gson; +import com.google.gson.GsonBuilder; + +import eu.dnetlib.dhp.bulktag.criteria.InterfaceAdapter; +import eu.dnetlib.dhp.bulktag.criteria.Selection; + /** Created by miriam on 02/08/2018. */ public class CommunityConfiguration implements Serializable { diff --git a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/community/CommunityConfigurationFactory.java b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/community/CommunityConfigurationFactory.java index dc83497c3..607315f3f 100644 --- a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/community/CommunityConfigurationFactory.java +++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/community/CommunityConfigurationFactory.java @@ -1,14 +1,11 @@ package eu.dnetlib.dhp.bulktag.community; -import com.google.common.collect.Lists; -import com.google.common.collect.Maps; -import com.google.gson.Gson; -import com.google.gson.GsonBuilder; -import eu.dnetlib.dhp.bulktag.criteria.InterfaceAdapter; -import eu.dnetlib.dhp.bulktag.criteria.Selection; -import eu.dnetlib.dhp.bulktag.criteria.VerbResolver; -import eu.dnetlib.dhp.bulktag.criteria.VerbResolverFactory; +import java.io.StringReader; +import java.util.ArrayList; +import java.util.List; +import java.util.Map; + import org.apache.commons.lang3.StringUtils; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; @@ -17,10 +14,15 @@ import org.dom4j.DocumentException; import org.dom4j.Node; import org.dom4j.io.SAXReader; -import java.io.StringReader; -import java.util.ArrayList; -import java.util.List; -import java.util.Map; +import com.google.common.collect.Lists; +import com.google.common.collect.Maps; +import com.google.gson.Gson; +import com.google.gson.GsonBuilder; + +import eu.dnetlib.dhp.bulktag.criteria.InterfaceAdapter; +import eu.dnetlib.dhp.bulktag.criteria.Selection; +import eu.dnetlib.dhp.bulktag.criteria.VerbResolver; +import eu.dnetlib.dhp.bulktag.criteria.VerbResolverFactory; /** Created by miriam on 03/08/2018. */ public class CommunityConfigurationFactory { diff --git a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/community/Constraint.java b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/community/Constraint.java index 8e28a7a5f..e0856ae8f 100644 --- a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/community/Constraint.java +++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/community/Constraint.java @@ -1,12 +1,12 @@ package eu.dnetlib.dhp.bulktag.community; -import eu.dnetlib.dhp.bulktag.criteria.Selection; -import eu.dnetlib.dhp.bulktag.criteria.VerbResolver; - import java.io.Serializable; import java.lang.reflect.InvocationTargetException; +import eu.dnetlib.dhp.bulktag.criteria.Selection; +import eu.dnetlib.dhp.bulktag.criteria.VerbResolver; + public class Constraint implements Serializable { private String verb; private String field; diff --git a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/community/Constraints.java b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/community/Constraints.java index eace3bc35..b56dfaaa3 100644 --- a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/community/Constraints.java +++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/community/Constraints.java @@ -1,12 +1,6 @@ package eu.dnetlib.dhp.bulktag.community; -import com.google.gson.Gson; -import com.google.gson.reflect.TypeToken; -import eu.dnetlib.dhp.bulktag.criteria.VerbResolver; -import org.apache.commons.logging.Log; -import org.apache.commons.logging.LogFactory; - import java.io.Serializable; import java.lang.reflect.InvocationTargetException; import java.lang.reflect.Type; @@ -14,6 +8,14 @@ import java.util.Collection; import java.util.List; import java.util.Map; +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; + +import com.google.gson.Gson; +import com.google.gson.reflect.TypeToken; + +import eu.dnetlib.dhp.bulktag.criteria.VerbResolver; + /** Created by miriam on 02/08/2018. */ public class Constraints implements Serializable { private static final Log log = LogFactory.getLog(Constraints.class); diff --git a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/community/Pair.java b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/community/Pair.java index 1130a0770..50e1836fa 100644 --- a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/community/Pair.java +++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/community/Pair.java @@ -1,10 +1,10 @@ package eu.dnetlib.dhp.bulktag.community; -import com.google.gson.Gson; - import java.io.Serializable; +import com.google.gson.Gson; + /** Created by miriam on 03/08/2018. */ public class Pair implements Serializable { private A fst; diff --git a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/community/Provider.java b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/community/Provider.java index c4362610e..b9c37f4dc 100644 --- a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/community/Provider.java +++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/community/Provider.java @@ -1,13 +1,15 @@ package eu.dnetlib.dhp.bulktag.community; -import com.google.gson.Gson; -import eu.dnetlib.dhp.bulktag.criteria.VerbResolver; +import java.io.Serializable; + import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.dom4j.Node; -import java.io.Serializable; +import com.google.gson.Gson; + +import eu.dnetlib.dhp.bulktag.criteria.VerbResolver; /** Created by miriam on 01/08/2018. */ public class Provider implements Serializable { diff --git a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/community/QueryInformationSystem.java b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/community/QueryInformationSystem.java index 43eb40940..7ec2f916f 100644 --- a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/community/QueryInformationSystem.java +++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/community/QueryInformationSystem.java @@ -1,13 +1,15 @@ package eu.dnetlib.dhp.bulktag.community; +import java.util.List; + +import org.dom4j.DocumentException; + import com.google.common.base.Joiner; + import eu.dnetlib.dhp.utils.ISLookupClientFactory; import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpException; import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpService; -import org.dom4j.DocumentException; - -import java.util.List; public class QueryInformationSystem { private static final String XQUERY = "for $x in collection('/db/DRIVER/ContextDSResources/ContextDSResourceType') " diff --git a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/community/ResultTagger.java b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/community/ResultTagger.java index fd4f5497a..f5a985d15 100644 --- a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/community/ResultTagger.java +++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/community/ResultTagger.java @@ -1,19 +1,21 @@ package eu.dnetlib.dhp.bulktag.community; -import com.google.gson.Gson; -import com.jayway.jsonpath.DocumentContext; -import com.jayway.jsonpath.JsonPath; -import eu.dnetlib.dhp.schema.oaf.*; -import org.apache.commons.lang3.StringUtils; +import static eu.dnetlib.dhp.bulktag.community.TaggingConstants.*; +import static eu.dnetlib.dhp.schema.common.ModelConstants.*; import java.io.Serializable; import java.util.*; import java.util.stream.Collectors; import java.util.stream.Stream; -import static eu.dnetlib.dhp.bulktag.community.TaggingConstants.*; -import static eu.dnetlib.dhp.schema.common.ModelConstants.*; +import org.apache.commons.lang3.StringUtils; + +import com.google.gson.Gson; +import com.jayway.jsonpath.DocumentContext; +import com.jayway.jsonpath.JsonPath; + +import eu.dnetlib.dhp.schema.oaf.*; /** Created by miriam on 02/08/2018. */ public class ResultTagger implements Serializable { @@ -50,7 +52,7 @@ public class ResultTagger implements Serializable { } public R enrichContextCriteria( - final R result, final CommunityConfiguration conf, final Map criteria) { + final R result, final CommunityConfiguration conf, final Map criteria) { // } // public Result enrichContextCriteria(final Result result, final CommunityConfiguration diff --git a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/community/SelectionConstraints.java b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/community/SelectionConstraints.java index 28674d9ef..71ff61d1b 100644 --- a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/community/SelectionConstraints.java +++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/community/SelectionConstraints.java @@ -1,16 +1,17 @@ package eu.dnetlib.dhp.bulktag.community; -import com.google.gson.Gson; -import com.google.gson.reflect.TypeToken; -import eu.dnetlib.dhp.bulktag.criteria.VerbResolver; - import java.io.Serializable; import java.lang.reflect.Type; import java.util.Collection; import java.util.List; import java.util.Map; +import com.google.gson.Gson; +import com.google.gson.reflect.TypeToken; + +import eu.dnetlib.dhp.bulktag.criteria.VerbResolver; + public class SelectionConstraints implements Serializable { private List criteria; diff --git a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/community/ZenodoCommunity.java b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/community/ZenodoCommunity.java index eb0577ffc..bc6b75fba 100644 --- a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/community/ZenodoCommunity.java +++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/community/ZenodoCommunity.java @@ -1,10 +1,11 @@ package eu.dnetlib.dhp.bulktag.community; -import com.google.gson.Gson; +import java.io.Serializable; + import org.dom4j.Node; -import java.io.Serializable; +import com.google.gson.Gson; /** Created by miriam on 01/08/2018. */ public class ZenodoCommunity implements Serializable { diff --git a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/criteria/InterfaceAdapter.java b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/criteria/InterfaceAdapter.java index 2bc1ab3cf..e9b948b2b 100644 --- a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/criteria/InterfaceAdapter.java +++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/criteria/InterfaceAdapter.java @@ -1,10 +1,10 @@ package eu.dnetlib.dhp.bulktag.criteria; -import com.google.gson.*; - import java.lang.reflect.Type; +import com.google.gson.*; + public class InterfaceAdapter implements JsonSerializer, JsonDeserializer { private static final String CLASSNAME = "CLASSNAME"; diff --git a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/criteria/VerbResolver.java b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/criteria/VerbResolver.java index 19d6c4615..fe46c6936 100644 --- a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/criteria/VerbResolver.java +++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/criteria/VerbResolver.java @@ -1,16 +1,16 @@ package eu.dnetlib.dhp.bulktag.criteria; -import io.github.classgraph.ClassGraph; -import io.github.classgraph.ClassInfo; -import io.github.classgraph.ClassInfoList; -import io.github.classgraph.ScanResult; - import java.io.Serializable; import java.lang.reflect.InvocationTargetException; import java.util.Map; import java.util.stream.Collectors; +import io.github.classgraph.ClassGraph; +import io.github.classgraph.ClassInfo; +import io.github.classgraph.ClassInfoList; +import io.github.classgraph.ScanResult; + public class VerbResolver implements Serializable { private Map> map = null; // = new HashMap<>(); private final ClassGraph classgraph = new ClassGraph(); diff --git a/dhp-workflows/dhp-enrichment/src/test/java/eu/dnetlib/dhp/bulktag/BulkTagJobTest.java b/dhp-workflows/dhp-enrichment/src/test/java/eu/dnetlib/dhp/bulktag/BulkTagJobTest.java index f20678f7b..72e0a63fa 100644 --- a/dhp-workflows/dhp-enrichment/src/test/java/eu/dnetlib/dhp/bulktag/BulkTagJobTest.java +++ b/dhp-workflows/dhp-enrichment/src/test/java/eu/dnetlib/dhp/bulktag/BulkTagJobTest.java @@ -1,11 +1,12 @@ package eu.dnetlib.dhp.bulktag; -import com.fasterxml.jackson.databind.ObjectMapper; -import eu.dnetlib.dhp.schema.oaf.Dataset; -import eu.dnetlib.dhp.schema.oaf.OtherResearchProduct; -import eu.dnetlib.dhp.schema.oaf.Publication; -import eu.dnetlib.dhp.schema.oaf.Software; +import static eu.dnetlib.dhp.bulktag.community.TaggingConstants.ZENODO_COMMUNITY_INDICATOR; + +import java.io.IOException; +import java.nio.file.Files; +import java.nio.file.Path; + import org.apache.commons.io.FileUtils; import org.apache.commons.io.IOUtils; import org.apache.spark.SparkConf; @@ -18,15 +19,15 @@ import org.junit.jupiter.api.AfterAll; import org.junit.jupiter.api.Assertions; import org.junit.jupiter.api.BeforeAll; import org.junit.jupiter.api.Test; - import org.slf4j.Logger; import org.slf4j.LoggerFactory; -import java.io.IOException; -import java.nio.file.Files; -import java.nio.file.Path; +import com.fasterxml.jackson.databind.ObjectMapper; -import static eu.dnetlib.dhp.bulktag.community.TaggingConstants.ZENODO_COMMUNITY_INDICATOR; +import eu.dnetlib.dhp.schema.oaf.Dataset; +import eu.dnetlib.dhp.schema.oaf.OtherResearchProduct; +import eu.dnetlib.dhp.schema.oaf.Publication; +import eu.dnetlib.dhp.schema.oaf.Software; public class BulkTagJobTest { @@ -34,12 +35,11 @@ public class BulkTagJobTest { public static final String MOCK_IS_LOOK_UP_URL = "BASEURL:8280/is/services/isLookUp"; - public static final String pathMap = - "{ \"author\" : \"$['author'][*]['fullname']\"," - + " \"title\" : \"$['title'][*]['value']\"," - + " \"orcid\" : \"$['author'][*]['pid'][*][?(@['key']=='ORCID')]['value']\"," - + " \"contributor\" : \"$['contributor'][*]['value']\"," - + " \"description\" : \"$['description'][*]['value']\"}"; + public static final String pathMap = "{ \"author\" : \"$['author'][*]['fullname']\"," + + " \"title\" : \"$['title'][*]['value']\"," + + " \"orcid\" : \"$['author'][*]['pid'][*][?(@['key']=='ORCID')]['value']\"," + + " \"contributor\" : \"$['contributor'][*]['value']\"," + + " \"description\" : \"$['description'][*]['value']\"}"; private static SparkSession spark; @@ -97,7 +97,8 @@ public class BulkTagJobTest { new String[] { "-isTest", Boolean.TRUE.toString(), "-isSparkSessionManaged", Boolean.FALSE.toString(), - "-sourcePath", getClass().getResource("/eu/dnetlib/dhp/bulktag/sample/dataset/no_updates").getPath(), + "-sourcePath", + getClass().getResource("/eu/dnetlib/dhp/bulktag/sample/dataset/no_updates").getPath(), "-taggingConf", taggingConf, "-resultTableName", "eu.dnetlib.dhp.schema.oaf.Dataset", "-outputPath", workingDir.toString() + "/dataset", @@ -129,8 +130,8 @@ public class BulkTagJobTest { @Test public void bulktagBySubjectNoPreviousContextTest() throws Exception { final String sourcePath = getClass() - .getResource("/eu/dnetlib/dhp/bulktag/sample/dataset/update_subject/nocontext") - .getPath(); + .getResource("/eu/dnetlib/dhp/bulktag/sample/dataset/update_subject/nocontext") + .getPath(); final String pathMap = BulkTagJobTest.pathMap; SparkBulkTagJob .main( @@ -139,7 +140,7 @@ public class BulkTagJobTest { "-isSparkSessionManaged", Boolean.FALSE.toString(), "-sourcePath", sourcePath, "-taggingConf", taggingConf, - "-resultTableName", "eu.dnetlib.dhp.schema.oaf.Dataset", + "-resultTableName", "eu.dnetlib.dhp.schema.oaf.Dataset", "-outputPath", workingDir.toString() + "/dataset", "-isLookUpUrl", MOCK_IS_LOOK_UP_URL, "-pathMap", pathMap @@ -225,9 +226,9 @@ public class BulkTagJobTest { @Test public void bulktagBySubjectPreviousContextNoProvenanceTest() throws Exception { final String sourcePath = getClass() - .getResource( - "/eu/dnetlib/dhp/bulktag/sample/dataset/update_subject/contextnoprovenance") - .getPath(); + .getResource( + "/eu/dnetlib/dhp/bulktag/sample/dataset/update_subject/contextnoprovenance") + .getPath(); final String pathMap = BulkTagJobTest.pathMap; SparkBulkTagJob .main( @@ -236,7 +237,7 @@ public class BulkTagJobTest { "-isSparkSessionManaged", Boolean.FALSE.toString(), "-sourcePath", sourcePath, "-taggingConf", taggingConf, - "-resultTableName", "eu.dnetlib.dhp.schema.oaf.Dataset", + "-resultTableName", "eu.dnetlib.dhp.schema.oaf.Dataset", "-outputPath", workingDir.toString() + "/dataset", "-isLookUpUrl", MOCK_IS_LOOK_UP_URL, "-pathMap", pathMap @@ -307,8 +308,8 @@ public class BulkTagJobTest { @Test public void bulktagByDatasourceTest() throws Exception { final String sourcePath = getClass() - .getResource("/eu/dnetlib/dhp/bulktag/sample/publication/update_datasource") - .getPath(); + .getResource("/eu/dnetlib/dhp/bulktag/sample/publication/update_datasource") + .getPath(); SparkBulkTagJob .main( new String[] { @@ -379,9 +380,9 @@ public class BulkTagJobTest { @Test public void bulktagByZenodoCommunityTest() throws Exception { final String sourcePath = getClass() - .getResource( - "/eu/dnetlib/dhp/bulktag/sample/otherresearchproduct/update_zenodocommunity") - .getPath(); + .getResource( + "/eu/dnetlib/dhp/bulktag/sample/otherresearchproduct/update_zenodocommunity") + .getPath(); SparkBulkTagJob .main( new String[] { @@ -501,8 +502,8 @@ public class BulkTagJobTest { @Test public void bulktagBySubjectDatasourceTest() throws Exception { final String sourcePath = getClass() - .getResource("/eu/dnetlib/dhp/bulktag/sample/dataset/update_subject_datasource") - .getPath(); + .getResource("/eu/dnetlib/dhp/bulktag/sample/dataset/update_subject_datasource") + .getPath(); SparkBulkTagJob .main( new String[] { @@ -726,9 +727,9 @@ public class BulkTagJobTest { public void bulktagDatasourcewithConstraintsTest() throws Exception { final String sourcePath = getClass() - .getResource( - "/eu/dnetlib/dhp/bulktag/sample/dataset/update_datasourcewithconstraints") - .getPath(); + .getResource( + "/eu/dnetlib/dhp/bulktag/sample/dataset/update_datasourcewithconstraints") + .getPath(); SparkBulkTagJob .main( new String[] { diff --git a/dhp-workflows/dhp-enrichment/src/test/java/eu/dnetlib/dhp/bulktag/CommunityConfigurationFactoryTest.java b/dhp-workflows/dhp-enrichment/src/test/java/eu/dnetlib/dhp/bulktag/CommunityConfigurationFactoryTest.java index 056c3345c..aaf670fd7 100644 --- a/dhp-workflows/dhp-enrichment/src/test/java/eu/dnetlib/dhp/bulktag/CommunityConfigurationFactoryTest.java +++ b/dhp-workflows/dhp-enrichment/src/test/java/eu/dnetlib/dhp/bulktag/CommunityConfigurationFactoryTest.java @@ -1,21 +1,23 @@ package eu.dnetlib.dhp.bulktag; -import com.google.gson.Gson; -import eu.dnetlib.dhp.bulktag.community.CommunityConfiguration; -import eu.dnetlib.dhp.bulktag.community.CommunityConfigurationFactory; -import eu.dnetlib.dhp.bulktag.community.Constraint; -import eu.dnetlib.dhp.bulktag.community.SelectionConstraints; -import eu.dnetlib.dhp.bulktag.criteria.VerbResolver; +import java.io.IOException; +import java.lang.reflect.InvocationTargetException; +import java.util.*; + import org.apache.commons.io.IOUtils; import org.apache.commons.lang3.StringUtils; import org.dom4j.DocumentException; import org.junit.jupiter.api.Assertions; import org.junit.jupiter.api.Test; -import java.io.IOException; -import java.lang.reflect.InvocationTargetException; -import java.util.*; +import com.google.gson.Gson; + +import eu.dnetlib.dhp.bulktag.community.CommunityConfiguration; +import eu.dnetlib.dhp.bulktag.community.CommunityConfigurationFactory; +import eu.dnetlib.dhp.bulktag.community.Constraint; +import eu.dnetlib.dhp.bulktag.community.SelectionConstraints; +import eu.dnetlib.dhp.bulktag.criteria.VerbResolver; /** Created by miriam on 03/08/2018. */ public class CommunityConfigurationFactoryTest { diff --git a/dhp-workflows/dhp-enrichment/src/test/java/eu/dnetlib/dhp/countrypropagation/CountryPropagationJobTest.java b/dhp-workflows/dhp-enrichment/src/test/java/eu/dnetlib/dhp/countrypropagation/CountryPropagationJobTest.java index b62238089..88ad43b6b 100644 --- a/dhp-workflows/dhp-enrichment/src/test/java/eu/dnetlib/dhp/countrypropagation/CountryPropagationJobTest.java +++ b/dhp-workflows/dhp-enrichment/src/test/java/eu/dnetlib/dhp/countrypropagation/CountryPropagationJobTest.java @@ -103,7 +103,8 @@ public class CountryPropagationJobTest { Assertions.assertEquals(0, verificationDs.filter("size(country) > 2").count()); Dataset countryExploded = verificationDs - .flatMap((FlatMapFunction) row -> row.getCountry().iterator(), Encoders.bean(Country.class)) + .flatMap( + (FlatMapFunction) row -> row.getCountry().iterator(), Encoders.bean(Country.class)) .map((MapFunction) c -> c.getClassid(), Encoders.STRING()); Assertions.assertEquals(9, countryExploded.count()); @@ -123,10 +124,10 @@ public class CountryPropagationJobTest { country_list .stream() .forEach( - c -> prova - .add( - new Tuple2<>( - row.getId(), c.getClassid()))); + c -> prova + .add( + new Tuple2<>( + row.getId(), c.getClassid()))); return prova.iterator(); }, Encoders.tuple(Encoders.STRING(), Encoders.STRING())); @@ -178,20 +179,20 @@ public class CountryPropagationJobTest { Dataset> countryExplodedWithCountryclassname = verificationDs .flatMap( - (FlatMapFunction>) row -> { - List> prova = new ArrayList(); - List country_list = row.getCountry(); - country_list - .stream() - .forEach( - c -> prova - .add( - new Tuple2<>( - row.getId(), - c.getClassname()))); - return prova.iterator(); - }, - Encoders.tuple(Encoders.STRING(), Encoders.STRING())); + (FlatMapFunction>) row -> { + List> prova = new ArrayList(); + List country_list = row.getCountry(); + country_list + .stream() + .forEach( + c -> prova + .add( + new Tuple2<>( + row.getId(), + c.getClassname()))); + return prova.iterator(); + }, + Encoders.tuple(Encoders.STRING(), Encoders.STRING())); countryExplodedWithCountryclassname.show(false); Assertions @@ -239,22 +240,22 @@ public class CountryPropagationJobTest { Dataset> countryExplodedWithCountryProvenance = verificationDs .flatMap( - (FlatMapFunction>) row -> { - List> prova = new ArrayList(); - List country_list = row.getCountry(); - country_list - .stream() - .forEach( - c -> prova - .add( - new Tuple2<>( - row.getId(), - c - .getDataInfo() - .getInferenceprovenance()))); - return prova.iterator(); - }, - Encoders.tuple(Encoders.STRING(), Encoders.STRING())); + (FlatMapFunction>) row -> { + List> prova = new ArrayList(); + List country_list = row.getCountry(); + country_list + .stream() + .forEach( + c -> prova + .add( + new Tuple2<>( + row.getId(), + c + .getDataInfo() + .getInferenceprovenance()))); + return prova.iterator(); + }, + Encoders.tuple(Encoders.STRING(), Encoders.STRING())); Assertions .assertEquals( diff --git a/dhp-workflows/dhp-enrichment/src/test/java/eu/dnetlib/dhp/projecttoresult/ProjectPropagationJobTest.java b/dhp-workflows/dhp-enrichment/src/test/java/eu/dnetlib/dhp/projecttoresult/ProjectPropagationJobTest.java index ff63753b8..abed028e1 100644 --- a/dhp-workflows/dhp-enrichment/src/test/java/eu/dnetlib/dhp/projecttoresult/ProjectPropagationJobTest.java +++ b/dhp-workflows/dhp-enrichment/src/test/java/eu/dnetlib/dhp/projecttoresult/ProjectPropagationJobTest.java @@ -72,14 +72,15 @@ public class ProjectPropagationJobTest { public void NoUpdateTest() throws Exception { final String potentialUpdateDate = getClass() - .getResource( - "/eu/dnetlib/dhp/projecttoresult/preparedInfo/noupdates/potentialUpdates") - .getPath(); + .getResource( + "/eu/dnetlib/dhp/projecttoresult/preparedInfo/noupdates/potentialUpdates") + .getPath(); final String alreadyLinkedPath = getClass() - .getResource( - "/eu/dnetlib/dhp/projecttoresult/preparedInfo/alreadyLinked") - .getPath(); - SparkResultToProjectThroughSemRelJob.main( + .getResource( + "/eu/dnetlib/dhp/projecttoresult/preparedInfo/alreadyLinked") + .getPath(); + SparkResultToProjectThroughSemRelJob + .main( new String[] { "-isTest", Boolean.TRUE.toString(), "-isSparkSessionManaged", Boolean.FALSE.toString(), @@ -107,14 +108,15 @@ public class ProjectPropagationJobTest { @Test public void UpdateTenTest() throws Exception { final String potentialUpdatePath = getClass() - .getResource( - "/eu/dnetlib/dhp/projecttoresult/preparedInfo/tenupdates/potentialUpdates") - .getPath(); + .getResource( + "/eu/dnetlib/dhp/projecttoresult/preparedInfo/tenupdates/potentialUpdates") + .getPath(); final String alreadyLinkedPath = getClass() - .getResource( - "/eu/dnetlib/dhp/projecttoresult/preparedInfo/alreadyLinked") - .getPath(); - SparkResultToProjectThroughSemRelJob.main( + .getResource( + "/eu/dnetlib/dhp/projecttoresult/preparedInfo/alreadyLinked") + .getPath(); + SparkResultToProjectThroughSemRelJob + .main( new String[] { "-isTest", Boolean.TRUE.toString(), "-isSparkSessionManaged", Boolean.FALSE.toString(), @@ -143,8 +145,8 @@ public class ProjectPropagationJobTest { .assertEquals( 5, verificationDs - .filter((FilterFunction) r -> - r.getSource().startsWith("50") + .filter( + (FilterFunction) r -> r.getSource().startsWith("50") && r.getTarget().startsWith("40") && r.getRelClass().equals("isProducedBy")) .count()); @@ -152,8 +154,8 @@ public class ProjectPropagationJobTest { .assertEquals( 5, verificationDs - .filter((FilterFunction) r -> - r.getSource().startsWith("40") + .filter( + (FilterFunction) r -> r.getSource().startsWith("40") && r.getTarget().startsWith("50") && r.getRelClass().equals("produces")) .count()); @@ -178,14 +180,15 @@ public class ProjectPropagationJobTest { @Test public void UpdateMixTest() throws Exception { final String potentialUpdatepath = getClass() - .getResource( - "/eu/dnetlib/dhp/projecttoresult/preparedInfo/updatesmixed/potentialUpdates") - .getPath(); + .getResource( + "/eu/dnetlib/dhp/projecttoresult/preparedInfo/updatesmixed/potentialUpdates") + .getPath(); final String alreadyLinkedPath = getClass() - .getResource( - "/eu/dnetlib/dhp/projecttoresult/preparedInfo/alreadyLinked") - .getPath(); - SparkResultToProjectThroughSemRelJob.main( + .getResource( + "/eu/dnetlib/dhp/projecttoresult/preparedInfo/alreadyLinked") + .getPath(); + SparkResultToProjectThroughSemRelJob + .main( new String[] { "-isTest", Boolean.TRUE.toString(), "-isSparkSessionManaged", Boolean.FALSE.toString(), @@ -217,8 +220,8 @@ public class ProjectPropagationJobTest { .assertEquals( 4, verificationDs - .filter((FilterFunction) r -> - r.getSource().startsWith("50") + .filter( + (FilterFunction) r -> r.getSource().startsWith("50") && r.getTarget().startsWith("40") && r.getRelClass().equals("isProducedBy")) .count()); @@ -226,8 +229,8 @@ public class ProjectPropagationJobTest { .assertEquals( 4, verificationDs - .filter((FilterFunction) r -> - r.getSource().startsWith("40") + .filter( + (FilterFunction) r -> r.getSource().startsWith("40") && r.getTarget().startsWith("50") && r.getRelClass().equals("produces")) .count()); diff --git a/dhp-workflows/dhp-enrichment/src/test/java/eu/dnetlib/dhp/resulttocommunityfromorganization/ResultToCommunityJobTest.java b/dhp-workflows/dhp-enrichment/src/test/java/eu/dnetlib/dhp/resulttocommunityfromorganization/ResultToCommunityJobTest.java index 20b20d4ed..d739516fc 100644 --- a/dhp-workflows/dhp-enrichment/src/test/java/eu/dnetlib/dhp/resulttocommunityfromorganization/ResultToCommunityJobTest.java +++ b/dhp-workflows/dhp-enrichment/src/test/java/eu/dnetlib/dhp/resulttocommunityfromorganization/ResultToCommunityJobTest.java @@ -67,9 +67,10 @@ public class ResultToCommunityJobTest { @Test public void testSparkResultToCommunityFromOrganizationJob() throws Exception { final String preparedInfoPath = getClass() - .getResource("/eu/dnetlib/dhp/resulttocommunityfromorganization/preparedInfo") - .getPath(); - SparkResultToCommunityFromOrganizationJob.main( + .getResource("/eu/dnetlib/dhp/resulttocommunityfromorganization/preparedInfo") + .getPath(); + SparkResultToCommunityFromOrganizationJob + .main( new String[] { "-isTest", Boolean.TRUE.toString(), "-isSparkSessionManaged", Boolean.FALSE.toString(), diff --git a/dhp-workflows/dhp-enrichment/src/test/java/eu/dnetlib/dhp/resulttoorganizationfrominstrepo/ResultToOrganizationJobTest.java b/dhp-workflows/dhp-enrichment/src/test/java/eu/dnetlib/dhp/resulttoorganizationfrominstrepo/ResultToOrganizationJobTest.java index 30be118d1..435b76605 100644 --- a/dhp-workflows/dhp-enrichment/src/test/java/eu/dnetlib/dhp/resulttoorganizationfrominstrepo/ResultToOrganizationJobTest.java +++ b/dhp-workflows/dhp-enrichment/src/test/java/eu/dnetlib/dhp/resulttoorganizationfrominstrepo/ResultToOrganizationJobTest.java @@ -69,15 +69,17 @@ public class ResultToOrganizationJobTest { @Test public void NoUpdateTest() throws Exception { final String sourcePath = getClass() - .getResource("/eu/dnetlib/dhp/resulttoorganizationfrominstrepo/sample/noupdate_updatenomix") - .getPath(); + .getResource("/eu/dnetlib/dhp/resulttoorganizationfrominstrepo/sample/noupdate_updatenomix") + .getPath(); final String datasourceOrganizationPath = getClass() - .getResource("/eu/dnetlib/dhp/resulttoorganizationfrominstrepo/noupdate/preparedInfo/datasourceOrganization") - .getPath(); + .getResource( + "/eu/dnetlib/dhp/resulttoorganizationfrominstrepo/noupdate/preparedInfo/datasourceOrganization") + .getPath(); final String alreadyLinkedPath = getClass() - .getResource("/eu/dnetlib/dhp/resulttoorganizationfrominstrepo/noupdate/preparedInfo/alreadyLinked") - .getPath(); - SparkResultToOrganizationFromIstRepoJob.main( + .getResource("/eu/dnetlib/dhp/resulttoorganizationfrominstrepo/noupdate/preparedInfo/alreadyLinked") + .getPath(); + SparkResultToOrganizationFromIstRepoJob + .main( new String[] { "-isTest", Boolean.TRUE.toString(), "-isSparkSessionManaged", Boolean.FALSE.toString(), @@ -109,15 +111,17 @@ public class ResultToOrganizationJobTest { @Test public void UpdateNoMixTest() throws Exception { final String sourcePath = getClass() - .getResource("/eu/dnetlib/dhp/resulttoorganizationfrominstrepo/sample/noupdate_updatenomix") - .getPath(); + .getResource("/eu/dnetlib/dhp/resulttoorganizationfrominstrepo/sample/noupdate_updatenomix") + .getPath(); final String datasourceOrganizationPath = getClass() - .getResource("/eu/dnetlib/dhp/resulttoorganizationfrominstrepo/updatenomix/preparedInfo/datasourceOrganization") - .getPath(); + .getResource( + "/eu/dnetlib/dhp/resulttoorganizationfrominstrepo/updatenomix/preparedInfo/datasourceOrganization") + .getPath(); final String alreadyLinkedPath = getClass() - .getResource("/eu/dnetlib/dhp/resulttoorganizationfrominstrepo/updatenomix/preparedInfo/alreadyLinked") - .getPath(); - SparkResultToOrganizationFromIstRepoJob.main( + .getResource("/eu/dnetlib/dhp/resulttoorganizationfrominstrepo/updatenomix/preparedInfo/alreadyLinked") + .getPath(); + SparkResultToOrganizationFromIstRepoJob + .main( new String[] { "-isTest", Boolean.TRUE.toString(), "-isSparkSessionManaged", Boolean.FALSE.toString(), @@ -172,15 +176,17 @@ public class ResultToOrganizationJobTest { @Test public void UpdateMixTest() throws Exception { final String sourcePath = getClass() - .getResource("/eu/dnetlib/dhp/resulttoorganizationfrominstrepo/sample/updatemix") - .getPath(); + .getResource("/eu/dnetlib/dhp/resulttoorganizationfrominstrepo/sample/updatemix") + .getPath(); final String datasourceOrganizationPath = getClass() - .getResource("/eu/dnetlib/dhp/resulttoorganizationfrominstrepo/updatemix/preparedInfo/datasourceOrganization") - .getPath(); + .getResource( + "/eu/dnetlib/dhp/resulttoorganizationfrominstrepo/updatemix/preparedInfo/datasourceOrganization") + .getPath(); final String alreadyLinkedPath = getClass() - .getResource("/eu/dnetlib/dhp/resulttoorganizationfrominstrepo/updatemix/preparedInfo/alreadyLinked") - .getPath(); - SparkResultToOrganizationFromIstRepoJob.main( + .getResource("/eu/dnetlib/dhp/resulttoorganizationfrominstrepo/updatemix/preparedInfo/alreadyLinked") + .getPath(); + SparkResultToOrganizationFromIstRepoJob + .main( new String[] { "-isTest", Boolean.TRUE.toString(), "-isSparkSessionManaged", Boolean.FALSE.toString(), From ad63effb4e078fb99b2471d98c964f2bbba3c1f8 Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Mon, 11 May 2020 17:48:22 +0200 Subject: [PATCH 03/31] removed deletion of working dir --- .../resources/eu/dnetlib/dhp/blacklist/oozie_app/workflow.xml | 1 - 1 file changed, 1 deletion(-) diff --git a/dhp-workflows/dhp-blacklist/src/main/resources/eu/dnetlib/dhp/blacklist/oozie_app/workflow.xml b/dhp-workflows/dhp-blacklist/src/main/resources/eu/dnetlib/dhp/blacklist/oozie_app/workflow.xml index f71c085b2..7cbf166a1 100644 --- a/dhp-workflows/dhp-blacklist/src/main/resources/eu/dnetlib/dhp/blacklist/oozie_app/workflow.xml +++ b/dhp-workflows/dhp-blacklist/src/main/resources/eu/dnetlib/dhp/blacklist/oozie_app/workflow.xml @@ -30,7 +30,6 @@ - From f9a62ba63b21e1687623a0f58abeb15f74db95a1 Mon Sep 17 00:00:00 2001 From: Claudio Atzori Date: Mon, 11 May 2020 18:16:39 +0200 Subject: [PATCH 04/31] added wf nodes to copy entities to the output path --- .../dhp/blacklist/oozie_app/workflow.xml | 91 ++++++++++++++++++- 1 file changed, 90 insertions(+), 1 deletion(-) diff --git a/dhp-workflows/dhp-blacklist/src/main/resources/eu/dnetlib/dhp/blacklist/oozie_app/workflow.xml b/dhp-workflows/dhp-blacklist/src/main/resources/eu/dnetlib/dhp/blacklist/oozie_app/workflow.xml index f71c085b2..397a48f09 100644 --- a/dhp-workflows/dhp-blacklist/src/main/resources/eu/dnetlib/dhp/blacklist/oozie_app/workflow.xml +++ b/dhp-workflows/dhp-blacklist/src/main/resources/eu/dnetlib/dhp/blacklist/oozie_app/workflow.xml @@ -34,10 +34,99 @@ - + + + + + + + + + + + + + + ${jobTracker} + ${nameNode} + ${nameNode}/${sourcePath}/publication + ${nameNode}/${outputPath}/publication + + + + + + + + ${jobTracker} + ${nameNode} + ${nameNode}/${sourcePath}/dataset + ${nameNode}/${outputPath}/dataset + + + + + + + + ${jobTracker} + ${nameNode} + ${nameNode}/${sourcePath}/otherresearchproduct + ${nameNode}/${outputPath}/otherresearchproduct + + + + + + + + ${jobTracker} + ${nameNode} + ${nameNode}/${sourcePath}/software + ${nameNode}/${outputPath}/software + + + + + + + + ${jobTracker} + ${nameNode} + ${nameNode}/${sourcePath}/organization + ${nameNode}/${outputPath}/organization + + + + + + + + ${jobTracker} + ${nameNode} + ${nameNode}/${sourcePath}/project + ${nameNode}/${outputPath}/project + + + + + + + + ${jobTracker} + ${nameNode} + ${nameNode}/${sourcePath}/datasource + ${nameNode}/${outputPath}/datasource + + + + + + + ${jobTracker} From 527e8169a80addd181e8cabbe1866e2891a3fbf9 Mon Sep 17 00:00:00 2001 From: Claudio Atzori Date: Mon, 11 May 2020 18:17:05 +0200 Subject: [PATCH 05/31] adjusted paths pointing to test configurations, cleanup --- .../CommunityConfigurationFactoryTest.java | 88 +------------------ 1 file changed, 2 insertions(+), 86 deletions(-) diff --git a/dhp-workflows/dhp-enrichment/src/test/java/eu/dnetlib/dhp/bulktag/CommunityConfigurationFactoryTest.java b/dhp-workflows/dhp-enrichment/src/test/java/eu/dnetlib/dhp/bulktag/CommunityConfigurationFactoryTest.java index aaf670fd7..c4beac4e6 100644 --- a/dhp-workflows/dhp-enrichment/src/test/java/eu/dnetlib/dhp/bulktag/CommunityConfigurationFactoryTest.java +++ b/dhp-workflows/dhp-enrichment/src/test/java/eu/dnetlib/dhp/bulktag/CommunityConfigurationFactoryTest.java @@ -11,12 +11,9 @@ import org.dom4j.DocumentException; import org.junit.jupiter.api.Assertions; import org.junit.jupiter.api.Test; -import com.google.gson.Gson; - import eu.dnetlib.dhp.bulktag.community.CommunityConfiguration; import eu.dnetlib.dhp.bulktag.community.CommunityConfigurationFactory; import eu.dnetlib.dhp.bulktag.community.Constraint; -import eu.dnetlib.dhp.bulktag.community.SelectionConstraints; import eu.dnetlib.dhp.bulktag.criteria.VerbResolver; /** Created by miriam on 03/08/2018. */ @@ -30,7 +27,7 @@ public class CommunityConfigurationFactoryTest { .toString( getClass() .getResourceAsStream( - "/eu/dnetlib/dhp/communityconfiguration/community_configuration.xml")); + "/eu/dnetlib/dhp/bulktag/communityconfiguration/community_configuration.xml")); final CommunityConfiguration cc = CommunityConfigurationFactory.newInstance(xml); Assertions.assertEquals(5, cc.size()); cc @@ -57,7 +54,7 @@ public class CommunityConfigurationFactoryTest { .toString( getClass() .getResourceAsStream( - "/eu/dnetlib/dhp/communityconfiguration/community_configuration_selcrit.xml")); + "/eu/dnetlib/dhp/bulktag/communityconfiguration/community_configuration_selcrit.xml")); final CommunityConfiguration cc = CommunityConfigurationFactory.newInstance(xml); Map> param = new HashMap<>(); param.put("author", new ArrayList<>(Collections.singletonList("Pippo Pippi"))); @@ -82,85 +79,4 @@ public class CommunityConfigurationFactoryTest { Assertions.assertEquals("dariah", comm.get(0)); } - @Test - public void test4() throws DocumentException, IOException { - final CommunityConfiguration cc = CommunityConfigurationFactory - .fromJson( - IOUtils - .toString( - getClass() - .getResourceAsStream( - "/eu/dnetlib/dhp/communityconfiguration/community_configuration_selcrit.json"))); - cc.toString(); - } - - @Test - public void test5() throws IOException, DocumentException { - - // final CommunityConfiguration cc = - // CommunityConfigurationFactory.newInstance(IOUtils.toString(getClass().getResourceAsStream("test.xml"))); - final CommunityConfiguration cc = CommunityConfigurationFactory - .fromJson( - IOUtils - .toString( - getClass() - .getResourceAsStream( - "/eu/dnetlib/dhp/communityconfiguration/community_configuration.json"))); - - System.out.println(cc.toJson()); - } - - @Test - public void test6() { - String json = "{\"criteria\":[{\"constraint\":[{\"verb\":\"contains\",\"field\":\"contributor\",\"value\":\"DARIAH\"}]}]}"; - - String step1 = "{\"verb\":\"contains\",\"field\":\"contributor\",\"value\":\"DARIAH\"}"; - - Constraint c = new Gson().fromJson(step1, Constraint.class); - // - // String step2 = - // "{\"constraint\":[{\"verb\":\"contains\",\"field\":\"contributor\",\"value\":\"DARIAH\"}]}"; - // - // ConstraintEncapsulator ce = new - // Gson().fromJson(step2,ConstraintEncapsulator.class); - // - // - // String step3 = - // "{\"ce\":{\"constraint\":[{\"verb\":\"contains\",\"field\":\"contributor\",\"value\":\"DARIAH\"}]}}"; - // - // Constraints cons = new Gson().fromJson(step3,Constraints.class); - // - // String step4 = - // "{\"criteria\":[{\"ce\":{\"constraint\":[{\"verb\":\"contains\",\"field\":\"contributor\",\"value\":\"DARIAH\"}]}}]}"; - // - // ConstraintsList cl = new Gson().fromJson(step4,ConstraintsList.class); - // - // String step5 = - // "{\"cl\":{\"criteria\":[{\"ce\":{\"constraint\":[{\"verb\":\"contains\",\"field\":\"contributor\",\"value\":\"DARIAH\"}]}}]}}"; - SelectionConstraints sl = new Gson().fromJson(json, SelectionConstraints.class); - } - - @Test - public void test7() throws IOException { - final CommunityConfiguration cc = CommunityConfigurationFactory - .fromJson( - IOUtils - .toString( - getClass() - .getResourceAsStream( - "/eu/dnetlib/dhp/communityconfiguration/tagging_conf.json"))); - - System.out.println(cc.toJson()); - } - - @Test - public void temporaneo() throws Exception { - String xml = IOUtils - .toString( - getClass() - .getResourceAsStream( - "/eu/dnetlib/dhp/communityconfiguration/tagging_conf.xml")); - final CommunityConfiguration cc = CommunityConfigurationFactory.newInstance(xml); - System.out.println(cc.toJson()); - } } From 8ffc050b8ac8ee79201e4f6b2fea78efbd19ba66 Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Tue, 12 May 2020 10:01:09 +0200 Subject: [PATCH 06/31] fixed problem in communityconfigurationfactory test --- .../CommunityConfigurationFactoryTest.java | 87 +------------------ .../community_configuration_selcrit.xml | 24 ++--- 2 files changed, 15 insertions(+), 96 deletions(-) diff --git a/dhp-workflows/dhp-enrichment/src/test/java/eu/dnetlib/dhp/bulktag/CommunityConfigurationFactoryTest.java b/dhp-workflows/dhp-enrichment/src/test/java/eu/dnetlib/dhp/bulktag/CommunityConfigurationFactoryTest.java index aaf670fd7..3d08b5915 100644 --- a/dhp-workflows/dhp-enrichment/src/test/java/eu/dnetlib/dhp/bulktag/CommunityConfigurationFactoryTest.java +++ b/dhp-workflows/dhp-enrichment/src/test/java/eu/dnetlib/dhp/bulktag/CommunityConfigurationFactoryTest.java @@ -30,7 +30,7 @@ public class CommunityConfigurationFactoryTest { .toString( getClass() .getResourceAsStream( - "/eu/dnetlib/dhp/communityconfiguration/community_configuration.xml")); + "/eu/dnetlib/dhp/bulktag/communityconfiguration/community_configuration.xml")); final CommunityConfiguration cc = CommunityConfigurationFactory.newInstance(xml); Assertions.assertEquals(5, cc.size()); cc @@ -57,7 +57,7 @@ public class CommunityConfigurationFactoryTest { .toString( getClass() .getResourceAsStream( - "/eu/dnetlib/dhp/communityconfiguration/community_configuration_selcrit.xml")); + "/eu/dnetlib/dhp/bulktag/communityconfiguration/community_configuration_selcrit.xml")); final CommunityConfiguration cc = CommunityConfigurationFactory.newInstance(xml); Map> param = new HashMap<>(); param.put("author", new ArrayList<>(Collections.singletonList("Pippo Pippi"))); @@ -81,86 +81,5 @@ public class CommunityConfigurationFactoryTest { Assertions.assertEquals(1, comm.size()); Assertions.assertEquals("dariah", comm.get(0)); } - - @Test - public void test4() throws DocumentException, IOException { - final CommunityConfiguration cc = CommunityConfigurationFactory - .fromJson( - IOUtils - .toString( - getClass() - .getResourceAsStream( - "/eu/dnetlib/dhp/communityconfiguration/community_configuration_selcrit.json"))); - cc.toString(); - } - - @Test - public void test5() throws IOException, DocumentException { - - // final CommunityConfiguration cc = - // CommunityConfigurationFactory.newInstance(IOUtils.toString(getClass().getResourceAsStream("test.xml"))); - final CommunityConfiguration cc = CommunityConfigurationFactory - .fromJson( - IOUtils - .toString( - getClass() - .getResourceAsStream( - "/eu/dnetlib/dhp/communityconfiguration/community_configuration.json"))); - - System.out.println(cc.toJson()); - } - - @Test - public void test6() { - String json = "{\"criteria\":[{\"constraint\":[{\"verb\":\"contains\",\"field\":\"contributor\",\"value\":\"DARIAH\"}]}]}"; - - String step1 = "{\"verb\":\"contains\",\"field\":\"contributor\",\"value\":\"DARIAH\"}"; - - Constraint c = new Gson().fromJson(step1, Constraint.class); - // - // String step2 = - // "{\"constraint\":[{\"verb\":\"contains\",\"field\":\"contributor\",\"value\":\"DARIAH\"}]}"; - // - // ConstraintEncapsulator ce = new - // Gson().fromJson(step2,ConstraintEncapsulator.class); - // - // - // String step3 = - // "{\"ce\":{\"constraint\":[{\"verb\":\"contains\",\"field\":\"contributor\",\"value\":\"DARIAH\"}]}}"; - // - // Constraints cons = new Gson().fromJson(step3,Constraints.class); - // - // String step4 = - // "{\"criteria\":[{\"ce\":{\"constraint\":[{\"verb\":\"contains\",\"field\":\"contributor\",\"value\":\"DARIAH\"}]}}]}"; - // - // ConstraintsList cl = new Gson().fromJson(step4,ConstraintsList.class); - // - // String step5 = - // "{\"cl\":{\"criteria\":[{\"ce\":{\"constraint\":[{\"verb\":\"contains\",\"field\":\"contributor\",\"value\":\"DARIAH\"}]}}]}}"; - SelectionConstraints sl = new Gson().fromJson(json, SelectionConstraints.class); - } - - @Test - public void test7() throws IOException { - final CommunityConfiguration cc = CommunityConfigurationFactory - .fromJson( - IOUtils - .toString( - getClass() - .getResourceAsStream( - "/eu/dnetlib/dhp/communityconfiguration/tagging_conf.json"))); - - System.out.println(cc.toJson()); - } - - @Test - public void temporaneo() throws Exception { - String xml = IOUtils - .toString( - getClass() - .getResourceAsStream( - "/eu/dnetlib/dhp/communityconfiguration/tagging_conf.xml")); - final CommunityConfiguration cc = CommunityConfigurationFactory.newInstance(xml); - System.out.println(cc.toJson()); - } + } diff --git a/dhp-workflows/dhp-enrichment/src/test/resources/eu/dnetlib/dhp/bulktag/communityconfiguration/community_configuration_selcrit.xml b/dhp-workflows/dhp-enrichment/src/test/resources/eu/dnetlib/dhp/bulktag/communityconfiguration/community_configuration_selcrit.xml index 89ace0e5f..cd5ea38d0 100644 --- a/dhp-workflows/dhp-enrichment/src/test/resources/eu/dnetlib/dhp/bulktag/communityconfiguration/community_configuration_selcrit.xml +++ b/dhp-workflows/dhp-enrichment/src/test/resources/eu/dnetlib/dhp/bulktag/communityconfiguration/community_configuration_selcrit.xml @@ -35,7 +35,7 @@ SDG9 - Industry innovation and infrastructure SDG16 - Peace justice and strong institutions - + 123 @@ -50,7 +50,7 @@ - + @@ -74,7 +74,7 @@ brain magnetic resonance imaging brain abnormalities - + re3data_____::5b9bf9171d92df854cf3c520692e9122 @@ -95,7 +95,7 @@ doajarticles::0c0e74daa5d95504eade9c81ebbd5b8a - + @@ -106,12 +106,12 @@ aqua sea - + re3data_____::9633d1e8c4309c833c2c442abeb0cfeb - + @@ -134,7 +134,7 @@ food distribution forestry - + opendoar____::1a551829d50f1400b0dab21fdd969c04 @@ -159,30 +159,30 @@ opendoar____::87ae6fb631f7c8a627e8e28785d9992d - + oac_clarin - + re3data_____::a507cdacc5bbcc08761c92185dee5cab - + oaa_dariah - + openaire____::1cfdb2e14977f31a98e0118283401f32 {"criteria":[{"constraint":[{"verb":"contains","field":"contributor","value":"DARIAH"}]}]} - + dimpo From 7387f3449a16111ec7e492a7c3fcf43c35924898 Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Tue, 12 May 2020 11:27:38 +0200 Subject: [PATCH 07/31] changed the route to find the verb resolver classes --- .../java/eu/dnetlib/dhp/bulktag/criteria/VerbResolver.java | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/criteria/VerbResolver.java b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/criteria/VerbResolver.java index fe46c6936..3d0db2063 100644 --- a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/criteria/VerbResolver.java +++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/criteria/VerbResolver.java @@ -22,12 +22,12 @@ public class VerbResolver implements Serializable { .verbose() // If you want to enable logging to stderr .enableAllInfo() // Scan classes, methods, fields, annotations .whitelistPackages( - "eu.dnetlib.dhp.selectioncriteria") // Scan com.xyz and subpackages + "eu.dnetlib.dhp.bulktag.criteria") // Scan com.xyz and subpackages .scan()) { // Perform the scan and return a ScanResult ClassInfoList routeClassInfoList = scanResult .getClassesWithAnnotation( - "eu.dnetlib.dhp.selectioncriteria.VerbClass"); + "eu.dnetlib.dhp.bulktag.criteria.VerbClass"); this.map = routeClassInfoList .stream() From f8aef6161a3001c14207e57fae15330ad4e25afc Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Tue, 12 May 2020 11:28:07 +0200 Subject: [PATCH 08/31] minor modification --- .../dnetlib/dhp/bulktag/CommunityConfigurationFactoryTest.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dhp-workflows/dhp-enrichment/src/test/java/eu/dnetlib/dhp/bulktag/CommunityConfigurationFactoryTest.java b/dhp-workflows/dhp-enrichment/src/test/java/eu/dnetlib/dhp/bulktag/CommunityConfigurationFactoryTest.java index 71252dd82..ca737b79f 100644 --- a/dhp-workflows/dhp-enrichment/src/test/java/eu/dnetlib/dhp/bulktag/CommunityConfigurationFactoryTest.java +++ b/dhp-workflows/dhp-enrichment/src/test/java/eu/dnetlib/dhp/bulktag/CommunityConfigurationFactoryTest.java @@ -81,5 +81,5 @@ public class CommunityConfigurationFactoryTest { Assertions.assertEquals(1, comm.size()); Assertions.assertEquals("dariah", comm.get(0)); } - + } From 14979f299ebe2641f0b8906678eb617af66c5767 Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Tue, 12 May 2020 11:28:38 +0200 Subject: [PATCH 09/31] changed the configuration factory --- .../communityconfiguration/tagging_conf.xml | 127 ++++++++++-------- 1 file changed, 74 insertions(+), 53 deletions(-) diff --git a/dhp-workflows/dhp-enrichment/src/test/resources/eu/dnetlib/dhp/bulktag/communityconfiguration/tagging_conf.xml b/dhp-workflows/dhp-enrichment/src/test/resources/eu/dnetlib/dhp/bulktag/communityconfiguration/tagging_conf.xml index ec52d4799..a44372e4d 100644 --- a/dhp-workflows/dhp-enrichment/src/test/resources/eu/dnetlib/dhp/bulktag/communityconfiguration/tagging_conf.xml +++ b/dhp-workflows/dhp-enrichment/src/test/resources/eu/dnetlib/dhp/bulktag/communityconfiguration/tagging_conf.xml @@ -1,7 +1,7 @@ - + zenodo @@ -9,29 +9,33 @@ - Result: 2 + + - + - Result: 3 + + - + - Result: 4 + + - + re3data_____::a507cdacc5bbcc08761c92185dee5cab - + - Result: 5 + + - + rda @@ -39,7 +43,8 @@ - Result: 6 + + SDG13 - Climate action SDG8 - Decent work and economic growth @@ -59,10 +64,11 @@ SDG9 - Industry innovation and infrastructure SDG16 - Peace justice and strong institutions - + - Result: 7 + + modern art monuments @@ -132,7 +138,7 @@ architectural vessels - + re3data_____::9ebe127e5f3a0bf401875690f3bb6b81 @@ -169,7 +175,7 @@ opendoar____::97275a23ca44226c9964043c8462be96 - + storm @@ -253,7 +259,8 @@ - Result: 8 + + Stock Assessment pelagic @@ -275,7 +282,7 @@ Fishing fleet Aquaculture - + doajarticles::8cec81178926caaca531afbd8eb5d64c @@ -328,7 +335,7 @@ doajarticles::dd70e44479f0ade25aa106aef3e87a0a - + discardless @@ -372,7 +379,8 @@ - Result: 9 + + brain mapping brain imaging @@ -393,7 +401,7 @@ brain magnetic resonance imaging brain abnormalities - + re3data_____::5b9bf9171d92df854cf3c520692e9122 @@ -418,7 +426,7 @@ rest________::fb1a3d4523c95e63496e3bc7ba36244b - + neuroinformatics @@ -486,7 +494,8 @@ - Result: 10 + + marine ocean @@ -494,7 +503,7 @@ aqua sea - + adriplan @@ -686,9 +695,10 @@ - Result: 11 + + - + instruct @@ -700,12 +710,14 @@ - Result: 12 + + - + - Result: 13 + + animal production and health fisheries and aquaculture @@ -726,7 +738,7 @@ food distribution forestry - + opendoar____::1a551829d50f1400b0dab21fdd969c04 @@ -751,7 +763,7 @@ opendoar____::87ae6fb631f7c8a627e8e28785d9992d - + edenis @@ -827,9 +839,10 @@ - Result: 14 + + - + opendoar____::7e7757b1e12abcb736ab9a754ffb617a {"criteria":[{"constraint":[{"verb":"contains","field":"contributor","value":"DARIAH"}]}]} @@ -838,7 +851,7 @@ opendoar____::96da2f590cd7246bbde0051047b0d6f7 {"criteria":[{"constraint":[{"verb":"contains","field":"contributor","value":"DARIAH"}]}]} - + dimpo @@ -846,17 +859,20 @@ - Result: 15 + + - + - Result: 16 + + - + - Result: 17 + + Green Transport City mobility systems @@ -1009,7 +1025,7 @@ Innovative freight delivery systems urban freight delivery infrastructures - + doajarticles::1c5bdf8fca58937894ad1441cca99b76 @@ -1094,7 +1110,7 @@ doajarticles::fba6191177ede7c51ea1cdf58eae7f8b - + jsdtl @@ -1154,22 +1170,26 @@ - Result: 18 + + - + - Result: 19 + + - + - Result: 20 + + - + - Result: 21 + + Sustainability-oriented science policy STI policies @@ -1179,7 +1199,7 @@ science policy Policy and Law - + doajarticles::c6f0ed5fa41e98863e7c73501fe4bd6d @@ -1264,7 +1284,7 @@ doajarticles::7ffc35ac5133da01d421ccf8af5b70bc - + risis @@ -1272,7 +1292,8 @@ - Result: 22 + + COVID-19 Severe acute respiratory syndrome coronavirus 2 @@ -1287,7 +1308,7 @@ mesh:COVID-19 COVID2019 - + opendoar____::358aee4cc897452c00244351e4d91f69 {"criteria":[{"constraint":[{"verb":"contains_ignorecase","field":"title","value":"COVID-19"}]}, @@ -1366,7 +1387,7 @@ re3data_____::978378def740bbf2bfb420de868c460b {"criteria":[{"constraint":[{"verb":"contains_ignorecase","field":"title","value":"SARS-CoV-2"}]},{"constraint":[{"verb":"contains_ignorecase","field":"title","value":"COVID-19"}]},{"constraint":[{"verb":"contains_ignorecase","field":"title","value":"2019-nCov"}]}]} - + chicago-covid-19 @@ -1387,4 +1408,4 @@ - + \ No newline at end of file From 1547ca7e15a7353476b794ec51e3d53791f2d66c Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Tue, 12 May 2020 12:17:27 +0200 Subject: [PATCH 10/31] added blacklist step to the end of the provision wf --- .../eu/dnetlib/dhp/wf/profiles/provision.xml | 130 +++++++----------- 1 file changed, 52 insertions(+), 78 deletions(-) diff --git a/dhp-workflows/dhp-worfklow-profiles/src/main/resources/eu/dnetlib/dhp/wf/profiles/provision.xml b/dhp-workflows/dhp-worfklow-profiles/src/main/resources/eu/dnetlib/dhp/wf/profiles/provision.xml index f99ea7aed..7c918a0d7 100644 --- a/dhp-workflows/dhp-worfklow-profiles/src/main/resources/eu/dnetlib/dhp/wf/profiles/provision.xml +++ b/dhp-workflows/dhp-worfklow-profiles/src/main/resources/eu/dnetlib/dhp/wf/profiles/provision.xml @@ -4,7 +4,7 @@ - + Data Provision [OCEAN] @@ -131,6 +131,16 @@ + + Set the target path to store the blacklisted graph + + blacklistedGraphPath + /tmp/beta_provision/graph/12_graph_blacklisted + + + + + Set the lookup address @@ -155,64 +165,8 @@ Set the map of associations organization, community list for the propagation of community to result through organization propagationOrganizationCommunityMap - - { - "20|corda__h2020::3fb05a9524c3f790391261347852f638":["mes","euromarine"], - "20|corda__h2020::e8dbe14cca9bf6fce09d468872f813f8":["mes","euromarine"], - "20|snsf________::9b253f265e3bef5cae6d881fdf61aceb":["mes","euromarine"], - "20|rcuk________::e054eea0a47665af8c3656b5785ccf76":["mes","euromarine"], - "20|corda__h2020::edc18d67c9b11fb616ca9f6e1db1b151":["mes","euromarine"], - "20|rcuk________::d5736d9da90521ddcdc7828a05a85e9a":["mes","euromarine"], - "20|corda__h2020::f5d418d3aa1cf817ddefcc3fdc039f27":["mes","euromarine"], - "20|snsf________::8fa091f8f25a846779acb4ea97b50aef":["mes","euromarine"], - "20|corda__h2020::81e020977211c2c40fae2e1a50bffd71":["mes","euromarine"], - "20|corda_______::81e020977211c2c40fae2e1a50bffd71":["mes","euromarine"], - "20|snsf________::31d0a100e54e3cdb3c6f52d91e638c78":["mes","euromarine"], - "20|corda__h2020::ea379ef91b8cc86f9ac5edc4169292db":["mes","euromarine"], - "20|corda__h2020::f75ee2ee48e5cb0ec8c8d30aaa8fef70":["mes","euromarine"], - "20|rcuk________::e16010089551a1a9182a94604fc0ea59":["mes","euromarine"], - "20|corda__h2020::38531a2cce7c5c347ffc439b07c1f43b":["mes","euromarine"], - "20|corda_______::38531a2cce7c5c347ffc439b07c1f43b":["mes","euromarine"], - "20|grid________::b2cbbf5eadbbf87d534b022bad3191d7":["mes","euromarine"], - "20|snsf________::74730ef1439d7f7636a8be58a6b471b8":["mes","euromarine"], - "20|nsf_________::ad72e19043a5a467e35f9b444d11563e":["mes","euromarine"], - "20|rcuk________::0fc3e92500290902a2d38ec2445e74c3":["mes","euromarine"], - "20|grid________::ad2c29905da0eb3c06b3fa80cacd89ea":["mes","euromarine"], - "20|corda__h2020::30b53e4d63d3724f00acb9cbaca40860":["mes","euromarine"], - "20|corda__h2020::f60f84bee14ad93f0db0e49af1d5c317":["mes","euromarine"], - "20|corda__h2020::7bf251ac3765b5e89d82270a1763d09f":["mes","euromarine"], - "20|corda__h2020::65531bd11be9935948c7f2f4db1c1832":["mes","euromarine"], - "20|corda__h2020::e0e98f86bbc76638bbb72a8fe2302946":["mes","euromarine"], - "20|snsf________::3eb43582ac27601459a8d8b3e195724b":["mes","euromarine"], - "20|corda__h2020::af2481dab65d06c8ea0ae02b5517b9b6":["mes","euromarine"], - "20|corda__h2020::c19d05cfde69a50d3ebc89bd0ee49929":["mes","euromarine"], - "20|corda__h2020::af0bfd9fc09f80d9488f56d71a9832f0":["mes","euromarine"], - "20|rcuk________::f33c02afb0dc66c49d0ed97ca5dd5cb0":["beopen"], - "20|grid________::a867f78acdc5041b34acfe4f9a349157":["beopen"], - "20|grid________::7bb116a1a9f95ab812bf9d2dea2be1ff":["beopen"], - "20|corda__h2020::6ab0e0739dbe625b99a2ae45842164ad":["beopen"], - "20|corda__h2020::8ba50792bc5f4d51d79fca47d860c602":["beopen"], - "20|corda_______::8ba50792bc5f4d51d79fca47d860c602":["beopen"], - "20|corda__h2020::e70e9114979e963eef24666657b807c3":["beopen"], - "20|corda_______::e70e9114979e963eef24666657b807c3":["beopen"], - "20|corda_______::15911e01e9744d57205825d77c218737":["beopen"], - "20|opendoar____::056a41e24e2a9a67215e87bbee6a80ab":["beopen"], - "20|opendoar____::7f67f2e6c6fbb0628f8160fcd3d92ae3":["beopen"], - "20|grid________::a8ecfd7c084e561168bcbe6bf0daf3e3":["beopen"], - "20|corda_______::7bbe6cc5d8ec1864739a04b0d020c9e9":["beopen"], - "20|corda_______::3ff558e30c2e434d688539548300b050":["beopen"], - "20|corda__h2020::5ffee5b3b83b33a8cf0e046877bd3a39":["beopen"], - "20|corda__h2020::5187217e2e806a6df3579c46f82401bc":["beopen"], - "20|grid________::5fa7e2709bcd945e26bfa18689adeec1":["beopen"], - "20|corda_______::d8696683c53027438031a96ad27c3c07":["beopen"], - "20|corda__h2020::d8696683c53027438031a96ad27c3c07":["beopen"], - "20|rcuk________::23a79ebdfa59790864e4a485881568c1":["beopen"], - "20|corda__h2020::b76cf8fe49590a966953c37e18608af9":["beopen"], - "20|grid________::d2f0204126ee709244a488a4cd3b91c2":["beopen"], - "20|corda__h2020::05aba9d2ed17533d15221e5655ac11e6":["beopen"], - "20|grid________::802401579481dc32062bdee69f5e6a34":["beopen"], - "20|corda__h2020::3f6d9d54cac975a517ba6b252c81582d":["beopen"] - } + {"20|corda__h2020::3fb05a9524c3f790391261347852f638":["mes","euromarine"], "20|corda__h2020::e8dbe14cca9bf6fce09d468872f813f8":["mes","euromarine"], "20|snsf________::9b253f265e3bef5cae6d881fdf61aceb":["mes","euromarine"],"20|rcuk________::e054eea0a47665af8c3656b5785ccf76":["mes","euromarine"],"20|corda__h2020::edc18d67c9b11fb616ca9f6e1db1b151":["mes","euromarine"],"20|rcuk________::d5736d9da90521ddcdc7828a05a85e9a":["mes","euromarine"],"20|corda__h2020::f5d418d3aa1cf817ddefcc3fdc039f27":["mes","euromarine"],"20|snsf________::8fa091f8f25a846779acb4ea97b50aef":["mes","euromarine"],"20|corda__h2020::81e020977211c2c40fae2e1a50bffd71":["mes","euromarine"],"20|corda_______::81e020977211c2c40fae2e1a50bffd71":["mes","euromarine"],"20|snsf________::31d0a100e54e3cdb3c6f52d91e638c78":["mes","euromarine"],"20|corda__h2020::ea379ef91b8cc86f9ac5edc4169292db":["mes","euromarine"],"20|corda__h2020::f75ee2ee48e5cb0ec8c8d30aaa8fef70":["mes","euromarine"],"20|rcuk________::e16010089551a1a9182a94604fc0ea59":["mes","euromarine"],"20|corda__h2020::38531a2cce7c5c347ffc439b07c1f43b":["mes","euromarine"],"20|corda_______::38531a2cce7c5c347ffc439b07c1f43b":["mes","euromarine"],"20|grid________::b2cbbf5eadbbf87d534b022bad3191d7":["mes","euromarine"],"20|snsf________::74730ef1439d7f7636a8be58a6b471b8":["mes","euromarine"],"20|nsf_________::ad72e19043a5a467e35f9b444d11563e":["mes","euromarine"],"20|rcuk________::0fc3e92500290902a2d38ec2445e74c3":["mes","euromarine"],"20|grid________::ad2c29905da0eb3c06b3fa80cacd89ea":["mes","euromarine"],"20|corda__h2020::30b53e4d63d3724f00acb9cbaca40860":["mes","euromarine"],"20|corda__h2020::f60f84bee14ad93f0db0e49af1d5c317":["mes","euromarine"], "20|corda__h2020::7bf251ac3765b5e89d82270a1763d09f":["mes","euromarine"], "20|corda__h2020::65531bd11be9935948c7f2f4db1c1832":["mes","euromarine"], "20|corda__h2020::e0e98f86bbc76638bbb72a8fe2302946":["mes","euromarine"], "20|snsf________::3eb43582ac27601459a8d8b3e195724b":["mes","euromarine"], "20|corda__h2020::af2481dab65d06c8ea0ae02b5517b9b6":["mes","euromarine"], "20|corda__h2020::c19d05cfde69a50d3ebc89bd0ee49929":["mes","euromarine"], "20|corda__h2020::af0bfd9fc09f80d9488f56d71a9832f0":["mes","euromarine"], "20|rcuk________::f33c02afb0dc66c49d0ed97ca5dd5cb0":["beopen"], + "20|grid________::a867f78acdc5041b34acfe4f9a349157":["beopen"], "20|grid________::7bb116a1a9f95ab812bf9d2dea2be1ff":["beopen"], "20|corda__h2020::6ab0e0739dbe625b99a2ae45842164ad":["beopen"], "20|corda__h2020::8ba50792bc5f4d51d79fca47d860c602":["beopen"], "20|corda_______::8ba50792bc5f4d51d79fca47d860c602":["beopen"], "20|corda__h2020::e70e9114979e963eef24666657b807c3":["beopen"], "20|corda_______::e70e9114979e963eef24666657b807c3":["beopen"], "20|corda_______::15911e01e9744d57205825d77c218737":["beopen"], "20|opendoar____::056a41e24e2a9a67215e87bbee6a80ab":["beopen"], "20|opendoar____::7f67f2e6c6fbb0628f8160fcd3d92ae3":["beopen"], "20|grid________::a8ecfd7c084e561168bcbe6bf0daf3e3":["beopen"], "20|corda_______::7bbe6cc5d8ec1864739a04b0d020c9e9":["beopen"], "20|corda_______::3ff558e30c2e434d688539548300b050":["beopen"], "20|corda__h2020::5ffee5b3b83b33a8cf0e046877bd3a39":["beopen"], "20|corda__h2020::5187217e2e806a6df3579c46f82401bc":["beopen"], "20|grid________::5fa7e2709bcd945e26bfa18689adeec1":["beopen"], "20|corda_______::d8696683c53027438031a96ad27c3c07":["beopen"], "20|corda__h2020::d8696683c53027438031a96ad27c3c07":["beopen"], "20|rcuk________::23a79ebdfa59790864e4a485881568c1":["beopen"], "20|corda__h2020::b76cf8fe49590a966953c37e18608af9":["beopen"], "20|grid________::d2f0204126ee709244a488a4cd3b91c2":["beopen"], "20|corda__h2020::05aba9d2ed17533d15221e5655ac11e6":["beopen"], "20|grid________::802401579481dc32062bdee69f5e6a34":["beopen"], "20|corda__h2020::3f6d9d54cac975a517ba6b252c81582d":["beopen"]} @@ -273,8 +227,8 @@ 'mongoDb' : 'mdstore', 'postgresURL' : 'jdbc:postgresql://beta.services.openaire.eu:5432/dnet_openaireplus', 'postgresUser' : 'dnet', - 'postgresPassword' : '*****', - 'reuseContent' : 'false', + 'postgresPassword' : 'dnetPwd', + 'reuseContent' : 'true', 'contentPath' : '/tmp/beta_provision/aggregator', 'workingDir' : '/tmp/beta_provision/working_dir/aggregator' } @@ -403,7 +357,6 @@ - propagates ORCID among results linked by allowedsemrels semantic relationships @@ -429,7 +382,6 @@ - mark results respecting some rules as belonging to communities @@ -440,7 +392,7 @@ 'sourcePath' : 'orcidGraphPath', 'outputPath': 'bulkTaggingGraphPath', 'isLookUpUrl' : 'isLookUpUrl', - 'pathMap' : 'bulkTaggingPathMap', + 'pathMap' : 'bulkTaggingPathMap' } @@ -455,7 +407,6 @@ - creates relashionships between results and organizations when the organizations are associated to institutional repositories @@ -464,14 +415,14 @@ { 'sourcePath' : 'bulkTaggingGraphPath', - 'outputPath': 'affiliationGraphPath', - 'saveGraph' : 'true' + 'outputPath': 'affiliationGraphPath' } { 'oozie.wf.application.path' : '/lib/dnet/oa/enrichment/affiliation/oozie_app', - 'workingDir' : '/tmp/beta_provision/working_dir/affiliation' + 'workingDir' : '/tmp/beta_provision/working_dir/affiliation', + 'saveGraph' : 'true' } build-report @@ -480,9 +431,8 @@ - - marks as belonging to communities the result collected from providers related to the organizations specified in the organizationCommunityMap + marks as belonging to communities the result collected from datasources related to the organizations specified in the organizationCommunityMap executeOozieJob IIS @@ -506,7 +456,6 @@ - created relation between projects and results linked to other results trough allowedsemrel semantic relations linked to projects @@ -532,7 +481,6 @@ - tag as belonging to communitites result in in allowedsemrels relation with other result already linked to communities @@ -542,14 +490,15 @@ { 'sourcePath' : 'fundingGraphPath', 'outputPath': 'communitySemRelGraphPath', - 'isLookupUrl' : 'isLookUpUrl' + 'isLookUpUrl' : 'isLookUpUrl' } { 'oozie.wf.application.path' : '/lib/dnet/oa/enrichment/community_semrel/oozie_app', 'workingDir' : '/tmp/beta_provision/working_dir/community_semrel', - 'allowedsemrels' : 'isSupplementedBy;isSupplementTo' + 'allowedsemrels' : 'isSupplementedBy;isSupplementTo', + 'saveGraph' : 'true' } build-report @@ -558,7 +507,6 @@ - associated to results colleced from allowedtypes and those in the whithelist the country of the organization(s) handling the datasource it is collected from @@ -581,16 +529,42 @@ build-report + + + + + + removes blacklisted relations + + executeOozieJob + IIS + + { + 'sourcePath' : 'countryGraphPath', + 'outputPath': 'blacklistedGraphPath' + } + + + { + 'oozie.wf.application.path' : '/lib/dnet/oa/enrichment/blacklist/oozie_app', + 'workingDir' : '/tmp/beta_provision/working_dir/blacklist', + 'postgresURL' : 'jdbc:postgresql://beta.services.openaire.eu:5432/dnet_openaireplus', + 'postgresUser' : 'dnet', + 'postgresPassword' : 'dnetPwd' + } + + build-report + - wf_20200428_155848_495 - 2020-04-28T16:53:23+00:00 + wf_20200509_100941_857 + 2020-05-09T13:26:09+00:00 FAILURE - + eu.dnetlib.data.hadoop.rmi.HadoopServiceException: hadoop job: 0002933-200403132837156-oozie-oozi-W failed with status: KILLED, oozie log: 2020-05-09 13:23:31,194 INFO org.apache.oozie.service.JPAService: SERVER[iis-cdh5-test-m3.ocean.icm.edu.pl] USER[dnet.beta] GROUP[-] TOKEN[] APP[orcid_to_result_from_semrel_propagation] JOB[0002933-200403132837156-oozie-oozi-W] ACTION[] No results found 2020-05-09 13:23:31,216 INFO org.apache.oozie.command.wf.ActionStartXCommand: SERVER[iis-cdh5-test-m3.ocean.icm.edu.pl] USER[dnet.beta] GROUP[-] TOKEN[] APP[orcid_to_result_from_semrel_propagation] JOB[0002933-200403132837156-oozie-oozi-W] ACTION[0002933-200403132837156-oozie-oozi-W@:start:] Start action [0002933-200403132837156-oozie-oozi-W@:start:] with user-retry state : userRetryCount [0], userRetryMax [0], userRetryInterval [10] 2020-05-09 13:23:31,216 INFO org.apache.oozie.command.wf.ActionStartXCommand: SERVER[iis-cdh5-test-m3.ocean.icm.edu.pl] USER[dnet.beta] GROUP[-] TOKEN[] APP[orcid_to_result_from_semrel_propagation] JOB[0002933-200403132837156-oozie-oozi-W] ACTION[0002933-200403132837156-oozie-oozi-W@:start:] [***0002933-200403132837156-oozie-oozi-W@:start:***]Action status=DONE 2020-05-09 13:23:31,216 INFO org.apache.oozie.command.wf.ActionStartXCommand: SERVER[iis-cdh5-test-m3.ocean.icm.edu.pl] USER[dnet.beta] GROUP[-] TOKEN[] APP[orcid_to_result_from_semrel_propagation] JOB[0002933-200403132837156-oozie-oozi-W] ACTION[0002933-200403132837156-oozie-oozi-W@:start:] [***0002933-200403132837156-oozie-oozi-W@:start:***]Action updated in DB! 2020-05-09 13:23:31,257 INFO org.apache.oozie.service.JPAService: SERVER[iis-cdh5-test-m3.ocean.icm.edu.pl] USER[dnet.beta] GROUP[-] TOKEN[] APP[orcid_to_result_from_semrel_propagation] JOB[0002933-200403132837156-oozie-oozi-W] ACTION[0002933-200403132837156-oozie-oozi-W@:start:] No results found 2020-05-09 13:23:31,275 INFO org.apache.oozie.command.wf.WorkflowNotificationXCommand: SERVER[iis-cdh5-test-m3.ocean.icm.edu.pl] USER[-] GROUP[-] TOKEN[-] APP[-] JOB[0002933-200403132837156-oozie-oozi-W] ACTION[0002933-200403132837156-oozie-oozi-W@:start:] No Notification URL is defined. Therefore nothing to notify for job 0002933-200403132837156-oozie-oozi-W@:start: 2020-05-09 13:23:31,275 INFO org.apache.oozie.command.wf.WorkflowNotificationXCommand: SERVER[iis-cdh5-test-m3.ocean.icm.edu.pl] USER[-] GROUP[-] TOKEN[-] APP[-] JOB[0002933-200403132837156-oozie-oozi-W] ACTION[] No Notification URL is defined. Therefore nothing to notify for job 0002933-200403132837156-oozie-oozi-W 2020-05-09 13:23:31,314 INFO org.apache.oozie.command.wf.ActionStartXCommand: SERVER[iis-cdh5-test-m3.ocean.icm.edu.pl] USER[dnet.beta] GROUP[-] TOKEN[] APP[orcid_to_result_from_semrel_propagation] JOB[0002933-200403132837156-oozie-oozi-W] ACTION[0002933-200403132837156-oozie-oozi-W@reset-outputpath] Start action [0002933-200403132837156-oozie-oozi-W@reset-outputpath] with user-retry state : userRetryCount [0], userRetryMax [0], userRetryInterval [10] 2020-05-09 13:23:33,897 INFO org.apache.oozie.command.wf.ActionStartXCommand: SERVER[iis-cdh5-test-m3.ocean.icm.edu.pl] USER[dnet.beta] GROUP[-] TOKEN[] APP[orcid_to_result_from_semrel_propagation] JOB[0002933-200403132837156-oozie-oozi-W] ACTION[0002933-200403132837156-oozie-oozi-W@reset-outputpath] [***0002933-200403132837156-oozie-oozi-W@reset-outputpath***]Action status=DONE 2020-05-09 13:23:33,897 INFO org.apache.oozie.command.wf.ActionStartXCommand: SERVER[iis-cdh5-test-m3.ocean.icm.edu.pl] USER[dnet.beta] GROUP[-] TOKEN[] APP[orcid_to_result_from_semrel_propagation] JOB[0002933-200403132837156-oozie-oozi-W] ACTION[0002933-200403132837156-oozie-oozi-W@reset-outputpath] [***0002933-200403132837156-oozie-oozi-W@reset-outputpath***]Action updated in DB! 2020-05-09 13:23:33,947 INFO org.apache.oozie.service.JPAService: SERVER[iis-cdh5-test-m3.ocean.icm.edu.pl] USER[dnet.beta] GROUP[-] TOKEN[] APP[orcid_to_result_from_semrel_propagation] JOB[0002933-200403132837156-oozie-oozi-W] ACTION[0002933-200403132837156-oozie-oozi-W@reset-outputpath] No results found 2020-05-09 13:23:33,966 INFO org.apache.oozie.command.wf.ActionStartXCommand: SERVER[iis-cdh5-test-m3.ocean.icm.edu.pl] USER[dnet.beta] GROUP[-] TOKEN[] APP[orcid_to_result_from_semrel_propagation] JOB[0002933-200403132837156-oozie-oozi-W] ACTION[0002933-200403132837156-oozie-oozi-W@copy_entities] Start action [0002933-200403132837156-oozie-oozi-W@copy_entities] with user-retry state : userRetryCount [0], userRetryMax [0], userRetryInterval [10] 2020-05-09 13:23:33,966 INFO org.apache.oozie.command.wf.ActionStartXCommand: SERVER[iis-cdh5-test-m3.ocean.icm.edu.pl] USER[dnet.beta] GROUP[-] TOKEN[] APP[orcid_to_result_from_semrel_propagation] JOB[0002933-200403132837156-oozie-oozi-W] ACTION[0002933-200403132837156-oozie-oozi-W@copy_entities] [***0002933-200403132837156-oozie-oozi-W@copy_entities***]Action status=DONE 2020-05-09 13:23:33,966 INFO org.apache.oozie.command.wf.ActionStartXCommand: SERVER[iis-cdh5-test-m3.ocean.icm.edu.pl] USER[dnet.beta] GROUP[-] TOKEN[] APP[orcid_to_result_from_semrel_propagation] JOB[0002933-200403132837156-oozie-oozi-W] ACTION[0002933-200403132837156-oozie-oozi-W@copy_entities] [***0002933-200403132837156-oozie-oozi-W@copy_entities***]Action updated in DB! 2020-05-09 13:23:34,012 INFO org.apache.oozie.service.JPAService: SERVER[iis-cdh5-test-m3.ocean.icm.edu.pl] USER[dnet.beta] GROUP[-] TOKEN[] APP[orcid_to_result_from_semrel_propagation] JOB[0002933-200403132837156-oozie-oozi-W] ACTION[0002933-200403132837156-oozie-oozi-W@copy_entities] No results found 2020-05-09 13:23:34,018 INFO org.apache.oozie.service.JPAService: SERVER[iis-cdh5-test-m3.ocean.icm.edu.pl] USER[dnet.beta] GROUP[-] TOKEN[] APP[orcid_to_result_from_semrel_propagation] JOB[0002933-200403132837156-oozie-oozi-W] ACTION[0002933-200403132837156-oozie-oozi-W@copy_entities] No results found 2020-05-09 13:23:34,023 INFO org.apache.oozie.service.JPAService: SERVER[iis-cdh5-test-m3.ocean.icm.edu.pl] USER[dnet.beta] GROUP[-] TOKEN[] APP[orcid_to_result_from_semrel_propagation] JOB[0002933-200403132837156-oozie-oozi-W] ACTION[0002933-200403132837156-oozie-oozi-W@copy_entities] No results found 2020-05-09 13:23:34,029 INFO org.apache.oozie.service.JPAService: SERVER[iis-cdh5-test-m3.ocean.icm.edu.pl] USER[dnet.beta] GROUP[-] TOKEN[] APP[orcid_to_result_from_semrel_propagation] JOB[0002933-200403132837156-oozie-oozi-W] ACTION[0002933-200403132837156-oozie-oozi-W@copy_entities] No results found 2020-05-09 13:23:34,124 INFO org.apache.oozie.command.wf.ForkedActionStartXCommand: SERVER[iis-cdh5-test-m3.ocean.icm.edu.pl] USER[dnet.beta] GROUP[-] TOKEN[] APP[orcid_to_result_from_semrel_propagation] JOB[0002933-200403132837156-oozie-oozi-W] ACTION[0002933-200403132837156-oozie-oozi-W@copy_relation] Start action [0002933-200403132837156-oozie-oozi-W@copy_relation] with user-retry state : userRetryCount [0], userRetryMax [0], userRetryInterval [10] 2020-05-09 13:23:34,130 INFO org.apache.oozie.command.wf.ForkedActionStartXCommand: SERVER[iis-cdh5-test-m3.ocean.icm.edu.pl] USER[dnet.beta] GROUP[-] TOKEN[] APP[orcid_to_result_from_semrel_propagation] JOB[0002933-200403132837156-oozie-oozi-W] ACTION[0002933-200403132837156-oozie-oozi-W@copy_projects] Start action [0002933-200403132837156-oozie-oozi-W@copy_projects] with user-retry state : userRetryCount [0], userRetryMax [0], userRetryInterval [10] 2020-05-09 13:23:34,130 INFO org.apache.oozie.command.wf.ForkedActionStartXCommand: SERVER[iis-cdh5-test-m3.ocean.icm.edu.pl] USER[dnet.beta] GROUP[-] TOKEN[] APP[orcid_to_result_from_semrel_propagation] JOB[0002933-200403132837156-oozie-oozi-W] ACTION[0002933-200403132837156-oozie-oozi-W@copy_datasources] Start action [0002933-200403132837156-oozie-oozi-W@copy_datasources] with user-retry state : userRetryCount [0], userRetryMax [0], userRetryInterval [10] 2020-05-09 13:23:34,140 INFO org.apache.oozie.command.wf.ForkedActionStartXCommand: SERVER[iis-cdh5-test-m3.ocean.icm.edu.pl] USER[dnet.beta] GROUP[-] TOKEN[] APP[orcid_to_result_from_semrel_propagation] JOB[0002933-200403132837156-oozie-oozi-W] ACTION[0002933-200403132837156-oozie-oozi-W@copy_organization] Start action [0002933-200403132837156-oozie-oozi-W@copy_organization] with user-retry state : userRetryCount [0], userRetryMax [0], userRetryInterval [10] 2020-05-09 13:23:35,010 INFO org.apache.oozie.action.hadoop.DistcpActionExecutor: SERVER[iis-cdh5-test-m3.ocean.icm.edu.pl] USER[dnet.beta] GROUP[-] TOKEN[] APP[orcid_to_result_from_semrel_propagation] JOB[0002933-200403132837156-oozie-oozi-W] ACTION[0002933-200403132837156-oozie-oozi-W@copy_projects] checking action, hadoop job ID [job_1585920557248_14569] status [RUNNING] 2020-05-09 13:23:35,018 INFO org.apache.oozie.command.wf.ForkedActionStartXCommand: SERVER[iis-cdh5-test-m3.ocean.icm.edu.pl] USER[dnet.beta] GROUP[-] TOKEN[] APP[orcid_to_result_from_semrel_propagation] JOB[0002933-200403132837156-oozie-oozi-W] ACTION[0002933-200403132837156-oozie-oozi-W@copy_projects] [***0002933-200403132837156-oozie-oozi-W@copy_projects***]Action status=RUNNING 2020-05-09 13:23:35,018 INFO org.apache.oozie.command.wf.ForkedActionStartXCommand: SERVER[iis-cdh5-test-m3.ocean.icm.edu.pl] USER[dnet.beta] GROUP[-] TOKEN[] APP[orcid_to_result_from_semrel_propagation] JOB[0002933-200403132837156-oozie-oozi-W] ACTION[0002933-200403132837156-oozie-oozi-W@copy_projects] [***0002933-200403132837156-oozie-oozi-W@copy_projects***]Action updated in DB! 2020-05-09 13:23:35,022 INFO org.apache.oozie.action.hadoop.DistcpActionExecutor: SERVER[iis-cdh5-test-m3.ocean.icm.edu.pl] USER[dnet.beta] GROUP[-] TOKEN[] APP[orcid_to_result_from_semrel_propagation] JOB[0002933-200403132837156-oozie-oozi-W] ACTION[0002933-200403132837156-oozie-oozi-W@copy_relation] checking action, hadoop job ID [job_1585920557248_14568] status [RUNNING] 2020-05-09 13:23:35,027 INFO org.apache.oozie.command.wf.WorkflowNotificationXCommand: SERVER[iis-cdh5-test-m3.ocean.icm.edu.pl] USER[-] GROUP[-] TOKEN[-] APP[-] JOB[0002933-200403132837156-oozie-oozi-W] ACTION[0002933-200403132837156-oozie-oozi-W@copy_projects] No Notification URL is defined. Therefore nothing to notify for job 0002933-200403132837156-oozie-oozi-W@copy_projects 2020-05-09 13:23:35,028 INFO org.apache.oozie.command.wf.ForkedActionStartXCommand: SERVER[iis-cdh5-test-m3.ocean.icm.edu.pl] USER[dnet.beta] GROUP[-] TOKEN[] APP[orcid_to_result_from_semrel_propagation] JOB[0002933-200403132837156-oozie-oozi-W] ACTION[0002933-200403132837156-oozie-oozi-W@copy_relation] [***0002933-200403132837156-oozie-oozi-W@copy_relation***]Action status=RUNNING 2020-05-09 13:23:35,028 INFO org.apache.oozie.command.wf.ForkedActionStartXCommand: SERVER[iis-cdh5-test-m3.ocean.icm.edu.pl] USER[dnet.beta] GROUP[-] TOKEN[] APP[orcid_to_result_from_semrel_propagation] JOB[0002933-200403132837156-oozie-oozi-W] ACTION[0002933-200403132837156-oozie-oozi-W@copy_relation] [***0002933-200403132837156-oozie-oozi-W@copy_relation***]Action updated in DB! 2020-05-09 13:23:35,031 INFO org.apache.oozie.action.hadoop.DistcpActionExecutor: SERVER[iis-cdh5-test-m3.ocean.icm.edu.pl] USER[dnet.beta] GROUP[-] TOKEN[] APP[orcid_to_result_from_semrel_propagation] JOB[0002933-200403132837156-oozie-oozi-W] ACTION[0002933-200403132837156-oozie-oozi-W@copy_datasources] checking action, hadoop job ID [job_1585920557248_14570] status [RUNNING] 2020-05-09 13:23:35,035 INFO org.apache.oozie.command.wf.ForkedActionStartXCommand: SERVER[iis-cdh5-test-m3.ocean.icm.edu.pl] USER[dnet.beta] GROUP[-] TOKEN[] APP[orcid_to_result_from_semrel_propagation] JOB[0002933-200403132837156-oozie-oozi-W] ACTION[0002933-200403132837156-oozie-oozi-W@copy_datasources] [***0002933-200403132837156-oozie-oozi-W@copy_datasources***]Action status=RUNNING 2020-05-09 13:23:35,035 INFO org.apache.oozie.command.wf.ForkedActionStartXCommand: SERVER[iis-cdh5-test-m3.ocean.icm.edu.pl] USER[dnet.beta] GROUP[-] TOKEN[] APP[orcid_to_result_from_semrel_propagation] JOB[0002933-200403132837156-oozie-oozi-W] ACTION[0002933-200403132837156-oozie-oozi-W@copy_datasources] [***0002933-200403132837156-oozie-oozi-W@copy_datasources***]Action updated in DB! 2020-05-09 13:23:35,037 INFO org.apache.oozie.command.wf.WorkflowNotificationXCommand: SERVER[iis-cdh5-test-m3.ocean.icm.edu.pl] USER[-] GROUP[-] TOKEN[-] APP[-] JOB[0002933-200403132837156-oozie-oozi-W] ACTION[0002933-200403132837156-oozie-oozi-W@copy_relation] No Notification URL is defined. Therefore nothing to notify for job 0002933-200403132837156-oozie-oozi-W@copy_relation 2020-05-09 13:23:35,048 INFO org.apache.oozie.command.wf.WorkflowNotificationXCommand: SERVER[iis-cdh5-test-m3.ocean.icm.edu.pl] USER[-] GROUP[-] TOKEN[-] APP[-] JOB[0002933-200403132837156-oozie-oozi-W] ACTION[0002933-200403132837156-oozie-oozi-W@copy_datasources] No Notification URL is defined. Therefore nothing to notify for job 0002933-200403132837156-oozie-oozi-W@copy_datasources 2020-05-09 13:23:35,072 INFO org.apache.oozie.action.hadoop.DistcpActionExecutor: SERVER[iis-cdh5-test-m3.ocean.icm.edu.pl] USER[dnet.beta] GROUP[-] TOKEN[] APP[orcid_to_result_from_semrel_propagation] JOB[0002933-200403132837156-oozie-oozi-W] ACTION[0002933-200403132837156-oozie-oozi-W@copy_organization] checking action, hadoop job ID [job_1585920557248_14571] status [RUNNING] 2020-05-09 13:23:35,076 INFO org.apache.oozie.command.wf.ForkedActionStartXCommand: SERVER[iis-cdh5-test-m3.ocean.icm.edu.pl] USER[dnet.beta] GROUP[-] TOKEN[] APP[orcid_to_result_from_semrel_propagation] JOB[0002933-200403132837156-oozie-oozi-W] ACTION[0002933-200403132837156-oozie-oozi-W@copy_organization] [***0002933-200403132837156-oozie-oozi-W@copy_organization***]Action status=RUNNING 2020-05-09 13:23:35,076 INFO org.apache.oozie.command.wf.ForkedActionStartXCommand: SERVER[iis-cdh5-test-m3.ocean.icm.edu.pl] USER[dnet.beta] GROUP[-] TOKEN[] APP[orcid_to_result_from_semrel_propagation] JOB[0002933-200403132837156-oozie-oozi-W] ACTION[0002933-200403132837156-oozie-oozi-W@copy_organization] [***0002933-200403132837156-oozie-oozi-W@copy_organization***]Action updated in DB! 2020-05-09 13:23:35,084 INFO org.apache.oozie.command.wf.WorkflowNotificationXCommand: SERVER[iis-cdh5-test-m3.ocean.icm.edu.pl] USER[-] GROUP[-] TOKEN[-] APP[-] JOB[0002933-200403132837156-oozie-oozi-W] ACTION[0002933-200403132837156-oozie-oozi-W@copy_organization] No Notification URL is defined. Therefore nothing to notify for job 0002933-200403132837156-oozie-oozi-W@copy_organization 2020-05-09 13:23:35,090 INFO org.apache.oozie.command.wf.WorkflowNotificationXCommand: SERVER[iis-cdh5-test-m3.ocean.icm.edu.pl] USER[-] GROUP[-] TOKEN[-] APP[-] JOB[0002933-200403132837156-oozie-oozi-W] ACTION[0002933-200403132837156-oozie-oozi-W@copy_entities] No Notification URL is defined. Therefore nothing to notify for job 0002933-200403132837156-oozie-oozi-W@copy_entities 2020-05-09 13:23:35,090 INFO org.apache.oozie.command.wf.WorkflowNotificationXCommand: SERVER[iis-cdh5-test-m3.ocean.icm.edu.pl] USER[-] GROUP[-] TOKEN[-] APP[-] JOB[0002933-200403132837156-oozie-oozi-W] ACTION[0002933-200403132837156-oozie-oozi-W@reset-outputpath] No Notification URL is defined. Therefore nothing to notify for job 0002933-200403132837156-oozie-oozi-W@reset-outputpath 2020-05-09 13:23:58,926 INFO org.apache.oozie.servlet.CallbackServlet: SERVER[iis-cdh5-test-m3.ocean.icm.edu.pl] USER[-] GROUP[-] TOKEN[-] APP[-] JOB[0002933-200403132837156-oozie-oozi-W] ACTION[0002933-200403132837156-oozie-oozi-W@copy_datasources] callback for action [0002933-200403132837156-oozie-oozi-W@copy_datasources] 2020-05-09 13:23:59,085 INFO org.apache.oozie.action.hadoop.DistcpActionExecutor: SERVER[iis-cdh5-test-m3.ocean.icm.edu.pl] USER[dnet.beta] GROUP[-] TOKEN[] APP[orcid_to_result_from_semrel_propagation] JOB[0002933-200403132837156-oozie-oozi-W] ACTION[0002933-200403132837156-oozie-oozi-W@copy_datasources] checking action, hadoop job ID [job_1585920557248_14570] status [RUNNING] 2020-05-09 13:23:59,242 INFO org.apache.oozie.servlet.CallbackServlet: SERVER[iis-cdh5-test-m3.ocean.icm.edu.pl] USER[-] GROUP[-] TOKEN[-] APP[-] JOB[0002933-200403132837156-oozie-oozi-W] ACTION[0002933-200403132837156-oozie-oozi-W@copy_projects] callback for action [0002933-200403132837156-oozie-oozi-W@copy_projects] 2020-05-09 13:23:59,386 INFO org.apache.oozie.action.hadoop.DistcpActionExecutor: SERVER[iis-cdh5-test-m3.ocean.icm.edu.pl] USER[dnet.beta] GROUP[-] TOKEN[] APP[orcid_to_result_from_semrel_propagation] JOB[0002933-200403132837156-oozie-oozi-W] ACTION[0002933-200403132837156-oozie-oozi-W@copy_projects] checking action, hadoop job ID [job_1585920557248_14569] status [RUNNING] 2020-05-09 13:24:01,343 INFO org.apache.oozie.servlet.CallbackServlet: SERVER[iis-cdh5-test-m3.ocean.icm.edu.pl] USER[-] GROUP[-] TOKEN[-] APP[-] JOB[0002933-200403132837156-oozie-oozi-W] ACTION[0002933-200403132837156-oozie-oozi-W@copy_datasources] callback for action [0002933-200403132837156-oozie-oozi-W@copy_datasources] 2020-05-09 13:24:01,418 INFO org.apache.oozie.action.hadoop.DistcpActionExecutor: SERVER[iis-cdh5-test-m3.ocean.icm.edu.pl] USER[dnet.beta] GROUP[-] TOKEN[] APP[orcid_to_result_from_semrel_propagation] JOB[0002933-200403132837156-oozie-oozi-W] ACTION[0002933-200403132837156-oozie-oozi-W@copy_datasources] Hadoop Jobs launched : [job_1585920557248_14573] 2020-05-09 13:24:01,418 INFO org.apache.oozie.action.hadoop.DistcpActionExecutor: SERVER[iis-cdh5-test-m3.ocean.icm.edu.pl] USER[dnet.beta] GROUP[-] TOKEN[] APP[orcid_to_result_from_semrel_propagation] JOB[0002933-200403132837156-oozie-oozi-W] ACTION[0002933-200403132837156-oozie-oozi-W@copy_datasources] action completed, external ID [job_1585920557248_14570] 2020-05-09 13:24:01,493 INFO org.apache.oozie.command.wf.WorkflowNotificationXCommand: SERVER[iis-cdh5-test-m3.ocean.icm.edu.pl] USER[-] GROUP[-] TOKEN[-] APP[-] JOB[0002933-200403132837156-oozie-oozi-W] ACTION[0002933-200403132837156-oozie-oozi-W@copy_datasources] No Notification URL is defined. Therefore nothing to notify for job 0002933-200403132837156-oozie-oozi-W@copy_datasources 2020-05-09 13:24:01,935 INFO org.apache.oozie.servlet.CallbackServlet: SERVER[iis-cdh5-test-m3.ocean.icm.edu.pl] USER[-] GROUP[-] TOKEN[-] APP[-] JOB[0002933-200403132837156-oozie-oozi-W] ACTION[0002933-200403132837156-oozie-oozi-W@copy_projects] callback for action [0002933-200403132837156-oozie-oozi-W@copy_projects] 2020-05-09 13:24:02,012 INFO org.apache.oozie.action.hadoop.DistcpActionExecutor: SERVER[iis-cdh5-test-m3.ocean.icm.edu.pl] USER[dnet.beta] GROUP[-] TOKEN[] APP[orcid_to_result_from_semrel_propagation] JOB[0002933-200403132837156-oozie-oozi-W] ACTION[0002933-200403132837156-oozie-oozi-W@copy_projects] Hadoop Jobs launched : [job_1585920557248_14572] 2020-05-09 13:24:02,012 INFO org.apache.oozie.action.hadoop.DistcpActionExecutor: SERVER[iis-cdh5-test-m3.ocean.icm.edu.pl] USER[dnet.beta] GROUP[-] TOKEN[] APP[orcid_to_result_from_semrel_propagation] JOB[0002933-200403132837156-oozie-oozi-W] ACTION[0002933-200403132837156-oozie-oozi-W@copy_projects] action completed, external ID [job_1585920557248_14569] 2020-05-09 13:24:02,076 INFO org.apache.oozie.command.wf.WorkflowNotificationXCommand: SERVER[iis-cdh5-test-m3.ocean.icm.edu.pl] USER[-] GROUP[-] TOKEN[-] APP[-] JOB[0002933-200403132837156-oozie-oozi-W] ACTION[0002933-200403132837156-oozie-oozi-W@copy_projects] No Notification URL is defined. Therefore nothing to notify for job 0002933-200403132837156-oozie-oozi-W@copy_projects 2020-05-09 13:25:03,172 INFO org.apache.oozie.servlet.CallbackServlet: SERVER[iis-cdh5-test-m3.ocean.icm.edu.pl] USER[-] GROUP[-] TOKEN[-] APP[-] JOB[0002933-200403132837156-oozie-oozi-W] ACTION[0002933-200403132837156-oozie-oozi-W@copy_organization] callback for action [0002933-200403132837156-oozie-oozi-W@copy_organization] 2020-05-09 13:25:03,336 INFO org.apache.oozie.action.hadoop.DistcpActionExecutor: SERVER[iis-cdh5-test-m3.ocean.icm.edu.pl] USER[dnet.beta] GROUP[-] TOKEN[] APP[orcid_to_result_from_semrel_propagation] JOB[0002933-200403132837156-oozie-oozi-W] ACTION[0002933-200403132837156-oozie-oozi-W@copy_organization] checking action, hadoop job ID [job_1585920557248_14571] status [RUNNING] 2020-05-09 13:25:05,598 INFO org.apache.oozie.servlet.CallbackServlet: SERVER[iis-cdh5-test-m3.ocean.icm.edu.pl] USER[-] GROUP[-] TOKEN[-] APP[-] JOB[0002933-200403132837156-oozie-oozi-W] ACTION[0002933-200403132837156-oozie-oozi-W@copy_organization] callback for action [0002933-200403132837156-oozie-oozi-W@copy_organization] 2020-05-09 13:25:05,688 INFO org.apache.oozie.action.hadoop.DistcpActionExecutor: SERVER[iis-cdh5-test-m3.ocean.icm.edu.pl] USER[dnet.beta] GROUP[-] TOKEN[] APP[orcid_to_result_from_semrel_propagation] JOB[0002933-200403132837156-oozie-oozi-W] ACTION[0002933-200403132837156-oozie-oozi-W@copy_organization] Hadoop Jobs launched : [job_1585920557248_14574] 2020-05-09 13:25:05,691 INFO org.apache.oozie.action.hadoop.DistcpActionExecutor: SERVER[iis-cdh5-test-m3.ocean.icm.edu.pl] USER[dnet.beta] GROUP[-] TOKEN[] APP[orcid_to_result_from_semrel_propagation] JOB[0002933-200403132837156-oozie-oozi-W] ACTION[0002933-200403132837156-oozie-oozi-W@copy_organization] action completed, external ID [job_1585920557248_14571] 2020-05-09 13:25:05,748 INFO org.apache.oozie.command.wf.WorkflowNotificationXCommand: SERVER[iis-cdh5-test-m3.ocean.icm.edu.pl] USER[-] GROUP[-] TOKEN[-] APP[-] JOB[0002933-200403132837156-oozie-oozi-W] ACTION[0002933-200403132837156-oozie-oozi-W@copy_organization] No Notification URL is defined. Therefore nothing to notify for job 0002933-200403132837156-oozie-oozi-W@copy_organization 2020-05-09 13:25:23,274 INFO org.apache.oozie.servlet.CallbackServlet: SERVER[iis-cdh5-test-m3.ocean.icm.edu.pl] USER[-] GROUP[-] TOKEN[-] APP[-] JOB[0002933-200403132837156-oozie-oozi-W] ACTION[0002933-200403132837156-oozie-oozi-W@copy_relation] callback for action [0002933-200403132837156-oozie-oozi-W@copy_relation] 2020-05-09 13:25:23,409 INFO org.apache.oozie.action.hadoop.DistcpActionExecutor: SERVER[iis-cdh5-test-m3.ocean.icm.edu.pl] USER[dnet.beta] GROUP[-] TOKEN[] APP[orcid_to_result_from_semrel_propagation] JOB[0002933-200403132837156-oozie-oozi-W] ACTION[0002933-200403132837156-oozie-oozi-W@copy_relation] checking action, hadoop job ID [job_1585920557248_14568] status [RUNNING] 2020-05-09 13:25:25,419 INFO org.apache.oozie.servlet.CallbackServlet: SERVER[iis-cdh5-test-m3.ocean.icm.edu.pl] USER[-] GROUP[-] TOKEN[-] APP[-] JOB[0002933-200403132837156-oozie-oozi-W] ACTION[0002933-200403132837156-oozie-oozi-W@copy_relation] callback for action [0002933-200403132837156-oozie-oozi-W@copy_relation] 2020-05-09 13:25:25,510 INFO org.apache.oozie.action.hadoop.DistcpActionExecutor: SERVER[iis-cdh5-test-m3.ocean.icm.edu.pl] USER[dnet.beta] GROUP[-] TOKEN[] APP[orcid_to_result_from_semrel_propagation] JOB[0002933-200403132837156-oozie-oozi-W] ACTION[0002933-200403132837156-oozie-oozi-W@copy_relation] Hadoop Jobs launched : [job_1585920557248_14575] 2020-05-09 13:25:25,511 INFO org.apache.oozie.action.hadoop.DistcpActionExecutor: SERVER[iis-cdh5-test-m3.ocean.icm.edu.pl] USER[dnet.beta] GROUP[-] TOKEN[] APP[orcid_to_result_from_semrel_propagation] JOB[0002933-200403132837156-oozie-oozi-W] ACTION[0002933-200403132837156-oozie-oozi-W@copy_relation] action completed, external ID [job_1585920557248_14568] 2020-05-09 13:25:25,565 INFO org.apache.oozie.service.JPAService: SERVER[iis-cdh5-test-m3.ocean.icm.edu.pl] USER[dnet.beta] GROUP[-] TOKEN[] APP[orcid_to_result_from_semrel_propagation] JOB[0002933-200403132837156-oozie-oozi-W] ACTION[0002933-200403132837156-oozie-oozi-W@copy_relation] No results found 2020-05-09 13:25:25,585 INFO org.apache.oozie.command.wf.ActionStartXCommand: SERVER[iis-cdh5-test-m3.ocean.icm.edu.pl] USER[dnet.beta] GROUP[-] TOKEN[] APP[orcid_to_result_from_semrel_propagation] JOB[0002933-200403132837156-oozie-oozi-W] ACTION[0002933-200403132837156-oozie-oozi-W@copy_wait] Start action [0002933-200403132837156-oozie-oozi-W@copy_wait] with user-retry state : userRetryCount [0], userRetryMax [0], userRetryInterval [10] 2020-05-09 13:25:25,585 INFO org.apache.oozie.command.wf.ActionStartXCommand: SERVER[iis-cdh5-test-m3.ocean.icm.edu.pl] USER[dnet.beta] GROUP[-] TOKEN[] APP[orcid_to_result_from_semrel_propagation] JOB[0002933-200403132837156-oozie-oozi-W] ACTION[0002933-200403132837156-oozie-oozi-W@copy_wait] [***0002933-200403132837156-oozie-oozi-W@copy_wait***]Action status=DONE 2020-05-09 13:25:25,585 INFO org.apache.oozie.command.wf.ActionStartXCommand: SERVER[iis-cdh5-test-m3.ocean.icm.edu.pl] USER[dnet.beta] GROUP[-] TOKEN[] APP[orcid_to_result_from_semrel_propagation] JOB[0002933-200403132837156-oozie-oozi-W] ACTION[0002933-200403132837156-oozie-oozi-W@copy_wait] [***0002933-200403132837156-oozie-oozi-W@copy_wait***]Action updated in DB! 2020-05-09 13:25:25,627 INFO org.apache.oozie.service.JPAService: SERVER[iis-cdh5-test-m3.ocean.icm.edu.pl] USER[dnet.beta] GROUP[-] TOKEN[] APP[orcid_to_result_from_semrel_propagation] JOB[0002933-200403132837156-oozie-oozi-W] ACTION[0002933-200403132837156-oozie-oozi-W@copy_wait] No results found 2020-05-09 13:25:25,648 INFO org.apache.oozie.command.wf.ActionStartXCommand: SERVER[iis-cdh5-test-m3.ocean.icm.edu.pl] USER[dnet.beta] GROUP[-] TOKEN[] APP[orcid_to_result_from_semrel_propagation] JOB[0002933-200403132837156-oozie-oozi-W] ACTION[0002933-200403132837156-oozie-oozi-W@fork_prepare_assoc_step1] Start action [0002933-200403132837156-oozie-oozi-W@fork_prepare_assoc_step1] with user-retry state : userRetryCount [0], userRetryMax [0], userRetryInterval [10] 2020-05-09 13:25:25,648 INFO org.apache.oozie.command.wf.ActionStartXCommand: SERVER[iis-cdh5-test-m3.ocean.icm.edu.pl] USER[dnet.beta] GROUP[-] TOKEN[] APP[orcid_to_result_from_semrel_propagation] JOB[0002933-200403132837156-oozie-oozi-W] ACTION[0002933-200403132837156-oozie-oozi-W@fork_prepare_assoc_step1] [***0002933-200403132837156-oozie-oozi-W@fork_prepare_assoc_step1***]Action status=DONE 2020-05-09 13:25:25,648 INFO org.apache.oozie.command.wf.ActionStartXCommand: SERVER[iis-cdh5-test-m3.ocean.icm.edu.pl] USER[dnet.beta] GROUP[-] TOKEN[] APP[orcid_to_result_from_semrel_propagation] JOB[0002933-200403132837156-oozie-oozi-W] ACTION[0002933-200403132837156-oozie-oozi-W@fork_prepare_assoc_step1] [***0002933-200403132837156-oozie-oozi-W@fork_prepare_assoc_step1***]Action updated in DB! 2020-05-09 13:25:25,694 INFO org.apache.oozie.service.JPAService: SERVER[iis-cdh5-test-m3.ocean.icm.edu.pl] USER[dnet.beta] GROUP[-] TOKEN[] APP[orcid_to_result_from_semrel_propagation] JOB[0002933-200403132837156-oozie-oozi-W] ACTION[0002933-200403132837156-oozie-oozi-W@fork_prepare_assoc_step1] No results found 2020-05-09 13:25:25,700 INFO org.apache.oozie.service.JPAService: SERVER[iis-cdh5-test-m3.ocean.icm.edu.pl] USER[dnet.beta] GROUP[-] TOKEN[] APP[orcid_to_result_from_semrel_propagation] JOB[0002933-200403132837156-oozie-oozi-W] ACTION[0002933-200403132837156-oozie-oozi-W@fork_prepare_assoc_step1] No results found 2020-05-09 13:25:25,706 INFO org.apache.oozie.service.JPAService: SERVER[iis-cdh5-test-m3.ocean.icm.edu.pl] USER[dnet.beta] GROUP[-] TOKEN[] APP[orcid_to_result_from_semrel_propagation] JOB[0002933-200403132837156-oozie-oozi-W] ACTION[0002933-200403132837156-oozie-oozi-W@fork_prepare_assoc_step1] No results found 2020-05-09 13:25:25,711 INFO org.apache.oozie.service.JPAService: SERVER[iis-cdh5-test-m3.ocean.icm.edu.pl] USER[dnet.beta] GROUP[-] TOKEN[] APP[orcid_to_result_from_semrel_propagation] JOB[0002933-200403132837156-oozie-oozi-W] ACTION[0002933-200403132837156-oozie-oozi-W@fork_prepare_assoc_step1] No results found 2020-05-09 13:25:25,801 INFO org.apache.oozie.command.wf.ForkedActionStartXCommand: SERVER[iis-cdh5-test-m3.ocean.icm.edu.pl] USER[dnet.beta] GROUP[-] TOKEN[] APP[orcid_to_result_from_semrel_propagation] JOB[0002933-200403132837156-oozie-oozi-W] ACTION[0002933-200403132837156-oozie-oozi-W@join_prepare_dataset] Start action [0002933-200403132837156-oozie-oozi-W@join_prepare_dataset] with user-retry state : userRetryCount [0], userRetryMax [0], userRetryInterval [10] 2020-05-09 13:25:25,825 INFO org.apache.oozie.command.wf.ForkedActionStartXCommand: SERVER[iis-cdh5-test-m3.ocean.icm.edu.pl] USER[dnet.beta] GROUP[-] TOKEN[] APP[orcid_to_result_from_semrel_propagation] JOB[0002933-200403132837156-oozie-oozi-W] ACTION[0002933-200403132837156-oozie-oozi-W@join_prepare_software] Start action [0002933-200403132837156-oozie-oozi-W@join_prepare_software] with user-retry state : userRetryCount [0], userRetryMax [0], userRetryInterval [10] 2020-05-09 13:25:25,825 INFO org.apache.oozie.command.wf.ForkedActionStartXCommand: SERVER[iis-cdh5-test-m3.ocean.icm.edu.pl] USER[dnet.beta] GROUP[-] TOKEN[] APP[orcid_to_result_from_semrel_propagation] JOB[0002933-200403132837156-oozie-oozi-W] ACTION[0002933-200403132837156-oozie-oozi-W@join_prepare_otherresearchproduct] Start action [0002933-200403132837156-oozie-oozi-W@join_prepare_otherresearchproduct] with user-retry state : userRetryCount [0], userRetryMax [0], userRetryInterval [10] 2020-05-09 13:25:25,828 INFO org.apache.oozie.command.wf.ForkedActionStartXCommand: SERVER[iis-cdh5-test-m3.ocean.icm.edu.pl] USER[dnet.beta] GROUP[-] TOKEN[] APP[orcid_to_result_from_semrel_propagation] JOB[0002933-200403132837156-oozie-oozi-W] ACTION[0002933-200403132837156-oozie-oozi-W@join_prepare_publication] Start action [0002933-200403132837156-oozie-oozi-W@join_prepare_publication] with user-retry state : userRetryCount [0], userRetryMax [0], userRetryInterval [10] 2020-05-09 13:25:27,165 INFO org.apache.oozie.action.hadoop.SparkActionExecutor: SERVER[iis-cdh5-test-m3.ocean.icm.edu.pl] USER[dnet.beta] GROUP[-] TOKEN[] APP[orcid_to_result_from_semrel_propagation] JOB[0002933-200403132837156-oozie-oozi-W] ACTION[0002933-200403132837156-oozie-oozi-W@join_prepare_otherresearchproduct] checking action, hadoop job ID [job_1585920557248_14578] status [RUNNING] 2020-05-09 13:25:27,170 INFO org.apache.oozie.command.wf.ForkedActionStartXCommand: SERVER[iis-cdh5-test-m3.ocean.icm.edu.pl] USER[dnet.beta] GROUP[-] TOKEN[] APP[orcid_to_result_from_semrel_propagation] JOB[0002933-200403132837156-oozie-oozi-W] ACTION[0002933-200403132837156-oozie-oozi-W@join_prepare_otherresearchproduct] [***0002933-200403132837156-oozie-oozi-W@join_prepare_otherresearchproduct***]Action status=RUNNING 2020-05-09 13:25:27,170 INFO org.apache.oozie.command.wf.ForkedActionStartXCommand: SERVER[iis-cdh5-test-m3.ocean.icm.edu.pl] USER[dnet.beta] GROUP[-] TOKEN[] APP[orcid_to_result_from_semrel_propagation] JOB[0002933-200403132837156-oozie-oozi-W] ACTION[0002933-200403132837156-oozie-oozi-W@join_prepare_otherresearchproduct] [***0002933-200403132837156-oozie-oozi-W@join_prepare_otherresearchproduct***]Action updated in DB! 2020-05-09 13:25:27,179 INFO org.apache.oozie.action.hadoop.SparkActionExecutor: SERVER[iis-cdh5-test-m3.ocean.icm.edu.pl] USER[dnet.beta] GROUP[-] TOKEN[] APP[orcid_to_result_from_semrel_propagation] JOB[0002933-200403132837156-oozie-oozi-W] ACTION[0002933-200403132837156-oozie-oozi-W@join_prepare_software] checking action, hadoop job ID [job_1585920557248_14577] status [RUNNING] 2020-05-09 13:25:27,181 INFO org.apache.oozie.command.wf.WorkflowNotificationXCommand: SERVER[iis-cdh5-test-m3.ocean.icm.edu.pl] USER[-] GROUP[-] TOKEN[-] APP[-] JOB[0002933-200403132837156-oozie-oozi-W] ACTION[0002933-200403132837156-oozie-oozi-W@join_prepare_otherresearchproduct] No Notification URL is defined. Therefore nothing to notify for job 0002933-200403132837156-oozie-oozi-W@join_prepare_otherresearchproduct 2020-05-09 13:25:27,183 INFO org.apache.oozie.command.wf.ForkedActionStartXCommand: SERVER[iis-cdh5-test-m3.ocean.icm.edu.pl] USER[dnet.beta] GROUP[-] TOKEN[] APP[orcid_to_result_from_semrel_propagation] JOB[0002933-200403132837156-oozie-oozi-W] ACTION[0002933-200403132837156-oozie-oozi-W@join_prepare_software] [***0002933-200403132837156-oozie-oozi-W@join_prepare_software***]Action status=RUNNING 2020-05-09 13:25:27,183 INFO org.apache.oozie.command.wf.ForkedActionStartXCommand: SERVER[iis-cdh5-test-m3.ocean.icm.edu.pl] USER[dnet.beta] GROUP[-] TOKEN[] APP[orcid_to_result_from_semrel_propagation] JOB[0002933-200403132837156-oozie-oozi-W] ACTION[0002933-200403132837156-oozie-oozi-W@join_prepare_software] [***0002933-200403132837156-oozie-oozi-W@join_prepare_software***]Action updated in DB! 2020-05-09 13:25:27,188 INFO org.apache.oozie.command.wf.WorkflowNotificationXCommand: SERVER[iis-cdh5-test-m3.ocean.icm.edu.pl] USER[-] GROUP[-] TOKEN[-] APP[-] JOB[0002933-200403132837156-oozie-oozi-W] ACTION[0002933-200403132837156-oozie-oozi-W@join_prepare_software] No Notification URL is defined. Therefore nothing to notify for job 0002933-200403132837156-oozie-oozi-W@join_prepare_software 2020-05-09 13:25:27,617 INFO org.apache.oozie.action.hadoop.SparkActionExecutor: SERVER[iis-cdh5-test-m3.ocean.icm.edu.pl] USER[dnet.beta] GROUP[-] TOKEN[] APP[orcid_to_result_from_semrel_propagation] JOB[0002933-200403132837156-oozie-oozi-W] ACTION[0002933-200403132837156-oozie-oozi-W@join_prepare_publication] checking action, hadoop job ID [job_1585920557248_14576] status [RUNNING] 2020-05-09 13:25:27,622 INFO org.apache.oozie.command.wf.ForkedActionStartXCommand: SERVER[iis-cdh5-test-m3.ocean.icm.edu.pl] USER[dnet.beta] GROUP[-] TOKEN[] APP[orcid_to_result_from_semrel_propagation] JOB[0002933-200403132837156-oozie-oozi-W] ACTION[0002933-200403132837156-oozie-oozi-W@join_prepare_publication] [***0002933-200403132837156-oozie-oozi-W@join_prepare_publication***]Action status=RUNNING 2020-05-09 13:25:27,622 INFO org.apache.oozie.command.wf.ForkedActionStartXCommand: SERVER[iis-cdh5-test-m3.ocean.icm.edu.pl] USER[dnet.beta] GROUP[-] TOKEN[] APP[orcid_to_result_from_semrel_propagation] JOB[0002933-200403132837156-oozie-oozi-W] ACTION[0002933-200403132837156-oozie-oozi-W@join_prepare_publication] [***0002933-200403132837156-oozie-oozi-W@join_prepare_publication***]Action updated in DB! 2020-05-09 13:25:27,625 INFO org.apache.oozie.action.hadoop.SparkActionExecutor: SERVER[iis-cdh5-test-m3.ocean.icm.edu.pl] USER[dnet.beta] GROUP[-] TOKEN[] APP[orcid_to_result_from_semrel_propagation] JOB[0002933-200403132837156-oozie-oozi-W] ACTION[0002933-200403132837156-oozie-oozi-W@join_prepare_dataset] checking action, hadoop job ID [job_1585920557248_14579] status [RUNNING] 2020-05-09 13:25:27,628 INFO org.apache.oozie.command.wf.WorkflowNotificationXCommand: SERVER[iis-cdh5-test-m3.ocean.icm.edu.pl] USER[-] GROUP[-] TOKEN[-] APP[-] JOB[0002933-200403132837156-oozie-oozi-W] ACTION[0002933-200403132837156-oozie-oozi-W@join_prepare_publication] No Notification URL is defined. Therefore nothing to notify for job 0002933-200403132837156-oozie-oozi-W@join_prepare_publication 2020-05-09 13:25:27,629 INFO org.apache.oozie.command.wf.ForkedActionStartXCommand: SERVER[iis-cdh5-test-m3.ocean.icm.edu.pl] USER[dnet.beta] GROUP[-] TOKEN[] APP[orcid_to_result_from_semrel_propagation] JOB[0002933-200403132837156-oozie-oozi-W] ACTION[0002933-200403132837156-oozie-oozi-W@join_prepare_dataset] [***0002933-200403132837156-oozie-oozi-W@join_prepare_dataset***]Action status=RUNNING 2020-05-09 13:25:27,629 INFO org.apache.oozie.command.wf.ForkedActionStartXCommand: SERVER[iis-cdh5-test-m3.ocean.icm.edu.pl] USER[dnet.beta] GROUP[-] TOKEN[] APP[orcid_to_result_from_semrel_propagation] JOB[0002933-200403132837156-oozie-oozi-W] ACTION[0002933-200403132837156-oozie-oozi-W@join_prepare_dataset] [***0002933-200403132837156-oozie-oozi-W@join_prepare_dataset***]Action updated in DB! 2020-05-09 13:25:27,634 INFO org.apache.oozie.command.wf.WorkflowNotificationXCommand: SERVER[iis-cdh5-test-m3.ocean.icm.edu.pl] USER[-] GROUP[-] TOKEN[-] APP[-] JOB[0002933-200403132837156-oozie-oozi-W] ACTION[0002933-200403132837156-oozie-oozi-W@join_prepare_dataset] No Notification URL is defined. Therefore nothing to notify for job 0002933-200403132837156-oozie-oozi-W@join_prepare_dataset 2020-05-09 13:25:27,639 INFO org.apache.oozie.command.wf.WorkflowNotificationXCommand: SERVER[iis-cdh5-test-m3.ocean.icm.edu.pl] USER[-] GROUP[-] TOKEN[-] APP[-] JOB[0002933-200403132837156-oozie-oozi-W] ACTION[0002933-200403132837156-oozie-oozi-W@fork_prepare_assoc_step1] No Notification URL is defined. Therefore nothing to notify for job 0002933-200403132837156-oozie-oozi-W@fork_prepare_assoc_step1 2020-05-09 13:25:27,639 INFO org.apache.oozie.command.wf.WorkflowNotificationXCommand: SERVER[iis-cdh5-test-m3.ocean.icm.edu.pl] USER[-] GROUP[-] TOKEN[-] APP[-] JOB[0002933-200403132837156-oozie-oozi-W] ACTION[0002933-200403132837156-oozie-oozi-W@copy_wait] No Notification URL is defined. Therefore nothing to notify for job 0002933-200403132837156-oozie-oozi-W@copy_wait 2020-05-09 13:25:27,640 INFO org.apache.oozie.command.wf.WorkflowNotificationXCommand: SERVER[iis-cdh5-test-m3.ocean.icm.edu.pl] USER[-] GROUP[-] TOKEN[-] APP[-] JOB[0002933-200403132837156-oozie-oozi-W] ACTION[0002933-200403132837156-oozie-oozi-W@copy_relation] No Notification URL is defined. Therefore nothing to notify for job 0002933-200403132837156-oozie-oozi-W@copy_relation 2020-05-09 13:25:41,416 INFO org.apache.oozie.servlet.CallbackServlet: SERVER[iis-cdh5-test-m3.ocean.icm.edu.pl] USER[-] GROUP[-] TOKEN[-] APP[-] JOB[0002933-200403132837156-oozie-oozi-W] ACTION[0002933-200403132837156-oozie-oozi-W@join_prepare_software] callback for action [0002933-200403132837156-oozie-oozi-W@join_prepare_software] 2020-05-09 13:25:41,490 INFO org.apache.oozie.action.hadoop.SparkActionExecutor: SERVER[iis-cdh5-test-m3.ocean.icm.edu.pl] USER[dnet.beta] GROUP[-] TOKEN[] APP[orcid_to_result_from_semrel_propagation] JOB[0002933-200403132837156-oozie-oozi-W] ACTION[0002933-200403132837156-oozie-oozi-W@join_prepare_software] action completed, external ID [job_1585920557248_14577] 2020-05-09 13:25:41,495 WARN org.apache.oozie.action.hadoop.SparkActionExecutor: SERVER[iis-cdh5-test-m3.ocean.icm.edu.pl] USER[dnet.beta] GROUP[-] TOKEN[] APP[orcid_to_result_from_semrel_propagation] JOB[0002933-200403132837156-oozie-oozi-W] ACTION[0002933-200403132837156-oozie-oozi-W@join_prepare_software] Launcher ERROR, reason: Main class [org.apache.oozie.action.hadoop.SparkMain], main() threw exception, File file:/data/3/yarn/nm/usercache/dnet.beta/appcache/application_1585920557248_14577/container_e68_1585920557248_14577_01_000002/dhp-propagation-1.1.8-SNAPSHOT.jar does not exist 2020-05-09 13:25:41,495 WARN org.apache.oozie.action.hadoop.SparkActionExecutor: SERVER[iis-cdh5-test-m3.ocean.icm.edu.pl] USER[dnet.beta] GROUP[-] TOKEN[] APP[orcid_to_result_from_semrel_propagation] JOB[0002933-200403132837156-oozie-oozi-W] ACTION[0002933-200403132837156-oozie-oozi-W@join_prepare_software] Launcher exception: File file:/data/3/yarn/nm/usercache/dnet.beta/appcache/application_1585920557248_14577/container_e68_1585920557248_14577_01_000002/dhp-propagation-1.1.8-SNAPSHOT.jar does not exist java.io.FileNotFoundException: File file:/data/3/yarn/nm/usercache/dnet.beta/appcache/application_1585920557248_14577/container_e68_1585920557248_14577_01_000002/dhp-propagation-1.1.8-SNAPSHOT.jar does not exist at org.apache.hadoop.fs.RawLocalFileSystem.deprecatedGetFileStatus(RawLocalFileSystem.java:598) at org.apache.hadoop.fs.RawLocalFileSystem.getFileLinkStatusInternal(RawLocalFileSystem.java:811) at org.apache.hadoop.fs.RawLocalFileSystem.getFileStatus(RawLocalFileSystem.java:588) at org.apache.hadoop.fs.FilterFileSystem.getFileStatus(FilterFileSystem.java:432) at org.apache.hadoop.fs.FileUtil.copy(FileUtil.java:340) at org.apache.hadoop.fs.FileUtil.copy(FileUtil.java:292) at org.apache.spark.deploy.yarn.Client.copyFileToRemote(Client.scala:404) at org.apache.spark.deploy.yarn.Client.org$apache$spark$deploy$yarn$Client$$distribute$1(Client.scala:496) at org.apache.spark.deploy.yarn.Client$$anonfun$prepareLocalResources$9.apply(Client.scala:595) at org.apache.spark.deploy.yarn.Client$$anonfun$prepareLocalResources$9.apply(Client.scala:594) at scala.Option.foreach(Option.scala:257) at org.apache.spark.deploy.yarn.Client.prepareLocalResources(Client.scala:594) at org.apache.spark.deploy.yarn.Client.createContainerLaunchContext(Client.scala:886) at org.apache.spark.deploy.yarn.Client.submitApplication(Client.scala:180) at org.apache.spark.deploy.yarn.Client.run(Client.scala:1156) at org.apache.spark.deploy.yarn.YarnClusterApplication.start(Client.scala:1608) at org.apache.spark.deploy.SparkSubmit.org$apache$spark$deploy$SparkSubmit$$runMain(SparkSubmit.scala:849) at org.apache.spark.deploy.SparkSubmit.doRunMain$1(SparkSubmit.scala:167) at org.apache.spark.deploy.SparkSubmit.submit(SparkSubmit.scala:195) at org.apache.spark.deploy.SparkSubmit.doSubmit(SparkSubmit.scala:86) at org.apache.spark.deploy.SparkSubmit$$anon$2.doSubmit(SparkSubmit.scala:924) at org.apache.spark.deploy.SparkSubmit$.main(SparkSubmit.scala:933) at org.apache.spark.deploy.SparkSubmit.main(SparkSubmit.scala) at org.apache.oozie.action.hadoop.SparkMain.runSpark(SparkMain.java:178) at org.apache.oozie.action.hadoop.SparkMain.run(SparkMain.java:90) at org.apache.oozie.action.hadoop.LauncherMain.run(LauncherMain.java:81) at org.apache.oozie.action.hadoop.SparkMain.main(SparkMain.java:57) at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method) at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62) at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43) at java.lang.reflect.Method.invoke(Method.java:498) at org.apache.oozie.action.hadoop.LauncherMapper.map(LauncherMapper.java:235) at org.apache.hadoop.mapred.MapRunner.run(MapRunner.java:54) at org.apache.hadoop.mapred.MapTask.runOldMapper(MapTask.java:459) at org.apache.hadoop.mapred.MapTask.run(MapTask.java:343) at org.apache.hadoop.mapred.YarnChild$2.run(YarnChild.java:164) at java.security.AccessController.doPrivileged(Native Method) at javax.security.auth.Subject.doAs(Subject.java:422) at org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1924) at org.apache.hadoop.mapred.YarnChild.main(YarnChild.java:158) 2020-05-09 13:25:41,514 INFO org.apache.oozie.command.wf.ActionEndXCommand: SERVER[iis-cdh5-test-m3.ocean.icm.edu.pl] USER[dnet.beta] GROUP[-] TOKEN[] APP[orcid_to_result_from_semrel_propagation] JOB[0002933-200403132837156-oozie-oozi-W] ACTION[0002933-200403132837156-oozie-oozi-W@join_prepare_software] ERROR is considered as FAILED for SLA 2020-05-09 13:25:41,541 INFO org.apache.oozie.service.JPAService: SERVER[iis-cdh5-test-m3.ocean.icm.edu.pl] USER[dnet.beta] GROUP[-] TOKEN[] APP[orcid_to_result_from_semrel_propagation] JOB[0002933-200403132837156-oozie-oozi-W] ACTION[0002933-200403132837156-oozie-oozi-W@join_prepare_software] No results found 2020-05-09 13:25:41,580 INFO org.apache.oozie.command.wf.ActionStartXCommand: SERVER[iis-cdh5-test-m3.ocean.icm.edu.pl] USER[dnet.beta] GROUP[-] TOKEN[] APP[orcid_to_result_from_semrel_propagation] JOB[0002933-200403132837156-oozie-oozi-W] ACTION[0002933-200403132837156-oozie-oozi-W@Kill] Start action [0002933-200403132837156-oozie-oozi-W@Kill] with user-retry state : userRetryCount [0], userRetryMax [0], userRetryInterval [10] 2020-05-09 13:25:41,580 INFO org.apache.oozie.command.wf.ActionStartXCommand: SERVER[iis-cdh5-test-m3.ocean.icm.edu.pl] USER[dnet.beta] GROUP[-] TOKEN[] APP[orcid_to_result_from_semrel_propagation] JOB[0002933-200403132837156-oozie-oozi-W] ACTION[0002933-200403132837156-oozie-oozi-W@Kill] [***0002933-200403132837156-oozie-oozi-W@Kill***]Action status=DONE 2020-05-09 13:25:41,580 INFO org.apache.oozie.command.wf.ActionStartXCommand: SERVER[iis-cdh5-test-m3.ocean.icm.edu.pl] USER[dnet.beta] GROUP[-] TOKEN[] APP[orcid_to_result_from_semrel_propagation] JOB[0002933-200403132837156-oozie-oozi-W] ACTION[0002933-200403132837156-oozie-oozi-W@Kill] [***0002933-200403132837156-oozie-oozi-W@Kill***]Action updated in DB! 2020-05-09 13:25:41,692 WARN org.apache.oozie.workflow.lite.LiteWorkflowInstance: SERVER[iis-cdh5-test-m3.ocean.icm.edu.pl] USER[dnet.beta] GROUP[-] TOKEN[] APP[orcid_to_result_from_semrel_propagation] JOB[0002933-200403132837156-oozie-oozi-W] ACTION[0002933-200403132837156-oozie-oozi-W@Kill] Workflow completed [KILLED], killing [3] running nodes 2020-05-09 13:25:41,760 INFO org.apache.oozie.command.wf.WorkflowNotificationXCommand: SERVER[iis-cdh5-test-m3.ocean.icm.edu.pl] USER[-] GROUP[-] TOKEN[-] APP[-] JOB[0002933-200403132837156-oozie-oozi-W] ACTION[0002933-200403132837156-oozie-oozi-W@Kill] No Notification URL is defined. Therefore nothing to notify for job 0002933-200403132837156-oozie-oozi-W@Kill 2020-05-09 13:25:41,766 INFO org.apache.oozie.command.wf.WorkflowNotificationXCommand: SERVER[iis-cdh5-test-m3.ocean.icm.edu.pl] USER[-] GROUP[-] TOKEN[-] APP[-] JOB[0002933-200403132837156-oozie-oozi-W] ACTION[0002933-200403132837156-oozie-oozi-W@join_prepare_software] No Notification URL is defined. Therefore nothing to notify for job 0002933-200403132837156-oozie-oozi-W@join_prepare_software 2020-05-09 13:25:41,852 INFO org.apache.oozie.command.wf.WorkflowNotificationXCommand: SERVER[iis-cdh5-test-m3.ocean.icm.edu.pl] USER[-] GROUP[-] TOKEN[-] APP[-] JOB[0002933-200403132837156-oozie-oozi-W] ACTION[0002933-200403132837156-oozie-oozi-W@join_prepare_otherresearchproduct] No Notification URL is defined. Therefore nothing to notify for job 0002933-200403132837156-oozie-oozi-W@join_prepare_otherresearchproduct 2020-05-09 13:25:41,914 INFO org.apache.oozie.servlet.CallbackServlet: SERVER[iis-cdh5-test-m3.ocean.icm.edu.pl] USER[-] GROUP[-] TOKEN[-] APP[-] JOB[0002933-200403132837156-oozie-oozi-W] ACTION[0002933-200403132837156-oozie-oozi-W@join_prepare_dataset] callback for action [0002933-200403132837156-oozie-oozi-W@join_prepare_dataset] 2020-05-09 13:25:41,920 ERROR org.apache.oozie.command.wf.CompletedActionXCommand: SERVER[iis-cdh5-test-m3.ocean.icm.edu.pl] USER[-] GROUP[-] TOKEN[] APP[-] JOB[0002933-200403132837156-oozie-oozi-W] ACTION[0002933-200403132837156-oozie-oozi-W@join_prepare_dataset] XException, org.apache.oozie.command.CommandException: E0800: Action it is not running its in [KILLED] state, action [0002933-200403132837156-oozie-oozi-W@join_prepare_dataset] at org.apache.oozie.command.wf.CompletedActionXCommand.eagerVerifyPrecondition(CompletedActionXCommand.java:92) at org.apache.oozie.command.XCommand.call(XCommand.java:257) at java.util.concurrent.FutureTask.run(FutureTask.java:266) at org.apache.oozie.service.CallableQueueService$CallableWrapper.run(CallableQueueService.java:179) at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1142) at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:617) at java.lang.Thread.run(Thread.java:745) 2020-05-09 13:25:41,938 INFO org.apache.oozie.command.wf.WorkflowNotificationXCommand: SERVER[iis-cdh5-test-m3.ocean.icm.edu.pl] USER[-] GROUP[-] TOKEN[-] APP[-] JOB[0002933-200403132837156-oozie-oozi-W] ACTION[0002933-200403132837156-oozie-oozi-W@join_prepare_publication] No Notification URL is defined. Therefore nothing to notify for job 0002933-200403132837156-oozie-oozi-W@join_prepare_publication 2020-05-09 13:25:42,005 INFO org.apache.oozie.servlet.CallbackServlet: SERVER[iis-cdh5-test-m3.ocean.icm.edu.pl] USER[-] GROUP[-] TOKEN[-] APP[-] JOB[0002933-200403132837156-oozie-oozi-W] ACTION[0002933-200403132837156-oozie-oozi-W@join_prepare_publication] callback for action [0002933-200403132837156-oozie-oozi-W@join_prepare_publication] 2020-05-09 13:25:42,010 ERROR org.apache.oozie.command.wf.CompletedActionXCommand: SERVER[iis-cdh5-test-m3.ocean.icm.edu.pl] USER[-] GROUP[-] TOKEN[] APP[-] JOB[0002933-200403132837156-oozie-oozi-W] ACTION[0002933-200403132837156-oozie-oozi-W@join_prepare_publication] XException, org.apache.oozie.command.CommandException: E0800: Action it is not running its in [KILLED] state, action [0002933-200403132837156-oozie-oozi-W@join_prepare_publication] at org.apache.oozie.command.wf.CompletedActionXCommand.eagerVerifyPrecondition(CompletedActionXCommand.java:92) at org.apache.oozie.command.XCommand.call(XCommand.java:257) at java.util.concurrent.FutureTask.run(FutureTask.java:266) at org.apache.oozie.service.CallableQueueService$CallableWrapper.run(CallableQueueService.java:179) at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1142) at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:617) at java.lang.Thread.run(Thread.java:745) 2020-05-09 13:25:42,028 INFO org.apache.oozie.command.wf.WorkflowNotificationXCommand: SERVER[iis-cdh5-test-m3.ocean.icm.edu.pl] USER[dnet.beta] GROUP[-] TOKEN[] APP[orcid_to_result_from_semrel_propagation] JOB[0002933-200403132837156-oozie-oozi-W] ACTION[] No Notification URL is defined. Therefore nothing to notify for job 0002933-200403132837156-oozie-oozi-W 2020-05-09 13:25:42,028 INFO org.apache.oozie.command.wf.WorkflowNotificationXCommand: SERVER[iis-cdh5-test-m3.ocean.icm.edu.pl] USER[-] GROUP[-] TOKEN[-] APP[-] JOB[0002933-200403132837156-oozie-oozi-W] ACTION[0002933-200403132837156-oozie-oozi-W@join_prepare_dataset] No Notification URL is defined. Therefore nothing to notify for job 0002933-200403132837156-oozie-oozi-W@join_prepare_dataset 2020-05-09 13:25:42,113 INFO org.apache.oozie.servlet.CallbackServlet: SERVER[iis-cdh5-test-m3.ocean.icm.edu.pl] USER[-] GROUP[-] TOKEN[-] APP[-] JOB[0002933-200403132837156-oozie-oozi-W] ACTION[0002933-200403132837156-oozie-oozi-W@join_prepare_otherresearchproduct] callback for action [0002933-200403132837156-oozie-oozi-W@join_prepare_otherresearchproduct] 2020-05-09 13:25:42,116 ERROR org.apache.oozie.command.wf.CompletedActionXCommand: SERVER[iis-cdh5-test-m3.ocean.icm.edu.pl] USER[-] GROUP[-] TOKEN[] APP[-] JOB[0002933-200403132837156-oozie-oozi-W] ACTION[0002933-200403132837156-oozie-oozi-W@join_prepare_otherresearchproduct] XException, org.apache.oozie.command.CommandException: E0800: Action it is not running its in [KILLED] state, action [0002933-200403132837156-oozie-oozi-W@join_prepare_otherresearchproduct] at org.apache.oozie.command.wf.CompletedActionXCommand.eagerVerifyPrecondition(CompletedActionXCommand.java:92) at org.apache.oozie.command.XCommand.call(XCommand.java:257) at java.util.concurrent.FutureTask.run(FutureTask.java:266) at org.apache.oozie.service.CallableQueueService$CallableWrapper.run(CallableQueueService.java:179) at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1142) at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:617) at java.lang.Thread.run(Thread.java:745) \ No newline at end of file From ec0782e582961bea41f8fa30ac43a3d4a3a366fa Mon Sep 17 00:00:00 2001 From: Claudio Atzori Date: Tue, 12 May 2020 15:49:28 +0200 Subject: [PATCH 11/31] renamed jar containing the bulktagging and propagation workflows from dhp-[bulktagging|propagation] to dhp-enrichment; adjusted xml formatting --- .../dhp/bulktag/oozie_app/workflow.xml | 8 +- .../countrypropagation/oozie_app/workflow.xml | 18 +- .../oozie_app/workflow.xml | 160 +++++++----------- .../projecttoresult/oozie_app/workflow.xml | 107 ++++++------ .../oozie_app/workflow.xml | 10 +- .../oozie_app/workflow.xml | 21 ++- .../oozie_app/workflow.xml | 10 +- 7 files changed, 144 insertions(+), 190 deletions(-) diff --git a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/bulktag/oozie_app/workflow.xml b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/bulktag/oozie_app/workflow.xml index 524281bc9..754aba4f2 100644 --- a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/bulktag/oozie_app/workflow.xml +++ b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/bulktag/oozie_app/workflow.xml @@ -101,7 +101,7 @@ cluster bulkTagging-publication eu.dnetlib.dhp.bulktag.SparkBulkTagJob - dhp-bulktag-${projectVersion}.jar + dhp-enrichment-${projectVersion}.jar --num-executors=${sparkExecutorNumber} --executor-memory=${sparkExecutorMemory} @@ -130,7 +130,7 @@ cluster bulkTagging-dataset eu.dnetlib.dhp.bulktag.SparkBulkTagJob - dhp-bulktag-${projectVersion}.jar + dhp-enrichment-${projectVersion}.jar --num-executors=${sparkExecutorNumber} --executor-memory=${sparkExecutorMemory} @@ -159,7 +159,7 @@ cluster bulkTagging-orp eu.dnetlib.dhp.bulktag.SparkBulkTagJob - dhp-bulktag-${projectVersion}.jar + dhp-enrichment-${projectVersion}.jar --num-executors=${sparkExecutorNumber} --executor-memory=${sparkExecutorMemory} @@ -188,7 +188,7 @@ cluster bulkTagging-software eu.dnetlib.dhp.bulktag.SparkBulkTagJob - dhp-bulktag-${projectVersion}.jar + dhp-enrichment-${projectVersion}.jar --num-executors=${sparkExecutorNumber} --executor-memory=${sparkExecutorMemory} diff --git a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/countrypropagation/oozie_app/workflow.xml b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/countrypropagation/oozie_app/workflow.xml index f269c5442..fc877071d 100644 --- a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/countrypropagation/oozie_app/workflow.xml +++ b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/countrypropagation/oozie_app/workflow.xml @@ -92,7 +92,7 @@ cluster PrepareDatasourceCountryAssociation eu.dnetlib.dhp.countrypropagation.PrepareDatasourceCountryAssociation - dhp-propagation-${projectVersion}.jar + dhp-enrichment-${projectVersion}.jar --executor-cores=${sparkExecutorCores} --executor-memory=${sparkExecutorMemory} @@ -126,7 +126,7 @@ cluster prepareResultCountry-Publication eu.dnetlib.dhp.countrypropagation.PrepareResultCountrySet - dhp-propagation-${projectVersion}.jar + dhp-enrichment-${projectVersion}.jar --executor-cores=${sparkExecutorCores} --executor-memory=${sparkExecutorMemory} @@ -156,7 +156,7 @@ cluster prepareResultCountry-Dataset eu.dnetlib.dhp.countrypropagation.PrepareResultCountrySet - dhp-propagation-${projectVersion}.jar + dhp-enrichment-${projectVersion}.jar --executor-cores=${sparkExecutorCores} --executor-memory=${sparkExecutorMemory} @@ -186,7 +186,7 @@ cluster prepareResultCountry-ORP eu.dnetlib.dhp.countrypropagation.PrepareResultCountrySet - dhp-propagation-${projectVersion}.jar + dhp-enrichment-${projectVersion}.jar --executor-cores=${sparkExecutorCores} --executor-memory=${sparkExecutorMemory} @@ -216,7 +216,7 @@ cluster prepareResultCountry-Software eu.dnetlib.dhp.countrypropagation.PrepareResultCountrySet - dhp-propagation-${projectVersion}.jar + dhp-enrichment-${projectVersion}.jar --executor-cores=${sparkExecutorCores} --executor-memory=${sparkExecutorMemory} @@ -255,7 +255,7 @@ cluster countryPropagationForPublications eu.dnetlib.dhp.countrypropagation.SparkCountryPropagationJob - dhp-propagation-${projectVersion}.jar + dhp-enrichment-${projectVersion}.jar --executor-cores=${sparkExecutorCores} --executor-memory=${sparkExecutorMemory} @@ -285,7 +285,7 @@ cluster countryPropagationForDataset eu.dnetlib.dhp.countrypropagation.SparkCountryPropagationJob - dhp-propagation-${projectVersion}.jar + dhp-enrichment-${projectVersion}.jar --executor-cores=${sparkExecutorCores} --executor-memory=${sparkExecutorMemory} @@ -315,7 +315,7 @@ cluster countryPropagationForORP eu.dnetlib.dhp.countrypropagation.SparkCountryPropagationJob - dhp-propagation-${projectVersion}.jar + dhp-enrichment-${projectVersion}.jar --executor-cores=${sparkExecutorCores} --executor-memory=${sparkExecutorMemory} @@ -345,7 +345,7 @@ cluster countryPropagationForSoftware eu.dnetlib.dhp.countrypropagation.SparkCountryPropagationJob - dhp-propagation-${projectVersion}.jar + dhp-enrichment-${projectVersion}.jar --executor-cores=${sparkExecutorCores} --executor-memory=${sparkExecutorMemory} diff --git a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/orcidtoresultfromsemrel/oozie_app/workflow.xml b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/orcidtoresultfromsemrel/oozie_app/workflow.xml index 7b06b6504..e4429b710 100644 --- a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/orcidtoresultfromsemrel/oozie_app/workflow.xml +++ b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/orcidtoresultfromsemrel/oozie_app/workflow.xml @@ -95,7 +95,7 @@ cluster ORCIDPropagation-PreparePhase1-Publications eu.dnetlib.dhp.orcidtoresultfromsemrel.PrepareResultOrcidAssociationStep1 - dhp-propagation-${projectVersion}.jar + dhp-enrichment-${projectVersion}.jar --executor-cores=${sparkExecutorCores} --executor-memory=${sparkExecutorMemory} @@ -111,16 +111,11 @@ --conf spark.hadoop.mapreduce.map.speculative=false --conf spark.hadoop.mapreduce.reduce.speculative=false - --sourcePath - ${sourcePath} - --hive_metastore_uris - ${hive_metastore_uris} - --resultTableName - eu.dnetlib.dhp.schema.oaf.Publication - --outputPath - ${workingDir}/preparedInfo/targetOrcidAssoc - --allowedsemrels - ${allowedsemrels} + --sourcePath${sourcePath} + --hive_metastore_uris${hive_metastore_uris} + --resultTableNameeu.dnetlib.dhp.schema.oaf.Publication + --outputPath${workingDir}/preparedInfo/targetOrcidAssoc + --allowedsemrels${allowedsemrels} @@ -132,7 +127,7 @@ cluster ORCIDPropagation-PreparePhase1-Dataset eu.dnetlib.dhp.orcidtoresultfromsemrel.PrepareResultOrcidAssociationStep1 - dhp-propagation-${projectVersion}.jar + dhp-enrichment-${projectVersion}.jar --executor-cores=${sparkExecutorCores} --executor-memory=${sparkExecutorMemory} @@ -144,16 +139,11 @@ --conf spark.dynamicAllocation.enabled=true --conf spark.dynamicAllocation.maxExecutors=${spark2MaxExecutors} - --sourcePath - ${sourcePath} - --hive_metastore_uris - ${hive_metastore_uris} - --resultTableName - eu.dnetlib.dhp.schema.oaf.Dataset - --outputPath - ${workingDir}/preparedInfo/targetOrcidAssoc - --allowedsemrels - ${allowedsemrels} + --sourcePath${sourcePath} + --hive_metastore_uris${hive_metastore_uris} + --resultTableNameeu.dnetlib.dhp.schema.oaf.Dataset + --outputPath${workingDir}/preparedInfo/targetOrcidAssoc + --allowedsemrels${allowedsemrels} @@ -165,7 +155,7 @@ cluster ORCIDPropagation-PreparePhase1-ORP eu.dnetlib.dhp.orcidtoresultfromsemrel.PrepareResultOrcidAssociationStep1 - dhp-propagation-${projectVersion}.jar + dhp-enrichment-${projectVersion}.jar --executor-cores=${sparkExecutorCores} --executor-memory=${sparkExecutorMemory} @@ -177,16 +167,11 @@ --conf spark.dynamicAllocation.enabled=true --conf spark.dynamicAllocation.maxExecutors=${spark2MaxExecutors} - --sourcePath - ${sourcePath} - --hive_metastore_uris - ${hive_metastore_uris} - --resultTableName - eu.dnetlib.dhp.schema.oaf.OtherResearchProduct - --outputPath - ${workingDir}/preparedInfo/targetOrcidAssoc - --allowedsemrels - ${allowedsemrels} + --sourcePath${sourcePath} + --hive_metastore_uris${hive_metastore_uris} + --resultTableNameeu.dnetlib.dhp.schema.oaf.OtherResearchProduct + --outputPath${workingDir}/preparedInfo/targetOrcidAssoc + --allowedsemrels${allowedsemrels} @@ -198,7 +183,7 @@ cluster ORCIDPropagation-PreparePhase1-Software eu.dnetlib.dhp.orcidtoresultfromsemrel.PrepareResultOrcidAssociationStep1 - dhp-propagation-${projectVersion}.jar + dhp-enrichment-${projectVersion}.jar --executor-cores=${sparkExecutorCores} --executor-memory=${sparkExecutorMemory} @@ -210,16 +195,11 @@ --conf spark.dynamicAllocation.enabled=true --conf spark.dynamicAllocation.maxExecutors=${spark2MaxExecutors} - --sourcePath - ${sourcePath} - --hive_metastore_uris - ${hive_metastore_uris} - --resultTableName - eu.dnetlib.dhp.schema.oaf.Software - --outputPath - ${workingDir}/preparedInfo/targetOrcidAssoc - --allowedsemrels - ${allowedsemrels} + --sourcePath${sourcePath} + --hive_metastore_uris${hive_metastore_uris} + --resultTableNameeu.dnetlib.dhp.schema.oaf.Software + --outputPath${workingDir}/preparedInfo/targetOrcidAssoc + --allowedsemrels${allowedsemrels} @@ -233,7 +213,7 @@ cluster ORCIDPropagation-PreparePhase2 eu.dnetlib.dhp.orcidtoresultfromsemrel.PrepareResultOrcidAssociationStep2 - dhp-propagation-${projectVersion}.jar + dhp-enrichment-${projectVersion}.jar --executor-cores=${sparkExecutorCores} --executor-memory=${sparkExecutorMemory} @@ -245,13 +225,10 @@ --conf spark.dynamicAllocation.enabled=true --conf spark.dynamicAllocation.maxExecutors=${spark2MaxExecutors} - --sourcePath - ${workingDir}/preparedInfo/targetOrcidAssoc - --outputPath - ${workingDir}/preparedInfo/mergedOrcidAssoc + --sourcePath${workingDir}/preparedInfo/targetOrcidAssoc + --outputPath${workingDir}/preparedInfo/mergedOrcidAssoc - @@ -268,7 +245,7 @@ cluster ORCIDPropagation-Publication eu.dnetlib.dhp.orcidtoresultfromsemrel.SparkOrcidToResultFromSemRelJob - dhp-propagation-${projectVersion}.jar + dhp-enrichment-${projectVersion}.jar --executor-cores=${sparkExecutorCores} --executor-memory=${sparkExecutorMemory} @@ -284,18 +261,12 @@ --conf spark.hadoop.mapreduce.reduce.speculative=false --conf spark.sql.shuffle.partitions=3840 - --possibleUpdatesPath - ${workingDir}/preparedInfo/mergedOrcidAssoc - --sourcePath - ${sourcePath}/publication - --hive_metastore_uris - ${hive_metastore_uris} - --resultTableName - eu.dnetlib.dhp.schema.oaf.Publication - --outputPath - ${outputPath}/publication - --saveGraph - ${saveGraph} + --possibleUpdatesPath${workingDir}/preparedInfo/mergedOrcidAssoc + --sourcePath${sourcePath}/publication + --hive_metastore_uris${hive_metastore_uris} + --resultTableNameeu.dnetlib.dhp.schema.oaf.Publication + --outputPath${outputPath}/publication + --saveGraph${saveGraph} @@ -306,7 +277,7 @@ cluster ORCIDPropagation-Dataset eu.dnetlib.dhp.orcidtoresultfromsemrel.SparkOrcidToResultFromSemRelJob - dhp-propagation-${projectVersion}.jar + dhp-enrichment-${projectVersion}.jar --executor-cores=${sparkExecutorCores} --executor-memory=${sparkExecutorMemory} @@ -321,18 +292,12 @@ --conf spark.hadoop.mapreduce.map.speculative=false --conf spark.hadoop.mapreduce.reduce.speculative=false - --possibleUpdatesPath - ${workingDir}/preparedInfo/mergedOrcidAssoc - --sourcePath - ${sourcePath}/dataset - --hive_metastore_uris - ${hive_metastore_uris} - --resultTableName - eu.dnetlib.dhp.schema.oaf.Dataset - --outputPath - ${outputPath}/dataset - --saveGraph - ${saveGraph} + --possibleUpdatesPath${workingDir}/preparedInfo/mergedOrcidAssoc + --sourcePath${sourcePath}/dataset + --hive_metastore_uris${hive_metastore_uris} + --resultTableNameeu.dnetlib.dhp.schema.oaf.Dataset + --outputPath${outputPath}/dataset + --saveGraph${saveGraph} @@ -343,7 +308,7 @@ cluster ORCIDPropagation-ORP eu.dnetlib.dhp.orcidtoresultfromsemrel.SparkOrcidToResultFromSemRelJob - dhp-propagation-${projectVersion}.jar + dhp-enrichment-${projectVersion}.jar --executor-cores=${sparkExecutorCores} --executor-memory=${sparkExecutorMemory} @@ -358,18 +323,12 @@ --conf spark.hadoop.mapreduce.map.speculative=false --conf spark.hadoop.mapreduce.reduce.speculative=false - --possibleUpdatesPath - ${workingDir}/preparedInfo/mergedOrcidAssoc - --sourcePath - ${sourcePath}/otherresearchproduct - --hive_metastore_uris - ${hive_metastore_uris} - --resultTableName - eu.dnetlib.dhp.schema.oaf.OtherResearchProduct - --outputPath - ${outputPath}/otherresearchproduct - --saveGraph - ${saveGraph} + --possibleUpdatesPath${workingDir}/preparedInfo/mergedOrcidAssoc + --sourcePath${sourcePath}/otherresearchproduct + --hive_metastore_uris${hive_metastore_uris} + --resultTableNameeu.dnetlib.dhp.schema.oaf.OtherResearchProduct + --outputPath${outputPath}/otherresearchproduct + --saveGraph${saveGraph} @@ -380,7 +339,7 @@ cluster ORCIDPropagation-Software eu.dnetlib.dhp.orcidtoresultfromsemrel.SparkOrcidToResultFromSemRelJob - dhp-propagation-${projectVersion}.jar + dhp-enrichment-${projectVersion}.jar --executor-cores=${sparkExecutorCores} --executor-memory=${sparkExecutorMemory} @@ -395,22 +354,19 @@ --conf spark.hadoop.mapreduce.map.speculative=false --conf spark.hadoop.mapreduce.reduce.speculative=false - --possibleUpdatesPath - ${workingDir}/preparedInfo/mergedOrcidAssoc - --sourcePath - ${sourcePath}/software - --hive_metastore_uris - ${hive_metastore_uris} - --resultTableName - eu.dnetlib.dhp.schema.oaf.Software - --outputPath - ${outputPath}/software - --saveGraph - ${saveGraph} + --possibleUpdatesPath${workingDir}/preparedInfo/mergedOrcidAssoc + --sourcePath${sourcePath}/software + --hive_metastore_uris${hive_metastore_uris} + --resultTableNameeu.dnetlib.dhp.schema.oaf.Software + --outputPath${outputPath}/software + --saveGraph${saveGraph} + + + \ No newline at end of file diff --git a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/projecttoresult/oozie_app/workflow.xml b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/projecttoresult/oozie_app/workflow.xml index dd7f25846..687d66869 100644 --- a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/projecttoresult/oozie_app/workflow.xml +++ b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/projecttoresult/oozie_app/workflow.xml @@ -127,61 +127,60 @@ + + + + yarn + cluster + PrepareProjectResultsAssociation + eu.dnetlib.dhp.projecttoresult.PrepareProjectResultsAssociation + dhp-propagation-${projectVersion}.jar + + --executor-cores=${sparkExecutorCores} + --executor-memory=${sparkExecutorMemory} + --driver-memory=${sparkDriverMemory} + --conf spark.extraListeners=${spark2ExtraListeners} + --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} + --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} + --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} + + --sourcePath${sourcePath}/relation + --allowedsemrels${allowedsemrels} + --hive_metastore_uris${hive_metastore_uris} + --potentialUpdatePath${workingDir}/preparedInfo/potentialUpdates + --alreadyLinkedPath${workingDir}/preparedInfo/alreadyLinked + + + + - - - - yarn - cluster - PrepareProjectResultsAssociation - eu.dnetlib.dhp.projecttoresult.PrepareProjectResultsAssociation - dhp-propagation-${projectVersion}.jar - - --executor-cores=${sparkExecutorCores} - --executor-memory=${sparkExecutorMemory} - --driver-memory=${sparkDriverMemory} - --conf spark.extraListeners=${spark2ExtraListeners} - --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} - --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} - --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} - - --sourcePath${sourcePath}/relation - --allowedsemrels${allowedsemrels} - --hive_metastore_uris${hive_metastore_uris} - --potentialUpdatePath${workingDir}/preparedInfo/potentialUpdates - --alreadyLinkedPath${workingDir}/preparedInfo/alreadyLinked - - - - - - - - yarn - cluster - ProjectToResultPropagation - eu.dnetlib.dhp.projecttoresult.SparkResultToProjectThroughSemRelJob - dhp-propagation-${projectVersion}.jar - - --executor-cores=${sparkExecutorCores} - --executor-memory=${sparkExecutorMemory} - --driver-memory=${sparkDriverMemory} - --conf spark.extraListeners=${spark2ExtraListeners} - --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} - --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} - --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} - --conf spark.dynamicAllocation.enabled=true - --conf spark.dynamicAllocation.maxExecutors=${spark2MaxExecutors} - - --saveGraph${saveGraph} - --hive_metastore_uris${hive_metastore_uris} - --outputPath${outputPath}/relation - --potentialUpdatePath${workingDir}/preparedInfo/potentialUpdates - --alreadyLinkedPath${workingDir}/preparedInfo/alreadyLinked - - - - + + + yarn + cluster + ProjectToResultPropagation + eu.dnetlib.dhp.projecttoresult.SparkResultToProjectThroughSemRelJob + dhp-enrichment-${projectVersion}.jar + + --executor-cores=${sparkExecutorCores} + --executor-memory=${sparkExecutorMemory} + --driver-memory=${sparkDriverMemory} + --conf spark.extraListeners=${spark2ExtraListeners} + --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} + --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} + --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} + --conf spark.dynamicAllocation.enabled=true + --conf spark.dynamicAllocation.maxExecutors=${spark2MaxExecutors} + + --saveGraph${saveGraph} + --hive_metastore_uris${hive_metastore_uris} + --outputPath${outputPath}/relation + --potentialUpdatePath${workingDir}/preparedInfo/potentialUpdates + --alreadyLinkedPath${workingDir}/preparedInfo/alreadyLinked + + + + diff --git a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/resulttocommunityfromorganization/oozie_app/workflow.xml b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/resulttocommunityfromorganization/oozie_app/workflow.xml index 3be69bde6..d481cad05 100644 --- a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/resulttocommunityfromorganization/oozie_app/workflow.xml +++ b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/resulttocommunityfromorganization/oozie_app/workflow.xml @@ -88,7 +88,7 @@ cluster Prepare-Community-Result-Organization eu.dnetlib.dhp.resulttocommunityfromorganization.PrepareResultCommunitySet - dhp-propagation-${projectVersion}.jar + dhp-enrichment-${projectVersion}.jar --executor-cores=${sparkExecutorCores} --executor-memory=${sparkExecutorMemory} @@ -122,7 +122,7 @@ cluster community2resultfromorganization-Publication eu.dnetlib.dhp.resulttocommunityfromorganization.SparkResultToCommunityFromOrganizationJob - dhp-propagation-${projectVersion}.jar + dhp-enrichment-${projectVersion}.jar --executor-cores=${sparkExecutorCores} --executor-memory=${sparkExecutorMemory} @@ -151,7 +151,7 @@ cluster community2resultfromorganization-Dataset eu.dnetlib.dhp.resulttocommunityfromorganization.SparkResultToCommunityFromOrganizationJob - dhp-propagation-${projectVersion}.jar + dhp-enrichment-${projectVersion}.jar --executor-cores=${sparkExecutorCores} --executor-memory=${sparkExecutorMemory} @@ -180,7 +180,7 @@ cluster community2resultfromorganization-ORP eu.dnetlib.dhp.resulttocommunityfromorganization.SparkResultToCommunityFromOrganizationJob - dhp-propagation-${projectVersion}.jar + dhp-enrichment-${projectVersion}.jar --executor-cores=${sparkExecutorCores} --executor-memory=${sparkExecutorMemory} @@ -209,7 +209,7 @@ cluster community2resultfromorganization-Software eu.dnetlib.dhp.resulttocommunityfromorganization.SparkResultToCommunityFromOrganizationJob - dhp-propagation-${projectVersion}.jar + dhp-enrichment-${projectVersion}.jar --executor-cores=${sparkExecutorCores} --executor-memory=${sparkExecutorMemory} diff --git a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/resulttocommunityfromsemrel/oozie_app/workflow.xml b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/resulttocommunityfromsemrel/oozie_app/workflow.xml index b75b2d31e..81b51443c 100644 --- a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/resulttocommunityfromsemrel/oozie_app/workflow.xml +++ b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/resulttocommunityfromsemrel/oozie_app/workflow.xml @@ -99,7 +99,7 @@ cluster ResultToCommunitySemRel-PreparePhase1-Publications eu.dnetlib.dhp.resulttocommunityfromsemrel.PrepareResultCommunitySetStep1 - dhp-propagation-${projectVersion}.jar + dhp-enrichment-${projectVersion}.jar --executor-cores=${sparkExecutorCores} --executor-memory=${sparkExecutorMemory} @@ -128,7 +128,7 @@ cluster ResultToCommunitySemRel-PreparePhase1-Dataset eu.dnetlib.dhp.resulttocommunityfromsemrel.PrepareResultCommunitySetStep1 - dhp-propagation-${projectVersion}.jar + dhp-enrichment-${projectVersion}.jar --executor-cores=${sparkExecutorCores} --executor-memory=${sparkExecutorMemory} @@ -150,13 +150,14 @@ + yarn cluster ResultToCommunitySemRel-PreparePhase1-ORP eu.dnetlib.dhp.resulttocommunityfromsemrel.PrepareResultCommunitySetStep1 - dhp-propagation-${projectVersion}.jar + dhp-enrichment-${projectVersion}.jar --executor-cores=${sparkExecutorCores} --executor-memory=${sparkExecutorMemory} @@ -185,7 +186,7 @@ cluster ResultToCommunitySemRel-PreparePhase1-Software eu.dnetlib.dhp.resulttocommunityfromsemrel.PrepareResultCommunitySetStep1 - dhp-propagation-${projectVersion}.jar + dhp-enrichment-${projectVersion}.jar --executor-cores=${sparkExecutorCores} --executor-memory=${sparkExecutorMemory} @@ -216,7 +217,7 @@ cluster ResultToCommunityEmRelPropagation-PreparePhase2 eu.dnetlib.dhp.resulttocommunityfromsemrel.PrepareResultCommunitySetStep2 - dhp-propagation-${projectVersion}.jar + dhp-enrichment-${projectVersion}.jar --executor-cores=${sparkExecutorCores} --executor-memory=${sparkExecutorMemory} @@ -232,9 +233,7 @@ --outputPath${workingDir}/preparedInfo/mergedCommunityAssoc - - @@ -250,7 +249,7 @@ cluster Result2CommunitySemRelPropagation-Publication eu.dnetlib.dhp.resulttocommunityfromsemrel.SparkResultToCommunityThroughSemRelJob - dhp-propagation-${projectVersion}.jar + dhp-enrichment-${projectVersion}.jar --executor-cores=${sparkExecutorCores} --executor-memory=${sparkExecutorMemory} @@ -279,7 +278,7 @@ cluster Result2CommunitySemRelPropagation-Dataset eu.dnetlib.dhp.resulttocommunityfromsemrel.SparkResultToCommunityThroughSemRelJob - dhp-propagation-${projectVersion}.jar + dhp-enrichment-${projectVersion}.jar --executor-cores=${sparkExecutorCores} --executor-memory=${sparkExecutorMemory} @@ -308,7 +307,7 @@ cluster Result2CommunitySemRelPropagation-ORP eu.dnetlib.dhp.resulttocommunityfromsemrel.SparkResultToCommunityThroughSemRelJob - dhp-propagation-${projectVersion}.jar + dhp-enrichment-${projectVersion}.jar --executor-cores=${sparkExecutorCores} --executor-memory=${sparkExecutorMemory} @@ -337,7 +336,7 @@ cluster Result2CommunitySemRelPropagation-Software eu.dnetlib.dhp.resulttocommunityfromsemrel.SparkResultToCommunityThroughSemRelJob - dhp-propagation-${projectVersion}.jar + dhp-enrichment-${projectVersion}.jar --executor-cores=${sparkExecutorCores} --executor-memory=${sparkExecutorMemory} diff --git a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/resulttoorganizationfrominstrepo/oozie_app/workflow.xml b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/resulttoorganizationfrominstrepo/oozie_app/workflow.xml index 73268fcc7..a1b7f4ad7 100644 --- a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/resulttoorganizationfrominstrepo/oozie_app/workflow.xml +++ b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/resulttoorganizationfrominstrepo/oozie_app/workflow.xml @@ -131,7 +131,7 @@ cluster PrepareResultOrganizationAssociation eu.dnetlib.dhp.resulttoorganizationfrominstrepo.PrepareResultInstRepoAssociation - dhp-propagation-${projectVersion}.jar + dhp-enrichment-${projectVersion}.jar --executor-cores=${sparkExecutorCores} --executor-memory=${sparkExecutorMemory} @@ -163,7 +163,7 @@ cluster resultToOrganizationFromInstRepoPropagationForPublications eu.dnetlib.dhp.resulttoorganizationfrominstrepo.SparkResultToOrganizationFromIstRepoJob - dhp-propagation-${projectVersion}.jar + dhp-enrichment-${projectVersion}.jar --executor-cores=${sparkExecutorCores} --executor-memory=${sparkExecutorMemory} @@ -193,7 +193,7 @@ cluster resultToOrganizationFromInstRepoPropagationForDataset eu.dnetlib.dhp.resulttoorganizationfrominstrepo.SparkResultToOrganizationFromIstRepoJob - dhp-propagation-${projectVersion}.jar + dhp-enrichment-${projectVersion}.jar --executor-cores=${sparkExecutorCores} --executor-memory=${sparkExecutorMemory} @@ -223,7 +223,7 @@ cluster resultToOrganizationFromInstRepoPropagationForORP eu.dnetlib.dhp.resulttoorganizationfrominstrepo.SparkResultToOrganizationFromIstRepoJob - dhp-propagation-${projectVersion}.jar + dhp-enrichment-${projectVersion}.jar --executor-cores=${sparkExecutorCores} --executor-memory=${sparkExecutorMemory} @@ -253,7 +253,7 @@ cluster resultToOrganizationFromInstRepoPropagationForSoftware eu.dnetlib.dhp.resulttoorganizationfrominstrepo.SparkResultToOrganizationFromIstRepoJob - dhp-propagation-${projectVersion}.jar + dhp-enrichment-${projectVersion}.jar --executor-cores=${sparkExecutorCores} --executor-memory=${sparkExecutorMemory} From 43f127448ddf38d2a2328585ff4d2b1562bd0ff5 Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Tue, 12 May 2020 18:24:26 +0200 Subject: [PATCH 12/31] changed the package name from dhp-propagation to dhp-enrichment for the preparation phase of funding propagation --- .../eu/dnetlib/dhp/projecttoresult/oozie_app/workflow.xml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/projecttoresult/oozie_app/workflow.xml b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/projecttoresult/oozie_app/workflow.xml index 687d66869..24e1d3b7f 100644 --- a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/projecttoresult/oozie_app/workflow.xml +++ b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/projecttoresult/oozie_app/workflow.xml @@ -134,7 +134,7 @@ cluster PrepareProjectResultsAssociation eu.dnetlib.dhp.projecttoresult.PrepareProjectResultsAssociation - dhp-propagation-${projectVersion}.jar + dhp-enrichment-${projectVersion}.jar --executor-cores=${sparkExecutorCores} --executor-memory=${sparkExecutorMemory} From 85f3c55992b9d9b526020aa096e9867774b92270 Mon Sep 17 00:00:00 2001 From: Claudio Atzori Date: Wed, 13 May 2020 09:04:33 +0200 Subject: [PATCH 13/31] fixed node names in blacklist workflow --- .../eu/dnetlib/dhp/blacklist/oozie_app/workflow.xml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/dhp-workflows/dhp-blacklist/src/main/resources/eu/dnetlib/dhp/blacklist/oozie_app/workflow.xml b/dhp-workflows/dhp-blacklist/src/main/resources/eu/dnetlib/dhp/blacklist/oozie_app/workflow.xml index 59fd30fea..1538318c1 100644 --- a/dhp-workflows/dhp-blacklist/src/main/resources/eu/dnetlib/dhp/blacklist/oozie_app/workflow.xml +++ b/dhp-workflows/dhp-blacklist/src/main/resources/eu/dnetlib/dhp/blacklist/oozie_app/workflow.xml @@ -1,4 +1,4 @@ - + postgresURL @@ -102,7 +102,7 @@ - + ${jobTracker} ${nameNode} @@ -113,7 +113,7 @@ - + ${jobTracker} ${nameNode} From c0265213a0b2119a30f00ec0013c6c88de3b826a Mon Sep 17 00:00:00 2001 From: "michele.artini" Date: Wed, 13 May 2020 12:00:27 +0200 Subject: [PATCH 14/31] partial implementation --- .../dhp/broker/model/EventFactory.java | 31 ++++----- .../eu/dnetlib/dhp/broker/model/Topic.java | 52 +++++++++++++++ .../broker/oa/GenerateEventsApplication.java | 55 +++++++--------- .../broker/oa/util/EnrichMissingAbstract.java | 30 +++++---- .../oa/util/EnrichMissingAuthorOrcid.java | 32 +++++----- .../oa/util/EnrichMissingOpenAccess.java | 31 ++++----- .../dhp/broker/oa/util/EnrichMissingPid.java | 28 ++++----- .../broker/oa/util/EnrichMissingProject.java | 30 ++++----- .../oa/util/EnrichMissingPublicationDate.java | 29 ++++----- .../broker/oa/util/EnrichMissingSubject.java | 30 +++++---- .../broker/oa/util/EnrichMoreOpenAccess.java | 29 ++++----- .../dhp/broker/oa/util/EnrichMorePid.java | 28 ++++----- .../dhp/broker/oa/util/EnrichMoreSubject.java | 30 +++++---- .../dhp/broker/oa/util/UpdateInfo.java | 59 ++++++++++++++--- .../dhp/broker/oa/util/UpdateMatcher.java | 63 +++++++++++++++++++ 15 files changed, 363 insertions(+), 194 deletions(-) create mode 100644 dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/model/Topic.java create mode 100644 dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/util/UpdateMatcher.java diff --git a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/model/EventFactory.java b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/model/EventFactory.java index 0694556b2..9e5d98644 100644 --- a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/model/EventFactory.java +++ b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/model/EventFactory.java @@ -29,31 +29,32 @@ public class EventFactory { "yyyy-MM-dd" }; - public static Event newBrokerEvent(final Result source, final Result target, final UpdateInfo updateInfo) { + public static Event newBrokerEvent(final UpdateInfo updateInfo) { final long now = new Date().getTime(); final Event res = new Event(); - final Map map = createMapFromResult(target, source, updateInfo); + final Map map = createMapFromResult(updateInfo); - final String payload = createPayload(target, updateInfo); + final String payload = createPayload(updateInfo); final String eventId = calculateEventId( - updateInfo.getTopic(), target.getOriginalId().get(0), updateInfo.getHighlightValueAsString()); + updateInfo.getTopicPath(), updateInfo.getTarget().getOriginalId().get(0), + updateInfo.getHighlightValueAsString()); res.setEventId(eventId); res.setProducerId(PRODUCER_ID); res.setPayload(payload); res.setMap(map); - res.setTopic(updateInfo.getTopic()); + res.setTopic(updateInfo.getTopicPath()); res.setCreationDate(now); res.setExpiryDate(calculateExpiryDate(now)); res.setInstantMessage(false); return res; } - private static String createPayload(final Result result, final UpdateInfo updateInfo) { + private static String createPayload(final UpdateInfo updateInfo) { final OpenAireEventPayload payload = new OpenAireEventPayload(); // TODO @@ -62,32 +63,34 @@ public class EventFactory { return payload.toJSON(); } - private static Map createMapFromResult(final Result oaf, final Result source, - final UpdateInfo updateInfo) { + private static Map createMapFromResult(final UpdateInfo updateInfo) { final Map map = new HashMap<>(); - final List collectedFrom = oaf.getCollectedfrom(); + final Result source = updateInfo.getSource(); + final Result target = updateInfo.getTarget(); + + final List collectedFrom = target.getCollectedfrom(); if (collectedFrom.size() == 1) { map.put("target_datasource_id", collectedFrom.get(0).getKey()); map.put("target_datasource_name", collectedFrom.get(0).getValue()); } - final List ids = oaf.getOriginalId(); + final List ids = target.getOriginalId(); if (ids.size() > 0) { map.put("target_publication_id", ids.get(0)); } - final List titles = oaf.getTitle(); + final List titles = target.getTitle(); if (titles.size() > 0) { map.put("target_publication_title", titles.get(0)); } - final long date = parseDateTolong(oaf.getDateofacceptance().getValue()); + final long date = parseDateTolong(target.getDateofacceptance().getValue()); if (date > 0) { map.put("target_dateofacceptance", date); } - final List subjects = oaf.getSubject(); + final List subjects = target.getSubject(); if (subjects.size() > 0) { map .put( @@ -95,7 +98,7 @@ public class EventFactory { subjects.stream().map(StructuredProperty::getValue).collect(Collectors.toList())); } - final List authors = oaf.getAuthor(); + final List authors = target.getAuthor(); if (authors.size() > 0) { map .put( diff --git a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/model/Topic.java b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/model/Topic.java new file mode 100644 index 000000000..29f6cbe3a --- /dev/null +++ b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/model/Topic.java @@ -0,0 +1,52 @@ + +package eu.dnetlib.dhp.broker.model; + +public enum Topic { + + // ENRICHMENT MISSING + ENRICH_MISSING_OA_VERSION("ENRICH/MISSING/OPENACCESS_VERSION"), ENRICH_MISSING_ABSTRACT( + "ENRICH/MISSING/ABSTRACT"), ENRICH_MISSING_PUBLICATION_DATE( + "ENRICH/MISSING/PUBLICATION_DATE"), ENRICH_MISSING_PID( + "ENRICH/MISSING/PID"), ENRICH_MISSING_PROJECT("ENRICH/MISSING/PROJECT"), ENRICH_MISSING_SOFTWARE( + "ENRICH/MISSING/SOFTWARE"), ENRICH_MISSING_SUBJECT_MESHEUROPMC( + "ENRICH/MISSING/SUBJECT/MESHEUROPMC"), ENRICH_MISSING_SUBJECT_ARXIV( + "ENRICH/MISSING/SUBJECT/ARXIV"), ENRICH_MISSING_SUBJECT_JEL( + "ENRICH/MISSING/SUBJECT/JEL"), ENRICH_MISSING_SUBJECT_DDC( + "ENRICH/MISSING/SUBJECT/DDC"), ENRICH_MISSING_SUBJECT_ACM( + "ENRICH/MISSING/SUBJECT/ACM"), ENRICH_MISSING_SUBJECT_RVK( + "ENRICH/MISSING/SUBJECT/RVK"), ENRICH_MISSING_AUTHOR_ORCID( + "ENRICH/MISSING/AUTHOR/ORCID"), + + // ENRICHMENT MORE + ENRICH_MORE_PID("ENRICH/MORE/PID"), ENRICH_MORE_OA_VERSION("ENRICH/MORE/OPENACCESS_VERSION"), ENRICH_MORE_ABSTRACT( + "ENRICH/MORE/ABSTRACT"), ENRICH_MORE_PUBLICATION_DATE("ENRICH/MORE/PUBLICATION_DATE"), ENRICH_MORE_PROJECT( + "ENRICH/MORE/PROJECT"), ENRICH_MORE_SUBJECT_MESHEUROPMC( + "ENRICH/MORE/SUBJECT/MESHEUROPMC"), ENRICH_MORE_SUBJECT_ARXIV( + "ENRICH/MORE/SUBJECT/ARXIV"), ENRICH_MORE_SUBJECT_JEL( + "ENRICH/MORE/SUBJECT/JEL"), ENRICH_MORE_SUBJECT_DDC( + "ENRICH/MORE/SUBJECT/DDC"), ENRICH_MORE_SUBJECT_ACM( + "ENRICH/MORE/SUBJECT/ACM"), ENRICH_MORE_SUBJECT_RVK("ENRICH/MORE/SUBJECT/RVK"), + + // ADDITION + ADD_BY_PROJECT("ADD/BY_PROJECT"); + + Topic(final String path) { + this.path = path; + } + + protected String path; + + public String getPath() { + return this.path; + } + + public static Topic fromPath(final String path) { + for (final Topic t : Topic.values()) { + if (t.getPath().equals(path)) { + return t; + } + } + return null; + } + +} diff --git a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/GenerateEventsApplication.java b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/GenerateEventsApplication.java index 54d4ef36a..c4c167c13 100644 --- a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/GenerateEventsApplication.java +++ b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/GenerateEventsApplication.java @@ -14,8 +14,6 @@ import org.apache.spark.sql.SparkSession; import org.slf4j.Logger; import org.slf4j.LoggerFactory; -import com.fasterxml.jackson.databind.ObjectMapper; - import eu.dnetlib.dhp.application.ArgumentApplicationParser; import eu.dnetlib.dhp.broker.model.Event; import eu.dnetlib.dhp.broker.model.EventFactory; @@ -30,6 +28,7 @@ import eu.dnetlib.dhp.broker.oa.util.EnrichMoreOpenAccess; import eu.dnetlib.dhp.broker.oa.util.EnrichMorePid; import eu.dnetlib.dhp.broker.oa.util.EnrichMoreSubject; import eu.dnetlib.dhp.broker.oa.util.UpdateInfo; +import eu.dnetlib.dhp.broker.oa.util.UpdateMatcher; import eu.dnetlib.dhp.common.HdfsSupport; import eu.dnetlib.dhp.schema.oaf.Result; @@ -37,7 +36,16 @@ public class GenerateEventsApplication { private static final Logger log = LoggerFactory.getLogger(GenerateEventsApplication.class); - private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper(); + private static final UpdateMatcher enrichMissingAbstract = new EnrichMissingAbstract(); + private static final UpdateMatcher enrichMissingAuthorOrcid = new EnrichMissingAuthorOrcid(); + private static final UpdateMatcher enrichMissingOpenAccess = new EnrichMissingOpenAccess(); + private static final UpdateMatcher enrichMissingPid = new EnrichMissingPid(); + private static final UpdateMatcher enrichMissingProject = new EnrichMissingProject(); + private static final UpdateMatcher enrichMissingPublicationDate = new EnrichMissingPublicationDate(); + private static final UpdateMatcher enrichMissingSubject = new EnrichMissingSubject(); + private static final UpdateMatcher enrichMoreOpenAccess = new EnrichMoreOpenAccess(); + private static final UpdateMatcher enrichMorePid = new EnrichMorePid(); + private static final UpdateMatcher enrichMoreSubject = new EnrichMoreSubject(); public static void main(final String[] args) throws Exception { final ArgumentApplicationParser parser = new ArgumentApplicationParser( @@ -76,37 +84,22 @@ public class GenerateEventsApplication { } private List generateEvents(final Result... children) { - final List list = new ArrayList<>(); + final List> list = new ArrayList<>(); - for (final Result source : children) { - for (final Result target : children) { - if (source != target) { - list - .addAll( - findUpdates(source, target) - .stream() - .map(info -> EventFactory.newBrokerEvent(source, target, info)) - .collect(Collectors.toList())); - } - } + for (final Result target : children) { + list.addAll(enrichMissingAbstract.searchUpdatesForRecord(target, children)); + list.addAll(enrichMissingAuthorOrcid.searchUpdatesForRecord(target, children)); + list.addAll(enrichMissingOpenAccess.searchUpdatesForRecord(target, children)); + list.addAll(enrichMissingPid.searchUpdatesForRecord(target, children)); + list.addAll(enrichMissingProject.searchUpdatesForRecord(target, children)); + list.addAll(enrichMissingPublicationDate.searchUpdatesForRecord(target, children)); + list.addAll(enrichMissingSubject.searchUpdatesForRecord(target, children)); + list.addAll(enrichMoreOpenAccess.searchUpdatesForRecord(target, children)); + list.addAll(enrichMorePid.searchUpdatesForRecord(target, children)); + list.addAll(enrichMoreSubject.searchUpdatesForRecord(target, children)); } - return list; - } - - private List> findUpdates(final Result source, final Result target) { - final List> list = new ArrayList<>(); - list.addAll(EnrichMissingAbstract.findUpdates(source, target)); - list.addAll(EnrichMissingAuthorOrcid.findUpdates(source, target)); - list.addAll(EnrichMissingOpenAccess.findUpdates(source, target)); - list.addAll(EnrichMissingPid.findUpdates(source, target)); - list.addAll(EnrichMissingProject.findUpdates(source, target)); - list.addAll(EnrichMissingPublicationDate.findUpdates(source, target)); - list.addAll(EnrichMissingSubject.findUpdates(source, target)); - list.addAll(EnrichMoreOpenAccess.findUpdates(source, target)); - list.addAll(EnrichMorePid.findUpdates(source, target)); - list.addAll(EnrichMoreSubject.findUpdates(source, target)); - return list; + return list.stream().map(EventFactory::newBrokerEvent).collect(Collectors.toList()); } } diff --git a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/util/EnrichMissingAbstract.java b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/util/EnrichMissingAbstract.java index 493d1f97c..6b6e35d1d 100644 --- a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/util/EnrichMissingAbstract.java +++ b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/util/EnrichMissingAbstract.java @@ -1,31 +1,35 @@ package eu.dnetlib.dhp.broker.oa.util; +import java.util.ArrayList; import java.util.Arrays; import java.util.List; -import eu.dnetlib.broker.objects.OpenAireEventPayload; +import eu.dnetlib.dhp.broker.model.Topic; import eu.dnetlib.dhp.schema.oaf.Result; -public class EnrichMissingAbstract extends UpdateInfo { +public class EnrichMissingAbstract extends UpdateMatcher { - public static List findUpdates(final Result source, final Result target) { - // return Arrays.asList(new EnrichMissingAbstract("xxxxxxx", 0.9f)); - return Arrays.asList(); - } - - private EnrichMissingAbstract(final String highlightValue, final float trust) { - super("ENRICH/MISSING/ABSTRACT", highlightValue, trust); + public EnrichMissingAbstract() { + super(false); } @Override - public void compileHighlight(final OpenAireEventPayload payload) { - payload.getHighlight().getAbstracts().add(getHighlightValue()); + protected List> findUpdates(final Result source, final Result target) { + if (isMissing(target.getDescription()) && !isMissing(source.getDescription())) { + return Arrays.asList(generateUpdateInfo(source.getDescription().get(0).getValue(), source, target)); + } + return new ArrayList<>(); } @Override - public String getHighlightValueAsString() { - return getHighlightValue(); + public UpdateInfo generateUpdateInfo(final String highlightValue, final Result source, + final Result target) { + return new UpdateInfo<>( + Topic.ENRICH_MISSING_ABSTRACT, + highlightValue, source, target, + (p, s) -> p.getAbstracts().add(s), + s -> s); } } diff --git a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/util/EnrichMissingAuthorOrcid.java b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/util/EnrichMissingAuthorOrcid.java index 6899c62a3..d81427e05 100644 --- a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/util/EnrichMissingAuthorOrcid.java +++ b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/util/EnrichMissingAuthorOrcid.java @@ -4,28 +4,30 @@ package eu.dnetlib.dhp.broker.oa.util; import java.util.Arrays; import java.util.List; -import eu.dnetlib.broker.objects.OpenAireEventPayload; +import org.apache.commons.lang3.tuple.Pair; + +import eu.dnetlib.dhp.broker.model.Topic; import eu.dnetlib.dhp.schema.oaf.Result; -public class EnrichMissingAuthorOrcid extends UpdateInfo { +public class EnrichMissingAuthorOrcid extends UpdateMatcher> { - public static List findUpdates(final Result source, final Result target) { + public EnrichMissingAuthorOrcid() { + super(true); + } + + @Override + protected List>> findUpdates(final Result source, final Result target) { // return Arrays.asList(new EnrichMissingAbstract("xxxxxxx", 0.9f)); return Arrays.asList(); } - private EnrichMissingAuthorOrcid(final String highlightValue, final float trust) { - super("ENRICH/MISSING/AUTHOR/ORCID", highlightValue, trust); - } - @Override - public void compileHighlight(final OpenAireEventPayload payload) { - // TODO + public UpdateInfo> generateUpdateInfo(final Pair highlightValue, + final Result source, final Result target) { + return new UpdateInfo<>( + Topic.ENRICH_MISSING_AUTHOR_ORCID, + highlightValue, source, target, + (p, pair) -> p.getCreators().add(pair.getLeft() + " - ORCID: " + pair.getRight()), + pair -> pair.getLeft() + "::" + pair.getRight()); } - - @Override - public String getHighlightValueAsString() { - return getHighlightValue(); - } - } diff --git a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/util/EnrichMissingOpenAccess.java b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/util/EnrichMissingOpenAccess.java index 9464130f3..9079ee24b 100644 --- a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/util/EnrichMissingOpenAccess.java +++ b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/util/EnrichMissingOpenAccess.java @@ -5,28 +5,29 @@ import java.util.Arrays; import java.util.List; import eu.dnetlib.broker.objects.Instance; -import eu.dnetlib.broker.objects.OpenAireEventPayload; +import eu.dnetlib.dhp.broker.model.Topic; import eu.dnetlib.dhp.schema.oaf.Result; -public class EnrichMissingOpenAccess extends UpdateInfo { +public class EnrichMissingOpenAccess extends UpdateMatcher { + + public EnrichMissingOpenAccess() { + super(true); + } + + @Override + protected List> findUpdates(final Result source, final Result target) { - public static List findUpdates(final Result source, final Result target) { - // return Arrays.asList(new EnrichMissingAbstract("xxxxxxx", 0.9f)); return Arrays.asList(); } - private EnrichMissingOpenAccess(final Instance highlightValue, final float trust) { - super("ENRICH/MISSING/OPENACCESS_VERSION", highlightValue, trust); - } - @Override - public void compileHighlight(final OpenAireEventPayload payload) { - payload.getHighlight().getInstances().add(getHighlightValue()); - } - - @Override - public String getHighlightValueAsString() { - return getHighlightValue().getUrl(); + public UpdateInfo generateUpdateInfo(final Instance highlightValue, final Result source, + final Result target) { + return new UpdateInfo<>( + Topic.ENRICH_MISSING_OA_VERSION, + highlightValue, source, target, + (p, i) -> p.getInstances().add(i), + Instance::getUrl); } } diff --git a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/util/EnrichMissingPid.java b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/util/EnrichMissingPid.java index 293d4993f..0b4045a0e 100644 --- a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/util/EnrichMissingPid.java +++ b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/util/EnrichMissingPid.java @@ -4,29 +4,29 @@ package eu.dnetlib.dhp.broker.oa.util; import java.util.Arrays; import java.util.List; -import eu.dnetlib.broker.objects.OpenAireEventPayload; import eu.dnetlib.broker.objects.Pid; +import eu.dnetlib.dhp.broker.model.Topic; import eu.dnetlib.dhp.schema.oaf.Result; -public class EnrichMissingPid extends UpdateInfo { +public class EnrichMissingPid extends UpdateMatcher { - public static List findUpdates(final Result source, final Result target) { + public EnrichMissingPid() { + super(true); + } + + @Override + protected List> findUpdates(final Result source, final Result target) { // return Arrays.asList(new EnrichMissingAbstract("xxxxxxx", 0.9f)); return Arrays.asList(); } - private EnrichMissingPid(final Pid highlightValue, final float trust) { - super("ENRICH/MISSING/PID", highlightValue, trust); - } - @Override - public void compileHighlight(final OpenAireEventPayload payload) { - payload.getHighlight().getPids().add(getHighlightValue()); - } - - @Override - public String getHighlightValueAsString() { - return getHighlightValue().getType() + "::" + getHighlightValue().getValue(); + public UpdateInfo generateUpdateInfo(final Pid highlightValue, final Result source, final Result target) { + return new UpdateInfo<>( + Topic.ENRICH_MISSING_PID, + highlightValue, source, target, + (p, pid) -> p.getPids().add(pid), + pid -> pid.getType() + "::" + pid.getValue()); } } diff --git a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/util/EnrichMissingProject.java b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/util/EnrichMissingProject.java index a22c179a2..45b16801c 100644 --- a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/util/EnrichMissingProject.java +++ b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/util/EnrichMissingProject.java @@ -4,30 +4,30 @@ package eu.dnetlib.dhp.broker.oa.util; import java.util.Arrays; import java.util.List; -import eu.dnetlib.broker.objects.OpenAireEventPayload; import eu.dnetlib.broker.objects.Project; +import eu.dnetlib.dhp.broker.model.Topic; import eu.dnetlib.dhp.schema.oaf.Result; -public class EnrichMissingProject extends UpdateInfo { +public class EnrichMissingProject extends UpdateMatcher { - public static List findUpdates(final Result source, final Result target) { + public EnrichMissingProject() { + super(true); + } + + @Override + protected List> findUpdates(final Result source, final Result target) { // return Arrays.asList(new EnrichMissingAbstract("xxxxxxx", 0.9f)); return Arrays.asList(); } - private EnrichMissingProject(final Project highlightValue, final float trust) { - super("ENRICH/MISSING/PROJECT", highlightValue, trust); - } - @Override - public void compileHighlight(final OpenAireEventPayload payload) { - payload.getHighlight().getProjects().add(getHighlightValue()); - } - - @Override - public String getHighlightValueAsString() { - return getHighlightValue().getFunder() + "::" + getHighlightValue().getFundingProgram() - + getHighlightValue().getCode(); + public UpdateInfo generateUpdateInfo(final Project highlightValue, final Result source, + final Result target) { + return new UpdateInfo<>( + Topic.ENRICH_MISSING_PROJECT, + highlightValue, source, target, + (p, prj) -> p.getProjects().add(prj), + prj -> prj.getFunder() + "::" + prj.getFundingProgram() + prj.getCode()); } } diff --git a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/util/EnrichMissingPublicationDate.java b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/util/EnrichMissingPublicationDate.java index 869dca264..7fcd2a66f 100644 --- a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/util/EnrichMissingPublicationDate.java +++ b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/util/EnrichMissingPublicationDate.java @@ -4,28 +4,29 @@ package eu.dnetlib.dhp.broker.oa.util; import java.util.Arrays; import java.util.List; -import eu.dnetlib.broker.objects.OpenAireEventPayload; +import eu.dnetlib.dhp.broker.model.Topic; import eu.dnetlib.dhp.schema.oaf.Result; -public class EnrichMissingPublicationDate extends UpdateInfo { +public class EnrichMissingPublicationDate extends UpdateMatcher { - public static List findUpdates(final Result source, final Result target) { + public EnrichMissingPublicationDate() { + super(false); + } + + @Override + protected List> findUpdates(final Result source, final Result target) { // return Arrays.asList(new EnrichMissingAbstract("xxxxxxx", 0.9f)); return Arrays.asList(); } - private EnrichMissingPublicationDate(final String highlightValue, final float trust) { - super("ENRICH/MISSING/PUBLICATION_DATE", highlightValue, trust); - } - @Override - public void compileHighlight(final OpenAireEventPayload payload) { - payload.getHighlight().setPublicationdate(getHighlightValue()); - } - - @Override - public String getHighlightValueAsString() { - return getHighlightValue(); + public UpdateInfo generateUpdateInfo(final String highlightValue, final Result source, + final Result target) { + return new UpdateInfo<>( + Topic.ENRICH_MISSING_PUBLICATION_DATE, + highlightValue, source, target, + (p, date) -> p.setPublicationdate(date), + s -> s); } } diff --git a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/util/EnrichMissingSubject.java b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/util/EnrichMissingSubject.java index a2ed5d043..4470bd9d9 100644 --- a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/util/EnrichMissingSubject.java +++ b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/util/EnrichMissingSubject.java @@ -4,12 +4,19 @@ package eu.dnetlib.dhp.broker.oa.util; import java.util.Arrays; import java.util.List; -import eu.dnetlib.broker.objects.OpenAireEventPayload; +import org.apache.commons.lang3.tuple.Pair; + +import eu.dnetlib.dhp.broker.model.Topic; import eu.dnetlib.dhp.schema.oaf.Result; -public class EnrichMissingSubject extends UpdateInfo { +public class EnrichMissingSubject extends UpdateMatcher> { - public static List findUpdates(final Result source, final Result target) { + public EnrichMissingSubject() { + super(true); + } + + @Override + protected List>> findUpdates(final Result source, final Result target) { // MESHEUROPMC // ARXIV // JEL @@ -19,18 +26,15 @@ public class EnrichMissingSubject extends UpdateInfo { return Arrays.asList(); } - private EnrichMissingSubject(final String subjectClassification, final String highlightValue, final float trust) { - super("ENRICH/MISSING/SUBJECT/" + subjectClassification, highlightValue, trust); - } - @Override - public void compileHighlight(final OpenAireEventPayload payload) { - payload.getHighlight().getSubjects().add(getHighlightValue()); - } + public UpdateInfo> generateUpdateInfo(final Pair highlightValue, + final Result source, final Result target) { - @Override - public String getHighlightValueAsString() { - return getHighlightValue(); + return new UpdateInfo<>( + Topic.fromPath("ENRICH/MISSING/SUBJECT/" + highlightValue.getLeft()), + highlightValue, source, target, + (p, pair) -> p.getSubjects().add(pair.getRight()), + pair -> pair.getLeft() + "::" + pair.getRight()); } } diff --git a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/util/EnrichMoreOpenAccess.java b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/util/EnrichMoreOpenAccess.java index 4f1e88d3d..bc37ce659 100644 --- a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/util/EnrichMoreOpenAccess.java +++ b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/util/EnrichMoreOpenAccess.java @@ -5,28 +5,29 @@ import java.util.Arrays; import java.util.List; import eu.dnetlib.broker.objects.Instance; -import eu.dnetlib.broker.objects.OpenAireEventPayload; +import eu.dnetlib.dhp.broker.model.Topic; import eu.dnetlib.dhp.schema.oaf.Result; -public class EnrichMoreOpenAccess extends UpdateInfo { +public class EnrichMoreOpenAccess extends UpdateMatcher { - public static List findUpdates(final Result source, final Result target) { + public EnrichMoreOpenAccess() { + super(true); + } + + @Override + protected List> findUpdates(final Result source, final Result target) { // return Arrays.asList(new EnrichMissingAbstract("xxxxxxx", 0.9f)); return Arrays.asList(); } - private EnrichMoreOpenAccess(final Instance highlightValue, final float trust) { - super("ENRICH/MORE/OPENACCESS_VERSION", highlightValue, trust); - } - @Override - public void compileHighlight(final OpenAireEventPayload payload) { - payload.getHighlight().getInstances().add(getHighlightValue()); - } - - @Override - public String getHighlightValueAsString() { - return getHighlightValue().getUrl(); + public UpdateInfo generateUpdateInfo(final Instance highlightValue, final Result source, + final Result target) { + return new UpdateInfo<>( + Topic.ENRICH_MORE_OA_VERSION, + highlightValue, source, target, + (p, i) -> p.getInstances().add(i), + Instance::getUrl); } } diff --git a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/util/EnrichMorePid.java b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/util/EnrichMorePid.java index ecf2cf310..8cd67f553 100644 --- a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/util/EnrichMorePid.java +++ b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/util/EnrichMorePid.java @@ -4,29 +4,29 @@ package eu.dnetlib.dhp.broker.oa.util; import java.util.Arrays; import java.util.List; -import eu.dnetlib.broker.objects.OpenAireEventPayload; import eu.dnetlib.broker.objects.Pid; +import eu.dnetlib.dhp.broker.model.Topic; import eu.dnetlib.dhp.schema.oaf.Result; -public class EnrichMorePid extends UpdateInfo { +public class EnrichMorePid extends UpdateMatcher { - public static List findUpdates(final Result source, final Result target) { + public EnrichMorePid() { + super(true); + } + + @Override + protected List> findUpdates(final Result source, final Result target) { // return Arrays.asList(new EnrichMissingAbstract("xxxxxxx", 0.9f)); return Arrays.asList(); } - private EnrichMorePid(final Pid highlightValue, final float trust) { - super("ENRICH/MORE/PID", highlightValue, trust); - } - @Override - public void compileHighlight(final OpenAireEventPayload payload) { - payload.getHighlight().getPids().add(getHighlightValue()); - } - - @Override - public String getHighlightValueAsString() { - return getHighlightValue().getType() + "::" + getHighlightValue().getValue(); + public UpdateInfo generateUpdateInfo(final Pid highlightValue, final Result source, final Result target) { + return new UpdateInfo<>( + Topic.ENRICH_MORE_PID, + highlightValue, source, target, + (p, pid) -> p.getPids().add(pid), + pid -> pid.getType() + "::" + pid.getValue()); } } diff --git a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/util/EnrichMoreSubject.java b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/util/EnrichMoreSubject.java index f29b86292..9e0d8e693 100644 --- a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/util/EnrichMoreSubject.java +++ b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/util/EnrichMoreSubject.java @@ -4,12 +4,19 @@ package eu.dnetlib.dhp.broker.oa.util; import java.util.Arrays; import java.util.List; -import eu.dnetlib.broker.objects.OpenAireEventPayload; +import org.apache.commons.lang3.tuple.Pair; + +import eu.dnetlib.dhp.broker.model.Topic; import eu.dnetlib.dhp.schema.oaf.Result; -public class EnrichMoreSubject extends UpdateInfo { +public class EnrichMoreSubject extends UpdateMatcher> { - public static List findUpdates(final Result source, final Result target) { + public EnrichMoreSubject() { + super(true); + } + + @Override + protected List>> findUpdates(final Result source, final Result target) { // MESHEUROPMC // ARXIV // JEL @@ -19,18 +26,15 @@ public class EnrichMoreSubject extends UpdateInfo { return Arrays.asList(); } - private EnrichMoreSubject(final String subjectClassification, final String highlightValue, final float trust) { - super("ENRICH/MORE/SUBJECT/" + subjectClassification, highlightValue, trust); - } - @Override - public void compileHighlight(final OpenAireEventPayload payload) { - payload.getHighlight().getSubjects().add(getHighlightValue()); - } + public UpdateInfo> generateUpdateInfo(final Pair highlightValue, + final Result source, final Result target) { - @Override - public String getHighlightValueAsString() { - return getHighlightValue(); + return new UpdateInfo<>( + Topic.fromPath("ENRICH/MORE/SUBJECT/" + highlightValue.getLeft()), + highlightValue, source, target, + (p, pair) -> p.getSubjects().add(pair.getRight()), + pair -> pair.getLeft() + "::" + pair.getRight()); } } diff --git a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/util/UpdateInfo.java b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/util/UpdateInfo.java index f7b6b69e9..1dfc14e5e 100644 --- a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/util/UpdateInfo.java +++ b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/util/UpdateInfo.java @@ -1,36 +1,77 @@ package eu.dnetlib.dhp.broker.oa.util; +import java.util.function.BiConsumer; +import java.util.function.Function; + import eu.dnetlib.broker.objects.OpenAireEventPayload; +import eu.dnetlib.broker.objects.Publication; +import eu.dnetlib.dhp.broker.model.Topic; +import eu.dnetlib.dhp.schema.oaf.Result; -public abstract class UpdateInfo { +public final class UpdateInfo { - private final String topic; + private final Topic topic; private final T highlightValue; + private final Result source; + + private final Result target; + + private final BiConsumer compileHighlight; + + private final Function highlightToString; + private final float trust; - protected UpdateInfo(final String topic, final T highlightValue, final float trust) { + protected UpdateInfo(final Topic topic, final T highlightValue, final Result source, final Result target, + final BiConsumer compileHighlight, + final Function highlightToString) { this.topic = topic; this.highlightValue = highlightValue; - this.trust = trust; + this.source = source; + this.target = target; + this.compileHighlight = compileHighlight; + this.highlightToString = highlightToString; + this.trust = calculateTrust(source, target); } public T getHighlightValue() { return highlightValue; } + public Result getSource() { + return source; + } + + public Result getTarget() { + return target; + } + + private float calculateTrust(final Result source, final Result target) { + // TODO + return 0.9f; + } + + protected Topic getTopic() { + return topic; + } + + public String getTopicPath() { + return topic.getPath(); + } + public float getTrust() { return trust; } - public String getTopic() { - return topic; + public void compileHighlight(final OpenAireEventPayload payload) { + compileHighlight.accept(payload.getHighlight(), getHighlightValue()); } - abstract public void compileHighlight(OpenAireEventPayload payload); - - abstract public String getHighlightValueAsString(); + public String getHighlightValueAsString() { + return highlightToString.apply(getHighlightValue()); + } } diff --git a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/util/UpdateMatcher.java b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/util/UpdateMatcher.java new file mode 100644 index 000000000..3fd6d4027 --- /dev/null +++ b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/util/UpdateMatcher.java @@ -0,0 +1,63 @@ + +package eu.dnetlib.dhp.broker.oa.util; + +import java.util.Arrays; +import java.util.Collection; +import java.util.HashMap; +import java.util.List; +import java.util.Map; + +import org.apache.commons.codec.digest.DigestUtils; +import org.apache.commons.lang3.StringUtils; + +import eu.dnetlib.dhp.schema.oaf.Field; +import eu.dnetlib.dhp.schema.oaf.Result; + +public abstract class UpdateMatcher { + + private final boolean multipleUpdate; + + public UpdateMatcher(final boolean multipleUpdate) { + this.multipleUpdate = multipleUpdate; + } + + public Collection> searchUpdatesForRecord(final Result res, final Result... others) { + + final Map> infoMap = new HashMap<>(); + + for (final Result source : others) { + if (source != res) { + for (final UpdateInfo info : findUpdates(source, res)) { + final String s = DigestUtils.md5Hex(info.getHighlightValueAsString()); + if (!infoMap.containsKey(s) || infoMap.get(s).getTrust() < info.getTrust()) { + } else { + infoMap.put(s, info); + } + } + } + } + + final Collection> values = infoMap.values(); + + if (values.isEmpty() || multipleUpdate) { + return values; + } else { + final UpdateInfo v = values + .stream() + .sorted((o1, o2) -> Float.compare(o1.getTrust(), o2.getTrust())) + .findFirst() + .get(); + return Arrays.asList(v); + } + } + + protected abstract List> findUpdates(Result source, Result target); + + protected abstract UpdateInfo generateUpdateInfo(final T highlightValue, final Result source, + final Result target); + + protected static boolean isMissing(final List> list) { + return list == null || list.isEmpty() || StringUtils.isBlank(list.get(0).getValue()); + } + +} From 5ecacad70a0539bd94e336ad604cc32ac8e0ce1a Mon Sep 17 00:00:00 2001 From: Claudio Atzori Date: Wed, 13 May 2020 17:01:11 +0200 Subject: [PATCH 15/31] fixed default resource typing in Oaf/Odf mapping --- .../raw/AbstractMdRecordToOafMapper.java | 168 +++++++++--------- 1 file changed, 84 insertions(+), 84 deletions(-) diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/AbstractMdRecordToOafMapper.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/AbstractMdRecordToOafMapper.java index fd12716b4..be0b91022 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/AbstractMdRecordToOafMapper.java +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/AbstractMdRecordToOafMapper.java @@ -64,7 +64,7 @@ public abstract class AbstractMdRecordToOafMapper { } protected static final Qualifier MAIN_TITLE_QUALIFIER = qualifier( - "main title", "main title", "dnet:dataCite_title", "dnet:dataCite_title"); + "main title", "main title", "dnet:dataCite_title", "dnet:dataCite_title"); protected AbstractMdRecordToOafMapper(final Map code2name) { this.code2name = code2name; @@ -75,20 +75,20 @@ public abstract class AbstractMdRecordToOafMapper { DocumentFactory.getInstance().setXPathNamespaceURIs(nsContext); final Document doc = DocumentHelper - .parseText( - xml.replaceAll(DATACITE_SCHEMA_KERNEL_4, DATACITE_SCHEMA_KERNEL_3)); + .parseText( + xml.replaceAll(DATACITE_SCHEMA_KERNEL_4, DATACITE_SCHEMA_KERNEL_3)); final String type = doc.valueOf("//dr:CobjCategory/@type"); final KeyValue collectedFrom = getProvenanceDatasource( - doc, "//oaf:collectedFrom/@id", "//oaf:collectedFrom/@name"); + doc, "//oaf:collectedFrom/@id", "//oaf:collectedFrom/@name"); if (collectedFrom == null) { return null; } final KeyValue hostedBy = StringUtils.isBlank(doc.valueOf("//oaf:hostedBy/@id")) - ? collectedFrom - : getProvenanceDatasource(doc, "//oaf:hostedBy/@id", "//oaf:hostedBy/@name"); + ? collectedFrom + : getProvenanceDatasource(doc, "//oaf:hostedBy/@id", "//oaf:hostedBy/@name"); if (hostedBy == null) { return null; @@ -112,22 +112,21 @@ public abstract class AbstractMdRecordToOafMapper { } return keyValue( - createOpenaireId(10, dsId, true), - dsName); + createOpenaireId(10, dsId, true), + dsName); } protected List createOafs( - final Document doc, - final String type, - final KeyValue collectedFrom, - final KeyValue hostedBy, - final DataInfo info, - final long lastUpdateTimestamp) { + final Document doc, + final String type, + final KeyValue collectedFrom, + final KeyValue hostedBy, + final DataInfo info, + final long lastUpdateTimestamp) { final List oafs = new ArrayList<>(); switch (type.toLowerCase()) { - case "": case "publication": final Publication p = new Publication(); populateResultFields(p, doc, collectedFrom, hostedBy, info, lastUpdateTimestamp); @@ -138,7 +137,7 @@ public abstract class AbstractMdRecordToOafMapper { case "dataset": final Dataset d = new Dataset(); populateResultFields(d, doc, collectedFrom, hostedBy, info, lastUpdateTimestamp); - d.setResulttype(PUBLICATION_DEFAULT_RESULTTYPE); + d.setResulttype(DATASET_DEFAULT_RESULTTYPE); d.setStoragedate(prepareDatasetStorageDate(doc, info)); d.setDevice(prepareDatasetDevice(doc, info)); d.setSize(prepareDatasetSize(doc, info)); @@ -158,6 +157,7 @@ public abstract class AbstractMdRecordToOafMapper { s.setProgrammingLanguage(prepareSoftwareProgrammingLanguage(doc, info)); oafs.add(s); break; + case "": case "otherresearchproducts": default: final OtherResearchProduct o = new OtherResearchProduct(); @@ -179,10 +179,10 @@ public abstract class AbstractMdRecordToOafMapper { } private List addProjectRels( - final Document doc, - final KeyValue collectedFrom, - final DataInfo info, - final long lastUpdateTimestamp) { + final Document doc, + final KeyValue collectedFrom, + final DataInfo info, + final long lastUpdateTimestamp) { final List res = new ArrayList<>(); @@ -196,15 +196,15 @@ public abstract class AbstractMdRecordToOafMapper { final String projectId = createOpenaireId(40, originalId, true); res - .add( - getRelation( - docId, projectId, RESULT_PROJECT, OUTCOME, IS_PRODUCED_BY, collectedFrom, info, - lastUpdateTimestamp)); + .add( + getRelation( + docId, projectId, RESULT_PROJECT, OUTCOME, IS_PRODUCED_BY, collectedFrom, info, + lastUpdateTimestamp)); res - .add( - getRelation( - projectId, docId, RESULT_PROJECT, OUTCOME, PRODUCES, collectedFrom, info, - lastUpdateTimestamp)); + .add( + getRelation( + projectId, docId, RESULT_PROJECT, OUTCOME, PRODUCES, collectedFrom, info, + lastUpdateTimestamp)); } } @@ -212,7 +212,7 @@ public abstract class AbstractMdRecordToOafMapper { } protected Relation getRelation(String source, String target, String relType, String subRelType, String relClass, - KeyValue collectedFrom, DataInfo info, long lastUpdateTimestamp) { + KeyValue collectedFrom, DataInfo info, long lastUpdateTimestamp) { final Relation rel = new Relation(); rel.setRelType(relType); rel.setSubRelType(subRelType); @@ -226,27 +226,27 @@ public abstract class AbstractMdRecordToOafMapper { } protected abstract List addOtherResultRels( - final Document doc, - final KeyValue collectedFrom, - final DataInfo info, - final long lastUpdateTimestamp); + final Document doc, + final KeyValue collectedFrom, + final DataInfo info, + final long lastUpdateTimestamp); private void populateResultFields( - final Result r, - final Document doc, - final KeyValue collectedFrom, - final KeyValue hostedBy, - final DataInfo info, - final long lastUpdateTimestamp) { + final Result r, + final Document doc, + final KeyValue collectedFrom, + final KeyValue hostedBy, + final DataInfo info, + final long lastUpdateTimestamp) { r.setDataInfo(info); r.setLastupdatetimestamp(lastUpdateTimestamp); r.setId(createOpenaireId(50, doc.valueOf("//dri:objIdentifier"), false)); r.setOriginalId(Arrays.asList(doc.valueOf("//dri:objIdentifier"))); r.setCollectedfrom(Arrays.asList(collectedFrom)); r - .setPid( - prepareListStructProps( - doc, "//oaf:identifier", "@identifierType", "dnet:pid_types", "dnet:pid_types", info)); + .setPid( + prepareListStructProps( + doc, "//oaf:identifier", "@identifierType", "dnet:pid_types", "dnet:pid_types", info)); r.setDateofcollection(doc.valueOf("//dr:dateOfCollection")); r.setDateoftransformation(doc.valueOf("//dr:dateOfTransformation")); r.setExtraInfo(new ArrayList<>()); // NOT PRESENT IN MDSTORES @@ -289,7 +289,7 @@ public abstract class AbstractMdRecordToOafMapper { protected abstract Qualifier prepareResourceType(Document doc, DataInfo info); protected abstract List prepareInstances( - Document doc, DataInfo info, KeyValue collectedfrom, KeyValue hostedby); + Document doc, DataInfo info, KeyValue collectedfrom, KeyValue hostedby); protected abstract List> prepareSources(Document doc, DataInfo info); @@ -314,13 +314,13 @@ public abstract class AbstractMdRecordToOafMapper { protected abstract List prepareAuthors(Document doc, DataInfo info); protected abstract List> prepareOtherResearchProductTools( - Document doc, DataInfo info); + Document doc, DataInfo info); protected abstract List> prepareOtherResearchProductContactGroups( - Document doc, DataInfo info); + Document doc, DataInfo info); protected abstract List> prepareOtherResearchProductContactPersons( - Document doc, DataInfo info); + Document doc, DataInfo info); protected abstract Qualifier prepareSoftwareProgrammingLanguage(Document doc, DataInfo info); @@ -329,7 +329,7 @@ public abstract class AbstractMdRecordToOafMapper { protected abstract List prepareSoftwareLicenses(Document doc, DataInfo info); protected abstract List> prepareSoftwareDocumentationUrls( - Document doc, DataInfo info); + Document doc, DataInfo info); protected abstract List prepareDatasetGeoLocations(Document doc, DataInfo info); @@ -359,37 +359,37 @@ public abstract class AbstractMdRecordToOafMapper { final String edition = n.valueOf("@edition"); if (StringUtils.isNotBlank(name)) { return journal( - name, - issnPrinted, - issnOnline, - issnLinking, - ep, - iss, - sp, - vol, - edition, - null, - null, - info); + name, + issnPrinted, + issnOnline, + issnLinking, + ep, + iss, + sp, + vol, + edition, + null, + null, + info); } } return null; } protected Qualifier prepareQualifier( - final Node node, final String xpath, final String schemeId, final String schemeName) { + final Node node, final String xpath, final String schemeId, final String schemeName) { final String classId = node.valueOf(xpath); final String className = code2name.get(classId); return qualifier(classId, className, schemeId, schemeName); } protected List prepareListStructProps( - final Node node, - final String xpath, - final String xpathClassId, - final String schemeId, - final String schemeName, - final DataInfo info) { + final Node node, + final String xpath, + final String xpathClassId, + final String schemeId, + final String schemeName, + final DataInfo info) { final List res = new ArrayList<>(); for (final Object o : node.selectNodes(xpath)) { final Node n = (Node) o; @@ -401,7 +401,7 @@ public abstract class AbstractMdRecordToOafMapper { } protected List prepareListStructProps( - final Node node, final String xpath, final Qualifier qualifier, final DataInfo info) { + final Node node, final String xpath, final Qualifier qualifier, final DataInfo info) { final List res = new ArrayList<>(); for (final Object o : node.selectNodes(xpath)) { final Node n = (Node) o; @@ -411,19 +411,19 @@ public abstract class AbstractMdRecordToOafMapper { } protected List prepareListStructProps( - final Node node, final String xpath, final DataInfo info) { + final Node node, final String xpath, final DataInfo info) { final List res = new ArrayList<>(); for (final Object o : node.selectNodes(xpath)) { final Node n = (Node) o; res - .add( - structuredProperty( - n.getText(), - n.valueOf("@classid"), - n.valueOf("@classname"), - n.valueOf("@schemeid"), - n.valueOf("@schemename"), - info)); + .add( + structuredProperty( + n.getText(), + n.valueOf("@classid"), + n.valueOf("@classname"), + n.valueOf("@schemeid"), + n.valueOf("@schemename"), + info)); } return res; } @@ -450,7 +450,7 @@ public abstract class AbstractMdRecordToOafMapper { if (n == null) { return dataInfo( - false, null, false, false, REPOSITORY_PROVENANCE_ACTIONS, "0.9"); + false, null, false, false, REPOSITORY_PROVENANCE_ACTIONS, "0.9"); } final String paClassId = n.valueOf("./oaf:provenanceaction/@classid"); @@ -464,12 +464,12 @@ public abstract class AbstractMdRecordToOafMapper { final String trust = n.valueOf("./oaf:trust"); return dataInfo( - deletedbyinference, - inferenceprovenance, - inferred, - false, - qualifier(paClassId, paClassName, paSchemeId, paSchemeName), - trust); + deletedbyinference, + inferenceprovenance, + inferred, + false, + qualifier(paClassId, paClassName, paSchemeId, paSchemeName), + trust); } protected Field prepareField(final Node node, final String xpath, final DataInfo info) { @@ -477,7 +477,7 @@ public abstract class AbstractMdRecordToOafMapper { } protected List> prepareListFields( - final Node node, final String xpath, final DataInfo info) { + final Node node, final String xpath, final DataInfo info) { return listFields(info, prepareListString(node, xpath)); } From ab37953332755ed53ebf95655547a4736d8f7395 Mon Sep 17 00:00:00 2001 From: Claudio Atzori Date: Thu, 14 May 2020 10:25:41 +0200 Subject: [PATCH 16/31] added global properties in wf definitions to avoid repeating name-node and job-tracker in the (many) distcp actions; reintroduced output directory removal at the beginning of each spark action --- .../dnetlib/dhp/bulktag/SparkBulkTagJob.java | 2 + .../SparkCountryPropagationJob.java | 17 +- .../PrepareResultOrcidAssociationStep1.java | 4 +- .../PrepareResultOrcidAssociationStep2.java | 4 +- .../SparkOrcidToResultFromSemRelJob.java | 7 +- .../PrepareProjectResultsAssociation.java | 2 + .../PrepareResultCommunitySet.java | 4 +- ...kResultToCommunityFromOrganizationJob.java | 7 +- .../PrepareResultInstRepoAssociation.java | 43 +++-- ...arkResultToOrganizationFromIstRepoJob.java | 7 +- .../dhp/bulktag/oozie_app/workflow.xml | 27 ++- .../countrypropagation/oozie_app/workflow.xml | 20 ++- .../oozie_app/workflow.xml | 7 +- .../projecttoresult/oozie_app/workflow.xml | 29 ++-- .../oozie_app/workflow.xml | 29 ++-- .../oozie_app/workflow.xml | 52 +++--- .../raw/AbstractMdRecordToOafMapper.java | 164 +++++++++--------- 17 files changed, 213 insertions(+), 212 deletions(-) diff --git a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/SparkBulkTagJob.java b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/SparkBulkTagJob.java index 75d85e2ba..1c65e8ade 100644 --- a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/SparkBulkTagJob.java +++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/SparkBulkTagJob.java @@ -1,6 +1,7 @@ package eu.dnetlib.dhp.bulktag; +import static eu.dnetlib.dhp.PropagationConstant.removeOutputDir; import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession; import java.util.Optional; @@ -84,6 +85,7 @@ public class SparkBulkTagJob { conf, isSparkSessionManaged, spark -> { + removeOutputDir(spark, outputPath); execBulkTag(spark, inputPath, outputPath, protoMappingParams, resultClazz, cc); }); } diff --git a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/countrypropagation/SparkCountryPropagationJob.java b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/countrypropagation/SparkCountryPropagationJob.java index 9dc17701b..974b3a3b1 100644 --- a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/countrypropagation/SparkCountryPropagationJob.java +++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/countrypropagation/SparkCountryPropagationJob.java @@ -69,13 +69,16 @@ public class SparkCountryPropagationJob { runWithSparkSession( conf, isSparkSessionManaged, - spark -> execPropagation( - spark, - sourcePath, - preparedInfoPath, - outputPath, - resultClazz, - saveGraph)); + spark -> { + removeOutputDir(spark, outputPath); + execPropagation( + spark, + sourcePath, + preparedInfoPath, + outputPath, + resultClazz, + saveGraph); + }); } private static void execPropagation( diff --git a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/orcidtoresultfromsemrel/PrepareResultOrcidAssociationStep1.java b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/orcidtoresultfromsemrel/PrepareResultOrcidAssociationStep1.java index 3e16b4b4b..400c8d8ef 100644 --- a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/orcidtoresultfromsemrel/PrepareResultOrcidAssociationStep1.java +++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/orcidtoresultfromsemrel/PrepareResultOrcidAssociationStep1.java @@ -74,9 +74,7 @@ public class PrepareResultOrcidAssociationStep1 { conf, isSparkSessionManaged, spark -> { - if (isTest(parser)) { - removeOutputDir(spark, outputPath); - } + removeOutputDir(spark, outputPath); prepareInfo( spark, inputRelationPath, inputResultPath, outputResultPath, resultClazz, allowedsemrel); }); diff --git a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/orcidtoresultfromsemrel/PrepareResultOrcidAssociationStep2.java b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/orcidtoresultfromsemrel/PrepareResultOrcidAssociationStep2.java index 65d8811bc..2cea32e58 100644 --- a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/orcidtoresultfromsemrel/PrepareResultOrcidAssociationStep2.java +++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/orcidtoresultfromsemrel/PrepareResultOrcidAssociationStep2.java @@ -50,9 +50,7 @@ public class PrepareResultOrcidAssociationStep2 { conf, isSparkSessionManaged, spark -> { - if (isTest(parser)) { - removeOutputDir(spark, outputPath); - } + removeOutputDir(spark, outputPath); mergeInfo(spark, inputPath, outputPath); }); } diff --git a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/orcidtoresultfromsemrel/SparkOrcidToResultFromSemRelJob.java b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/orcidtoresultfromsemrel/SparkOrcidToResultFromSemRelJob.java index ebb75a5a6..b34b29c48 100644 --- a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/orcidtoresultfromsemrel/SparkOrcidToResultFromSemRelJob.java +++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/orcidtoresultfromsemrel/SparkOrcidToResultFromSemRelJob.java @@ -70,11 +70,10 @@ public class SparkOrcidToResultFromSemRelJob { conf, isSparkSessionManaged, spark -> { - if (isTest(parser)) { - removeOutputDir(spark, outputPath); - } - if (saveGraph) + removeOutputDir(spark, outputPath); + if (saveGraph) { execPropagation(spark, possibleUpdates, inputPath, outputPath, resultClazz); + } }); } diff --git a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/projecttoresult/PrepareProjectResultsAssociation.java b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/projecttoresult/PrepareProjectResultsAssociation.java index 05dcdc692..c27da4258 100644 --- a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/projecttoresult/PrepareProjectResultsAssociation.java +++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/projecttoresult/PrepareProjectResultsAssociation.java @@ -60,6 +60,8 @@ public class PrepareProjectResultsAssociation { conf, isSparkSessionManaged, spark -> { + removeOutputDir(spark, potentialUpdatePath); + removeOutputDir(spark, alreadyLinkedPath); prepareResultProjProjectResults( spark, inputPath, diff --git a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttocommunityfromorganization/PrepareResultCommunitySet.java b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttocommunityfromorganization/PrepareResultCommunitySet.java index e2d4d5687..90eb54e5f 100644 --- a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttocommunityfromorganization/PrepareResultCommunitySet.java +++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttocommunityfromorganization/PrepareResultCommunitySet.java @@ -55,9 +55,7 @@ public class PrepareResultCommunitySet { conf, isSparkSessionManaged, spark -> { - if (isTest(parser)) { - removeOutputDir(spark, outputPath); - } + removeOutputDir(spark, outputPath); prepareInfo(spark, inputPath, outputPath, organizationMap); }); } diff --git a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttocommunityfromorganization/SparkResultToCommunityFromOrganizationJob.java b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttocommunityfromorganization/SparkResultToCommunityFromOrganizationJob.java index 71275cc7f..66297e177 100644 --- a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttocommunityfromorganization/SparkResultToCommunityFromOrganizationJob.java +++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttocommunityfromorganization/SparkResultToCommunityFromOrganizationJob.java @@ -68,11 +68,10 @@ public class SparkResultToCommunityFromOrganizationJob { conf, isSparkSessionManaged, spark -> { - if (isTest(parser)) { - removeOutputDir(spark, outputPath); - } - if (saveGraph) + removeOutputDir(spark, outputPath); + if (saveGraph) { execPropagation(spark, inputPath, outputPath, resultClazz, possibleupdatespath); + } }); } diff --git a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttoorganizationfrominstrepo/PrepareResultInstRepoAssociation.java b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttoorganizationfrominstrepo/PrepareResultInstRepoAssociation.java index f8fe1668f..5f549be53 100644 --- a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttoorganizationfrominstrepo/PrepareResultInstRepoAssociation.java +++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttoorganizationfrominstrepo/PrepareResultInstRepoAssociation.java @@ -58,30 +58,15 @@ public class PrepareResultInstRepoAssociation { isSparkSessionManaged, spark -> { readNeededResources(spark, inputPath); + + removeOutputDir(spark, datasourceOrganizationPath); prepareDatasourceOrganization(spark, datasourceOrganizationPath); + + removeOutputDir(spark, alreadyLinkedPath); prepareAlreadyLinkedAssociation(spark, alreadyLinkedPath); }); } - private static void prepareAlreadyLinkedAssociation( - SparkSession spark, String alreadyLinkedPath) { - String query = "Select source resultId, collect_set(target) organizationSet " - + "from relation " - + "where datainfo.deletedbyinference = false " - + "and relClass = '" - + RELATION_RESULT_ORGANIZATION_REL_CLASS - + "' " - + "group by source"; - - spark - .sql(query) - .as(Encoders.bean(ResultOrganizationSet.class)) - // TODO retry to stick with datasets - .toJavaRDD() - .map(r -> OBJECT_MAPPER.writeValueAsString(r)) - .saveAsTextFile(alreadyLinkedPath, GzipCodec.class); - } - private static void readNeededResources(SparkSession spark, String inputPath) { Dataset datasource = readPath(spark, inputPath + "/datasource", Datasource.class); datasource.createOrReplaceTempView("datasource"); @@ -119,4 +104,24 @@ public class PrepareResultInstRepoAssociation { .option("compression", "gzip") .json(datasourceOrganizationPath); } + + private static void prepareAlreadyLinkedAssociation( + SparkSession spark, String alreadyLinkedPath) { + String query = "Select source resultId, collect_set(target) organizationSet " + + "from relation " + + "where datainfo.deletedbyinference = false " + + "and relClass = '" + + RELATION_RESULT_ORGANIZATION_REL_CLASS + + "' " + + "group by source"; + + spark + .sql(query) + .as(Encoders.bean(ResultOrganizationSet.class)) + // TODO retry to stick with datasets + .toJavaRDD() + .map(r -> OBJECT_MAPPER.writeValueAsString(r)) + .saveAsTextFile(alreadyLinkedPath, GzipCodec.class); + } + } diff --git a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttoorganizationfrominstrepo/SparkResultToOrganizationFromIstRepoJob.java b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttoorganizationfrominstrepo/SparkResultToOrganizationFromIstRepoJob.java index 86634d43f..13577fa7c 100644 --- a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttoorganizationfrominstrepo/SparkResultToOrganizationFromIstRepoJob.java +++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttoorganizationfrominstrepo/SparkResultToOrganizationFromIstRepoJob.java @@ -83,10 +83,8 @@ public class SparkResultToOrganizationFromIstRepoJob { conf, isSparkSessionManaged, spark -> { - if (isTest(parser)) { - removeOutputDir(spark, outputPath); - } - if (saveGraph) + removeOutputDir(spark, outputPath); + if (saveGraph) { execPropagation( spark, datasourceorganization, @@ -94,6 +92,7 @@ public class SparkResultToOrganizationFromIstRepoJob { inputPath, outputPath, resultClazz); + } }); } diff --git a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/bulktag/oozie_app/workflow.xml b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/bulktag/oozie_app/workflow.xml index 754aba4f2..f019f8413 100644 --- a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/bulktag/oozie_app/workflow.xml +++ b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/bulktag/oozie_app/workflow.xml @@ -18,6 +18,17 @@ + + ${jobTracker} + ${nameNode} + + + oozie.action.sharelib.for.spark + ${oozieActionShareLibForSpark2} + + + + @@ -42,8 +53,6 @@ - ${jobTracker} - ${nameNode} ${nameNode}/${sourcePath}/relation ${nameNode}/${outputPath}/relation @@ -53,8 +62,6 @@ - ${jobTracker} - ${nameNode} ${nameNode}/${sourcePath}/organization ${nameNode}/${outputPath}/organization @@ -64,8 +71,6 @@ - ${jobTracker} - ${nameNode} ${nameNode}/${sourcePath}/project ${nameNode}/${outputPath}/project @@ -75,8 +80,6 @@ - ${jobTracker} - ${nameNode} ${nameNode}/${sourcePath}/datasource ${nameNode}/${outputPath}/datasource @@ -95,8 +98,6 @@ - ${jobTracker} - ${nameNode} yarn-cluster cluster bulkTagging-publication @@ -124,8 +125,6 @@ - ${jobTracker} - ${nameNode} yarn-cluster cluster bulkTagging-dataset @@ -153,8 +152,6 @@ - ${jobTracker} - ${nameNode} yarn-cluster cluster bulkTagging-orp @@ -182,8 +179,6 @@ - ${jobTracker} - ${nameNode} yarn-cluster cluster bulkTagging-software diff --git a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/countrypropagation/oozie_app/workflow.xml b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/countrypropagation/oozie_app/workflow.xml index fc877071d..85116e4cc 100644 --- a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/countrypropagation/oozie_app/workflow.xml +++ b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/countrypropagation/oozie_app/workflow.xml @@ -19,6 +19,17 @@ + + ${jobTracker} + ${nameNode} + + + oozie.action.sharelib.for.spark + ${oozieActionShareLibForSpark2} + + + + @@ -43,8 +54,6 @@ - ${jobTracker} - ${nameNode} ${nameNode}/${sourcePath}/relation ${nameNode}/${outputPath}/relation @@ -54,18 +63,15 @@ - ${jobTracker} - ${nameNode} ${nameNode}/${sourcePath}/organization ${nameNode}/${outputPath}/organization + - ${jobTracker} - ${nameNode} ${nameNode}/${sourcePath}/project ${nameNode}/${outputPath}/project @@ -75,8 +81,6 @@ - ${jobTracker} - ${nameNode} ${nameNode}/${sourcePath}/datasource ${nameNode}/${outputPath}/datasource diff --git a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/orcidtoresultfromsemrel/oozie_app/workflow.xml b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/orcidtoresultfromsemrel/oozie_app/workflow.xml index e4429b710..5ddc5fedf 100644 --- a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/orcidtoresultfromsemrel/oozie_app/workflow.xml +++ b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/orcidtoresultfromsemrel/oozie_app/workflow.xml @@ -57,6 +57,7 @@ + ${jobTracker} @@ -81,7 +82,6 @@ - @@ -230,8 +230,8 @@ - + @@ -271,6 +271,7 @@ + yarn @@ -302,6 +303,7 @@ + yarn @@ -333,6 +335,7 @@ + yarn diff --git a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/projecttoresult/oozie_app/workflow.xml b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/projecttoresult/oozie_app/workflow.xml index 24e1d3b7f..9e91c06fb 100644 --- a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/projecttoresult/oozie_app/workflow.xml +++ b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/projecttoresult/oozie_app/workflow.xml @@ -14,6 +14,17 @@ + + ${jobTracker} + ${nameNode} + + + oozie.action.sharelib.for.spark + ${oozieActionShareLibForSpark2} + + + + @@ -42,8 +53,6 @@ - ${jobTracker} - ${nameNode} ${nameNode}/${sourcePath}/relation ${nameNode}/${outputPath}/relation @@ -53,8 +62,6 @@ - ${jobTracker} - ${nameNode} ${nameNode}/${sourcePath}/publication ${nameNode}/${outputPath}/publication @@ -64,8 +71,6 @@ - ${jobTracker} - ${nameNode} ${nameNode}/${sourcePath}/dataset ${nameNode}/${outputPath}/dataset @@ -75,8 +80,6 @@ - ${jobTracker} - ${nameNode} ${nameNode}/${sourcePath}/otherresearchproduct ${nameNode}/${outputPath}/otherresearchproduct @@ -86,28 +89,24 @@ - ${jobTracker} - ${nameNode} ${nameNode}/${sourcePath}/software ${nameNode}/${outputPath}/software + - ${jobTracker} - ${nameNode} ${nameNode}/${sourcePath}/organization ${nameNode}/${outputPath}/organization + - ${jobTracker} - ${nameNode} ${nameNode}/${sourcePath}/project ${nameNode}/${outputPath}/project @@ -117,8 +116,6 @@ - ${jobTracker} - ${nameNode} ${nameNode}/${sourcePath}/datasource ${nameNode}/${outputPath}/datasource diff --git a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/resulttocommunityfromorganization/oozie_app/workflow.xml b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/resulttocommunityfromorganization/oozie_app/workflow.xml index d481cad05..6a329fdc4 100644 --- a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/resulttocommunityfromorganization/oozie_app/workflow.xml +++ b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/resulttocommunityfromorganization/oozie_app/workflow.xml @@ -14,6 +14,17 @@ + + ${jobTracker} + ${nameNode} + + + oozie.action.sharelib.for.spark + ${oozieActionShareLibForSpark2} + + + + @@ -38,8 +49,6 @@ - ${jobTracker} - ${nameNode} ${nameNode}/${sourcePath}/relation ${nameNode}/${outputPath}/relation @@ -49,8 +58,6 @@ - ${jobTracker} - ${nameNode} ${nameNode}/${sourcePath}/organization ${nameNode}/${outputPath}/organization @@ -60,8 +67,6 @@ - ${jobTracker} - ${nameNode} ${nameNode}/${sourcePath}/project ${nameNode}/${outputPath}/project @@ -71,8 +76,6 @@ - ${jobTracker} - ${nameNode} ${nameNode}/${sourcePath}/datasource ${nameNode}/${outputPath}/datasource @@ -101,8 +104,8 @@ --conf spark.dynamicAllocation.maxExecutors=${spark2MaxExecutors} --sourcePath${sourcePath}/relation - --hive_metastore_uris${hive_metastore_uris} --outputPath${workingDir}/preparedInfo/resultCommunityList + --hive_metastore_uris${hive_metastore_uris} --organizationtoresultcommunitymap${organizationtoresultcommunitymap} @@ -136,9 +139,9 @@ --preparedInfoPath${workingDir}/preparedInfo/resultCommunityList --sourcePath${sourcePath}/publication + --outputPath${outputPath}/publication --hive_metastore_uris${hive_metastore_uris} --resultTableNameeu.dnetlib.dhp.schema.oaf.Publication - --outputPath${outputPath}/publication --saveGraph${saveGraph} @@ -165,9 +168,9 @@ --preparedInfoPath${workingDir}/preparedInfo/resultCommunityList --sourcePath${sourcePath}/dataset + --outputPath${outputPath}/dataset --hive_metastore_uris${hive_metastore_uris} --resultTableNameeu.dnetlib.dhp.schema.oaf.Dataset - --outputPath${outputPath}/dataset --saveGraph${saveGraph} @@ -194,9 +197,9 @@ --preparedInfoPath${workingDir}/preparedInfo/resultCommunityList --sourcePath${sourcePath}/otherresearchproduct + --outputPath${outputPath}/otherresearchproduct --hive_metastore_uris${hive_metastore_uris} --resultTableNameeu.dnetlib.dhp.schema.oaf.OtherResearchProduct - --outputPath${outputPath}/otherresearchproduct --saveGraph${saveGraph} @@ -223,9 +226,9 @@ --preparedInfoPath${workingDir}/preparedInfo/resultCommunityList --sourcePath${sourcePath}/software + --outputPath${outputPath}/software --hive_metastore_uris${hive_metastore_uris} --resultTableNameeu.dnetlib.dhp.schema.oaf.Software - --outputPath${outputPath}/software --saveGraph${saveGraph} diff --git a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/resulttoorganizationfrominstrepo/oozie_app/workflow.xml b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/resulttoorganizationfrominstrepo/oozie_app/workflow.xml index a1b7f4ad7..e0563abae 100644 --- a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/resulttoorganizationfrominstrepo/oozie_app/workflow.xml +++ b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/resulttoorganizationfrominstrepo/oozie_app/workflow.xml @@ -10,6 +10,17 @@ + + ${jobTracker} + ${nameNode} + + + oozie.action.sharelib.for.spark + ${oozieActionShareLibForSpark2} + + + + @@ -38,8 +49,6 @@ - ${jobTracker} - ${nameNode} ${nameNode}/${sourcePath}/relation ${nameNode}/${outputPath}/relation @@ -49,8 +58,6 @@ - ${jobTracker} - ${nameNode} ${nameNode}/${sourcePath}/publication ${nameNode}/${outputPath}/publication @@ -60,8 +67,6 @@ - ${jobTracker} - ${nameNode} ${nameNode}/${sourcePath}/dataset ${nameNode}/${outputPath}/dataset @@ -71,8 +76,6 @@ - ${jobTracker} - ${nameNode} ${nameNode}/${sourcePath}/otherresearchproduct ${nameNode}/${outputPath}/otherresearchproduct @@ -82,8 +85,6 @@ - ${jobTracker} - ${nameNode} ${nameNode}/${sourcePath}/software ${nameNode}/${outputPath}/software @@ -93,8 +94,6 @@ - ${jobTracker} - ${nameNode} ${nameNode}/${sourcePath}/organization ${nameNode}/${outputPath}/organization @@ -104,8 +103,6 @@ - ${jobTracker} - ${nameNode} ${nameNode}/${sourcePath}/project ${nameNode}/${outputPath}/project @@ -115,8 +112,6 @@ - ${jobTracker} - ${nameNode} ${nameNode}/${sourcePath}/datasource ${nameNode}/${outputPath}/datasource @@ -125,6 +120,7 @@ + yarn @@ -176,12 +172,12 @@ --conf spark.dynamicAllocation.maxExecutors=${spark2MaxExecutors} --sourcePath${sourcePath}/publication - --hive_metastore_uris${hive_metastore_uris} - --saveGraph${saveGraph} - --resultTableNameeu.dnetlib.dhp.schema.oaf.Publication --outputPath${outputPath}/relation --datasourceOrganizationPath${workingDir}/preparedInfo/datasourceOrganization --alreadyLinkedPath${workingDir}/preparedInfo/alreadyLinked + --hive_metastore_uris${hive_metastore_uris} + --saveGraph${saveGraph} + --resultTableNameeu.dnetlib.dhp.schema.oaf.Publication @@ -206,12 +202,12 @@ --conf spark.dynamicAllocation.maxExecutors=${spark2MaxExecutors} --sourcePath${sourcePath}/dataset - --hive_metastore_uris${hive_metastore_uris} - --saveGraph${saveGraph} - --resultTableNameeu.dnetlib.dhp.schema.oaf.Dataset --outputPath${outputPath}/relation --datasourceOrganizationPath${workingDir}/preparedInfo/datasourceOrganization --alreadyLinkedPath${workingDir}/preparedInfo/alreadyLinked + --hive_metastore_uris${hive_metastore_uris} + --saveGraph${saveGraph} + --resultTableNameeu.dnetlib.dhp.schema.oaf.Dataset @@ -236,12 +232,12 @@ --conf spark.dynamicAllocation.maxExecutors=${spark2MaxExecutors} --sourcePath${sourcePath}/otherresearchproduct - --hive_metastore_uris${hive_metastore_uris} - --saveGraph${saveGraph} - --resultTableNameeu.dnetlib.dhp.schema.oaf.OtherResearchProduct --outputPath${outputPath}/relation --datasourceOrganizationPath${workingDir}/preparedInfo/datasourceOrganization --alreadyLinkedPath${workingDir}/preparedInfo/alreadyLinked + --hive_metastore_uris${hive_metastore_uris} + --saveGraph${saveGraph} + --resultTableNameeu.dnetlib.dhp.schema.oaf.OtherResearchProduct @@ -266,12 +262,12 @@ --conf spark.dynamicAllocation.maxExecutors=${spark2MaxExecutors} --sourcePath${sourcePath}/software - --hive_metastore_uris${hive_metastore_uris} - --saveGraph${saveGraph} - --resultTableNameeu.dnetlib.dhp.schema.oaf.Software --outputPath${outputPath}/relation --datasourceOrganizationPath${workingDir}/preparedInfo/datasourceOrganization --alreadyLinkedPath${workingDir}/preparedInfo/alreadyLinked + --hive_metastore_uris${hive_metastore_uris} + --saveGraph${saveGraph} + --resultTableNameeu.dnetlib.dhp.schema.oaf.Software diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/AbstractMdRecordToOafMapper.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/AbstractMdRecordToOafMapper.java index be0b91022..b9c4e6c80 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/AbstractMdRecordToOafMapper.java +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/AbstractMdRecordToOafMapper.java @@ -64,7 +64,7 @@ public abstract class AbstractMdRecordToOafMapper { } protected static final Qualifier MAIN_TITLE_QUALIFIER = qualifier( - "main title", "main title", "dnet:dataCite_title", "dnet:dataCite_title"); + "main title", "main title", "dnet:dataCite_title", "dnet:dataCite_title"); protected AbstractMdRecordToOafMapper(final Map code2name) { this.code2name = code2name; @@ -75,20 +75,20 @@ public abstract class AbstractMdRecordToOafMapper { DocumentFactory.getInstance().setXPathNamespaceURIs(nsContext); final Document doc = DocumentHelper - .parseText( - xml.replaceAll(DATACITE_SCHEMA_KERNEL_4, DATACITE_SCHEMA_KERNEL_3)); + .parseText( + xml.replaceAll(DATACITE_SCHEMA_KERNEL_4, DATACITE_SCHEMA_KERNEL_3)); final String type = doc.valueOf("//dr:CobjCategory/@type"); final KeyValue collectedFrom = getProvenanceDatasource( - doc, "//oaf:collectedFrom/@id", "//oaf:collectedFrom/@name"); + doc, "//oaf:collectedFrom/@id", "//oaf:collectedFrom/@name"); if (collectedFrom == null) { return null; } final KeyValue hostedBy = StringUtils.isBlank(doc.valueOf("//oaf:hostedBy/@id")) - ? collectedFrom - : getProvenanceDatasource(doc, "//oaf:hostedBy/@id", "//oaf:hostedBy/@name"); + ? collectedFrom + : getProvenanceDatasource(doc, "//oaf:hostedBy/@id", "//oaf:hostedBy/@name"); if (hostedBy == null) { return null; @@ -112,17 +112,17 @@ public abstract class AbstractMdRecordToOafMapper { } return keyValue( - createOpenaireId(10, dsId, true), - dsName); + createOpenaireId(10, dsId, true), + dsName); } protected List createOafs( - final Document doc, - final String type, - final KeyValue collectedFrom, - final KeyValue hostedBy, - final DataInfo info, - final long lastUpdateTimestamp) { + final Document doc, + final String type, + final KeyValue collectedFrom, + final KeyValue hostedBy, + final DataInfo info, + final long lastUpdateTimestamp) { final List oafs = new ArrayList<>(); @@ -179,10 +179,10 @@ public abstract class AbstractMdRecordToOafMapper { } private List addProjectRels( - final Document doc, - final KeyValue collectedFrom, - final DataInfo info, - final long lastUpdateTimestamp) { + final Document doc, + final KeyValue collectedFrom, + final DataInfo info, + final long lastUpdateTimestamp) { final List res = new ArrayList<>(); @@ -196,15 +196,15 @@ public abstract class AbstractMdRecordToOafMapper { final String projectId = createOpenaireId(40, originalId, true); res - .add( - getRelation( - docId, projectId, RESULT_PROJECT, OUTCOME, IS_PRODUCED_BY, collectedFrom, info, - lastUpdateTimestamp)); + .add( + getRelation( + docId, projectId, RESULT_PROJECT, OUTCOME, IS_PRODUCED_BY, collectedFrom, info, + lastUpdateTimestamp)); res - .add( - getRelation( - projectId, docId, RESULT_PROJECT, OUTCOME, PRODUCES, collectedFrom, info, - lastUpdateTimestamp)); + .add( + getRelation( + projectId, docId, RESULT_PROJECT, OUTCOME, PRODUCES, collectedFrom, info, + lastUpdateTimestamp)); } } @@ -212,7 +212,7 @@ public abstract class AbstractMdRecordToOafMapper { } protected Relation getRelation(String source, String target, String relType, String subRelType, String relClass, - KeyValue collectedFrom, DataInfo info, long lastUpdateTimestamp) { + KeyValue collectedFrom, DataInfo info, long lastUpdateTimestamp) { final Relation rel = new Relation(); rel.setRelType(relType); rel.setSubRelType(subRelType); @@ -226,27 +226,27 @@ public abstract class AbstractMdRecordToOafMapper { } protected abstract List addOtherResultRels( - final Document doc, - final KeyValue collectedFrom, - final DataInfo info, - final long lastUpdateTimestamp); + final Document doc, + final KeyValue collectedFrom, + final DataInfo info, + final long lastUpdateTimestamp); private void populateResultFields( - final Result r, - final Document doc, - final KeyValue collectedFrom, - final KeyValue hostedBy, - final DataInfo info, - final long lastUpdateTimestamp) { + final Result r, + final Document doc, + final KeyValue collectedFrom, + final KeyValue hostedBy, + final DataInfo info, + final long lastUpdateTimestamp) { r.setDataInfo(info); r.setLastupdatetimestamp(lastUpdateTimestamp); r.setId(createOpenaireId(50, doc.valueOf("//dri:objIdentifier"), false)); r.setOriginalId(Arrays.asList(doc.valueOf("//dri:objIdentifier"))); r.setCollectedfrom(Arrays.asList(collectedFrom)); r - .setPid( - prepareListStructProps( - doc, "//oaf:identifier", "@identifierType", "dnet:pid_types", "dnet:pid_types", info)); + .setPid( + prepareListStructProps( + doc, "//oaf:identifier", "@identifierType", "dnet:pid_types", "dnet:pid_types", info)); r.setDateofcollection(doc.valueOf("//dr:dateOfCollection")); r.setDateoftransformation(doc.valueOf("//dr:dateOfTransformation")); r.setExtraInfo(new ArrayList<>()); // NOT PRESENT IN MDSTORES @@ -289,7 +289,7 @@ public abstract class AbstractMdRecordToOafMapper { protected abstract Qualifier prepareResourceType(Document doc, DataInfo info); protected abstract List prepareInstances( - Document doc, DataInfo info, KeyValue collectedfrom, KeyValue hostedby); + Document doc, DataInfo info, KeyValue collectedfrom, KeyValue hostedby); protected abstract List> prepareSources(Document doc, DataInfo info); @@ -314,13 +314,13 @@ public abstract class AbstractMdRecordToOafMapper { protected abstract List prepareAuthors(Document doc, DataInfo info); protected abstract List> prepareOtherResearchProductTools( - Document doc, DataInfo info); + Document doc, DataInfo info); protected abstract List> prepareOtherResearchProductContactGroups( - Document doc, DataInfo info); + Document doc, DataInfo info); protected abstract List> prepareOtherResearchProductContactPersons( - Document doc, DataInfo info); + Document doc, DataInfo info); protected abstract Qualifier prepareSoftwareProgrammingLanguage(Document doc, DataInfo info); @@ -329,7 +329,7 @@ public abstract class AbstractMdRecordToOafMapper { protected abstract List prepareSoftwareLicenses(Document doc, DataInfo info); protected abstract List> prepareSoftwareDocumentationUrls( - Document doc, DataInfo info); + Document doc, DataInfo info); protected abstract List prepareDatasetGeoLocations(Document doc, DataInfo info); @@ -359,37 +359,37 @@ public abstract class AbstractMdRecordToOafMapper { final String edition = n.valueOf("@edition"); if (StringUtils.isNotBlank(name)) { return journal( - name, - issnPrinted, - issnOnline, - issnLinking, - ep, - iss, - sp, - vol, - edition, - null, - null, - info); + name, + issnPrinted, + issnOnline, + issnLinking, + ep, + iss, + sp, + vol, + edition, + null, + null, + info); } } return null; } protected Qualifier prepareQualifier( - final Node node, final String xpath, final String schemeId, final String schemeName) { + final Node node, final String xpath, final String schemeId, final String schemeName) { final String classId = node.valueOf(xpath); final String className = code2name.get(classId); return qualifier(classId, className, schemeId, schemeName); } protected List prepareListStructProps( - final Node node, - final String xpath, - final String xpathClassId, - final String schemeId, - final String schemeName, - final DataInfo info) { + final Node node, + final String xpath, + final String xpathClassId, + final String schemeId, + final String schemeName, + final DataInfo info) { final List res = new ArrayList<>(); for (final Object o : node.selectNodes(xpath)) { final Node n = (Node) o; @@ -401,7 +401,7 @@ public abstract class AbstractMdRecordToOafMapper { } protected List prepareListStructProps( - final Node node, final String xpath, final Qualifier qualifier, final DataInfo info) { + final Node node, final String xpath, final Qualifier qualifier, final DataInfo info) { final List res = new ArrayList<>(); for (final Object o : node.selectNodes(xpath)) { final Node n = (Node) o; @@ -411,19 +411,19 @@ public abstract class AbstractMdRecordToOafMapper { } protected List prepareListStructProps( - final Node node, final String xpath, final DataInfo info) { + final Node node, final String xpath, final DataInfo info) { final List res = new ArrayList<>(); for (final Object o : node.selectNodes(xpath)) { final Node n = (Node) o; res - .add( - structuredProperty( - n.getText(), - n.valueOf("@classid"), - n.valueOf("@classname"), - n.valueOf("@schemeid"), - n.valueOf("@schemename"), - info)); + .add( + structuredProperty( + n.getText(), + n.valueOf("@classid"), + n.valueOf("@classname"), + n.valueOf("@schemeid"), + n.valueOf("@schemename"), + info)); } return res; } @@ -450,7 +450,7 @@ public abstract class AbstractMdRecordToOafMapper { if (n == null) { return dataInfo( - false, null, false, false, REPOSITORY_PROVENANCE_ACTIONS, "0.9"); + false, null, false, false, REPOSITORY_PROVENANCE_ACTIONS, "0.9"); } final String paClassId = n.valueOf("./oaf:provenanceaction/@classid"); @@ -464,12 +464,12 @@ public abstract class AbstractMdRecordToOafMapper { final String trust = n.valueOf("./oaf:trust"); return dataInfo( - deletedbyinference, - inferenceprovenance, - inferred, - false, - qualifier(paClassId, paClassName, paSchemeId, paSchemeName), - trust); + deletedbyinference, + inferenceprovenance, + inferred, + false, + qualifier(paClassId, paClassName, paSchemeId, paSchemeName), + trust); } protected Field prepareField(final Node node, final String xpath, final DataInfo info) { @@ -477,7 +477,7 @@ public abstract class AbstractMdRecordToOafMapper { } protected List> prepareListFields( - final Node node, final String xpath, final DataInfo info) { + final Node node, final String xpath, final DataInfo info) { return listFields(info, prepareListString(node, xpath)); } From 8828458acfd38e47b03c1a88335b4b5bc69c9eab Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Thu, 14 May 2020 10:34:12 +0200 Subject: [PATCH 17/31] minor changes --- .../PrepareResultOrcidAssociationStep1.java | 33 ++++++++++--------- .../SparkOrcidToResultFromSemRelJob.java | 22 +++++++------ 2 files changed, 29 insertions(+), 26 deletions(-) diff --git a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/orcidtoresultfromsemrel/PrepareResultOrcidAssociationStep1.java b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/orcidtoresultfromsemrel/PrepareResultOrcidAssociationStep1.java index 3e16b4b4b..7cd057cf3 100644 --- a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/orcidtoresultfromsemrel/PrepareResultOrcidAssociationStep1.java +++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/orcidtoresultfromsemrel/PrepareResultOrcidAssociationStep1.java @@ -97,22 +97,23 @@ public class PrepareResultOrcidAssociationStep1 { Dataset result = readPath(spark, inputResultPath, resultClazz); result.createOrReplaceTempView("result"); - String query = " select target resultId, author authorList" - + " from (select id, collect_set(named_struct('name', name, 'surname', surname, 'fullname', fullname, 'orcid', orcid)) author " - + " from ( " - + " select id, MyT.fullname, MyT.name, MyT.surname, MyP.value orcid " - + " from result " - + " lateral view explode (author) a as MyT " - + " lateral view explode (MyT.pid) p as MyP " - + " where MyP.qualifier.classid = 'ORCID') tmp " - + " group by id) r_t " - + " join (" - + " select source, target " - + " from relation " - + " where datainfo.deletedbyinference = false " - + getConstraintList(" relclass = '", allowedsemrel) - + ") rel_rel " - + " on source = id"; + String query = + "SELECT target resultId, author authorList" + + " FROM (SELECT id, collect_set(named_struct('name', name, 'surname', surname, 'fullname', fullname, 'orcid', orcid)) author " + + " FROM ( " + + " SELECT DISTINCT id, MyT.fullname, MyT.name, MyT.surname, MyP.value orcid " + + " FROM result " + + " LATERAL VIEW EXPLODE (author) a AS MyT " + + " LATERAL VIEW EXPLODE (MyT.pid) p AS MyP " + + " WHERE MyP.qualifier.classid = 'ORCID') tmp " + + " GROUP BY id) r_t " + + " JOIN (" + + " SELECT source, target " + + " FROM relation " + + " WHERE datainfo.deletedbyinference = false " + + getConstraintList(" relclass = '", allowedsemrel) + + " ) rel_rel " + + " ON source = id"; spark .sql(query) .as(Encoders.bean(ResultOrcidList.class)) diff --git a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/orcidtoresultfromsemrel/SparkOrcidToResultFromSemRelJob.java b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/orcidtoresultfromsemrel/SparkOrcidToResultFromSemRelJob.java index ebb75a5a6..b93b66d9f 100644 --- a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/orcidtoresultfromsemrel/SparkOrcidToResultFromSemRelJob.java +++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/orcidtoresultfromsemrel/SparkOrcidToResultFromSemRelJob.java @@ -132,16 +132,16 @@ public class SparkOrcidToResultFromSemRelJob { private static boolean enrichAuthor(AutoritativeAuthor autoritative_author, Author author) { boolean toaddpid = false; - if (StringUtils.isNoneEmpty(autoritative_author.getSurname())) { - if (StringUtils.isNoneEmpty(author.getSurname())) { + if (StringUtils.isNotEmpty(autoritative_author.getSurname())) { + if (StringUtils.isNotEmpty(author.getSurname())) { if (autoritative_author .getSurname() .trim() .equalsIgnoreCase(author.getSurname().trim())) { // have the same surname. Check the name - if (StringUtils.isNoneEmpty(autoritative_author.getName())) { - if (StringUtils.isNoneEmpty(author.getName())) { + if (StringUtils.isNotEmpty(autoritative_author.getName())) { + if (StringUtils.isNotEmpty(author.getName())) { if (autoritative_author .getName() .trim() @@ -150,12 +150,14 @@ public class SparkOrcidToResultFromSemRelJob { } // they could be differently written (i.e. only the initials of the name // in one of the two - if (autoritative_author - .getName() - .trim() - .substring(0, 0) - .equalsIgnoreCase(author.getName().trim().substring(0, 0))) { - toaddpid = true; + else { + if (autoritative_author + .getName() + .trim() + .substring(0, 0) + .equalsIgnoreCase(author.getName().trim().substring(0, 0))) { + toaddpid = true; + } } } } From f044d093156c3c29cf00a4a9b498459885ebcdd0 Mon Sep 17 00:00:00 2001 From: Claudio Atzori Date: Thu, 14 May 2020 15:07:24 +0200 Subject: [PATCH 18/31] revised mapping: more accurate mapping for name/surname from datacite format; improved mapping of null values --- .../migration/ProtoConverter.java | 27 ++++++++++++----- .../raw/MigrateDbEntitiesApplication.java | 20 +++++-------- .../dhp/oa/graph/raw/OdfToOafMapper.java | 30 +++++++++++++++---- .../dhp/oa/graph/raw/common/PacePerson.java | 1 - 4 files changed, 51 insertions(+), 27 deletions(-) diff --git a/dhp-workflows/dhp-actionmanager/src/main/java/eu/dnetlib/dhp/actionmanager/migration/ProtoConverter.java b/dhp-workflows/dhp-actionmanager/src/main/java/eu/dnetlib/dhp/actionmanager/migration/ProtoConverter.java index 90d573ac0..e55c0eb7b 100644 --- a/dhp-workflows/dhp-actionmanager/src/main/java/eu/dnetlib/dhp/actionmanager/migration/ProtoConverter.java +++ b/dhp-workflows/dhp-actionmanager/src/main/java/eu/dnetlib/dhp/actionmanager/migration/ProtoConverter.java @@ -523,7 +523,9 @@ public class ProtoConverter implements Serializable { } private static Context mapContext(ResultProtos.Result.Context context) { - + if (context == null || StringUtils.isBlank(context.getId())) { + return null; + } final Context entity = new Context(); entity.setId(context.getId()); entity @@ -537,6 +539,10 @@ public class ProtoConverter implements Serializable { } public static KeyValue mapKV(FieldTypeProtos.KeyValue kv) { + if (kv == null || StringUtils.isBlank(kv.getKey()) & StringUtils.isBlank(kv.getValue())) { + return null; + } + final KeyValue keyValue = new KeyValue(); keyValue.setKey(kv.getKey()); keyValue.setValue(kv.getValue()); @@ -575,6 +581,10 @@ public class ProtoConverter implements Serializable { } public static StructuredProperty mapStructuredProperty(FieldTypeProtos.StructuredProperty sp) { + if (sp == null | StringUtils.isBlank(sp.getValue())) { + return null; + } + final StructuredProperty structuredProperty = new StructuredProperty(); structuredProperty.setValue(sp.getValue()); structuredProperty.setQualifier(mapQualifier(sp.getQualifier())); @@ -611,6 +621,10 @@ public class ProtoConverter implements Serializable { } public static Field mapStringField(FieldTypeProtos.StringField s) { + if (s == null || StringUtils.isBlank(s.getValue())) { + return null; + } + final Field stringField = new Field<>(); stringField.setValue(s.getValue()); stringField.setDataInfo(mapDataInfo(s.getDataInfo())); @@ -618,19 +632,16 @@ public class ProtoConverter implements Serializable { } public static Field mapBoolField(FieldTypeProtos.BoolField b) { + if (b == null) { + return null; + } + final Field booleanField = new Field<>(); booleanField.setValue(b.getValue()); booleanField.setDataInfo(mapDataInfo(b.getDataInfo())); return booleanField; } - public static Field mapIntField(FieldTypeProtos.IntField b) { - final Field entity = new Field<>(); - entity.setValue(b.getValue()); - entity.setDataInfo(mapDataInfo(b.getDataInfo())); - return entity; - } - public static Journal mapJournal(FieldTypeProtos.Journal j) { final Journal journal = new Journal(); journal.setConferencedate(j.getConferencedate()); diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/MigrateDbEntitiesApplication.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/MigrateDbEntitiesApplication.java index e5e348642..ebe2b703b 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/MigrateDbEntitiesApplication.java +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/MigrateDbEntitiesApplication.java @@ -50,8 +50,7 @@ import eu.dnetlib.dhp.schema.oaf.Result; import eu.dnetlib.dhp.schema.oaf.Software; import eu.dnetlib.dhp.schema.oaf.StructuredProperty; -public class MigrateDbEntitiesApplication extends AbstractMigrationApplication - implements Closeable { +public class MigrateDbEntitiesApplication extends AbstractMigrationApplication implements Closeable { private static final Log log = LogFactory.getLog(MigrateDbEntitiesApplication.class); @@ -128,9 +127,7 @@ public class MigrateDbEntitiesApplication extends AbstractMigrationApplication } public List processDatasource(final ResultSet rs) { - try { - final DataInfo info = prepareDataInfo(rs); final Datasource ds = new Datasource(); @@ -194,7 +191,6 @@ public class MigrateDbEntitiesApplication extends AbstractMigrationApplication public List processProject(final ResultSet rs) { try { - final DataInfo info = prepareDataInfo(rs); final Project p = new Project(); @@ -249,9 +245,7 @@ public class MigrateDbEntitiesApplication extends AbstractMigrationApplication } public List processOrganization(final ResultSet rs) { - try { - final DataInfo info = prepareDataInfo(rs); final Organization o = new Organization(); @@ -370,14 +364,12 @@ public class MigrateDbEntitiesApplication extends AbstractMigrationApplication final DataInfo info = dataInfo( false, null, false, false, - qualifier(USER_CLAIM, USER_CLAIM, DNET_PROVENANCE_ACTIONS, DNET_PROVENANCE_ACTIONS), "0.9"); final List collectedFrom = listKeyValues( createOpenaireId(10, "infrastruct_::openaire", true), "OpenAIRE"); try { - if (rs.getString(SOURCE_TYPE).equals("context")) { final Result r; @@ -461,9 +453,12 @@ public class MigrateDbEntitiesApplication extends AbstractMigrationApplication final Boolean inferred = rs.getBoolean("inferred"); final String trust = rs.getString("trust"); return dataInfo( - - deletedbyinference, inferenceprovenance, inferred, false, ENTITYREGISTRY_PROVENANCE_ACTION, trust); - + deletedbyinference, + inferenceprovenance, + inferred, + false, + ENTITYREGISTRY_PROVENANCE_ACTION, + trust); } private Qualifier prepareQualifierSplitting(final String s) { @@ -535,4 +530,5 @@ public class MigrateDbEntitiesApplication extends AbstractMigrationApplication super.close(); dbClient.close(); } + } diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/OdfToOafMapper.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/OdfToOafMapper.java index 04984d008..5baac12fd 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/OdfToOafMapper.java +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/OdfToOafMapper.java @@ -12,6 +12,7 @@ import org.apache.commons.lang3.StringUtils; import org.dom4j.Document; import org.dom4j.Node; +import eu.dnetlib.dhp.oa.graph.raw.common.PacePerson; import eu.dnetlib.dhp.schema.common.ModelConstants; import eu.dnetlib.dhp.schema.oaf.Author; import eu.dnetlib.dhp.schema.oaf.DataInfo; @@ -44,9 +45,24 @@ public class OdfToOafMapper extends AbstractMdRecordToOafMapper { for (final Object o : doc.selectNodes("//datacite:creator")) { final Node n = (Node) o; final Author author = new Author(); - author.setFullname(n.valueOf("./datacite:creatorName")); - author.setName(n.valueOf("./datacite:givenName")); - author.setSurname(n.valueOf("./datacite:familyName")); + final String fullname = n.valueOf("./datacite:creatorName"); + author.setFullname(fullname); + + PacePerson pp = new PacePerson(fullname, false); + final String name = n.valueOf("./datacite:givenName"); + if (StringUtils.isBlank(name) & pp.isAccurate()) { + author.setName(pp.getNormalisedFirstName()); + } else { + author.setName(name); + } + + final String surname = n.valueOf("./datacite:familyName"); + if (StringUtils.isBlank(surname) & pp.isAccurate()) { + author.setSurname(pp.getNormalisedSurname()); + } else { + author.setSurname(surname); + } + author.setAffiliation(prepareListFields(doc, "./datacite:affiliation", info)); author.setPid(preparePids(doc, info)); author.setRank(pos++); @@ -77,8 +93,6 @@ public class OdfToOafMapper extends AbstractMdRecordToOafMapper { final KeyValue hostedby) { final Instance instance = new Instance(); - final Set url = new HashSet<>(); - instance.setUrl(new ArrayList<>()); instance .setInstancetype( prepareQualifier( @@ -97,6 +111,7 @@ public class OdfToOafMapper extends AbstractMdRecordToOafMapper { .setProcessingchargecurrency( field(doc.valueOf("//oaf:processingchargeamount/@currency"), info)); + final Set url = new HashSet<>(); for (final Object o : doc.selectNodes("//datacite:alternateIdentifier[@alternateIdentifierType='URL']")) { url.add(((Node) o).getText().trim()); } @@ -109,7 +124,10 @@ public class OdfToOafMapper extends AbstractMdRecordToOafMapper { for (final Object o : doc.selectNodes("//datacite:identifier[@identifierType='DOI']")) { url.add(HTTP_DX_DOI_PREIFX + ((Node) o).getText().trim()); } - instance.getUrl().addAll(url); + if (!url.isEmpty()) { + instance.setUrl(new ArrayList<>()); + instance.getUrl().addAll(url); + } return Arrays.asList(instance); } diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/common/PacePerson.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/common/PacePerson.java index d1c615dcd..6e474f2f3 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/common/PacePerson.java +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/common/PacePerson.java @@ -1,7 +1,6 @@ package eu.dnetlib.dhp.oa.graph.raw.common; -import java.nio.charset.Charset; import java.nio.charset.StandardCharsets; import java.text.Normalizer; import java.util.HashSet; From 42085e8d99220ccdf1f4c9cc38d26db24b9544a2 Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Thu, 14 May 2020 18:22:28 +0200 Subject: [PATCH 19/31] added some constants --- .../java/eu/dnetlib/dhp/schema/common/ModelConstants.java | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/common/ModelConstants.java b/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/common/ModelConstants.java index accc06d12..e32dd10fa 100644 --- a/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/common/ModelConstants.java +++ b/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/common/ModelConstants.java @@ -13,6 +13,7 @@ public class ModelConstants { public static final String DNET_DATA_CITE_DATE = "dnet:dataCite_date"; public static final String DNET_DATA_CITE_RESOURCE = "dnet:dataCite_resource"; public static final String DNET_PROVENANCE_ACTIONS = "dnet:provenanceActions"; + public static final String DNET_COUNTRY_TYPE = "dnet:countries"; public static final String SYSIMPORT_CROSSWALK_REPOSITORY = "sysimport:crosswalk:repository"; public static final String SYSIMPORT_CROSSWALK_ENTITYREGISTRY = "sysimport:crosswalk:entityregistry"; @@ -49,6 +50,13 @@ public class ModelConstants { public static final String HAS_PARTICIPANT = "hasParticipant"; public static final String IS_PARTICIPANT = "isParticipant"; + public static final String RESULT_ORGANIZATION = "resultOrganization"; + public static final String AFFILIATION = "affiliation"; + public static final String IS_AUTHOR_INSTITUTION_OF = "isAuthorInstitutionOf"; + public static final String HAS_AUTHOR_INSTITUTION = "hasAuthorInstitution"; + + public static final String MERGES = "merges"; + public static final String UNKNOWN = "UNKNOWN"; public static final String NOT_AVAILABLE = "not available"; From d05630d9795f1a26a9ea9ce33d2337c9156217ff Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Thu, 14 May 2020 18:22:50 +0200 Subject: [PATCH 20/31] removed the constants added in ModelConstants --- .../eu/dnetlib/dhp/PropagationConstant.java | 30 ++++--------------- 1 file changed, 6 insertions(+), 24 deletions(-) diff --git a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/PropagationConstant.java b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/PropagationConstant.java index 8d2fede82..13ed46508 100644 --- a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/PropagationConstant.java +++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/PropagationConstant.java @@ -4,6 +4,8 @@ package eu.dnetlib.dhp; import java.util.List; import java.util.Optional; +import eu.dnetlib.dhp.schema.common.ModelConstants; +import eu.dnetlib.dhp.schema.common.ModelSupport; import org.apache.spark.api.java.function.MapFunction; import org.apache.spark.sql.Dataset; import org.apache.spark.sql.Encoders; @@ -24,10 +26,6 @@ public class PropagationConstant { public static final String TRUE = "true"; - public static final String DNET_COUNTRY_SCHEMA = "dnet:countries"; - public static final String DNET_SCHEMA_NAME = "dnet:provenanceActions"; - public static final String DNET_SCHEMA_ID = "dnet:provenanceActions"; - public static final String PROPAGATION_COUNTRY_INSTREPO_CLASS_ID = "country:instrepos"; public static final String PROPAGATION_COUNTRY_INSTREPO_CLASS_NAME = "Propagation of country to result collected from datasources of type institutional repositories"; @@ -46,22 +44,6 @@ public class PropagationConstant { public static final String PROPAGATION_ORCID_TO_RESULT_FROM_SEM_REL_CLASS_ID = "authorpid:result"; public static final String PROPAGATION_ORCID_TO_RESULT_FROM_SEM_REL_CLASS_NAME = "Propagation of authors pid to result through semantic relations"; - public static final String RELATION_DATASOURCE_ORGANIZATION_REL_CLASS = "isProvidedBy"; - - public static final String RELATION_RESULTORGANIZATION_REL_TYPE = "resultOrganization"; - public static final String RELATION_RESULTORGANIZATION_SUBREL_TYPE = "affiliation"; - public static final String RELATION_ORGANIZATION_RESULT_REL_CLASS = "isAuthorInstitutionOf"; - public static final String RELATION_RESULT_ORGANIZATION_REL_CLASS = "hasAuthorInstitution"; - - public static final String RELATION_RESULTRESULT_REL_TYPE = "resultResult"; - - public static final String RELATION_RESULTPROJECT_REL_TYPE = "resultProject"; - public static final String RELATION_RESULTPROJECT_SUBREL_TYPE = "outcome"; - public static final String RELATION_RESULT_PROJECT_REL_CLASS = "isProducedBy"; - public static final String RELATION_PROJECT_RESULT_REL_CLASS = "produces"; - - public static final String RELATION_REPRESENTATIVERESULT_RESULT_CLASS = "merges"; - public static final String PROPAGATION_AUTHOR_PID = "ORCID"; public static final ObjectMapper OBJECT_MAPPER = new ObjectMapper(); @@ -76,8 +58,8 @@ public class PropagationConstant { Country nc = new Country(); nc.setClassid(classid); nc.setClassname(classname); - nc.setSchemename(DNET_COUNTRY_SCHEMA); - nc.setSchemeid(DNET_COUNTRY_SCHEMA); + nc.setSchemename(ModelConstants.DNET_COUNTRY_TYPE); + nc.setSchemeid(ModelConstants.DNET_COUNTRY_TYPE); nc .setDataInfo( getDataInfo( @@ -102,8 +84,8 @@ public class PropagationConstant { Qualifier pa = new Qualifier(); pa.setClassid(inference_class_id); pa.setClassname(inference_class_name); - pa.setSchemeid(DNET_SCHEMA_ID); - pa.setSchemename(DNET_SCHEMA_NAME); + pa.setSchemeid(ModelConstants.DNET_PID_TYPES); + pa.setSchemename(ModelConstants.DNET_PID_TYPES); return pa; } From f25db01664eb56d2250d00e95822b4aaacaf52bf Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Thu, 14 May 2020 18:29:24 +0200 Subject: [PATCH 21/31] changed in the constant from propagationconstants to modelconstants --- .../PrepareDatasourceCountryAssociation.java | 3 ++- .../PrepareProjectResultsAssociation.java | 3 ++- .../SparkResultToProjectThroughSemRelJob.java | 13 +++++++------ .../PrepareResultCommunitySet.java | 5 +++-- .../PrepareResultInstRepoAssociation.java | 5 +++-- .../SparkResultToOrganizationFromIstRepoJob.java | 13 +++++++------ 6 files changed, 24 insertions(+), 18 deletions(-) diff --git a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/countrypropagation/PrepareDatasourceCountryAssociation.java b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/countrypropagation/PrepareDatasourceCountryAssociation.java index e91a1e48a..1d01d35e5 100644 --- a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/countrypropagation/PrepareDatasourceCountryAssociation.java +++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/countrypropagation/PrepareDatasourceCountryAssociation.java @@ -7,6 +7,7 @@ import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkHiveSession; import java.util.Arrays; import java.util.List; +import eu.dnetlib.dhp.schema.common.ModelConstants; import org.apache.commons.io.IOUtils; import org.apache.hadoop.io.compress.GzipCodec; import org.apache.spark.SparkConf; @@ -100,7 +101,7 @@ public class PrepareDatasourceCountryAssociation { + "JOIN ( SELECT source, target " + " FROM relation " + " WHERE relclass = '" - + RELATION_DATASOURCE_ORGANIZATION_REL_CLASS + + ModelConstants.IS_PROVIDED_BY + "' " + " AND datainfo.deletedbyinference = false ) rel " + "ON d.id = rel.source " diff --git a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/projecttoresult/PrepareProjectResultsAssociation.java b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/projecttoresult/PrepareProjectResultsAssociation.java index c27da4258..920ef1f06 100644 --- a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/projecttoresult/PrepareProjectResultsAssociation.java +++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/projecttoresult/PrepareProjectResultsAssociation.java @@ -8,6 +8,7 @@ import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkHiveSession; import java.util.Arrays; import java.util.List; +import eu.dnetlib.dhp.schema.common.ModelConstants; import org.apache.commons.io.IOUtils; import org.apache.hadoop.io.compress.GzipCodec; import org.apache.spark.SparkConf; @@ -85,7 +86,7 @@ public class PrepareProjectResultsAssociation { + " FROM relation " + " WHERE datainfo.deletedbyinference = false " + " AND relClass = '" - + RELATION_RESULT_PROJECT_REL_CLASS + + ModelConstants.IS_PRODUCED_BY + "'"; Dataset resproj_relation = spark.sql(resproj_relation_query); diff --git a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/projecttoresult/SparkResultToProjectThroughSemRelJob.java b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/projecttoresult/SparkResultToProjectThroughSemRelJob.java index 36694b3dd..44a439ab7 100644 --- a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/projecttoresult/SparkResultToProjectThroughSemRelJob.java +++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/projecttoresult/SparkResultToProjectThroughSemRelJob.java @@ -9,6 +9,7 @@ import java.util.Iterator; import java.util.List; import java.util.Optional; +import eu.dnetlib.dhp.schema.common.ModelConstants; import org.apache.commons.io.IOUtils; import org.apache.spark.SparkConf; import org.apache.spark.api.java.function.FlatMapFunction; @@ -122,9 +123,9 @@ public class SparkResultToProjectThroughSemRelJob { getRelation( resId, projectId, - RELATION_RESULT_PROJECT_REL_CLASS, - RELATION_RESULTPROJECT_REL_TYPE, - RELATION_RESULTPROJECT_SUBREL_TYPE, + ModelConstants.IS_PRODUCED_BY, + ModelConstants.RESULT_PROJECT , + ModelConstants.OUTCOME, PROPAGATION_DATA_INFO_TYPE, PROPAGATION_RELATION_RESULT_PROJECT_SEM_REL_CLASS_ID, PROPAGATION_RELATION_RESULT_PROJECT_SEM_REL_CLASS_NAME)); @@ -133,9 +134,9 @@ public class SparkResultToProjectThroughSemRelJob { getRelation( projectId, resId, - RELATION_PROJECT_RESULT_REL_CLASS, - RELATION_RESULTPROJECT_REL_TYPE, - RELATION_RESULTPROJECT_SUBREL_TYPE, + ModelConstants.PRODUCES, + ModelConstants.RESULT_PROJECT, + ModelConstants.OUTCOME, PROPAGATION_DATA_INFO_TYPE, PROPAGATION_RELATION_RESULT_PROJECT_SEM_REL_CLASS_ID, PROPAGATION_RELATION_RESULT_PROJECT_SEM_REL_CLASS_NAME)); diff --git a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttocommunityfromorganization/PrepareResultCommunitySet.java b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttocommunityfromorganization/PrepareResultCommunitySet.java index 90eb54e5f..fc9f395d1 100644 --- a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttocommunityfromorganization/PrepareResultCommunitySet.java +++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttocommunityfromorganization/PrepareResultCommunitySet.java @@ -6,6 +6,7 @@ import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkHiveSession; import java.util.*; +import eu.dnetlib.dhp.schema.common.ModelConstants; import org.apache.commons.io.IOUtils; import org.apache.spark.SparkConf; import org.apache.spark.api.java.function.MapFunction; @@ -74,13 +75,13 @@ public class PrepareResultCommunitySet { + " FROM relation " + " WHERE datainfo.deletedbyinference = false " + " AND relClass = '" - + RELATION_RESULT_ORGANIZATION_REL_CLASS + + ModelConstants.HAS_AUTHOR_INSTITUTION + "') result_organization " + "LEFT JOIN (SELECT source, collect_set(target) org_set " + " FROM relation " + " WHERE datainfo.deletedbyinference = false " + " AND relClass = '" - + RELATION_REPRESENTATIVERESULT_RESULT_CLASS + + ModelConstants.MERGES + "' " + " GROUP BY source) organization_organization " + "ON result_organization.target = organization_organization.source "; diff --git a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttoorganizationfrominstrepo/PrepareResultInstRepoAssociation.java b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttoorganizationfrominstrepo/PrepareResultInstRepoAssociation.java index 5f549be53..0a83e4195 100644 --- a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttoorganizationfrominstrepo/PrepareResultInstRepoAssociation.java +++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttoorganizationfrominstrepo/PrepareResultInstRepoAssociation.java @@ -4,6 +4,7 @@ package eu.dnetlib.dhp.resulttoorganizationfrominstrepo; import static eu.dnetlib.dhp.PropagationConstant.*; import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkHiveSession; +import eu.dnetlib.dhp.schema.common.ModelConstants; import org.apache.commons.io.IOUtils; import org.apache.hadoop.io.compress.GzipCodec; import org.apache.spark.SparkConf; @@ -91,7 +92,7 @@ public class PrepareResultInstRepoAssociation { + "JOIN ( SELECT source, target " + "FROM relation " + "WHERE relclass = '" - + RELATION_DATASOURCE_ORGANIZATION_REL_CLASS + + ModelConstants.IS_PROVIDED_BY + "' " + "AND datainfo.deletedbyinference = false ) rel " + "ON d.id = rel.source "; @@ -111,7 +112,7 @@ public class PrepareResultInstRepoAssociation { + "from relation " + "where datainfo.deletedbyinference = false " + "and relClass = '" - + RELATION_RESULT_ORGANIZATION_REL_CLASS + + ModelConstants.HAS_AUTHOR_INSTITUTION + "' " + "group by source"; diff --git a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttoorganizationfrominstrepo/SparkResultToOrganizationFromIstRepoJob.java b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttoorganizationfrominstrepo/SparkResultToOrganizationFromIstRepoJob.java index 13577fa7c..fc9a46d72 100644 --- a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttoorganizationfrominstrepo/SparkResultToOrganizationFromIstRepoJob.java +++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttoorganizationfrominstrepo/SparkResultToOrganizationFromIstRepoJob.java @@ -6,6 +6,7 @@ import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkHiveSession; import java.util.*; +import eu.dnetlib.dhp.schema.common.ModelConstants; import org.apache.commons.io.IOUtils; import org.apache.spark.SparkConf; import org.apache.spark.api.java.JavaSparkContext; @@ -150,9 +151,9 @@ public class SparkResultToOrganizationFromIstRepoJob { getRelation( orgId, resultId, - RELATION_ORGANIZATION_RESULT_REL_CLASS, - RELATION_RESULTORGANIZATION_REL_TYPE, - RELATION_RESULTORGANIZATION_SUBREL_TYPE, + ModelConstants.IS_AUTHOR_INSTITUTION_OF, + ModelConstants.RESULT_ORGANIZATION, + ModelConstants.AFFILIATION, PROPAGATION_DATA_INFO_TYPE, PROPAGATION_RELATION_RESULT_ORGANIZATION_INST_REPO_CLASS_ID, PROPAGATION_RELATION_RESULT_ORGANIZATION_INST_REPO_CLASS_NAME)); @@ -161,9 +162,9 @@ public class SparkResultToOrganizationFromIstRepoJob { getRelation( resultId, orgId, - RELATION_RESULT_ORGANIZATION_REL_CLASS, - RELATION_RESULTORGANIZATION_REL_TYPE, - RELATION_RESULTORGANIZATION_SUBREL_TYPE, + ModelConstants.HAS_AUTHOR_INSTITUTION, + ModelConstants.RESULT_ORGANIZATION, + ModelConstants.AFFILIATION, PROPAGATION_DATA_INFO_TYPE, PROPAGATION_RELATION_RESULT_ORGANIZATION_INST_REPO_CLASS_ID, PROPAGATION_RELATION_RESULT_ORGANIZATION_INST_REPO_CLASS_NAME)); From eb64335a547c1a8b9e708f50b6f33b362fa1e54e Mon Sep 17 00:00:00 2001 From: Claudio Atzori Date: Fri, 15 May 2020 09:05:26 +0200 Subject: [PATCH 22/31] parallel implementation for graph Hive importer --- .../graph/hive/GraphHiveTableImporterJob.java | 79 +++++++ .../hive/oozie_app/lib/scripts/reset_db.sql | 2 + .../dhp/oa/graph/hive/oozie_app/workflow.xml | 200 +++++++++++++++++- .../oa/graph/hive_db_importer_parameters.json | 26 +++ .../graph/hive_table_importer_parameters.json | 32 +++ 5 files changed, 331 insertions(+), 8 deletions(-) create mode 100644 dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hive/GraphHiveTableImporterJob.java create mode 100644 dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/hive/oozie_app/lib/scripts/reset_db.sql create mode 100644 dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/hive_db_importer_parameters.json create mode 100644 dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/hive_table_importer_parameters.json diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hive/GraphHiveTableImporterJob.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hive/GraphHiveTableImporterJob.java new file mode 100644 index 000000000..f88f7457f --- /dev/null +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hive/GraphHiveTableImporterJob.java @@ -0,0 +1,79 @@ + +package eu.dnetlib.dhp.oa.graph.hive; + +import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkHiveSession; +import static eu.dnetlib.dhp.schema.common.ModelSupport.tableIdentifier; + +import java.util.Optional; + +import org.apache.commons.io.IOUtils; +import org.apache.spark.SparkConf; +import org.apache.spark.api.java.function.MapFunction; +import org.apache.spark.sql.Encoders; +import org.apache.spark.sql.SaveMode; +import org.apache.spark.sql.SparkSession; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import com.fasterxml.jackson.databind.ObjectMapper; + +import eu.dnetlib.dhp.application.ArgumentApplicationParser; +import eu.dnetlib.dhp.schema.oaf.Oaf; + +public class GraphHiveTableImporterJob { + + private static final Logger log = LoggerFactory.getLogger(GraphHiveTableImporterJob.class); + + private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper(); + + public static void main(String[] args) throws Exception { + + final ArgumentApplicationParser parser = new ArgumentApplicationParser( + IOUtils + .toString( + GraphHiveTableImporterJob.class + .getResourceAsStream( + "/eu/dnetlib/dhp/oa/graph/hive_table_importer_parameters.json"))); + parser.parseArgument(args); + + Boolean isSparkSessionManaged = Optional + .ofNullable(parser.get("isSparkSessionManaged")) + .map(Boolean::valueOf) + .orElse(Boolean.TRUE); + log.info("isSparkSessionManaged: {}", isSparkSessionManaged); + + String inputPath = parser.get("inputPath"); + log.info("inputPath: {}", inputPath); + + String hiveDbName = parser.get("hiveDbName"); + log.info("hiveDbName: {}", hiveDbName); + + final String className = parser.get("className"); + log.info("className: {}", className); + + Class clazz = (Class) Class.forName(className); + + String hiveMetastoreUris = parser.get("hiveMetastoreUris"); + log.info("hiveMetastoreUris: {}", hiveMetastoreUris); + + SparkConf conf = new SparkConf(); + conf.set("hive.metastore.uris", hiveMetastoreUris); + + runWithSparkHiveSession( + conf, isSparkSessionManaged, spark -> loadGraphTable(spark, inputPath, hiveDbName, clazz)); + } + + // protected for testing + private static void loadGraphTable(SparkSession spark, String inputPath, String hiveDbName, + Class clazz) { + + spark + .read() + .textFile(inputPath) + .map((MapFunction) s -> OBJECT_MAPPER.readValue(s, clazz), Encoders.bean(clazz)) + .write() + .mode(SaveMode.Overwrite) + .saveAsTable(tableIdentifier(hiveDbName, clazz)); + } + +} diff --git a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/hive/oozie_app/lib/scripts/reset_db.sql b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/hive/oozie_app/lib/scripts/reset_db.sql new file mode 100644 index 000000000..484afde80 --- /dev/null +++ b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/hive/oozie_app/lib/scripts/reset_db.sql @@ -0,0 +1,2 @@ +DROP DATABASE IF EXISTS ${hiveDbName} CASCADE; +CREATE DATABASE ${hiveDbName}; \ No newline at end of file diff --git a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/hive/oozie_app/workflow.xml b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/hive/oozie_app/workflow.xml index e837ac6b3..2bcbbba5e 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/hive/oozie_app/workflow.xml +++ b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/hive/oozie_app/workflow.xml @@ -72,18 +72,44 @@ - + Action failed, error message[${wf:errorMessage(wf:lastErrorNode())}] - + + + + + hive.metastore.uris + ${hiveMetastoreUris} + + + ${hiveJdbcUrl}/${hiveDbName} + + hiveDbName=${hiveDbName} + + + + + + + + + + + + + + + + yarn cluster - MapGraphAsHiveDB - eu.dnetlib.dhp.oa.graph.hive.GraphHiveImporterJob + Import table publication + eu.dnetlib.dhp.oa.graph.hive.GraphHiveTableImporterJob dhp-graph-mapper-${projectVersion}.jar --executor-memory=${sparkExecutorMemory} @@ -95,18 +121,175 @@ --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} --conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir} - --inputPath${inputPath} + --inputPath${inputPath}/publication --hiveDbName${hiveDbName} + --classNameeu.dnetlib.dhp.schema.oaf.Publication --hiveMetastoreUris${hiveMetastoreUris} - + + + + yarn + cluster + Import table dataset + eu.dnetlib.dhp.oa.graph.hive.GraphHiveTableImporterJob + dhp-graph-mapper-${projectVersion}.jar + + --executor-memory=${sparkExecutorMemory} + --executor-cores=${sparkExecutorCores} + --driver-memory=${sparkDriverMemory} + --conf spark.extraListeners=${spark2ExtraListeners} + --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} + --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} + --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} + --conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir} + + --inputPath${inputPath}/dataset + --hiveDbName${hiveDbName} + --classNameeu.dnetlib.dhp.schema.oaf.Dataset + --hiveMetastoreUris${hiveMetastoreUris} + + + + + + + + yarn + cluster + Import table otherresearchproduct + eu.dnetlib.dhp.oa.graph.hive.GraphHiveTableImporterJob + dhp-graph-mapper-${projectVersion}.jar + + --executor-memory=${sparkExecutorMemory} + --executor-cores=${sparkExecutorCores} + --driver-memory=${sparkDriverMemory} + --conf spark.extraListeners=${spark2ExtraListeners} + --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} + --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} + --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} + --conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir} + + --inputPath${inputPath}/otherresearchproduct + --hiveDbName${hiveDbName} + --classNameeu.dnetlib.dhp.schema.oaf.OtherResearchProduct + --hiveMetastoreUris${hiveMetastoreUris} + + + + + + + + yarn + cluster + Import table software + eu.dnetlib.dhp.oa.graph.hive.GraphHiveTableImporterJob + dhp-graph-mapper-${projectVersion}.jar + + --executor-memory=${sparkExecutorMemory} + --executor-cores=${sparkExecutorCores} + --driver-memory=${sparkDriverMemory} + --conf spark.extraListeners=${spark2ExtraListeners} + --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} + --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} + --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} + --conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir} + + --inputPath${inputPath}/software + --hiveDbName${hiveDbName} + --classNameeu.dnetlib.dhp.schema.oaf.Software + --hiveMetastoreUris${hiveMetastoreUris} + + + + + + + + yarn + cluster + Import table datasource + eu.dnetlib.dhp.oa.graph.hive.GraphHiveTableImporterJob + dhp-graph-mapper-${projectVersion}.jar + + --executor-memory=${sparkExecutorMemory} + --executor-cores=${sparkExecutorCores} + --driver-memory=${sparkDriverMemory} + --conf spark.extraListeners=${spark2ExtraListeners} + --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} + --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} + --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} + --conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir} + + --inputPath${inputPath}/datasource + --hiveDbName${hiveDbName} + --classNameeu.dnetlib.dhp.schema.oaf.Datasource + --hiveMetastoreUris${hiveMetastoreUris} + + + + + + + + yarn + cluster + Import table organization + eu.dnetlib.dhp.oa.graph.hive.GraphHiveTableImporterJob + dhp-graph-mapper-${projectVersion}.jar + + --executor-memory=${sparkExecutorMemory} + --executor-cores=${sparkExecutorCores} + --driver-memory=${sparkDriverMemory} + --conf spark.extraListeners=${spark2ExtraListeners} + --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} + --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} + --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} + --conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir} + + --inputPath${inputPath}/organization + --hiveDbName${hiveDbName} + --classNameeu.dnetlib.dhp.schema.oaf.Organization + --hiveMetastoreUris${hiveMetastoreUris} + + + + + + + + yarn + cluster + Import table project + eu.dnetlib.dhp.oa.graph.hive.GraphHiveTableImporterJob + dhp-graph-mapper-${projectVersion}.jar + + --executor-memory=${sparkExecutorMemory} + --executor-cores=${sparkExecutorCores} + --driver-memory=${sparkDriverMemory} + --conf spark.extraListeners=${spark2ExtraListeners} + --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} + --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} + --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} + --conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir} + + --inputPath${inputPath}/project + --hiveDbName${hiveDbName} + --classNameeu.dnetlib.dhp.schema.oaf.Project + --hiveMetastoreUris${hiveMetastoreUris} + + + + + + + - ${jobTracker} - ${nameNode} hive.metastore.uris @@ -122,4 +305,5 @@ + \ No newline at end of file diff --git a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/hive_db_importer_parameters.json b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/hive_db_importer_parameters.json new file mode 100644 index 000000000..d6c13773a --- /dev/null +++ b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/hive_db_importer_parameters.json @@ -0,0 +1,26 @@ +[ + { + "paramName": "issm", + "paramLongName": "isSparkSessionManaged", + "paramDescription": "when true will stop SparkSession after job execution", + "paramRequired": false + }, + { + "paramName": "in", + "paramLongName": "inputPath", + "paramDescription": "the path to the graph data dump to read", + "paramRequired": true + }, + { + "paramName": "hmu", + "paramLongName": "hiveMetastoreUris", + "paramDescription": "the hive metastore uris", + "paramRequired": true + }, + { + "paramName": "db", + "paramLongName": "hiveDbName", + "paramDescription": "the target hive database name", + "paramRequired": true + } +] \ No newline at end of file diff --git a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/hive_table_importer_parameters.json b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/hive_table_importer_parameters.json new file mode 100644 index 000000000..5b5b0743c --- /dev/null +++ b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/hive_table_importer_parameters.json @@ -0,0 +1,32 @@ +[ + { + "paramName": "issm", + "paramLongName": "isSparkSessionManaged", + "paramDescription": "when true will stop SparkSession after job execution", + "paramRequired": false + }, + { + "paramName": "in", + "paramLongName": "inputPath", + "paramDescription": "the path to the graph data dump to read", + "paramRequired": true + }, + { + "paramName": "hmu", + "paramLongName": "hiveMetastoreUris", + "paramDescription": "the hive metastore uris", + "paramRequired": true + }, + { + "paramName": "db", + "paramLongName": "hiveDbName", + "paramDescription": "the target hive database name", + "paramRequired": true + }, + { + "paramName": "tn", + "paramLongName": "className", + "paramDescription": "the class modelling the target table", + "paramRequired": true + } +] \ No newline at end of file From fd62359538d397cfd9e73a3602e38638a5a0ff1d Mon Sep 17 00:00:00 2001 From: Claudio Atzori Date: Fri, 15 May 2020 09:28:15 +0200 Subject: [PATCH 23/31] cleanup --- .../graph/migrate_actionsets_parameters.json | 10 ---------- .../eu/dnetlib/dhp/oa/graph/scriptFile.scala | 0 .../transform_actionsets_parameters.json | 20 ------------------- 3 files changed, 30 deletions(-) delete mode 100644 dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/migrate_actionsets_parameters.json create mode 100644 dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/scriptFile.scala delete mode 100644 dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/transform_actionsets_parameters.json diff --git a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/migrate_actionsets_parameters.json b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/migrate_actionsets_parameters.json deleted file mode 100644 index c4910ec61..000000000 --- a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/migrate_actionsets_parameters.json +++ /dev/null @@ -1,10 +0,0 @@ -[ - {"paramName":"is", "paramLongName":"isLookupUrl", "paramDescription": "URL of the isLookUp Service", "paramRequired": true}, - {"paramName":"sn", "paramLongName":"sourceNameNode", "paramDescription": "nameNode of the source cluster", "paramRequired": true}, - {"paramName":"tn", "paramLongName":"targetNameNode", "paramDescription": "namoNode of the target cluster", "paramRequired": true}, - {"paramName":"w", "paramLongName":"workingDirectory", "paramDescription": "working directory", "paramRequired": true}, - {"paramName":"nm", "paramLongName":"distcp_num_maps", "paramDescription": "maximum number of map tasks used in the distcp process", "paramRequired": true}, - {"paramName":"mm", "paramLongName":"distcp_memory_mb", "paramDescription": "memory for distcp action copying actionsets from remote cluster", "paramRequired": true}, - {"paramName":"tt", "paramLongName":"distcp_task_timeout", "paramDescription": "timeout for distcp copying actions from remote cluster", "paramRequired": true}, - {"paramName":"tr", "paramLongName":"transform_only", "paramDescription": "activate tranform-only mode. Only apply transformation step", "paramRequired": true} -] diff --git a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/scriptFile.scala b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/scriptFile.scala new file mode 100644 index 000000000..e69de29bb diff --git a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/transform_actionsets_parameters.json b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/transform_actionsets_parameters.json deleted file mode 100644 index 6fa10f739..000000000 --- a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/transform_actionsets_parameters.json +++ /dev/null @@ -1,20 +0,0 @@ -[ - { - "paramName": "mt", - "paramLongName": "master", - "paramDescription": "should be local or yarn", - "paramRequired": true - }, - { - "paramName": "is", - "paramLongName": "isLookupUrl", - "paramDescription": "URL of the isLookUp Service", - "paramRequired": true - }, - { - "paramName": "i", - "paramLongName": "inputPaths", - "paramDescription": "URL of the isLookUp Service", - "paramRequired": true - } -] From 9d028ffe1c91c270482c75d5860e9f0222c01341 Mon Sep 17 00:00:00 2001 From: Claudio Atzori Date: Fri, 15 May 2020 09:28:55 +0200 Subject: [PATCH 24/31] cleanup --- .../src/main/resources/eu/dnetlib/dhp/oa/graph/scriptFile.scala | 0 1 file changed, 0 insertions(+), 0 deletions(-) delete mode 100644 dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/scriptFile.scala diff --git a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/scriptFile.scala b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/scriptFile.scala deleted file mode 100644 index e69de29bb..000000000 From 18f46e47b915a3770e584765bce4e4d281f3668f Mon Sep 17 00:00:00 2001 From: Claudio Atzori Date: Fri, 15 May 2020 09:34:48 +0200 Subject: [PATCH 25/31] added relations to the graph2hive import workflow --- .../dhp/oa/graph/hive/oozie_app/workflow.xml | 27 +++++++++++++++++++ 1 file changed, 27 insertions(+) diff --git a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/hive/oozie_app/workflow.xml b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/hive/oozie_app/workflow.xml index 2bcbbba5e..8566d7667 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/hive/oozie_app/workflow.xml +++ b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/hive/oozie_app/workflow.xml @@ -102,6 +102,7 @@ + @@ -286,6 +287,32 @@ + + + yarn + cluster + Import table project + eu.dnetlib.dhp.oa.graph.hive.GraphHiveTableImporterJob + dhp-graph-mapper-${projectVersion}.jar + + --executor-memory=${sparkExecutorMemory} + --executor-cores=${sparkExecutorCores} + --driver-memory=${sparkDriverMemory} + --conf spark.extraListeners=${spark2ExtraListeners} + --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} + --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} + --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} + --conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir} + + --inputPath${inputPath}/relation + --hiveDbName${hiveDbName} + --classNameeu.dnetlib.dhp.schema.oaf.Relation + --hiveMetastoreUris${hiveMetastoreUris} + + + + + From 50d6a2ad3c2f6fe19a52ff279749bd5633917fd6 Mon Sep 17 00:00:00 2001 From: Claudio Atzori Date: Fri, 15 May 2020 09:53:37 +0200 Subject: [PATCH 26/31] added output directory removal in the blacklist spark actions; included common global properties in blacklist's workflow.xml --- .../blacklist/PrepareMergedRelationJob.java | 7 ++++ .../SparkRemoveBlacklistedRelationJob.java | 7 +++- .../dhp/blacklist/oozie_app/workflow.xml | 37 +++++++++++-------- 3 files changed, 34 insertions(+), 17 deletions(-) diff --git a/dhp-workflows/dhp-blacklist/src/main/java/eu/dnetlib/dhp/blacklist/PrepareMergedRelationJob.java b/dhp-workflows/dhp-blacklist/src/main/java/eu/dnetlib/dhp/blacklist/PrepareMergedRelationJob.java index 0ef59e8c2..2a46043e2 100644 --- a/dhp-workflows/dhp-blacklist/src/main/java/eu/dnetlib/dhp/blacklist/PrepareMergedRelationJob.java +++ b/dhp-workflows/dhp-blacklist/src/main/java/eu/dnetlib/dhp/blacklist/PrepareMergedRelationJob.java @@ -5,6 +5,7 @@ import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkHiveSession; import java.util.Optional; +import eu.dnetlib.dhp.common.HdfsSupport; import org.apache.commons.io.IOUtils; import org.apache.spark.SparkConf; import org.apache.spark.api.java.function.MapFunction; @@ -56,6 +57,7 @@ public class PrepareMergedRelationJob { conf, isSparkSessionManaged, spark -> { + removeOutputDir(spark, outputPath); selectMergesRelations( spark, inputPath, @@ -84,4 +86,9 @@ public class PrepareMergedRelationJob { (MapFunction) value -> OBJECT_MAPPER.readValue(value, Relation.class), Encoders.bean(Relation.class)); } + + private static void removeOutputDir(SparkSession spark, String path) { + HdfsSupport.remove(path, spark.sparkContext().hadoopConfiguration()); + } + } diff --git a/dhp-workflows/dhp-blacklist/src/main/java/eu/dnetlib/dhp/blacklist/SparkRemoveBlacklistedRelationJob.java b/dhp-workflows/dhp-blacklist/src/main/java/eu/dnetlib/dhp/blacklist/SparkRemoveBlacklistedRelationJob.java index 86587bfc9..fe4310217 100644 --- a/dhp-workflows/dhp-blacklist/src/main/java/eu/dnetlib/dhp/blacklist/SparkRemoveBlacklistedRelationJob.java +++ b/dhp-workflows/dhp-blacklist/src/main/java/eu/dnetlib/dhp/blacklist/SparkRemoveBlacklistedRelationJob.java @@ -6,6 +6,7 @@ import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession; import java.util.Objects; import java.util.Optional; +import eu.dnetlib.dhp.common.HdfsSupport; import org.apache.commons.io.IOUtils; import org.apache.spark.SparkConf; import org.apache.spark.api.java.function.MapFunction; @@ -62,6 +63,7 @@ public class SparkRemoveBlacklistedRelationJob { conf, isSparkSessionManaged, spark -> { + removeOutputDir(spark, outputPath); removeBlacklistedRelations( spark, blacklistPath, @@ -69,7 +71,6 @@ public class SparkRemoveBlacklistedRelationJob { outputPath, mergesPath); }); - } private static void removeBlacklistedRelations(SparkSession spark, String blacklistPath, String inputPath, @@ -144,4 +145,8 @@ public class SparkRemoveBlacklistedRelationJob { Encoders.bean(Relation.class)); } + private static void removeOutputDir(SparkSession spark, String path) { + HdfsSupport.remove(path, spark.sparkContext().hadoopConfiguration()); + } + } diff --git a/dhp-workflows/dhp-blacklist/src/main/resources/eu/dnetlib/dhp/blacklist/oozie_app/workflow.xml b/dhp-workflows/dhp-blacklist/src/main/resources/eu/dnetlib/dhp/blacklist/oozie_app/workflow.xml index 1538318c1..dd7827da4 100644 --- a/dhp-workflows/dhp-blacklist/src/main/resources/eu/dnetlib/dhp/blacklist/oozie_app/workflow.xml +++ b/dhp-workflows/dhp-blacklist/src/main/resources/eu/dnetlib/dhp/blacklist/oozie_app/workflow.xml @@ -22,6 +22,25 @@ + + ${jobTracker} + ${nameNode} + + + mapreduce.job.queuename + ${queueName} + + + oozie.launcher.mapred.job.queue.name + ${oozieLauncherQueueName} + + + oozie.action.sharelib.for.spark + ${oozieActionShareLibForSpark2} + + + + @@ -49,8 +68,6 @@ - ${jobTracker} - ${nameNode} ${nameNode}/${sourcePath}/publication ${nameNode}/${outputPath}/publication @@ -60,8 +77,6 @@ - ${jobTracker} - ${nameNode} ${nameNode}/${sourcePath}/dataset ${nameNode}/${outputPath}/dataset @@ -71,8 +86,6 @@ - ${jobTracker} - ${nameNode} ${nameNode}/${sourcePath}/otherresearchproduct ${nameNode}/${outputPath}/otherresearchproduct @@ -82,8 +95,6 @@ - ${jobTracker} - ${nameNode} ${nameNode}/${sourcePath}/software ${nameNode}/${outputPath}/software @@ -93,8 +104,6 @@ - ${jobTracker} - ${nameNode} ${nameNode}/${sourcePath}/organization ${nameNode}/${outputPath}/organization @@ -104,8 +113,6 @@ - ${jobTracker} - ${nameNode} ${nameNode}/${sourcePath}/project ${nameNode}/${outputPath}/project @@ -115,8 +122,6 @@ - ${jobTracker} - ${nameNode} ${nameNode}/${sourcePath}/datasource ${nameNode}/${outputPath}/datasource @@ -128,8 +133,6 @@ - ${jobTracker} - ${nameNode} eu.dnetlib.dhp.blacklist.ReadBlacklistFromDB --hdfsPath${workingDir}/blacklist --hdfsNameNode${nameNode} @@ -156,6 +159,7 @@ --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} + --conf spark.sql.shuffle.partitions=3840 --sourcePath${sourcePath}/relation --outputPath${workingDir}/mergesRelation @@ -180,6 +184,7 @@ --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} + --conf spark.sql.shuffle.partitions=3840 --sourcePath${sourcePath}/relation --outputPath${outputPath}/relation From b7e198475a7fdfacf5cdc3ea39d35f58af657d0c Mon Sep 17 00:00:00 2001 From: Claudio Atzori Date: Fri, 15 May 2020 10:20:07 +0200 Subject: [PATCH 27/31] added common methods to create HiveDB table identifiers --- .../dhp/schema/common/ModelSupport.java | 20 +++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/common/ModelSupport.java b/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/common/ModelSupport.java index fc85b1ac1..9ee7c2deb 100644 --- a/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/common/ModelSupport.java +++ b/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/common/ModelSupport.java @@ -1,10 +1,15 @@ package eu.dnetlib.dhp.schema.common; +import static com.google.common.base.Preconditions.checkArgument; + import java.util.Map; +import java.util.Objects; import java.util.Optional; import java.util.function.Function; +import org.apache.commons.lang3.StringUtils; + import com.google.common.collect.Maps; import eu.dnetlib.dhp.schema.oaf.*; @@ -379,6 +384,21 @@ public class ModelSupport { entityMapping.get(EntityType.valueOf(targetType)).name()); } + public static String tableIdentifier(String dbName, String tableName) { + + checkArgument(StringUtils.isNotBlank(dbName), "DB name cannot be empty"); + checkArgument(StringUtils.isNotBlank(tableName), "table name cannot be empty"); + + return String.format("%s.%s", dbName, tableName); + } + + public static String tableIdentifier(String dbName, Class clazz) { + + checkArgument(Objects.nonNull(clazz), "clazz is needed to derive the table name, thus cannot be null"); + + return tableIdentifier(dbName, clazz.getSimpleName().toLowerCase()); + } + public static Function idFn() { return x -> { if (isSubClass(x, Relation.class)) { From a83265829608c6a318e0a35cbdc7abc95dd9d1b6 Mon Sep 17 00:00:00 2001 From: Claudio Atzori Date: Fri, 15 May 2020 10:21:09 +0200 Subject: [PATCH 28/31] code formatting --- .../dhp/blacklist/PrepareMergedRelationJob.java | 2 +- .../blacklist/SparkRemoveBlacklistedRelationJob.java | 2 +- .../java/eu/dnetlib/dhp/PropagationConstant.java | 4 ++-- .../PrepareDatasourceCountryAssociation.java | 2 +- .../PrepareResultOrcidAssociationStep1.java | 5 ++--- .../SparkOrcidToResultFromSemRelJob.java | 8 ++++---- .../PrepareProjectResultsAssociation.java | 2 +- .../SparkResultToProjectThroughSemRelJob.java | 6 +++--- .../PrepareResultCommunitySet.java | 2 +- .../PrepareResultInstRepoAssociation.java | 2 +- .../SparkResultToOrganizationFromIstRepoJob.java | 8 ++++---- .../oa/graph/raw/MigrateDbEntitiesApplication.java | 12 ++++++------ 12 files changed, 27 insertions(+), 28 deletions(-) diff --git a/dhp-workflows/dhp-blacklist/src/main/java/eu/dnetlib/dhp/blacklist/PrepareMergedRelationJob.java b/dhp-workflows/dhp-blacklist/src/main/java/eu/dnetlib/dhp/blacklist/PrepareMergedRelationJob.java index 2a46043e2..b4bcc509e 100644 --- a/dhp-workflows/dhp-blacklist/src/main/java/eu/dnetlib/dhp/blacklist/PrepareMergedRelationJob.java +++ b/dhp-workflows/dhp-blacklist/src/main/java/eu/dnetlib/dhp/blacklist/PrepareMergedRelationJob.java @@ -5,7 +5,6 @@ import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkHiveSession; import java.util.Optional; -import eu.dnetlib.dhp.common.HdfsSupport; import org.apache.commons.io.IOUtils; import org.apache.spark.SparkConf; import org.apache.spark.api.java.function.MapFunction; @@ -19,6 +18,7 @@ import org.slf4j.LoggerFactory; import com.fasterxml.jackson.databind.ObjectMapper; import eu.dnetlib.dhp.application.ArgumentApplicationParser; +import eu.dnetlib.dhp.common.HdfsSupport; import eu.dnetlib.dhp.schema.oaf.Relation; public class PrepareMergedRelationJob { diff --git a/dhp-workflows/dhp-blacklist/src/main/java/eu/dnetlib/dhp/blacklist/SparkRemoveBlacklistedRelationJob.java b/dhp-workflows/dhp-blacklist/src/main/java/eu/dnetlib/dhp/blacklist/SparkRemoveBlacklistedRelationJob.java index fe4310217..92289ec2d 100644 --- a/dhp-workflows/dhp-blacklist/src/main/java/eu/dnetlib/dhp/blacklist/SparkRemoveBlacklistedRelationJob.java +++ b/dhp-workflows/dhp-blacklist/src/main/java/eu/dnetlib/dhp/blacklist/SparkRemoveBlacklistedRelationJob.java @@ -6,7 +6,6 @@ import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession; import java.util.Objects; import java.util.Optional; -import eu.dnetlib.dhp.common.HdfsSupport; import org.apache.commons.io.IOUtils; import org.apache.spark.SparkConf; import org.apache.spark.api.java.function.MapFunction; @@ -19,6 +18,7 @@ import org.slf4j.LoggerFactory; import com.fasterxml.jackson.databind.ObjectMapper; import eu.dnetlib.dhp.application.ArgumentApplicationParser; +import eu.dnetlib.dhp.common.HdfsSupport; import eu.dnetlib.dhp.schema.oaf.Relation; import scala.Tuple2; diff --git a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/PropagationConstant.java b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/PropagationConstant.java index 13ed46508..c8eb017c7 100644 --- a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/PropagationConstant.java +++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/PropagationConstant.java @@ -4,8 +4,6 @@ package eu.dnetlib.dhp; import java.util.List; import java.util.Optional; -import eu.dnetlib.dhp.schema.common.ModelConstants; -import eu.dnetlib.dhp.schema.common.ModelSupport; import org.apache.spark.api.java.function.MapFunction; import org.apache.spark.sql.Dataset; import org.apache.spark.sql.Encoders; @@ -17,6 +15,8 @@ import com.fasterxml.jackson.databind.ObjectMapper; import eu.dnetlib.dhp.application.ArgumentApplicationParser; import eu.dnetlib.dhp.common.HdfsSupport; import eu.dnetlib.dhp.resulttocommunityfromorganization.ResultCommunityList; +import eu.dnetlib.dhp.schema.common.ModelConstants; +import eu.dnetlib.dhp.schema.common.ModelSupport; import eu.dnetlib.dhp.schema.oaf.*; public class PropagationConstant { diff --git a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/countrypropagation/PrepareDatasourceCountryAssociation.java b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/countrypropagation/PrepareDatasourceCountryAssociation.java index 1d01d35e5..98b573102 100644 --- a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/countrypropagation/PrepareDatasourceCountryAssociation.java +++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/countrypropagation/PrepareDatasourceCountryAssociation.java @@ -7,7 +7,6 @@ import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkHiveSession; import java.util.Arrays; import java.util.List; -import eu.dnetlib.dhp.schema.common.ModelConstants; import org.apache.commons.io.IOUtils; import org.apache.hadoop.io.compress.GzipCodec; import org.apache.spark.SparkConf; @@ -22,6 +21,7 @@ import org.slf4j.LoggerFactory; import com.fasterxml.jackson.databind.ObjectMapper; import eu.dnetlib.dhp.application.ArgumentApplicationParser; +import eu.dnetlib.dhp.schema.common.ModelConstants; import eu.dnetlib.dhp.schema.oaf.*; /** diff --git a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/orcidtoresultfromsemrel/PrepareResultOrcidAssociationStep1.java b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/orcidtoresultfromsemrel/PrepareResultOrcidAssociationStep1.java index 6549d1ed2..b15f813ac 100644 --- a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/orcidtoresultfromsemrel/PrepareResultOrcidAssociationStep1.java +++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/orcidtoresultfromsemrel/PrepareResultOrcidAssociationStep1.java @@ -95,8 +95,7 @@ public class PrepareResultOrcidAssociationStep1 { Dataset result = readPath(spark, inputResultPath, resultClazz); result.createOrReplaceTempView("result"); - String query = - "SELECT target resultId, author authorList" + String query = "SELECT target resultId, author authorList" + " FROM (SELECT id, collect_set(named_struct('name', name, 'surname', surname, 'fullname', fullname, 'orcid', orcid)) author " + " FROM ( " + " SELECT DISTINCT id, MyT.fullname, MyT.name, MyT.surname, MyP.value orcid " @@ -109,7 +108,7 @@ public class PrepareResultOrcidAssociationStep1 { + " SELECT source, target " + " FROM relation " + " WHERE datainfo.deletedbyinference = false " - + getConstraintList(" relclass = '", allowedsemrel) + + getConstraintList(" relclass = '", allowedsemrel) + " ) rel_rel " + " ON source = id"; spark diff --git a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/orcidtoresultfromsemrel/SparkOrcidToResultFromSemRelJob.java b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/orcidtoresultfromsemrel/SparkOrcidToResultFromSemRelJob.java index fd1de3282..bea847ca7 100644 --- a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/orcidtoresultfromsemrel/SparkOrcidToResultFromSemRelJob.java +++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/orcidtoresultfromsemrel/SparkOrcidToResultFromSemRelJob.java @@ -151,10 +151,10 @@ public class SparkOrcidToResultFromSemRelJob { // in one of the two else { if (autoritative_author - .getName() - .trim() - .substring(0, 0) - .equalsIgnoreCase(author.getName().trim().substring(0, 0))) { + .getName() + .trim() + .substring(0, 0) + .equalsIgnoreCase(author.getName().trim().substring(0, 0))) { toaddpid = true; } } diff --git a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/projecttoresult/PrepareProjectResultsAssociation.java b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/projecttoresult/PrepareProjectResultsAssociation.java index 920ef1f06..4cd7f88df 100644 --- a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/projecttoresult/PrepareProjectResultsAssociation.java +++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/projecttoresult/PrepareProjectResultsAssociation.java @@ -8,7 +8,6 @@ import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkHiveSession; import java.util.Arrays; import java.util.List; -import eu.dnetlib.dhp.schema.common.ModelConstants; import org.apache.commons.io.IOUtils; import org.apache.hadoop.io.compress.GzipCodec; import org.apache.spark.SparkConf; @@ -22,6 +21,7 @@ import com.google.gson.Gson; import eu.dnetlib.dhp.application.ArgumentApplicationParser; import eu.dnetlib.dhp.countrypropagation.PrepareDatasourceCountryAssociation; +import eu.dnetlib.dhp.schema.common.ModelConstants; import eu.dnetlib.dhp.schema.oaf.Relation; public class PrepareProjectResultsAssociation { diff --git a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/projecttoresult/SparkResultToProjectThroughSemRelJob.java b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/projecttoresult/SparkResultToProjectThroughSemRelJob.java index 44a439ab7..1f6264c18 100644 --- a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/projecttoresult/SparkResultToProjectThroughSemRelJob.java +++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/projecttoresult/SparkResultToProjectThroughSemRelJob.java @@ -9,7 +9,6 @@ import java.util.Iterator; import java.util.List; import java.util.Optional; -import eu.dnetlib.dhp.schema.common.ModelConstants; import org.apache.commons.io.IOUtils; import org.apache.spark.SparkConf; import org.apache.spark.api.java.function.FlatMapFunction; @@ -21,6 +20,7 @@ import com.fasterxml.jackson.databind.ObjectMapper; import eu.dnetlib.dhp.application.ArgumentApplicationParser; import eu.dnetlib.dhp.countrypropagation.PrepareDatasourceCountryAssociation; +import eu.dnetlib.dhp.schema.common.ModelConstants; import eu.dnetlib.dhp.schema.oaf.Relation; import scala.Tuple2; @@ -124,7 +124,7 @@ public class SparkResultToProjectThroughSemRelJob { resId, projectId, ModelConstants.IS_PRODUCED_BY, - ModelConstants.RESULT_PROJECT , + ModelConstants.RESULT_PROJECT, ModelConstants.OUTCOME, PROPAGATION_DATA_INFO_TYPE, PROPAGATION_RELATION_RESULT_PROJECT_SEM_REL_CLASS_ID, @@ -135,7 +135,7 @@ public class SparkResultToProjectThroughSemRelJob { projectId, resId, ModelConstants.PRODUCES, - ModelConstants.RESULT_PROJECT, + ModelConstants.RESULT_PROJECT, ModelConstants.OUTCOME, PROPAGATION_DATA_INFO_TYPE, PROPAGATION_RELATION_RESULT_PROJECT_SEM_REL_CLASS_ID, diff --git a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttocommunityfromorganization/PrepareResultCommunitySet.java b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttocommunityfromorganization/PrepareResultCommunitySet.java index fc9f395d1..5574aad75 100644 --- a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttocommunityfromorganization/PrepareResultCommunitySet.java +++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttocommunityfromorganization/PrepareResultCommunitySet.java @@ -6,7 +6,6 @@ import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkHiveSession; import java.util.*; -import eu.dnetlib.dhp.schema.common.ModelConstants; import org.apache.commons.io.IOUtils; import org.apache.spark.SparkConf; import org.apache.spark.api.java.function.MapFunction; @@ -18,6 +17,7 @@ import com.fasterxml.jackson.databind.ObjectMapper; import com.google.gson.Gson; import eu.dnetlib.dhp.application.ArgumentApplicationParser; +import eu.dnetlib.dhp.schema.common.ModelConstants; import eu.dnetlib.dhp.schema.oaf.Relation; public class PrepareResultCommunitySet { diff --git a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttoorganizationfrominstrepo/PrepareResultInstRepoAssociation.java b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttoorganizationfrominstrepo/PrepareResultInstRepoAssociation.java index 0a83e4195..84e40fa88 100644 --- a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttoorganizationfrominstrepo/PrepareResultInstRepoAssociation.java +++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttoorganizationfrominstrepo/PrepareResultInstRepoAssociation.java @@ -4,7 +4,6 @@ package eu.dnetlib.dhp.resulttoorganizationfrominstrepo; import static eu.dnetlib.dhp.PropagationConstant.*; import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkHiveSession; -import eu.dnetlib.dhp.schema.common.ModelConstants; import org.apache.commons.io.IOUtils; import org.apache.hadoop.io.compress.GzipCodec; import org.apache.spark.SparkConf; @@ -18,6 +17,7 @@ import org.slf4j.LoggerFactory; import com.fasterxml.jackson.databind.ObjectMapper; import eu.dnetlib.dhp.application.ArgumentApplicationParser; +import eu.dnetlib.dhp.schema.common.ModelConstants; import eu.dnetlib.dhp.schema.oaf.Datasource; import eu.dnetlib.dhp.schema.oaf.Organization; import eu.dnetlib.dhp.schema.oaf.Relation; diff --git a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttoorganizationfrominstrepo/SparkResultToOrganizationFromIstRepoJob.java b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttoorganizationfrominstrepo/SparkResultToOrganizationFromIstRepoJob.java index fc9a46d72..0ce741b87 100644 --- a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttoorganizationfrominstrepo/SparkResultToOrganizationFromIstRepoJob.java +++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttoorganizationfrominstrepo/SparkResultToOrganizationFromIstRepoJob.java @@ -6,7 +6,6 @@ import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkHiveSession; import java.util.*; -import eu.dnetlib.dhp.schema.common.ModelConstants; import org.apache.commons.io.IOUtils; import org.apache.spark.SparkConf; import org.apache.spark.api.java.JavaSparkContext; @@ -20,6 +19,7 @@ import org.slf4j.LoggerFactory; import com.fasterxml.jackson.databind.ObjectMapper; import eu.dnetlib.dhp.application.ArgumentApplicationParser; +import eu.dnetlib.dhp.schema.common.ModelConstants; import eu.dnetlib.dhp.schema.oaf.*; import scala.Tuple2; @@ -151,7 +151,7 @@ public class SparkResultToOrganizationFromIstRepoJob { getRelation( orgId, resultId, - ModelConstants.IS_AUTHOR_INSTITUTION_OF, + ModelConstants.IS_AUTHOR_INSTITUTION_OF, ModelConstants.RESULT_ORGANIZATION, ModelConstants.AFFILIATION, PROPAGATION_DATA_INFO_TYPE, @@ -163,8 +163,8 @@ public class SparkResultToOrganizationFromIstRepoJob { resultId, orgId, ModelConstants.HAS_AUTHOR_INSTITUTION, - ModelConstants.RESULT_ORGANIZATION, - ModelConstants.AFFILIATION, + ModelConstants.RESULT_ORGANIZATION, + ModelConstants.AFFILIATION, PROPAGATION_DATA_INFO_TYPE, PROPAGATION_RELATION_RESULT_ORGANIZATION_INST_REPO_CLASS_ID, PROPAGATION_RELATION_RESULT_ORGANIZATION_INST_REPO_CLASS_NAME)); diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/MigrateDbEntitiesApplication.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/MigrateDbEntitiesApplication.java index ebe2b703b..5b8296c19 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/MigrateDbEntitiesApplication.java +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/MigrateDbEntitiesApplication.java @@ -453,12 +453,12 @@ public class MigrateDbEntitiesApplication extends AbstractMigrationApplication i final Boolean inferred = rs.getBoolean("inferred"); final String trust = rs.getString("trust"); return dataInfo( - deletedbyinference, - inferenceprovenance, - inferred, - false, - ENTITYREGISTRY_PROVENANCE_ACTION, - trust); + deletedbyinference, + inferenceprovenance, + inferred, + false, + ENTITYREGISTRY_PROVENANCE_ACTION, + trust); } private Qualifier prepareQualifierSplitting(final String s) { From 2a4e68a292d98e261527ab267c054cec9ae3aad9 Mon Sep 17 00:00:00 2001 From: "michele.artini" Date: Fri, 15 May 2020 12:25:37 +0200 Subject: [PATCH 29/31] events recognition --- .../broker/oa/GenerateEventsApplication.java | 22 ++++---- .../EnrichMissingAbstract.java | 3 +- .../EnrichMissingAuthorOrcid.java | 3 +- .../oa/matchers/EnrichMissingOpenAccess.java | 55 +++++++++++++++++++ .../{util => matchers}/EnrichMissingPid.java | 19 ++++++- .../EnrichMissingProject.java | 6 +- .../EnrichMissingPublicationDate.java | 3 +- .../oa/matchers/EnrichMissingSubject.java | 53 ++++++++++++++++++ .../oa/matchers/EnrichMoreOpenAccess.java | 53 ++++++++++++++++++ .../oa/{util => matchers}/EnrichMorePid.java | 22 ++++++-- .../{util => matchers}/EnrichMoreSubject.java | 28 +++++++--- .../oa/{util => matchers}/UpdateMatcher.java | 3 +- .../dhp/broker/oa/util/BrokerConstants.java | 7 +++ .../dhp/broker/oa/util/ConversionUtils.java | 36 ++++++++++++ .../oa/util/EnrichMissingOpenAccess.java | 33 ----------- .../broker/oa/util/EnrichMissingSubject.java | 40 -------------- .../broker/oa/util/EnrichMoreOpenAccess.java | 33 ----------- .../dhp/broker/oa/util/UpdateInfo.java | 2 +- 18 files changed, 281 insertions(+), 140 deletions(-) rename dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/{util => matchers}/EnrichMissingAbstract.java (90%) rename dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/{util => matchers}/EnrichMissingAuthorOrcid.java (91%) create mode 100644 dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/EnrichMissingOpenAccess.java rename dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/{util => matchers}/EnrichMissingPid.java (60%) rename dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/{util => matchers}/EnrichMissingProject.java (86%) rename dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/{util => matchers}/EnrichMissingPublicationDate.java (89%) create mode 100644 dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/EnrichMissingSubject.java create mode 100644 dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/EnrichMoreOpenAccess.java rename dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/{util => matchers}/EnrichMorePid.java (50%) rename dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/{util => matchers}/EnrichMoreSubject.java (51%) rename dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/{util => matchers}/UpdateMatcher.java (94%) create mode 100644 dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/util/BrokerConstants.java create mode 100644 dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/util/ConversionUtils.java delete mode 100644 dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/util/EnrichMissingOpenAccess.java delete mode 100644 dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/util/EnrichMissingSubject.java delete mode 100644 dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/util/EnrichMoreOpenAccess.java diff --git a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/GenerateEventsApplication.java b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/GenerateEventsApplication.java index c4c167c13..43ebd6dd8 100644 --- a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/GenerateEventsApplication.java +++ b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/GenerateEventsApplication.java @@ -17,18 +17,18 @@ import org.slf4j.LoggerFactory; import eu.dnetlib.dhp.application.ArgumentApplicationParser; import eu.dnetlib.dhp.broker.model.Event; import eu.dnetlib.dhp.broker.model.EventFactory; -import eu.dnetlib.dhp.broker.oa.util.EnrichMissingAbstract; -import eu.dnetlib.dhp.broker.oa.util.EnrichMissingAuthorOrcid; -import eu.dnetlib.dhp.broker.oa.util.EnrichMissingOpenAccess; -import eu.dnetlib.dhp.broker.oa.util.EnrichMissingPid; -import eu.dnetlib.dhp.broker.oa.util.EnrichMissingProject; -import eu.dnetlib.dhp.broker.oa.util.EnrichMissingPublicationDate; -import eu.dnetlib.dhp.broker.oa.util.EnrichMissingSubject; -import eu.dnetlib.dhp.broker.oa.util.EnrichMoreOpenAccess; -import eu.dnetlib.dhp.broker.oa.util.EnrichMorePid; -import eu.dnetlib.dhp.broker.oa.util.EnrichMoreSubject; +import eu.dnetlib.dhp.broker.oa.matchers.EnrichMissingAbstract; +import eu.dnetlib.dhp.broker.oa.matchers.EnrichMissingAuthorOrcid; +import eu.dnetlib.dhp.broker.oa.matchers.EnrichMissingOpenAccess; +import eu.dnetlib.dhp.broker.oa.matchers.EnrichMissingPid; +import eu.dnetlib.dhp.broker.oa.matchers.EnrichMissingProject; +import eu.dnetlib.dhp.broker.oa.matchers.EnrichMissingPublicationDate; +import eu.dnetlib.dhp.broker.oa.matchers.EnrichMissingSubject; +import eu.dnetlib.dhp.broker.oa.matchers.EnrichMoreOpenAccess; +import eu.dnetlib.dhp.broker.oa.matchers.EnrichMorePid; +import eu.dnetlib.dhp.broker.oa.matchers.EnrichMoreSubject; +import eu.dnetlib.dhp.broker.oa.matchers.UpdateMatcher; import eu.dnetlib.dhp.broker.oa.util.UpdateInfo; -import eu.dnetlib.dhp.broker.oa.util.UpdateMatcher; import eu.dnetlib.dhp.common.HdfsSupport; import eu.dnetlib.dhp.schema.oaf.Result; diff --git a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/util/EnrichMissingAbstract.java b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/EnrichMissingAbstract.java similarity index 90% rename from dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/util/EnrichMissingAbstract.java rename to dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/EnrichMissingAbstract.java index 6b6e35d1d..43cf738f8 100644 --- a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/util/EnrichMissingAbstract.java +++ b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/EnrichMissingAbstract.java @@ -1,11 +1,12 @@ -package eu.dnetlib.dhp.broker.oa.util; +package eu.dnetlib.dhp.broker.oa.matchers; import java.util.ArrayList; import java.util.Arrays; import java.util.List; import eu.dnetlib.dhp.broker.model.Topic; +import eu.dnetlib.dhp.broker.oa.util.UpdateInfo; import eu.dnetlib.dhp.schema.oaf.Result; public class EnrichMissingAbstract extends UpdateMatcher { diff --git a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/util/EnrichMissingAuthorOrcid.java b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/EnrichMissingAuthorOrcid.java similarity index 91% rename from dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/util/EnrichMissingAuthorOrcid.java rename to dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/EnrichMissingAuthorOrcid.java index d81427e05..beeccdbe8 100644 --- a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/util/EnrichMissingAuthorOrcid.java +++ b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/EnrichMissingAuthorOrcid.java @@ -1,5 +1,5 @@ -package eu.dnetlib.dhp.broker.oa.util; +package eu.dnetlib.dhp.broker.oa.matchers; import java.util.Arrays; import java.util.List; @@ -7,6 +7,7 @@ import java.util.List; import org.apache.commons.lang3.tuple.Pair; import eu.dnetlib.dhp.broker.model.Topic; +import eu.dnetlib.dhp.broker.oa.util.UpdateInfo; import eu.dnetlib.dhp.schema.oaf.Result; public class EnrichMissingAuthorOrcid extends UpdateMatcher> { diff --git a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/EnrichMissingOpenAccess.java b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/EnrichMissingOpenAccess.java new file mode 100644 index 000000000..a4a2ea0c6 --- /dev/null +++ b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/EnrichMissingOpenAccess.java @@ -0,0 +1,55 @@ + +package eu.dnetlib.dhp.broker.oa.matchers; + +import java.util.Arrays; +import java.util.List; +import java.util.stream.Collectors; + +import eu.dnetlib.broker.objects.Instance; +import eu.dnetlib.dhp.broker.model.Topic; +import eu.dnetlib.dhp.broker.oa.util.BrokerConstants; +import eu.dnetlib.dhp.broker.oa.util.ConversionUtils; +import eu.dnetlib.dhp.broker.oa.util.UpdateInfo; +import eu.dnetlib.dhp.schema.oaf.Result; + +public class EnrichMissingOpenAccess extends UpdateMatcher { + + public EnrichMissingOpenAccess() { + super(true); + } + + @Override + protected List> findUpdates(final Result source, final Result target) { + final long count = target + .getInstance() + .stream() + .map(i -> i.getAccessright().getClassid()) + .filter(right -> right.equals(BrokerConstants.OPEN_ACCESS)) + .count(); + + if (count > 0) { + return Arrays.asList(); + } + + return source + .getInstance() + .stream() + .filter(i -> i.getAccessright().getClassid().equals(BrokerConstants.OPEN_ACCESS)) + .map(ConversionUtils::oafInstanceToBrokerInstances) + .flatMap(s -> s) + .map(i -> generateUpdateInfo(i, source, target)) + .collect(Collectors.toList()); + } + + @Override + public UpdateInfo generateUpdateInfo(final Instance highlightValue, + final Result source, + final Result target) { + return new UpdateInfo<>( + Topic.ENRICH_MISSING_OA_VERSION, + highlightValue, source, target, + (p, i) -> p.getInstances().add(i), + Instance::getUrl); + } + +} diff --git a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/util/EnrichMissingPid.java b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/EnrichMissingPid.java similarity index 60% rename from dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/util/EnrichMissingPid.java rename to dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/EnrichMissingPid.java index 0b4045a0e..a8df62541 100644 --- a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/util/EnrichMissingPid.java +++ b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/EnrichMissingPid.java @@ -1,11 +1,14 @@ -package eu.dnetlib.dhp.broker.oa.util; +package eu.dnetlib.dhp.broker.oa.matchers; import java.util.Arrays; import java.util.List; +import java.util.stream.Collectors; import eu.dnetlib.broker.objects.Pid; import eu.dnetlib.dhp.broker.model.Topic; +import eu.dnetlib.dhp.broker.oa.util.ConversionUtils; +import eu.dnetlib.dhp.broker.oa.util.UpdateInfo; import eu.dnetlib.dhp.schema.oaf.Result; public class EnrichMissingPid extends UpdateMatcher { @@ -16,8 +19,18 @@ public class EnrichMissingPid extends UpdateMatcher { @Override protected List> findUpdates(final Result source, final Result target) { - // return Arrays.asList(new EnrichMissingAbstract("xxxxxxx", 0.9f)); - return Arrays.asList(); + final long count = target.getPid().size(); + + if (count > 0) { + return Arrays.asList(); + } + + return source + .getPid() + .stream() + .map(ConversionUtils::oafPidToBrokerPid) + .map(i -> generateUpdateInfo(i, source, target)) + .collect(Collectors.toList()); } @Override diff --git a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/util/EnrichMissingProject.java b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/EnrichMissingProject.java similarity index 86% rename from dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/util/EnrichMissingProject.java rename to dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/EnrichMissingProject.java index 45b16801c..b6e5b3b57 100644 --- a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/util/EnrichMissingProject.java +++ b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/EnrichMissingProject.java @@ -1,11 +1,12 @@ -package eu.dnetlib.dhp.broker.oa.util; +package eu.dnetlib.dhp.broker.oa.matchers; import java.util.Arrays; import java.util.List; import eu.dnetlib.broker.objects.Project; import eu.dnetlib.dhp.broker.model.Topic; +import eu.dnetlib.dhp.broker.oa.util.UpdateInfo; import eu.dnetlib.dhp.schema.oaf.Result; public class EnrichMissingProject extends UpdateMatcher { @@ -21,7 +22,8 @@ public class EnrichMissingProject extends UpdateMatcher { } @Override - public UpdateInfo generateUpdateInfo(final Project highlightValue, final Result source, + public UpdateInfo generateUpdateInfo(final Project highlightValue, + final Result source, final Result target) { return new UpdateInfo<>( Topic.ENRICH_MISSING_PROJECT, diff --git a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/util/EnrichMissingPublicationDate.java b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/EnrichMissingPublicationDate.java similarity index 89% rename from dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/util/EnrichMissingPublicationDate.java rename to dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/EnrichMissingPublicationDate.java index 7fcd2a66f..e9ec082c4 100644 --- a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/util/EnrichMissingPublicationDate.java +++ b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/EnrichMissingPublicationDate.java @@ -1,10 +1,11 @@ -package eu.dnetlib.dhp.broker.oa.util; +package eu.dnetlib.dhp.broker.oa.matchers; import java.util.Arrays; import java.util.List; import eu.dnetlib.dhp.broker.model.Topic; +import eu.dnetlib.dhp.broker.oa.util.UpdateInfo; import eu.dnetlib.dhp.schema.oaf.Result; public class EnrichMissingPublicationDate extends UpdateMatcher { diff --git a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/EnrichMissingSubject.java b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/EnrichMissingSubject.java new file mode 100644 index 000000000..79e9d469b --- /dev/null +++ b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/EnrichMissingSubject.java @@ -0,0 +1,53 @@ + +package eu.dnetlib.dhp.broker.oa.matchers; + +import java.util.List; +import java.util.Set; +import java.util.stream.Collectors; + +import org.apache.commons.lang3.tuple.Pair; + +import eu.dnetlib.dhp.broker.model.Topic; +import eu.dnetlib.dhp.broker.oa.util.ConversionUtils; +import eu.dnetlib.dhp.broker.oa.util.UpdateInfo; +import eu.dnetlib.dhp.schema.oaf.Qualifier; +import eu.dnetlib.dhp.schema.oaf.Result; +import eu.dnetlib.dhp.schema.oaf.StructuredProperty; + +public class EnrichMissingSubject extends UpdateMatcher> { + + public EnrichMissingSubject() { + super(true); + } + + @Override + protected List>> findUpdates(final Result source, final Result target) { + final Set existingTypes = target + .getSubject() + .stream() + .map(StructuredProperty::getQualifier) + .map(Qualifier::getClassid) + .collect(Collectors.toSet()); + + return source + .getPid() + .stream() + .filter(pid -> !existingTypes.contains(pid.getQualifier().getClassid())) + .map(ConversionUtils::oafSubjectToPair) + .map(i -> generateUpdateInfo(i, source, target)) + .collect(Collectors.toList()); + } + + @Override + public UpdateInfo> generateUpdateInfo(final Pair highlightValue, + final Result source, + final Result target) { + + return new UpdateInfo<>( + Topic.fromPath("ENRICH/MISSING/SUBJECT/" + highlightValue.getLeft()), + highlightValue, source, target, + (p, pair) -> p.getSubjects().add(pair.getRight()), + pair -> pair.getLeft() + "::" + pair.getRight()); + } + +} diff --git a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/EnrichMoreOpenAccess.java b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/EnrichMoreOpenAccess.java new file mode 100644 index 000000000..40c9b0500 --- /dev/null +++ b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/EnrichMoreOpenAccess.java @@ -0,0 +1,53 @@ + +package eu.dnetlib.dhp.broker.oa.matchers; + +import java.util.List; +import java.util.Set; +import java.util.stream.Collectors; + +import eu.dnetlib.broker.objects.Instance; +import eu.dnetlib.dhp.broker.model.Topic; +import eu.dnetlib.dhp.broker.oa.util.BrokerConstants; +import eu.dnetlib.dhp.broker.oa.util.ConversionUtils; +import eu.dnetlib.dhp.broker.oa.util.UpdateInfo; +import eu.dnetlib.dhp.schema.oaf.Result; + +public class EnrichMoreOpenAccess extends UpdateMatcher { + + public EnrichMoreOpenAccess() { + super(true); + } + + @Override + protected List> findUpdates(final Result source, final Result target) { + final Set urls = target + .getInstance() + .stream() + .filter(i -> i.getAccessright().getClassid().equals(BrokerConstants.OPEN_ACCESS)) + .map(i -> i.getUrl()) + .flatMap(List::stream) + .collect(Collectors.toSet()); + + return source + .getInstance() + .stream() + .filter(i -> i.getAccessright().getClassid().equals(BrokerConstants.OPEN_ACCESS)) + .map(ConversionUtils::oafInstanceToBrokerInstances) + .flatMap(s -> s) + .filter(i -> !urls.contains(i.getUrl())) + .map(i -> generateUpdateInfo(i, source, target)) + .collect(Collectors.toList()); + } + + @Override + public UpdateInfo generateUpdateInfo(final Instance highlightValue, + final Result source, + final Result target) { + return new UpdateInfo<>( + Topic.ENRICH_MORE_OA_VERSION, + highlightValue, source, target, + (p, i) -> p.getInstances().add(i), + Instance::getUrl); + } + +} diff --git a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/util/EnrichMorePid.java b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/EnrichMorePid.java similarity index 50% rename from dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/util/EnrichMorePid.java rename to dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/EnrichMorePid.java index 8cd67f553..0e7b7766a 100644 --- a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/util/EnrichMorePid.java +++ b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/EnrichMorePid.java @@ -1,11 +1,14 @@ -package eu.dnetlib.dhp.broker.oa.util; +package eu.dnetlib.dhp.broker.oa.matchers; -import java.util.Arrays; import java.util.List; +import java.util.Set; +import java.util.stream.Collectors; import eu.dnetlib.broker.objects.Pid; import eu.dnetlib.dhp.broker.model.Topic; +import eu.dnetlib.dhp.broker.oa.util.ConversionUtils; +import eu.dnetlib.dhp.broker.oa.util.UpdateInfo; import eu.dnetlib.dhp.schema.oaf.Result; public class EnrichMorePid extends UpdateMatcher { @@ -16,8 +19,19 @@ public class EnrichMorePid extends UpdateMatcher { @Override protected List> findUpdates(final Result source, final Result target) { - // return Arrays.asList(new EnrichMissingAbstract("xxxxxxx", 0.9f)); - return Arrays.asList(); + final Set existingPids = target + .getPid() + .stream() + .map(pid -> pid.getQualifier().getClassid() + "::" + pid.getValue()) + .collect(Collectors.toSet()); + + return source + .getPid() + .stream() + .filter(pid -> !existingPids.contains(pid.getQualifier().getClassid() + "::" + pid.getValue())) + .map(ConversionUtils::oafPidToBrokerPid) + .map(i -> generateUpdateInfo(i, source, target)) + .collect(Collectors.toList()); } @Override diff --git a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/util/EnrichMoreSubject.java b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/EnrichMoreSubject.java similarity index 51% rename from dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/util/EnrichMoreSubject.java rename to dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/EnrichMoreSubject.java index 9e0d8e693..e6374479b 100644 --- a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/util/EnrichMoreSubject.java +++ b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/EnrichMoreSubject.java @@ -1,12 +1,15 @@ -package eu.dnetlib.dhp.broker.oa.util; +package eu.dnetlib.dhp.broker.oa.matchers; -import java.util.Arrays; import java.util.List; +import java.util.Set; +import java.util.stream.Collectors; import org.apache.commons.lang3.tuple.Pair; import eu.dnetlib.dhp.broker.model.Topic; +import eu.dnetlib.dhp.broker.oa.util.ConversionUtils; +import eu.dnetlib.dhp.broker.oa.util.UpdateInfo; import eu.dnetlib.dhp.schema.oaf.Result; public class EnrichMoreSubject extends UpdateMatcher> { @@ -17,18 +20,25 @@ public class EnrichMoreSubject extends UpdateMatcher> { @Override protected List>> findUpdates(final Result source, final Result target) { - // MESHEUROPMC - // ARXIV - // JEL - // DDC - // ACM + final Set existingSubjects = target + .getSubject() + .stream() + .map(pid -> pid.getQualifier().getClassid() + "::" + pid.getValue()) + .collect(Collectors.toSet()); - return Arrays.asList(); + return source + .getPid() + .stream() + .filter(pid -> !existingSubjects.contains(pid.getQualifier().getClassid() + "::" + pid.getValue())) + .map(ConversionUtils::oafSubjectToPair) + .map(i -> generateUpdateInfo(i, source, target)) + .collect(Collectors.toList()); } @Override public UpdateInfo> generateUpdateInfo(final Pair highlightValue, - final Result source, final Result target) { + final Result source, + final Result target) { return new UpdateInfo<>( Topic.fromPath("ENRICH/MORE/SUBJECT/" + highlightValue.getLeft()), diff --git a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/util/UpdateMatcher.java b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/UpdateMatcher.java similarity index 94% rename from dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/util/UpdateMatcher.java rename to dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/UpdateMatcher.java index 3fd6d4027..b8b6132cd 100644 --- a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/util/UpdateMatcher.java +++ b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/UpdateMatcher.java @@ -1,5 +1,5 @@ -package eu.dnetlib.dhp.broker.oa.util; +package eu.dnetlib.dhp.broker.oa.matchers; import java.util.Arrays; import java.util.Collection; @@ -10,6 +10,7 @@ import java.util.Map; import org.apache.commons.codec.digest.DigestUtils; import org.apache.commons.lang3.StringUtils; +import eu.dnetlib.dhp.broker.oa.util.UpdateInfo; import eu.dnetlib.dhp.schema.oaf.Field; import eu.dnetlib.dhp.schema.oaf.Result; diff --git a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/util/BrokerConstants.java b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/util/BrokerConstants.java new file mode 100644 index 000000000..d61d5bfb7 --- /dev/null +++ b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/util/BrokerConstants.java @@ -0,0 +1,7 @@ + +package eu.dnetlib.dhp.broker.oa.util; + +public class BrokerConstants { + + public final static String OPEN_ACCESS = "OPEN"; +} diff --git a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/util/ConversionUtils.java b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/util/ConversionUtils.java new file mode 100644 index 000000000..2e2ce202a --- /dev/null +++ b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/util/ConversionUtils.java @@ -0,0 +1,36 @@ + +package eu.dnetlib.dhp.broker.oa.util; + +import java.util.stream.Stream; + +import org.apache.commons.lang3.tuple.Pair; + +import eu.dnetlib.broker.objects.Instance; +import eu.dnetlib.broker.objects.Pid; +import eu.dnetlib.dhp.schema.oaf.StructuredProperty; + +public class ConversionUtils { + + public static Stream oafInstanceToBrokerInstances(final eu.dnetlib.dhp.schema.oaf.Instance i) { + return i.getUrl().stream().map(url -> { + final Instance r = new Instance(); + r.setUrl(url); + r.setInstancetype(i.getInstancetype().getClassid()); + r.setLicense(BrokerConstants.OPEN_ACCESS); + r.setHostedby(i.getHostedby().getValue()); + return r; + }); + } + + public static Pid oafPidToBrokerPid(final StructuredProperty sp) { + final Pid pid = new Pid(); + pid.setValue(sp.getValue()); + pid.setType(sp.getQualifier().getClassid()); + return pid; + } + + public static final Pair oafSubjectToPair(final StructuredProperty sp) { + return Pair.of(sp.getQualifier().getClassid(), sp.getValue()); + } + +} diff --git a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/util/EnrichMissingOpenAccess.java b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/util/EnrichMissingOpenAccess.java deleted file mode 100644 index 9079ee24b..000000000 --- a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/util/EnrichMissingOpenAccess.java +++ /dev/null @@ -1,33 +0,0 @@ - -package eu.dnetlib.dhp.broker.oa.util; - -import java.util.Arrays; -import java.util.List; - -import eu.dnetlib.broker.objects.Instance; -import eu.dnetlib.dhp.broker.model.Topic; -import eu.dnetlib.dhp.schema.oaf.Result; - -public class EnrichMissingOpenAccess extends UpdateMatcher { - - public EnrichMissingOpenAccess() { - super(true); - } - - @Override - protected List> findUpdates(final Result source, final Result target) { - - return Arrays.asList(); - } - - @Override - public UpdateInfo generateUpdateInfo(final Instance highlightValue, final Result source, - final Result target) { - return new UpdateInfo<>( - Topic.ENRICH_MISSING_OA_VERSION, - highlightValue, source, target, - (p, i) -> p.getInstances().add(i), - Instance::getUrl); - } - -} diff --git a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/util/EnrichMissingSubject.java b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/util/EnrichMissingSubject.java deleted file mode 100644 index 4470bd9d9..000000000 --- a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/util/EnrichMissingSubject.java +++ /dev/null @@ -1,40 +0,0 @@ - -package eu.dnetlib.dhp.broker.oa.util; - -import java.util.Arrays; -import java.util.List; - -import org.apache.commons.lang3.tuple.Pair; - -import eu.dnetlib.dhp.broker.model.Topic; -import eu.dnetlib.dhp.schema.oaf.Result; - -public class EnrichMissingSubject extends UpdateMatcher> { - - public EnrichMissingSubject() { - super(true); - } - - @Override - protected List>> findUpdates(final Result source, final Result target) { - // MESHEUROPMC - // ARXIV - // JEL - // DDC - // ACM - - return Arrays.asList(); - } - - @Override - public UpdateInfo> generateUpdateInfo(final Pair highlightValue, - final Result source, final Result target) { - - return new UpdateInfo<>( - Topic.fromPath("ENRICH/MISSING/SUBJECT/" + highlightValue.getLeft()), - highlightValue, source, target, - (p, pair) -> p.getSubjects().add(pair.getRight()), - pair -> pair.getLeft() + "::" + pair.getRight()); - } - -} diff --git a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/util/EnrichMoreOpenAccess.java b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/util/EnrichMoreOpenAccess.java deleted file mode 100644 index bc37ce659..000000000 --- a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/util/EnrichMoreOpenAccess.java +++ /dev/null @@ -1,33 +0,0 @@ - -package eu.dnetlib.dhp.broker.oa.util; - -import java.util.Arrays; -import java.util.List; - -import eu.dnetlib.broker.objects.Instance; -import eu.dnetlib.dhp.broker.model.Topic; -import eu.dnetlib.dhp.schema.oaf.Result; - -public class EnrichMoreOpenAccess extends UpdateMatcher { - - public EnrichMoreOpenAccess() { - super(true); - } - - @Override - protected List> findUpdates(final Result source, final Result target) { - // return Arrays.asList(new EnrichMissingAbstract("xxxxxxx", 0.9f)); - return Arrays.asList(); - } - - @Override - public UpdateInfo generateUpdateInfo(final Instance highlightValue, final Result source, - final Result target) { - return new UpdateInfo<>( - Topic.ENRICH_MORE_OA_VERSION, - highlightValue, source, target, - (p, i) -> p.getInstances().add(i), - Instance::getUrl); - } - -} diff --git a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/util/UpdateInfo.java b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/util/UpdateInfo.java index 1dfc14e5e..5cc0d371d 100644 --- a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/util/UpdateInfo.java +++ b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/util/UpdateInfo.java @@ -25,7 +25,7 @@ public final class UpdateInfo { private final float trust; - protected UpdateInfo(final Topic topic, final T highlightValue, final Result source, final Result target, + public UpdateInfo(final Topic topic, final T highlightValue, final Result source, final Result target, final BiConsumer compileHighlight, final Function highlightToString) { this.topic = topic; From cfc8948717b4f5da506af7fc3c0a3d230ecb4c69 Mon Sep 17 00:00:00 2001 From: Claudio Atzori Date: Fri, 15 May 2020 12:26:16 +0200 Subject: [PATCH 30/31] fixed mapping OdfToGraph: pick the correct element to map author pids and author affiliations; extended mapping Oaf2Graph: added support for author pids --- .../dhp/oa/graph/raw/OafToOafMapper.java | 20 +++++-- .../dhp/oa/graph/raw/OdfToOafMapper.java | 8 +-- .../dnetlib/dhp/oa/graph/raw/MappersTest.java | 55 +++++++++++++++++-- .../dnetlib/dhp/oa/graph/raw/oaf_record.xml | 2 +- .../dnetlib/dhp/oa/graph/raw/odf_dataset.xml | 3 +- 5 files changed, 71 insertions(+), 17 deletions(-) diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/OafToOafMapper.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/OafToOafMapper.java index 891fee57e..6b6aa15e8 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/OafToOafMapper.java +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/OafToOafMapper.java @@ -1,15 +1,16 @@ package eu.dnetlib.dhp.oa.graph.raw; -import static eu.dnetlib.dhp.oa.graph.raw.common.OafMapperUtils.createOpenaireId; -import static eu.dnetlib.dhp.oa.graph.raw.common.OafMapperUtils.field; +import static eu.dnetlib.dhp.oa.graph.raw.common.OafMapperUtils.*; import static eu.dnetlib.dhp.schema.common.ModelConstants.*; import java.util.*; import java.util.stream.Collectors; +import eu.dnetlib.dhp.schema.common.ModelConstants; import org.apache.commons.lang3.StringUtils; import org.dom4j.Document; +import org.dom4j.Element; import org.dom4j.Node; import com.google.common.collect.Lists; @@ -28,15 +29,24 @@ public class OafToOafMapper extends AbstractMdRecordToOafMapper { final List res = new ArrayList<>(); int pos = 1; for (final Object o : doc.selectNodes("//dc:creator")) { - final Node n = (Node) o; + final Element e = (Element) o; final Author author = new Author(); - author.setFullname(n.getText()); + author.setFullname(e.getText()); author.setRank(pos++); - final PacePerson p = new PacePerson(n.getText(), false); + final PacePerson p = new PacePerson(e.getText(), false); if (p.isAccurate()) { author.setName(p.getNormalisedFirstName()); author.setSurname(p.getNormalisedSurname()); } + + final String pid = e.attributeValue("nameIdentifier"); + final String pidType = e.attributeValue("nameIdentifierScheme"); + + if (StringUtils.isNotBlank(pid) && StringUtils.isNotBlank(pidType)) { + author.setPid(new ArrayList<>()); + author.getPid().add(structuredProperty(pid, qualifier(pidType, pidType, DNET_PID_TYPES, DNET_PID_TYPES), info)); + } + res.add(author); } return res; diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/OdfToOafMapper.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/OdfToOafMapper.java index 5baac12fd..30b980c42 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/OdfToOafMapper.java +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/OdfToOafMapper.java @@ -63,17 +63,17 @@ public class OdfToOafMapper extends AbstractMdRecordToOafMapper { author.setSurname(surname); } - author.setAffiliation(prepareListFields(doc, "./datacite:affiliation", info)); - author.setPid(preparePids(doc, info)); + author.setAffiliation(prepareListFields(n, "./datacite:affiliation", info)); + author.setPid(preparePids(n, info)); author.setRank(pos++); res.add(author); } return res; } - private List preparePids(final Document doc, final DataInfo info) { + private List preparePids(final Node n, final DataInfo info) { final List res = new ArrayList<>(); - for (final Object o : doc.selectNodes("./datacite:nameIdentifier")) { + for (final Object o : n.selectNodes("./datacite:nameIdentifier")) { res .add( structuredProperty( diff --git a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/raw/MappersTest.java b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/raw/MappersTest.java index 951c97d9d..d7635c9ea 100644 --- a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/raw/MappersTest.java +++ b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/raw/MappersTest.java @@ -10,7 +10,10 @@ import static org.mockito.Mockito.when; import java.io.IOException; import java.util.List; import java.util.Map; +import java.util.Optional; +import eu.dnetlib.dhp.schema.common.ModelConstants; +import eu.dnetlib.dhp.schema.oaf.*; import org.apache.commons.io.IOUtils; import org.apache.commons.lang3.StringUtils; import org.junit.jupiter.api.BeforeEach; @@ -19,12 +22,6 @@ import org.junit.jupiter.api.extension.ExtendWith; import org.mockito.Mock; import org.mockito.junit.jupiter.MockitoExtension; -import eu.dnetlib.dhp.schema.oaf.Dataset; -import eu.dnetlib.dhp.schema.oaf.Oaf; -import eu.dnetlib.dhp.schema.oaf.Publication; -import eu.dnetlib.dhp.schema.oaf.Relation; -import eu.dnetlib.dhp.schema.oaf.Software; - @ExtendWith(MockitoExtension.class) public class MappersTest { @@ -54,7 +51,26 @@ public class MappersTest { assertValidId(p.getId()); assertValidId(p.getCollectedfrom().get(0).getKey()); assertTrue(StringUtils.isNotBlank(p.getTitle().get(0).getValue())); + assertTrue(p.getAuthor().size() > 0); + Optional author = p.getAuthor() + .stream() + .filter(a -> a.getPid() != null && !a.getPid().isEmpty()) + .findFirst(); + assertTrue(author.isPresent()); + StructuredProperty pid = author.get().getPid() + .stream() + .findFirst() + .get(); + assertEquals("0000-0001-6651-1178", pid.getValue()); + assertEquals("ORCID", pid.getQualifier().getClassid()); + assertEquals("ORCID", pid.getQualifier().getClassname()); + assertEquals(ModelConstants.DNET_PID_TYPES, pid.getQualifier().getSchemeid()); + assertEquals(ModelConstants.DNET_PID_TYPES, pid.getQualifier().getSchemename()); + assertEquals("Votsi,Nefta", author.get().getFullname()); + assertEquals("Votsi", author.get().getSurname()); + assertEquals("Nefta", author.get().getName()); + assertTrue(p.getSubject().size() > 0); assertTrue(StringUtils.isNotBlank(p.getJournal().getIssnOnline())); assertTrue(StringUtils.isNotBlank(p.getJournal().getName())); @@ -100,6 +116,33 @@ public class MappersTest { assertValidId(d.getCollectedfrom().get(0).getKey()); assertTrue(StringUtils.isNotBlank(d.getTitle().get(0).getValue())); assertTrue(d.getAuthor().size() > 0); + + Optional author = d.getAuthor() + .stream() + .filter(a -> a.getPid() != null && !a.getPid().isEmpty()) + .findFirst(); + assertTrue(author.isPresent()); + StructuredProperty pid = author.get().getPid() + .stream() + .findFirst() + .get(); + assertEquals("0000-0001-9074-1619", pid.getValue()); + assertEquals("ORCID", pid.getQualifier().getClassid()); + assertEquals("ORCID", pid.getQualifier().getClassname()); + assertEquals(ModelConstants.DNET_PID_TYPES, pid.getQualifier().getSchemeid()); + assertEquals(ModelConstants.DNET_PID_TYPES, pid.getQualifier().getSchemename()); + assertEquals("Baracchini, Theo", author.get().getFullname()); + assertEquals("Baracchini", author.get().getSurname()); + assertEquals("Theo", author.get().getName()); + + assertEquals(1, author.get().getAffiliation().size()); + Optional> opAff = author.get().getAffiliation() + .stream() + .findFirst(); + assertTrue(opAff.isPresent()); + Field affiliation = opAff.get(); + assertEquals("ISTI-CNR", affiliation.getValue()); + assertTrue(d.getSubject().size() > 0); assertTrue(d.getInstance().size() > 0); assertTrue(d.getContext().size() > 0); diff --git a/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/raw/oaf_record.xml b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/raw/oaf_record.xml index e898d4434..2cb0ba1c7 100644 --- a/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/raw/oaf_record.xml +++ b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/raw/oaf_record.xml @@ -19,7 +19,7 @@ Ecosystem Service capacity is higher in areas of multiple designation types Nikolaidou,Charitini - Votsi,Nefta + Votsi,Nefta Sgardelis,Steanos Halley,John Pantis,John diff --git a/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/raw/odf_dataset.xml b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/raw/odf_dataset.xml index 94dc802fa..88ae9d106 100644 --- a/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/raw/odf_dataset.xml +++ b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/raw/odf_dataset.xml @@ -35,9 +35,10 @@ Baracchini, Theo + 0000-0001-9074-1619 Theo Baracchini - Physics of Aquatic Systems Laboratory (APHYS) – Margaretha Kamprad Chair, ENAC, EPFL, Lausanne, 1015, Switzerland + ISTI-CNR Wüest, Alfred From 7a89507ab1b1f347cdcfb46ffa8a908072aed057 Mon Sep 17 00:00:00 2001 From: Claudio Atzori Date: Fri, 15 May 2020 15:16:54 +0200 Subject: [PATCH 31/31] code formatting --- .../dhp/oa/graph/raw/OafToOafMapper.java | 6 ++- .../dnetlib/dhp/oa/graph/raw/MappersTest.java | 51 +++++++++++-------- 2 files changed, 34 insertions(+), 23 deletions(-) diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/OafToOafMapper.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/OafToOafMapper.java index 6b6aa15e8..54594cb80 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/OafToOafMapper.java +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/OafToOafMapper.java @@ -7,7 +7,6 @@ import static eu.dnetlib.dhp.schema.common.ModelConstants.*; import java.util.*; import java.util.stream.Collectors; -import eu.dnetlib.dhp.schema.common.ModelConstants; import org.apache.commons.lang3.StringUtils; import org.dom4j.Document; import org.dom4j.Element; @@ -16,6 +15,7 @@ import org.dom4j.Node; import com.google.common.collect.Lists; import eu.dnetlib.dhp.oa.graph.raw.common.PacePerson; +import eu.dnetlib.dhp.schema.common.ModelConstants; import eu.dnetlib.dhp.schema.oaf.*; public class OafToOafMapper extends AbstractMdRecordToOafMapper { @@ -44,7 +44,9 @@ public class OafToOafMapper extends AbstractMdRecordToOafMapper { if (StringUtils.isNotBlank(pid) && StringUtils.isNotBlank(pidType)) { author.setPid(new ArrayList<>()); - author.getPid().add(structuredProperty(pid, qualifier(pidType, pidType, DNET_PID_TYPES, DNET_PID_TYPES), info)); + author + .getPid() + .add(structuredProperty(pid, qualifier(pidType, pidType, DNET_PID_TYPES, DNET_PID_TYPES), info)); } res.add(author); diff --git a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/raw/MappersTest.java b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/raw/MappersTest.java index d7635c9ea..5a006e351 100644 --- a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/raw/MappersTest.java +++ b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/raw/MappersTest.java @@ -12,8 +12,6 @@ import java.util.List; import java.util.Map; import java.util.Optional; -import eu.dnetlib.dhp.schema.common.ModelConstants; -import eu.dnetlib.dhp.schema.oaf.*; import org.apache.commons.io.IOUtils; import org.apache.commons.lang3.StringUtils; import org.junit.jupiter.api.BeforeEach; @@ -22,6 +20,9 @@ import org.junit.jupiter.api.extension.ExtendWith; import org.mockito.Mock; import org.mockito.junit.jupiter.MockitoExtension; +import eu.dnetlib.dhp.schema.common.ModelConstants; +import eu.dnetlib.dhp.schema.oaf.*; + @ExtendWith(MockitoExtension.class) public class MappersTest { @@ -53,15 +54,18 @@ public class MappersTest { assertTrue(StringUtils.isNotBlank(p.getTitle().get(0).getValue())); assertTrue(p.getAuthor().size() > 0); - Optional author = p.getAuthor() - .stream() - .filter(a -> a.getPid() != null && !a.getPid().isEmpty()) - .findFirst(); + Optional author = p + .getAuthor() + .stream() + .filter(a -> a.getPid() != null && !a.getPid().isEmpty()) + .findFirst(); assertTrue(author.isPresent()); - StructuredProperty pid = author.get().getPid() - .stream() - .findFirst() - .get(); + StructuredProperty pid = author + .get() + .getPid() + .stream() + .findFirst() + .get(); assertEquals("0000-0001-6651-1178", pid.getValue()); assertEquals("ORCID", pid.getQualifier().getClassid()); assertEquals("ORCID", pid.getQualifier().getClassname()); @@ -117,15 +121,18 @@ public class MappersTest { assertTrue(StringUtils.isNotBlank(d.getTitle().get(0).getValue())); assertTrue(d.getAuthor().size() > 0); - Optional author = d.getAuthor() - .stream() - .filter(a -> a.getPid() != null && !a.getPid().isEmpty()) - .findFirst(); + Optional author = d + .getAuthor() + .stream() + .filter(a -> a.getPid() != null && !a.getPid().isEmpty()) + .findFirst(); assertTrue(author.isPresent()); - StructuredProperty pid = author.get().getPid() - .stream() - .findFirst() - .get(); + StructuredProperty pid = author + .get() + .getPid() + .stream() + .findFirst() + .get(); assertEquals("0000-0001-9074-1619", pid.getValue()); assertEquals("ORCID", pid.getQualifier().getClassid()); assertEquals("ORCID", pid.getQualifier().getClassname()); @@ -136,9 +143,11 @@ public class MappersTest { assertEquals("Theo", author.get().getName()); assertEquals(1, author.get().getAffiliation().size()); - Optional> opAff = author.get().getAffiliation() - .stream() - .findFirst(); + Optional> opAff = author + .get() + .getAffiliation() + .stream() + .findFirst(); assertTrue(opAff.isPresent()); Field affiliation = opAff.get(); assertEquals("ISTI-CNR", affiliation.getValue());