From f072ed91b20502b9fea3151439a8d02add526b7a Mon Sep 17 00:00:00 2001 From: Claudio Atzori Date: Tue, 16 Jan 2018 14:21:13 +0100 Subject: [PATCH] first commit --- README.md | 1 + .../README.markdown | 7 + .../dhp-build-assembly-resources/pom.xml | 24 + .../resources/assemblies/oozie-installer.xml | 32 + .../src/main/resources/assemblies/tests.xml | 24 + .../resources/commands/get_working_dir.sh | 3 + .../resources/commands/print_working_dir.sh | 5 + .../main/resources/commands/readme.markdown | 5 + .../main/resources/commands/run_workflow.sh | 10 + .../resources/commands/upload_workflow.sh | 34 + .../main/resources/project-default.properties | 7 + .../classes/assemblies/oozie-installer.xml | 32 + .../target/classes/assemblies/tests.xml | 24 + .../classes/commands/get_working_dir.sh | 3 + .../classes/commands/print_working_dir.sh | 5 + .../target/classes/commands/readme.markdown | 5 + .../target/classes/commands/run_workflow.sh | 10 + .../classes/commands/upload_workflow.sh | 34 + .../target/classes/project-default.properties | 7 + .../README.markdown | 6 + .../dhp-build-properties-maven-plugin/pom.xml | 68 ++ .../GenerateOoziePropertiesMojo.java | 71 ++ .../WritePredefinedProjectProperties.java | 436 ++++++++ .../GenerateOoziePropertiesMojoTest.java | 101 ++ .../WritePredefinedProjectPropertiesTest.java | 365 +++++++ .../plugin/properties/included.properties | 1 + .../target/classes/META-INF/maven/plugin.xml | 281 ++++++ .../GenerateOoziePropertiesMojo.class | Bin 0 -> 2814 bytes .../WritePredefinedProjectProperties.class | Bin 0 -> 10384 bytes .../compile/default-compile/createdFiles.lst | 2 + .../compile/default-compile/inputFiles.lst | 2 + dhp-build/pom.xml | 16 + dhp-common/pom.xml | 177 ++++ .../dhp/common/FsShellPermissions.java | 106 ++ .../dhp/common/InfoSpaceConstants.java | 75 ++ .../dhp/common/WorkflowRuntimeParameters.java | 74 ++ .../dhp/common/counter/NamedCounters.java | 111 ++ .../NamedCountersAccumulableParam.java | 48 + .../counter/NamedCountersFileWriter.java | 52 + .../dnetlib/dhp/common/fault/FaultUtils.java | 67 ++ .../dhp/common/java/CmdLineParser.java | 98 ++ .../common/java/CmdLineParserException.java | 21 + .../CmdLineParserForProcessConstruction.java | 45 + .../CmdLineParserForProcessRunParameters.java | 100 ++ .../dnetlib/dhp/common/java/PortBindings.java | 43 + .../eu/dnetlib/dhp/common/java/Ports.java | 27 + .../eu/dnetlib/dhp/common/java/Process.java | 43 + .../dhp/common/java/ProcessException.java | 20 + .../dhp/common/java/ProcessParameters.java | 43 + .../dnetlib/dhp/common/java/ProcessUtils.java | 43 + .../dhp/common/java/ProcessWrapper.java | 88 ++ .../common/java/io/AvroDataStoreReader.java | 156 +++ .../dhp/common/java/io/CloseableIterator.java | 25 + .../dhp/common/java/io/CountingIterator.java | 19 + .../dnetlib/dhp/common/java/io/DataStore.java | 172 ++++ .../dhp/common/java/io/FileSystemPath.java | 61 ++ .../dnetlib/dhp/common/java/io/HdfsUtils.java | 37 + .../java/io/SequenceFileTextValueReader.java | 159 +++ .../ClassPathResourceToHdfsCopier.java | 54 + .../jsonworkflownodes/PortSpecifications.java | 66 ++ .../StringPortSpecificationExtractor.java | 89 ++ .../dhp/common/java/porttype/AnyPortType.java | 20 + .../common/java/porttype/AvroPortType.java | 65 ++ .../dhp/common/java/porttype/PortType.java | 33 + .../dhp/common/lock/LockManagingProcess.java | 149 +++ .../dhp/common/oozie/OozieClientFactory.java | 24 + .../property/ConditionalPropertySetter.java | 76 ++ .../protobuf/AvroToProtoBufConverter.java | 12 + .../AvroToProtoBufOneToOneMapper.java | 62 ++ .../dhp/common/report/ReportEntryFactory.java | 32 + .../dhp/common/report/ReportGenerator.java | 110 ++ .../common/spark/pipe/SparkPipeExecutor.java | 74 ++ .../dhp/common/string/CharSequenceUtils.java | 31 + .../dhp/common/string/DiacriticsRemover.java | 113 +++ .../LenientComparisonStringNormalizer.java | 130 +++ .../dhp/common/string/StringNormalizer.java | 16 + .../dhp/common/utils/AvroGsonFactory.java | 45 + .../dnetlib/dhp/common/utils/AvroUtils.java | 77 ++ .../dhp/common/utils/ByteArrayUtils.java | 45 + .../utils/EmptyDatastoreVerifierProcess.java | 89 ++ dhp-schemas/README.md | 3 + dhp-schemas/pom.xml | 62 ++ .../main/avro/eu/dnetlib/dhp/audit/Fault.avdl | 29 + .../eu/dnetlib/dhp/common/ReportEntry.avdl | 16 + .../eu/dnetlib/dhp/importer/NativeRecord.avdl | 21 + dhp-wf/dhp-wf-import/pom.xml | 105 ++ .../wf/importer/DataFileRecordReceiver.java | 29 + .../DataFileRecordReceiverWithCounter.java | 50 + .../ImportWorkflowRuntimeParameters.java | 52 + .../dhp/wf/importer/RecordReceiver.java | 14 + ...bstractResultSetAwareWebServiceFacade.java | 104 ++ .../wf/importer/facade/ISLookupFacade.java | 17 + .../dhp/wf/importer/facade/MDStoreFacade.java | 17 + .../wf/importer/facade/ObjectStoreFacade.java | 19 + .../facade/ServiceFacadeException.java | 27 + .../importer/facade/ServiceFacadeFactory.java | 20 + .../importer/facade/ServiceFacadeUtils.java | 80 ++ .../facade/WebServiceISLookupFacade.java | 55 + .../WebServiceISLookupFacadeFactory.java | 45 + .../facade/WebServiceMDStoreFacade.java | 52 + .../WebServiceMDStoreFacadeFactory.java | 45 + .../facade/WebServiceObjectStoreFacade.java | 52 + .../WebServiceObjectStoreFacadeFactory.java | 44 + .../wf/importer/mdrecord/MDRecordHandler.java | 94 ++ .../mdrecord/MDStoreRecordsImporter.java | 157 +++ .../mdrecord/MongoRecordImporter.java | 48 + .../importer/mdrecord/oozie_app/workflow.xml | 124 +++ .../wf/importer/DataFileRecordReceiver.class | Bin 0 -> 1161 bytes .../DataFileRecordReceiverWithCounter.class | Bin 0 -> 1251 bytes .../ImportWorkflowRuntimeParameters.class | Bin 0 -> 2706 bytes .../dhp/wf/importer/RecordReceiver.class | Bin 0 -> 306 bytes ...stractResultSetAwareWebServiceFacade.class | Bin 0 -> 3431 bytes .../wf/importer/facade/ISLookupFacade.class | Bin 0 -> 378 bytes .../wf/importer/facade/MDStoreFacade.class | Bin 0 -> 379 bytes .../importer/facade/ObjectStoreFacade.class | Bin 0 -> 389 bytes .../facade/ServiceFacadeException.class | Bin 0 -> 777 bytes .../facade/ServiceFacadeFactory.class | Bin 0 -> 347 bytes .../importer/facade/ServiceFacadeUtils.class | Bin 0 -> 3747 bytes .../facade/WebServiceISLookupFacade.class | Bin 0 -> 2544 bytes .../WebServiceISLookupFacadeFactory.class | Bin 0 -> 2265 bytes .../facade/WebServiceMDStoreFacade.class | Bin 0 -> 2671 bytes .../WebServiceMDStoreFacadeFactory.class | Bin 0 -> 2276 bytes .../facade/WebServiceObjectStoreFacade.class | Bin 0 -> 2839 bytes .../WebServiceObjectStoreFacadeFactory.class | Bin 0 -> 2314 bytes .../importer/mdrecord/MDRecordHandler.class | Bin 0 -> 2610 bytes .../mdrecord/MDStoreRecordsImporter.class | Bin 0 -> 9081 bytes .../mdrecord/MongoRecordImporter.class | Bin 0 -> 1523 bytes .../importer/mdrecord/oozie_app/workflow.xml | 124 +++ .../compile/default-compile/createdFiles.lst | 20 + .../compile/default-compile/inputFiles.lst | 20 + dhp-wf/pom.xml | 249 +++++ pom.xml | 953 ++++++++++++++++++ 132 files changed, 7690 insertions(+) create mode 100644 README.md create mode 100644 dhp-build/dhp-build-assembly-resources/README.markdown create mode 100644 dhp-build/dhp-build-assembly-resources/pom.xml create mode 100644 dhp-build/dhp-build-assembly-resources/src/main/resources/assemblies/oozie-installer.xml create mode 100644 dhp-build/dhp-build-assembly-resources/src/main/resources/assemblies/tests.xml create mode 100644 dhp-build/dhp-build-assembly-resources/src/main/resources/commands/get_working_dir.sh create mode 100644 dhp-build/dhp-build-assembly-resources/src/main/resources/commands/print_working_dir.sh create mode 100644 dhp-build/dhp-build-assembly-resources/src/main/resources/commands/readme.markdown create mode 100644 dhp-build/dhp-build-assembly-resources/src/main/resources/commands/run_workflow.sh create mode 100644 dhp-build/dhp-build-assembly-resources/src/main/resources/commands/upload_workflow.sh create mode 100644 dhp-build/dhp-build-assembly-resources/src/main/resources/project-default.properties create mode 100644 dhp-build/dhp-build-assembly-resources/target/classes/assemblies/oozie-installer.xml create mode 100644 dhp-build/dhp-build-assembly-resources/target/classes/assemblies/tests.xml create mode 100644 dhp-build/dhp-build-assembly-resources/target/classes/commands/get_working_dir.sh create mode 100644 dhp-build/dhp-build-assembly-resources/target/classes/commands/print_working_dir.sh create mode 100644 dhp-build/dhp-build-assembly-resources/target/classes/commands/readme.markdown create mode 100644 dhp-build/dhp-build-assembly-resources/target/classes/commands/run_workflow.sh create mode 100644 dhp-build/dhp-build-assembly-resources/target/classes/commands/upload_workflow.sh create mode 100644 dhp-build/dhp-build-assembly-resources/target/classes/project-default.properties create mode 100644 dhp-build/dhp-build-properties-maven-plugin/README.markdown create mode 100644 dhp-build/dhp-build-properties-maven-plugin/pom.xml create mode 100644 dhp-build/dhp-build-properties-maven-plugin/src/main/java/eu/dnetlib/maven/plugin/properties/GenerateOoziePropertiesMojo.java create mode 100644 dhp-build/dhp-build-properties-maven-plugin/src/main/java/eu/dnetlib/maven/plugin/properties/WritePredefinedProjectProperties.java create mode 100644 dhp-build/dhp-build-properties-maven-plugin/src/test/java/eu/dnetlib/maven/plugin/properties/GenerateOoziePropertiesMojoTest.java create mode 100644 dhp-build/dhp-build-properties-maven-plugin/src/test/java/eu/dnetlib/maven/plugin/properties/WritePredefinedProjectPropertiesTest.java create mode 100644 dhp-build/dhp-build-properties-maven-plugin/src/test/resources/eu/dnetlib/maven/plugin/properties/included.properties create mode 100644 dhp-build/dhp-build-properties-maven-plugin/target/classes/META-INF/maven/plugin.xml create mode 100644 dhp-build/dhp-build-properties-maven-plugin/target/classes/eu/dnetlib/maven/plugin/properties/GenerateOoziePropertiesMojo.class create mode 100644 dhp-build/dhp-build-properties-maven-plugin/target/classes/eu/dnetlib/maven/plugin/properties/WritePredefinedProjectProperties.class create mode 100644 dhp-build/dhp-build-properties-maven-plugin/target/maven-status/maven-compiler-plugin/compile/default-compile/createdFiles.lst create mode 100644 dhp-build/dhp-build-properties-maven-plugin/target/maven-status/maven-compiler-plugin/compile/default-compile/inputFiles.lst create mode 100644 dhp-build/pom.xml create mode 100644 dhp-common/pom.xml create mode 100644 dhp-common/src/main/java/eu/dnetlib/dhp/common/FsShellPermissions.java create mode 100644 dhp-common/src/main/java/eu/dnetlib/dhp/common/InfoSpaceConstants.java create mode 100644 dhp-common/src/main/java/eu/dnetlib/dhp/common/WorkflowRuntimeParameters.java create mode 100644 dhp-common/src/main/java/eu/dnetlib/dhp/common/counter/NamedCounters.java create mode 100644 dhp-common/src/main/java/eu/dnetlib/dhp/common/counter/NamedCountersAccumulableParam.java create mode 100644 dhp-common/src/main/java/eu/dnetlib/dhp/common/counter/NamedCountersFileWriter.java create mode 100644 dhp-common/src/main/java/eu/dnetlib/dhp/common/fault/FaultUtils.java create mode 100644 dhp-common/src/main/java/eu/dnetlib/dhp/common/java/CmdLineParser.java create mode 100644 dhp-common/src/main/java/eu/dnetlib/dhp/common/java/CmdLineParserException.java create mode 100644 dhp-common/src/main/java/eu/dnetlib/dhp/common/java/CmdLineParserForProcessConstruction.java create mode 100644 dhp-common/src/main/java/eu/dnetlib/dhp/common/java/CmdLineParserForProcessRunParameters.java create mode 100644 dhp-common/src/main/java/eu/dnetlib/dhp/common/java/PortBindings.java create mode 100644 dhp-common/src/main/java/eu/dnetlib/dhp/common/java/Ports.java create mode 100644 dhp-common/src/main/java/eu/dnetlib/dhp/common/java/Process.java create mode 100644 dhp-common/src/main/java/eu/dnetlib/dhp/common/java/ProcessException.java create mode 100644 dhp-common/src/main/java/eu/dnetlib/dhp/common/java/ProcessParameters.java create mode 100644 dhp-common/src/main/java/eu/dnetlib/dhp/common/java/ProcessUtils.java create mode 100644 dhp-common/src/main/java/eu/dnetlib/dhp/common/java/ProcessWrapper.java create mode 100644 dhp-common/src/main/java/eu/dnetlib/dhp/common/java/io/AvroDataStoreReader.java create mode 100644 dhp-common/src/main/java/eu/dnetlib/dhp/common/java/io/CloseableIterator.java create mode 100644 dhp-common/src/main/java/eu/dnetlib/dhp/common/java/io/CountingIterator.java create mode 100644 dhp-common/src/main/java/eu/dnetlib/dhp/common/java/io/DataStore.java create mode 100644 dhp-common/src/main/java/eu/dnetlib/dhp/common/java/io/FileSystemPath.java create mode 100644 dhp-common/src/main/java/eu/dnetlib/dhp/common/java/io/HdfsUtils.java create mode 100644 dhp-common/src/main/java/eu/dnetlib/dhp/common/java/io/SequenceFileTextValueReader.java create mode 100644 dhp-common/src/main/java/eu/dnetlib/dhp/common/java/jsonworkflownodes/ClassPathResourceToHdfsCopier.java create mode 100644 dhp-common/src/main/java/eu/dnetlib/dhp/common/java/jsonworkflownodes/PortSpecifications.java create mode 100644 dhp-common/src/main/java/eu/dnetlib/dhp/common/java/jsonworkflownodes/StringPortSpecificationExtractor.java create mode 100644 dhp-common/src/main/java/eu/dnetlib/dhp/common/java/porttype/AnyPortType.java create mode 100644 dhp-common/src/main/java/eu/dnetlib/dhp/common/java/porttype/AvroPortType.java create mode 100644 dhp-common/src/main/java/eu/dnetlib/dhp/common/java/porttype/PortType.java create mode 100644 dhp-common/src/main/java/eu/dnetlib/dhp/common/lock/LockManagingProcess.java create mode 100644 dhp-common/src/main/java/eu/dnetlib/dhp/common/oozie/OozieClientFactory.java create mode 100644 dhp-common/src/main/java/eu/dnetlib/dhp/common/oozie/property/ConditionalPropertySetter.java create mode 100644 dhp-common/src/main/java/eu/dnetlib/dhp/common/protobuf/AvroToProtoBufConverter.java create mode 100644 dhp-common/src/main/java/eu/dnetlib/dhp/common/protobuf/AvroToProtoBufOneToOneMapper.java create mode 100644 dhp-common/src/main/java/eu/dnetlib/dhp/common/report/ReportEntryFactory.java create mode 100644 dhp-common/src/main/java/eu/dnetlib/dhp/common/report/ReportGenerator.java create mode 100644 dhp-common/src/main/java/eu/dnetlib/dhp/common/spark/pipe/SparkPipeExecutor.java create mode 100644 dhp-common/src/main/java/eu/dnetlib/dhp/common/string/CharSequenceUtils.java create mode 100644 dhp-common/src/main/java/eu/dnetlib/dhp/common/string/DiacriticsRemover.java create mode 100644 dhp-common/src/main/java/eu/dnetlib/dhp/common/string/LenientComparisonStringNormalizer.java create mode 100644 dhp-common/src/main/java/eu/dnetlib/dhp/common/string/StringNormalizer.java create mode 100644 dhp-common/src/main/java/eu/dnetlib/dhp/common/utils/AvroGsonFactory.java create mode 100644 dhp-common/src/main/java/eu/dnetlib/dhp/common/utils/AvroUtils.java create mode 100644 dhp-common/src/main/java/eu/dnetlib/dhp/common/utils/ByteArrayUtils.java create mode 100644 dhp-common/src/main/java/eu/dnetlib/dhp/common/utils/EmptyDatastoreVerifierProcess.java create mode 100644 dhp-schemas/README.md create mode 100644 dhp-schemas/pom.xml create mode 100644 dhp-schemas/src/main/avro/eu/dnetlib/dhp/audit/Fault.avdl create mode 100644 dhp-schemas/src/main/avro/eu/dnetlib/dhp/common/ReportEntry.avdl create mode 100644 dhp-schemas/src/main/avro/eu/dnetlib/dhp/importer/NativeRecord.avdl create mode 100644 dhp-wf/dhp-wf-import/pom.xml create mode 100644 dhp-wf/dhp-wf-import/src/main/java/eu/dnetlib/dhp/wf/importer/DataFileRecordReceiver.java create mode 100644 dhp-wf/dhp-wf-import/src/main/java/eu/dnetlib/dhp/wf/importer/DataFileRecordReceiverWithCounter.java create mode 100644 dhp-wf/dhp-wf-import/src/main/java/eu/dnetlib/dhp/wf/importer/ImportWorkflowRuntimeParameters.java create mode 100644 dhp-wf/dhp-wf-import/src/main/java/eu/dnetlib/dhp/wf/importer/RecordReceiver.java create mode 100644 dhp-wf/dhp-wf-import/src/main/java/eu/dnetlib/dhp/wf/importer/facade/AbstractResultSetAwareWebServiceFacade.java create mode 100644 dhp-wf/dhp-wf-import/src/main/java/eu/dnetlib/dhp/wf/importer/facade/ISLookupFacade.java create mode 100644 dhp-wf/dhp-wf-import/src/main/java/eu/dnetlib/dhp/wf/importer/facade/MDStoreFacade.java create mode 100644 dhp-wf/dhp-wf-import/src/main/java/eu/dnetlib/dhp/wf/importer/facade/ObjectStoreFacade.java create mode 100644 dhp-wf/dhp-wf-import/src/main/java/eu/dnetlib/dhp/wf/importer/facade/ServiceFacadeException.java create mode 100644 dhp-wf/dhp-wf-import/src/main/java/eu/dnetlib/dhp/wf/importer/facade/ServiceFacadeFactory.java create mode 100644 dhp-wf/dhp-wf-import/src/main/java/eu/dnetlib/dhp/wf/importer/facade/ServiceFacadeUtils.java create mode 100644 dhp-wf/dhp-wf-import/src/main/java/eu/dnetlib/dhp/wf/importer/facade/WebServiceISLookupFacade.java create mode 100644 dhp-wf/dhp-wf-import/src/main/java/eu/dnetlib/dhp/wf/importer/facade/WebServiceISLookupFacadeFactory.java create mode 100644 dhp-wf/dhp-wf-import/src/main/java/eu/dnetlib/dhp/wf/importer/facade/WebServiceMDStoreFacade.java create mode 100644 dhp-wf/dhp-wf-import/src/main/java/eu/dnetlib/dhp/wf/importer/facade/WebServiceMDStoreFacadeFactory.java create mode 100644 dhp-wf/dhp-wf-import/src/main/java/eu/dnetlib/dhp/wf/importer/facade/WebServiceObjectStoreFacade.java create mode 100644 dhp-wf/dhp-wf-import/src/main/java/eu/dnetlib/dhp/wf/importer/facade/WebServiceObjectStoreFacadeFactory.java create mode 100644 dhp-wf/dhp-wf-import/src/main/java/eu/dnetlib/dhp/wf/importer/mdrecord/MDRecordHandler.java create mode 100644 dhp-wf/dhp-wf-import/src/main/java/eu/dnetlib/dhp/wf/importer/mdrecord/MDStoreRecordsImporter.java create mode 100644 dhp-wf/dhp-wf-import/src/main/java/eu/dnetlib/dhp/wf/importer/mdrecord/MongoRecordImporter.java create mode 100644 dhp-wf/dhp-wf-import/src/main/resources/eu/dnetlib/dhp/wf/importer/mdrecord/oozie_app/workflow.xml create mode 100644 dhp-wf/dhp-wf-import/target/classes/eu/dnetlib/dhp/wf/importer/DataFileRecordReceiver.class create mode 100644 dhp-wf/dhp-wf-import/target/classes/eu/dnetlib/dhp/wf/importer/DataFileRecordReceiverWithCounter.class create mode 100644 dhp-wf/dhp-wf-import/target/classes/eu/dnetlib/dhp/wf/importer/ImportWorkflowRuntimeParameters.class create mode 100644 dhp-wf/dhp-wf-import/target/classes/eu/dnetlib/dhp/wf/importer/RecordReceiver.class create mode 100644 dhp-wf/dhp-wf-import/target/classes/eu/dnetlib/dhp/wf/importer/facade/AbstractResultSetAwareWebServiceFacade.class create mode 100644 dhp-wf/dhp-wf-import/target/classes/eu/dnetlib/dhp/wf/importer/facade/ISLookupFacade.class create mode 100644 dhp-wf/dhp-wf-import/target/classes/eu/dnetlib/dhp/wf/importer/facade/MDStoreFacade.class create mode 100644 dhp-wf/dhp-wf-import/target/classes/eu/dnetlib/dhp/wf/importer/facade/ObjectStoreFacade.class create mode 100644 dhp-wf/dhp-wf-import/target/classes/eu/dnetlib/dhp/wf/importer/facade/ServiceFacadeException.class create mode 100644 dhp-wf/dhp-wf-import/target/classes/eu/dnetlib/dhp/wf/importer/facade/ServiceFacadeFactory.class create mode 100644 dhp-wf/dhp-wf-import/target/classes/eu/dnetlib/dhp/wf/importer/facade/ServiceFacadeUtils.class create mode 100644 dhp-wf/dhp-wf-import/target/classes/eu/dnetlib/dhp/wf/importer/facade/WebServiceISLookupFacade.class create mode 100644 dhp-wf/dhp-wf-import/target/classes/eu/dnetlib/dhp/wf/importer/facade/WebServiceISLookupFacadeFactory.class create mode 100644 dhp-wf/dhp-wf-import/target/classes/eu/dnetlib/dhp/wf/importer/facade/WebServiceMDStoreFacade.class create mode 100644 dhp-wf/dhp-wf-import/target/classes/eu/dnetlib/dhp/wf/importer/facade/WebServiceMDStoreFacadeFactory.class create mode 100644 dhp-wf/dhp-wf-import/target/classes/eu/dnetlib/dhp/wf/importer/facade/WebServiceObjectStoreFacade.class create mode 100644 dhp-wf/dhp-wf-import/target/classes/eu/dnetlib/dhp/wf/importer/facade/WebServiceObjectStoreFacadeFactory.class create mode 100644 dhp-wf/dhp-wf-import/target/classes/eu/dnetlib/dhp/wf/importer/mdrecord/MDRecordHandler.class create mode 100644 dhp-wf/dhp-wf-import/target/classes/eu/dnetlib/dhp/wf/importer/mdrecord/MDStoreRecordsImporter.class create mode 100644 dhp-wf/dhp-wf-import/target/classes/eu/dnetlib/dhp/wf/importer/mdrecord/MongoRecordImporter.class create mode 100644 dhp-wf/dhp-wf-import/target/classes/eu/dnetlib/dhp/wf/importer/mdrecord/oozie_app/workflow.xml create mode 100644 dhp-wf/dhp-wf-import/target/maven-status/maven-compiler-plugin/compile/default-compile/createdFiles.lst create mode 100644 dhp-wf/dhp-wf-import/target/maven-status/maven-compiler-plugin/compile/default-compile/inputFiles.lst create mode 100644 dhp-wf/pom.xml create mode 100644 pom.xml diff --git a/README.md b/README.md new file mode 100644 index 000000000..1a13ebf05 --- /dev/null +++ b/README.md @@ -0,0 +1 @@ +# dnet-hadoop diff --git a/dhp-build/dhp-build-assembly-resources/README.markdown b/dhp-build/dhp-build-assembly-resources/README.markdown new file mode 100644 index 000000000..efee5fa45 --- /dev/null +++ b/dhp-build/dhp-build-assembly-resources/README.markdown @@ -0,0 +1,7 @@ +Module utilized by `dhp-wf`. + +Contains all required resources by this parent module: + +* assembly XML definitions +* build shell scripts +* oozie package commands for uploading, running and monitoring oozie workflows diff --git a/dhp-build/dhp-build-assembly-resources/pom.xml b/dhp-build/dhp-build-assembly-resources/pom.xml new file mode 100644 index 000000000..2d2543505 --- /dev/null +++ b/dhp-build/dhp-build-assembly-resources/pom.xml @@ -0,0 +1,24 @@ + + + + 4.0.0 + + + eu.dnetlib.dhp + dhp-build + 1.0.0-SNAPSHOT + + + dhp-build-assembly-resources + jar + + + + + org.apache.maven.plugins + maven-compiler-plugin + + + + + diff --git a/dhp-build/dhp-build-assembly-resources/src/main/resources/assemblies/oozie-installer.xml b/dhp-build/dhp-build-assembly-resources/src/main/resources/assemblies/oozie-installer.xml new file mode 100644 index 000000000..1419c5b1c --- /dev/null +++ b/dhp-build/dhp-build-assembly-resources/src/main/resources/assemblies/oozie-installer.xml @@ -0,0 +1,32 @@ + + + oozie-installer + + dir + + + + + true + ${project.build.directory}/assembly-resources/commands + + / + + **/* + + 0755 + unix + + + / + diff --git a/dhp-build/dhp-build-assembly-resources/src/main/resources/assemblies/tests.xml b/dhp-build/dhp-build-assembly-resources/src/main/resources/assemblies/tests.xml new file mode 100644 index 000000000..bf679e652 --- /dev/null +++ b/dhp-build/dhp-build-assembly-resources/src/main/resources/assemblies/tests.xml @@ -0,0 +1,24 @@ + + + tests + + jar + + false + + + ${project.build.testOutputDirectory} + + + + + + \ No newline at end of file diff --git a/dhp-build/dhp-build-assembly-resources/src/main/resources/commands/get_working_dir.sh b/dhp-build/dhp-build-assembly-resources/src/main/resources/commands/get_working_dir.sh new file mode 100644 index 000000000..e9d55f0d7 --- /dev/null +++ b/dhp-build/dhp-build-assembly-resources/src/main/resources/commands/get_working_dir.sh @@ -0,0 +1,3 @@ +#!/bin/bash +hadoop fs -get ${workingDir} + diff --git a/dhp-build/dhp-build-assembly-resources/src/main/resources/commands/print_working_dir.sh b/dhp-build/dhp-build-assembly-resources/src/main/resources/commands/print_working_dir.sh new file mode 100644 index 000000000..c79839ea4 --- /dev/null +++ b/dhp-build/dhp-build-assembly-resources/src/main/resources/commands/print_working_dir.sh @@ -0,0 +1,5 @@ +#!/bin/bash +echo "" +echo "---->Contents of the working directory" +hadoop fs -ls ${workingDir} + diff --git a/dhp-build/dhp-build-assembly-resources/src/main/resources/commands/readme.markdown b/dhp-build/dhp-build-assembly-resources/src/main/resources/commands/readme.markdown new file mode 100644 index 000000000..3e049c18b --- /dev/null +++ b/dhp-build/dhp-build-assembly-resources/src/main/resources/commands/readme.markdown @@ -0,0 +1,5 @@ +Execute the scripts in the following order: + +1. `upload_workflow.sh` +2. `run_workflow.sh` +3. `print_working_dir.sh` or `get_working_dir.sh` diff --git a/dhp-build/dhp-build-assembly-resources/src/main/resources/commands/run_workflow.sh b/dhp-build/dhp-build-assembly-resources/src/main/resources/commands/run_workflow.sh new file mode 100644 index 000000000..fee3d7737 --- /dev/null +++ b/dhp-build/dhp-build-assembly-resources/src/main/resources/commands/run_workflow.sh @@ -0,0 +1,10 @@ +#!/bin/bash + +if [ $# = 0 ] ; then + oozie job -oozie ${oozieServiceLoc} -config job.properties -run +else + oozie job -oozie ${oozieServiceLoc} -config $1/job.properties -run +fi + + + diff --git a/dhp-build/dhp-build-assembly-resources/src/main/resources/commands/upload_workflow.sh b/dhp-build/dhp-build-assembly-resources/src/main/resources/commands/upload_workflow.sh new file mode 100644 index 000000000..c5d299c2f --- /dev/null +++ b/dhp-build/dhp-build-assembly-resources/src/main/resources/commands/upload_workflow.sh @@ -0,0 +1,34 @@ +#!/bin/bash +exec 3>&1 +BASH_XTRACEFD=3 +set -x ## print every executed command + + +if [ $# = 0 ] ; then + target_dir_root=`pwd`'/${oozieAppDir}' +else + target_dir_root=`readlink -f $1`'/${oozieAppDir}' +fi + +# initial phase, creating symbolic links to jars in all subworkflows +# currently disabled +#libDir=$target_dir_root'/lib' +#dirs=`find $target_dir_root/* -maxdepth 10 -type d` +#for dir in $dirs +#do +# if [ -f $dir/workflow.xml ] +# then +# echo "creating symbolic links to jars in directory: $dir/lib" +# if [ ! -d "$dir/lib" ]; then +# mkdir $dir/lib +# fi +# find $libDir -type f -exec ln -s \{\} $dir/lib \; +# fi +#done + + +#uploading +hadoop fs -rm -r ${sandboxDir} +hadoop fs -mkdir -p ${sandboxDir} +hadoop fs -mkdir -p ${workingDir} +hadoop fs -put $target_dir_root ${sandboxDir} diff --git a/dhp-build/dhp-build-assembly-resources/src/main/resources/project-default.properties b/dhp-build/dhp-build-assembly-resources/src/main/resources/project-default.properties new file mode 100644 index 000000000..021ecf55b --- /dev/null +++ b/dhp-build/dhp-build-assembly-resources/src/main/resources/project-default.properties @@ -0,0 +1,7 @@ +#sandboxName when not provided explicitly will be generated +sandboxName=${sandboxName} +sandboxDir=/user/${iis.hadoop.frontend.user.name}/${sandboxName} +workingDir=${sandboxDir}/working_dir +oozie.wf.application.path = ${nameNode}${sandboxDir}/${oozieAppDir} +oozieTopWfApplicationPath = ${oozie.wf.application.path} + diff --git a/dhp-build/dhp-build-assembly-resources/target/classes/assemblies/oozie-installer.xml b/dhp-build/dhp-build-assembly-resources/target/classes/assemblies/oozie-installer.xml new file mode 100644 index 000000000..1419c5b1c --- /dev/null +++ b/dhp-build/dhp-build-assembly-resources/target/classes/assemblies/oozie-installer.xml @@ -0,0 +1,32 @@ + + + oozie-installer + + dir + + + + + true + ${project.build.directory}/assembly-resources/commands + + / + + **/* + + 0755 + unix + + + / + diff --git a/dhp-build/dhp-build-assembly-resources/target/classes/assemblies/tests.xml b/dhp-build/dhp-build-assembly-resources/target/classes/assemblies/tests.xml new file mode 100644 index 000000000..bf679e652 --- /dev/null +++ b/dhp-build/dhp-build-assembly-resources/target/classes/assemblies/tests.xml @@ -0,0 +1,24 @@ + + + tests + + jar + + false + + + ${project.build.testOutputDirectory} + + + + + + \ No newline at end of file diff --git a/dhp-build/dhp-build-assembly-resources/target/classes/commands/get_working_dir.sh b/dhp-build/dhp-build-assembly-resources/target/classes/commands/get_working_dir.sh new file mode 100644 index 000000000..e9d55f0d7 --- /dev/null +++ b/dhp-build/dhp-build-assembly-resources/target/classes/commands/get_working_dir.sh @@ -0,0 +1,3 @@ +#!/bin/bash +hadoop fs -get ${workingDir} + diff --git a/dhp-build/dhp-build-assembly-resources/target/classes/commands/print_working_dir.sh b/dhp-build/dhp-build-assembly-resources/target/classes/commands/print_working_dir.sh new file mode 100644 index 000000000..c79839ea4 --- /dev/null +++ b/dhp-build/dhp-build-assembly-resources/target/classes/commands/print_working_dir.sh @@ -0,0 +1,5 @@ +#!/bin/bash +echo "" +echo "---->Contents of the working directory" +hadoop fs -ls ${workingDir} + diff --git a/dhp-build/dhp-build-assembly-resources/target/classes/commands/readme.markdown b/dhp-build/dhp-build-assembly-resources/target/classes/commands/readme.markdown new file mode 100644 index 000000000..3e049c18b --- /dev/null +++ b/dhp-build/dhp-build-assembly-resources/target/classes/commands/readme.markdown @@ -0,0 +1,5 @@ +Execute the scripts in the following order: + +1. `upload_workflow.sh` +2. `run_workflow.sh` +3. `print_working_dir.sh` or `get_working_dir.sh` diff --git a/dhp-build/dhp-build-assembly-resources/target/classes/commands/run_workflow.sh b/dhp-build/dhp-build-assembly-resources/target/classes/commands/run_workflow.sh new file mode 100644 index 000000000..fee3d7737 --- /dev/null +++ b/dhp-build/dhp-build-assembly-resources/target/classes/commands/run_workflow.sh @@ -0,0 +1,10 @@ +#!/bin/bash + +if [ $# = 0 ] ; then + oozie job -oozie ${oozieServiceLoc} -config job.properties -run +else + oozie job -oozie ${oozieServiceLoc} -config $1/job.properties -run +fi + + + diff --git a/dhp-build/dhp-build-assembly-resources/target/classes/commands/upload_workflow.sh b/dhp-build/dhp-build-assembly-resources/target/classes/commands/upload_workflow.sh new file mode 100644 index 000000000..c5d299c2f --- /dev/null +++ b/dhp-build/dhp-build-assembly-resources/target/classes/commands/upload_workflow.sh @@ -0,0 +1,34 @@ +#!/bin/bash +exec 3>&1 +BASH_XTRACEFD=3 +set -x ## print every executed command + + +if [ $# = 0 ] ; then + target_dir_root=`pwd`'/${oozieAppDir}' +else + target_dir_root=`readlink -f $1`'/${oozieAppDir}' +fi + +# initial phase, creating symbolic links to jars in all subworkflows +# currently disabled +#libDir=$target_dir_root'/lib' +#dirs=`find $target_dir_root/* -maxdepth 10 -type d` +#for dir in $dirs +#do +# if [ -f $dir/workflow.xml ] +# then +# echo "creating symbolic links to jars in directory: $dir/lib" +# if [ ! -d "$dir/lib" ]; then +# mkdir $dir/lib +# fi +# find $libDir -type f -exec ln -s \{\} $dir/lib \; +# fi +#done + + +#uploading +hadoop fs -rm -r ${sandboxDir} +hadoop fs -mkdir -p ${sandboxDir} +hadoop fs -mkdir -p ${workingDir} +hadoop fs -put $target_dir_root ${sandboxDir} diff --git a/dhp-build/dhp-build-assembly-resources/target/classes/project-default.properties b/dhp-build/dhp-build-assembly-resources/target/classes/project-default.properties new file mode 100644 index 000000000..021ecf55b --- /dev/null +++ b/dhp-build/dhp-build-assembly-resources/target/classes/project-default.properties @@ -0,0 +1,7 @@ +#sandboxName when not provided explicitly will be generated +sandboxName=${sandboxName} +sandboxDir=/user/${iis.hadoop.frontend.user.name}/${sandboxName} +workingDir=${sandboxDir}/working_dir +oozie.wf.application.path = ${nameNode}${sandboxDir}/${oozieAppDir} +oozieTopWfApplicationPath = ${oozie.wf.application.path} + diff --git a/dhp-build/dhp-build-properties-maven-plugin/README.markdown b/dhp-build/dhp-build-properties-maven-plugin/README.markdown new file mode 100644 index 000000000..f99c7c1b0 --- /dev/null +++ b/dhp-build/dhp-build-properties-maven-plugin/README.markdown @@ -0,0 +1,6 @@ +Maven plugin module utilized by `dhp-wf` for proper `job.properties` file building. + +It is based on http://site.kuali.org/maven/plugins/properties-maven-plugin/1.3.2/write-project-properties-mojo.html and supplemented with: + +* handling includePropertyKeysFromFiles property allowing writing only properties listed in given property files +As a final outcome only properties listed in `` element and listed as a keys in files from `` element will be written to output file. diff --git a/dhp-build/dhp-build-properties-maven-plugin/pom.xml b/dhp-build/dhp-build-properties-maven-plugin/pom.xml new file mode 100644 index 000000000..38093f4d1 --- /dev/null +++ b/dhp-build/dhp-build-properties-maven-plugin/pom.xml @@ -0,0 +1,68 @@ + + + + 4.0.0 + + + eu.dnetlib.dhp + dhp-build + 1.0.0-SNAPSHOT + + + dhp-build-properties-maven-plugin + maven-plugin + + + + + org.apache.maven + maven-plugin-api + 2.0 + + + org.apache.maven + maven-project + 2.0 + + + org.kuali.maven.plugins + properties-maven-plugin + 1.3.2 + + + + + + target + target/classes + ${project.artifactId}-${project.version} + target/test-classes + + + org.apache.maven.plugins + maven-compiler-plugin + + + org.apache.maven.plugins + maven-source-plugin + + + attach-sources + verify + + jar-no-fork + + + + + + org.apache.maven.plugins + maven-javadoc-plugin + + true + + + + + + diff --git a/dhp-build/dhp-build-properties-maven-plugin/src/main/java/eu/dnetlib/maven/plugin/properties/GenerateOoziePropertiesMojo.java b/dhp-build/dhp-build-properties-maven-plugin/src/main/java/eu/dnetlib/maven/plugin/properties/GenerateOoziePropertiesMojo.java new file mode 100644 index 000000000..a3a99cc0c --- /dev/null +++ b/dhp-build/dhp-build-properties-maven-plugin/src/main/java/eu/dnetlib/maven/plugin/properties/GenerateOoziePropertiesMojo.java @@ -0,0 +1,71 @@ +package eu.dnetlib.maven.plugin.properties; + +import java.io.File; +import java.util.ArrayList; +import java.util.List; + +import org.apache.commons.lang.ArrayUtils; +import org.apache.commons.lang.StringUtils; +import org.apache.maven.plugin.AbstractMojo; +import org.apache.maven.plugin.MojoExecutionException; +import org.apache.maven.plugin.MojoFailureException; + +/** + * Generates oozie properties which were not provided from commandline. + * @author mhorst + * + * @goal generate-properties + */ +public class GenerateOoziePropertiesMojo extends AbstractMojo { + + public static final String PROPERTY_NAME_WF_SOURCE_DIR = "workflow.source.dir"; + public static final String PROPERTY_NAME_SANDBOX_NAME = "sandboxName"; + + private final String[] limiters = {"iis", "dnetlib", "eu", "dhp"}; + + @Override + public void execute() throws MojoExecutionException, MojoFailureException { + if (System.getProperties().containsKey(PROPERTY_NAME_WF_SOURCE_DIR) && + !System.getProperties().containsKey(PROPERTY_NAME_SANDBOX_NAME)) { + String generatedSandboxName = generateSandboxName(System.getProperties().getProperty( + PROPERTY_NAME_WF_SOURCE_DIR)); + if (generatedSandboxName!=null) { + System.getProperties().setProperty(PROPERTY_NAME_SANDBOX_NAME, + generatedSandboxName); + } else { + System.out.println("unable to generate sandbox name from path: " + + System.getProperties().getProperty(PROPERTY_NAME_WF_SOURCE_DIR)); + } + } + } + + /** + * Generates sandbox name from workflow source directory. + * @param wfSourceDir + * @return generated sandbox name + */ + private String generateSandboxName(String wfSourceDir) { +// utilize all dir names until finding one of the limiters + List sandboxNameParts = new ArrayList(); + String[] tokens = StringUtils.split(wfSourceDir, File.separatorChar); + ArrayUtils.reverse(tokens); + if (tokens.length>0) { + for (String token : tokens) { + for (String limiter : limiters) { + if (limiter.equals(token)) { + return sandboxNameParts.size()>0? + StringUtils.join(sandboxNameParts.toArray()):null; + } + } + if (sandboxNameParts.size()>0) { + sandboxNameParts.add(0, File.separator); + } + sandboxNameParts.add(0, token); + } + return StringUtils.join(sandboxNameParts.toArray()); + } else { + return null; + } + } + +} diff --git a/dhp-build/dhp-build-properties-maven-plugin/src/main/java/eu/dnetlib/maven/plugin/properties/WritePredefinedProjectProperties.java b/dhp-build/dhp-build-properties-maven-plugin/src/main/java/eu/dnetlib/maven/plugin/properties/WritePredefinedProjectProperties.java new file mode 100644 index 000000000..62f04761a --- /dev/null +++ b/dhp-build/dhp-build-properties-maven-plugin/src/main/java/eu/dnetlib/maven/plugin/properties/WritePredefinedProjectProperties.java @@ -0,0 +1,436 @@ +/** + * + * Licensed under the Educational Community License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.opensource.org/licenses/ecl2.php + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package eu.dnetlib.maven.plugin.properties; + +import java.io.File; +import java.io.FileInputStream; +import java.io.IOException; +import java.io.InputStream; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.Collections; +import java.util.Date; +import java.util.List; +import java.util.Map.Entry; +import java.util.Properties; +import java.util.Set; + +import org.apache.commons.io.FileUtils; +import org.apache.commons.io.IOUtils; +import org.apache.commons.lang.StringUtils; +import org.apache.maven.plugin.AbstractMojo; +import org.apache.maven.plugin.MojoExecutionException; +import org.apache.maven.plugin.MojoFailureException; +import org.apache.maven.project.MavenProject; +import org.springframework.core.io.DefaultResourceLoader; +import org.springframework.core.io.Resource; +import org.springframework.core.io.ResourceLoader; + +import edu.umd.cs.findbugs.annotations.SuppressFBWarnings; + +/** + * Writes project properties for the keys listed in specified properties files. + * Based on: + * http://site.kuali.org/maven/plugins/properties-maven-plugin/1.3.2/write-project-properties-mojo.html + + * @author mhorst + * @goal write-project-properties + */ +public class WritePredefinedProjectProperties extends AbstractMojo { + + private static final String CR = "\r"; + private static final String LF = "\n"; + private static final String TAB = "\t"; + protected static final String PROPERTY_PREFIX_ENV = "env."; + private static final String ENCODING_UTF8 = "utf8"; + + /** + * @parameter property="properties.includePropertyKeysFromFiles" + */ + private String[] includePropertyKeysFromFiles; + + /** + * @parameter default-value="${project}" + * @required + * @readonly + */ + protected MavenProject project; + + /** + * The file that properties will be written to + * + * @parameter property="properties.outputFile" + * default-value="${project.build.directory}/properties/project.properties"; + * @required + */ + protected File outputFile; + + /** + * If true, the plugin will silently ignore any non-existent properties files, and the build will continue + * + * @parameter property="properties.quiet" default-value="true" + */ + private boolean quiet; + + /** + * Comma separated list of characters to escape when writing property values. cr=carriage return, lf=linefeed, + * tab=tab. Any other values are taken literally. + * + * @parameter default-value="cr,lf,tab" property="properties.escapeChars" + */ + private String escapeChars; + + /** + * If true, the plugin will include system properties when writing the properties file. System properties override + * both environment variables and project properties. + * + * @parameter default-value="false" property="properties.includeSystemProperties" + */ + private boolean includeSystemProperties; + + /** + * If true, the plugin will include environment variables when writing the properties file. Environment variables + * are prefixed with "env". Environment variables override project properties. + * + * @parameter default-value="false" property="properties.includeEnvironmentVariables" + */ + private boolean includeEnvironmentVariables; + + /** + * Comma separated set of properties to exclude when writing the properties file + * + * @parameter property="properties.exclude" + */ + private String exclude; + + /** + * Comma separated set of properties to write to the properties file. If provided, only the properties matching + * those supplied here will be written to the properties file. + * + * @parameter property="properties.include" + */ + private String include; + + /* (non-Javadoc) + * @see org.apache.maven.plugin.AbstractMojo#execute() + */ + @Override + @SuppressFBWarnings({"NP_UNWRITTEN_FIELD","UWF_UNWRITTEN_FIELD"}) + public void execute() throws MojoExecutionException, MojoFailureException { + Properties properties = new Properties(); + // Add project properties + properties.putAll(project.getProperties()); + if (includeEnvironmentVariables) { + // Add environment variables, overriding any existing properties with the same key + properties.putAll(getEnvironmentVariables()); + } + if (includeSystemProperties) { + // Add system properties, overriding any existing properties with the same key + properties.putAll(System.getProperties()); + } + + // Remove properties as appropriate + trim(properties, exclude, include); + + String comment = "# " + new Date() + "\n"; + List escapeTokens = getEscapeChars(escapeChars); + + getLog().info("Creating " + outputFile); + writeProperties(outputFile, comment, properties, escapeTokens); + } + + /** + * Provides environment variables. + * @return environment variables + */ + protected static Properties getEnvironmentVariables() { + Properties props = new Properties(); + for (Entry entry : System.getenv().entrySet()) { + props.setProperty(PROPERTY_PREFIX_ENV + entry.getKey(), entry.getValue()); + } + return props; + } + + /** + * Removes properties which should not be written. + * @param properties + * @param omitCSV + * @param includeCSV + * @throws MojoExecutionException + */ + protected void trim(Properties properties, String omitCSV, String includeCSV) throws MojoExecutionException { + List omitKeys = getListFromCSV(omitCSV); + for (String key : omitKeys) { + properties.remove(key); + } + + List includeKeys = getListFromCSV(includeCSV); +// mh: including keys from predefined properties + if (includePropertyKeysFromFiles!=null && includePropertyKeysFromFiles.length>0) { + for (String currentIncludeLoc : includePropertyKeysFromFiles) { + if (validate(currentIncludeLoc)) { + Properties p = getProperties(currentIncludeLoc); + for (String key : p.stringPropertyNames()) { + includeKeys.add(key); + } + } + } + } + if (includeKeys!=null && !includeKeys.isEmpty()) { +// removing only when include keys provided + Set keys = properties.stringPropertyNames(); + for (String key : keys) { + if (!includeKeys.contains(key)) { + properties.remove(key); + } + } + } + } + + /** + * Checks whether file exists. + * @param location + * @return true when exists, false otherwise. + */ + protected boolean exists(String location) { + if (StringUtils.isBlank(location)) { + return false; + } + File file = new File(location); + if (file.exists()) { + return true; + } + ResourceLoader loader = new DefaultResourceLoader(); + Resource resource = loader.getResource(location); + return resource.exists(); + } + + /** + * Validates resource location. + * @param location + * @return true when valid, false otherwise + * @throws MojoExecutionException + */ + protected boolean validate(String location) throws MojoExecutionException { + boolean exists = exists(location); + if (exists) { + return true; + } + if (quiet) { + getLog().info("Ignoring non-existent properties file '" + location + "'"); + return false; + } else { + throw new MojoExecutionException("Non-existent properties file '" + location + "'"); + } + } + + /** + * Provides input stream. + * @param location + * @return input stream + * @throws IOException + */ + protected InputStream getInputStream(String location) throws IOException { + File file = new File(location); + if (file.exists()) { + return new FileInputStream(location); + } + ResourceLoader loader = new DefaultResourceLoader(); + Resource resource = loader.getResource(location); + return resource.getInputStream(); + } + + /** + * Creates properties for given location. + * @param location + * @return properties for given location + * @throws MojoExecutionException + */ + protected Properties getProperties(String location) throws MojoExecutionException { + InputStream in = null; + try { + Properties properties = new Properties(); + in = getInputStream(location); + if (location.toLowerCase().endsWith(".xml")) { + properties.loadFromXML(in); + } else { + properties.load(in); + } + return properties; + } catch (IOException e) { + throw new MojoExecutionException("Error reading properties file " + location, e); + } finally { + IOUtils.closeQuietly(in); + } + } + + /** + * Provides escape characters. + * @param escapeChars + * @return escape characters + */ + protected List getEscapeChars(String escapeChars) { + List tokens = getListFromCSV(escapeChars); + List realTokens = new ArrayList(); + for (String token : tokens) { + String realToken = getRealToken(token); + realTokens.add(realToken); + } + return realTokens; + } + + /** + * Provides real token. + * @param token + * @return real token + */ + protected String getRealToken(String token) { + if (token.equalsIgnoreCase("CR")) { + return CR; + } else if (token.equalsIgnoreCase("LF")) { + return LF; + } else if (token.equalsIgnoreCase("TAB")) { + return TAB; + } else { + return token; + } + } + + /** + * Returns content. + * @param comment + * @param properties + * @param escapeTokens + * @return content + */ + protected String getContent(String comment, Properties properties, List escapeTokens) { + List names = new ArrayList(properties.stringPropertyNames()); + Collections.sort(names); + StringBuilder sb = new StringBuilder(); + if (!StringUtils.isBlank(comment)) { + sb.append(comment); + } + for (String name : names) { + String value = properties.getProperty(name); + String escapedValue = escape(value, escapeTokens); + sb.append(name + "=" + escapedValue + "\n"); + } + return sb.toString(); + } + + /** + * Writes properties to given file. + * @param file + * @param comment + * @param properties + * @param escapeTokens + * @throws MojoExecutionException + */ + protected void writeProperties(File file, String comment, Properties properties, List escapeTokens) + throws MojoExecutionException { + try { + String content = getContent(comment, properties, escapeTokens); + FileUtils.writeStringToFile(file, content, ENCODING_UTF8); + } catch (IOException e) { + throw new MojoExecutionException("Error creating properties file", e); + } + } + + /** + * Escapes characters. + * @param s + * @param escapeChars + * @return + */ + protected String escape(String s, List escapeChars) { + String result = s; + for (String escapeChar : escapeChars) { + result = result.replace(escapeChar, getReplacementToken(escapeChar)); + } + return result; + } + + /** + * Provides replacement token. + * @param escapeChar + * @return replacement token + */ + protected String getReplacementToken(String escapeChar) { + if (escapeChar.equals(CR)) { + return "\\r"; + } else if (escapeChar.equals(LF)) { + return "\\n"; + } else if (escapeChar.equals(TAB)) { + return "\\t"; + } else { + return "\\" + escapeChar; + } + } + + /** + * Returns list from csv. + * @param csv + * @return list of values generated from CSV + */ + protected static final List getListFromCSV(String csv) { + if (StringUtils.isBlank(csv)) { + return new ArrayList(); + } + List list = new ArrayList(); + String[] tokens = StringUtils.split(csv, ","); + for (String token : tokens) { + list.add(token.trim()); + } + return list; + } + + public void setIncludeSystemProperties(boolean includeSystemProperties) { + this.includeSystemProperties = includeSystemProperties; + } + + public void setEscapeChars(String escapeChars) { + this.escapeChars = escapeChars; + } + + public void setIncludeEnvironmentVariables(boolean includeEnvironmentVariables) { + this.includeEnvironmentVariables = includeEnvironmentVariables; + } + + public void setExclude(String exclude) { + this.exclude = exclude; + } + + public void setInclude(String include) { + this.include = include; + } + + public void setQuiet(boolean quiet) { + this.quiet = quiet; + } + + /** + * Sets property files for which keys properties should be included. + * @param includePropertyKeysFromFiles + */ + public void setIncludePropertyKeysFromFiles( + String[] includePropertyKeysFromFiles) { + if (includePropertyKeysFromFiles!=null) { + this.includePropertyKeysFromFiles = Arrays.copyOf( + includePropertyKeysFromFiles, + includePropertyKeysFromFiles.length); + } + } + +} \ No newline at end of file diff --git a/dhp-build/dhp-build-properties-maven-plugin/src/test/java/eu/dnetlib/maven/plugin/properties/GenerateOoziePropertiesMojoTest.java b/dhp-build/dhp-build-properties-maven-plugin/src/test/java/eu/dnetlib/maven/plugin/properties/GenerateOoziePropertiesMojoTest.java new file mode 100644 index 000000000..8a763c1bd --- /dev/null +++ b/dhp-build/dhp-build-properties-maven-plugin/src/test/java/eu/dnetlib/maven/plugin/properties/GenerateOoziePropertiesMojoTest.java @@ -0,0 +1,101 @@ +package eu.dnetlib.maven.plugin.properties; + +import static eu.dnetlib.maven.plugin.properties.GenerateOoziePropertiesMojo.PROPERTY_NAME_SANDBOX_NAME; +import static eu.dnetlib.maven.plugin.properties.GenerateOoziePropertiesMojo.PROPERTY_NAME_WF_SOURCE_DIR; +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertNull; + +import org.junit.Before; +import org.junit.Test; + +/** + * @author mhorst + * + */ +public class GenerateOoziePropertiesMojoTest { + + private GenerateOoziePropertiesMojo mojo = new GenerateOoziePropertiesMojo(); + + @Before + public void clearSystemProperties() { + System.clearProperty(PROPERTY_NAME_SANDBOX_NAME); + System.clearProperty(PROPERTY_NAME_WF_SOURCE_DIR); + } + + @Test + public void testExecuteEmpty() throws Exception { + // execute + mojo.execute(); + + // assert + assertNull(System.getProperty(PROPERTY_NAME_SANDBOX_NAME)); + } + + @Test + public void testExecuteSandboxNameAlreadySet() throws Exception { + // given + String workflowSourceDir = "eu/dnetlib/iis/wf/transformers"; + String sandboxName = "originalSandboxName"; + System.setProperty(PROPERTY_NAME_WF_SOURCE_DIR, workflowSourceDir); + System.setProperty(PROPERTY_NAME_SANDBOX_NAME, sandboxName); + + // execute + mojo.execute(); + + // assert + assertEquals(sandboxName, System.getProperty(PROPERTY_NAME_SANDBOX_NAME)); + } + + @Test + public void testExecuteEmptyWorkflowSourceDir() throws Exception { + // given + String workflowSourceDir = ""; + System.setProperty(PROPERTY_NAME_WF_SOURCE_DIR, workflowSourceDir); + + // execute + mojo.execute(); + + // assert + assertNull(System.getProperty(PROPERTY_NAME_SANDBOX_NAME)); + } + + @Test + public void testExecuteNullSandboxNameGenerated() throws Exception { + // given + String workflowSourceDir = "eu/dnetlib/iis/"; + System.setProperty(PROPERTY_NAME_WF_SOURCE_DIR, workflowSourceDir); + + // execute + mojo.execute(); + + // assert + assertNull(System.getProperty(PROPERTY_NAME_SANDBOX_NAME)); + } + + @Test + public void testExecute() throws Exception { + // given + String workflowSourceDir = "eu/dnetlib/iis/wf/transformers"; + System.setProperty(PROPERTY_NAME_WF_SOURCE_DIR, workflowSourceDir); + + // execute + mojo.execute(); + + // assert + assertEquals("wf/transformers", System.getProperty(PROPERTY_NAME_SANDBOX_NAME)); + } + + @Test + public void testExecuteWithoutRoot() throws Exception { + // given + String workflowSourceDir = "wf/transformers"; + System.setProperty(PROPERTY_NAME_WF_SOURCE_DIR, workflowSourceDir); + + // execute + mojo.execute(); + + // assert + assertEquals("wf/transformers", System.getProperty(PROPERTY_NAME_SANDBOX_NAME)); + } + +} diff --git a/dhp-build/dhp-build-properties-maven-plugin/src/test/java/eu/dnetlib/maven/plugin/properties/WritePredefinedProjectPropertiesTest.java b/dhp-build/dhp-build-properties-maven-plugin/src/test/java/eu/dnetlib/maven/plugin/properties/WritePredefinedProjectPropertiesTest.java new file mode 100644 index 000000000..51d9575ff --- /dev/null +++ b/dhp-build/dhp-build-properties-maven-plugin/src/test/java/eu/dnetlib/maven/plugin/properties/WritePredefinedProjectPropertiesTest.java @@ -0,0 +1,365 @@ +package eu.dnetlib.maven.plugin.properties; + +import static eu.dnetlib.maven.plugin.properties.WritePredefinedProjectProperties.PROPERTY_PREFIX_ENV; +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertFalse; +import static org.junit.Assert.assertTrue; +import static org.mockito.Mockito.doReturn; + +import java.io.File; +import java.io.FileInputStream; +import java.io.FileNotFoundException; +import java.io.FileOutputStream; +import java.io.FileWriter; +import java.io.IOException; +import java.util.Properties; + +import org.apache.maven.plugin.MojoExecutionException; +import org.apache.maven.project.MavenProject; +import org.junit.Before; +import org.junit.Rule; +import org.junit.Test; +import org.junit.rules.TemporaryFolder; +import org.junit.runner.RunWith; +import org.mockito.Mock; +import org.mockito.runners.MockitoJUnitRunner; + + +/** + * @author mhorst + * + */ +@RunWith(MockitoJUnitRunner.class) +public class WritePredefinedProjectPropertiesTest { + + @Rule + public TemporaryFolder testFolder = new TemporaryFolder(); + + @Mock + private MavenProject mavenProject; + + private WritePredefinedProjectProperties mojo; + + @Before + public void init() { + mojo = new WritePredefinedProjectProperties(); + mojo.outputFile = getPropertiesFileLocation(); + mojo.project = mavenProject; + doReturn(new Properties()).when(mavenProject).getProperties(); + } + + // ----------------------------------- TESTS --------------------------------------------- + + @Test + public void testExecuteEmpty() throws Exception { + // execute + mojo.execute(); + + // assert + assertTrue(mojo.outputFile.exists()); + Properties storedProperties = getStoredProperties(); + assertEquals(0, storedProperties.size()); + } + + @Test + public void testExecuteWithProjectProperties() throws Exception { + // given + String key = "projectPropertyKey"; + String value = "projectPropertyValue"; + Properties projectProperties = new Properties(); + projectProperties.setProperty(key, value); + doReturn(projectProperties).when(mavenProject).getProperties(); + + // execute + mojo.execute(); + + // assert + assertTrue(mojo.outputFile.exists()); + Properties storedProperties = getStoredProperties(); + assertEquals(1, storedProperties.size()); + assertTrue(storedProperties.containsKey(key)); + assertEquals(value, storedProperties.getProperty(key)); + } + + @Test(expected=MojoExecutionException.class) + public void testExecuteWithProjectPropertiesAndInvalidOutputFile() throws Exception { + // given + String key = "projectPropertyKey"; + String value = "projectPropertyValue"; + Properties projectProperties = new Properties(); + projectProperties.setProperty(key, value); + doReturn(projectProperties).when(mavenProject).getProperties(); + mojo.outputFile = testFolder.getRoot(); + + // execute + mojo.execute(); + } + + @Test + public void testExecuteWithProjectPropertiesExclusion() throws Exception { + // given + String key = "projectPropertyKey"; + String value = "projectPropertyValue"; + String excludedKey = "excludedPropertyKey"; + String excludedValue = "excludedPropertyValue"; + Properties projectProperties = new Properties(); + projectProperties.setProperty(key, value); + projectProperties.setProperty(excludedKey, excludedValue); + doReturn(projectProperties).when(mavenProject).getProperties(); + mojo.setExclude(excludedKey); + + // execute + mojo.execute(); + + // assert + assertTrue(mojo.outputFile.exists()); + Properties storedProperties = getStoredProperties(); + assertEquals(1, storedProperties.size()); + assertTrue(storedProperties.containsKey(key)); + assertEquals(value, storedProperties.getProperty(key)); + } + + @Test + public void testExecuteWithProjectPropertiesInclusion() throws Exception { + // given + String key = "projectPropertyKey"; + String value = "projectPropertyValue"; + String includedKey = "includedPropertyKey"; + String includedValue = "includedPropertyValue"; + Properties projectProperties = new Properties(); + projectProperties.setProperty(key, value); + projectProperties.setProperty(includedKey, includedValue); + doReturn(projectProperties).when(mavenProject).getProperties(); + mojo.setInclude(includedKey); + + // execute + mojo.execute(); + + // assert + assertTrue(mojo.outputFile.exists()); + Properties storedProperties = getStoredProperties(); + assertEquals(1, storedProperties.size()); + assertTrue(storedProperties.containsKey(includedKey)); + assertEquals(includedValue, storedProperties.getProperty(includedKey)); + } + + @Test + public void testExecuteIncludingPropertyKeysFromFile() throws Exception { + // given + String key = "projectPropertyKey"; + String value = "projectPropertyValue"; + String includedKey = "includedPropertyKey"; + String includedValue = "includedPropertyValue"; + Properties projectProperties = new Properties(); + projectProperties.setProperty(key, value); + projectProperties.setProperty(includedKey, includedValue); + doReturn(projectProperties).when(mavenProject).getProperties(); + + File includedPropertiesFile = new File(testFolder.getRoot(), "included.properties"); + Properties includedProperties = new Properties(); + includedProperties.setProperty(includedKey, "irrelevantValue"); + includedProperties.store(new FileWriter(includedPropertiesFile), null); + + mojo.setIncludePropertyKeysFromFiles(new String[] {includedPropertiesFile.getAbsolutePath()}); + + // execute + mojo.execute(); + + // assert + assertTrue(mojo.outputFile.exists()); + Properties storedProperties = getStoredProperties(); + assertEquals(1, storedProperties.size()); + assertTrue(storedProperties.containsKey(includedKey)); + assertEquals(includedValue, storedProperties.getProperty(includedKey)); + } + + @Test + public void testExecuteIncludingPropertyKeysFromClasspathResource() throws Exception { + // given + String key = "projectPropertyKey"; + String value = "projectPropertyValue"; + String includedKey = "includedPropertyKey"; + String includedValue = "includedPropertyValue"; + Properties projectProperties = new Properties(); + projectProperties.setProperty(key, value); + projectProperties.setProperty(includedKey, includedValue); + doReturn(projectProperties).when(mavenProject).getProperties(); + + mojo.setIncludePropertyKeysFromFiles(new String[] {"/eu/dnetlib/maven/plugin/properties/included.properties"}); + + // execute + mojo.execute(); + + // assert + assertTrue(mojo.outputFile.exists()); + Properties storedProperties = getStoredProperties(); + assertEquals(1, storedProperties.size()); + assertTrue(storedProperties.containsKey(includedKey)); + assertEquals(includedValue, storedProperties.getProperty(includedKey)); + } + + @Test(expected=MojoExecutionException.class) + public void testExecuteIncludingPropertyKeysFromBlankLocation() throws Exception { + // given + String key = "projectPropertyKey"; + String value = "projectPropertyValue"; + String includedKey = "includedPropertyKey"; + String includedValue = "includedPropertyValue"; + Properties projectProperties = new Properties(); + projectProperties.setProperty(key, value); + projectProperties.setProperty(includedKey, includedValue); + doReturn(projectProperties).when(mavenProject).getProperties(); + + mojo.setIncludePropertyKeysFromFiles(new String[] {""}); + + // execute + mojo.execute(); + } + + @Test + public void testExecuteIncludingPropertyKeysFromXmlFile() throws Exception { + // given + String key = "projectPropertyKey"; + String value = "projectPropertyValue"; + String includedKey = "includedPropertyKey"; + String includedValue = "includedPropertyValue"; + Properties projectProperties = new Properties(); + projectProperties.setProperty(key, value); + projectProperties.setProperty(includedKey, includedValue); + doReturn(projectProperties).when(mavenProject).getProperties(); + + File includedPropertiesFile = new File(testFolder.getRoot(), "included.xml"); + Properties includedProperties = new Properties(); + includedProperties.setProperty(includedKey, "irrelevantValue"); + includedProperties.storeToXML(new FileOutputStream(includedPropertiesFile), null); + + mojo.setIncludePropertyKeysFromFiles(new String[] {includedPropertiesFile.getAbsolutePath()}); + + // execute + mojo.execute(); + + // assert + assertTrue(mojo.outputFile.exists()); + Properties storedProperties = getStoredProperties(); + assertEquals(1, storedProperties.size()); + assertTrue(storedProperties.containsKey(includedKey)); + assertEquals(includedValue, storedProperties.getProperty(includedKey)); + } + + @Test(expected=MojoExecutionException.class) + public void testExecuteIncludingPropertyKeysFromInvalidXmlFile() throws Exception { + // given + String key = "projectPropertyKey"; + String value = "projectPropertyValue"; + String includedKey = "includedPropertyKey"; + String includedValue = "includedPropertyValue"; + Properties projectProperties = new Properties(); + projectProperties.setProperty(key, value); + projectProperties.setProperty(includedKey, includedValue); + doReturn(projectProperties).when(mavenProject).getProperties(); + + File includedPropertiesFile = new File(testFolder.getRoot(), "included.xml"); + Properties includedProperties = new Properties(); + includedProperties.setProperty(includedKey, "irrelevantValue"); + includedProperties.store(new FileOutputStream(includedPropertiesFile), null); + + mojo.setIncludePropertyKeysFromFiles(new String[] {includedPropertiesFile.getAbsolutePath()}); + + // execute + mojo.execute(); + } + + @Test + public void testExecuteWithQuietModeOn() throws Exception { + // given + mojo.setQuiet(true); + mojo.setIncludePropertyKeysFromFiles(new String[] {"invalid location"}); + + // execute + mojo.execute(); + + // assert + assertTrue(mojo.outputFile.exists()); + Properties storedProperties = getStoredProperties(); + assertEquals(0, storedProperties.size()); + } + + @Test(expected=MojoExecutionException.class) + public void testExecuteIncludingPropertyKeysFromInvalidFile() throws Exception { + // given + mojo.setIncludePropertyKeysFromFiles(new String[] {"invalid location"}); + + // execute + mojo.execute(); + } + + @Test + public void testExecuteWithEnvironmentProperties() throws Exception { + // given + mojo.setIncludeEnvironmentVariables(true); + + // execute + mojo.execute(); + + // assert + assertTrue(mojo.outputFile.exists()); + Properties storedProperties = getStoredProperties(); + assertTrue(storedProperties.size() > 0); + for (Object currentKey : storedProperties.keySet()) { + assertTrue(((String)currentKey).startsWith(PROPERTY_PREFIX_ENV)); + } + } + + @Test + public void testExecuteWithSystemProperties() throws Exception { + // given + String key = "systemPropertyKey"; + String value = "systemPropertyValue"; + System.setProperty(key, value); + mojo.setIncludeSystemProperties(true); + + // execute + mojo.execute(); + + // assert + assertTrue(mojo.outputFile.exists()); + Properties storedProperties = getStoredProperties(); + assertTrue(storedProperties.size() > 0); + assertTrue(storedProperties.containsKey(key)); + assertEquals(value, storedProperties.getProperty(key)); + } + + @Test + public void testExecuteWithSystemPropertiesAndEscapeChars() throws Exception { + // given + String key = "systemPropertyKey "; + String value = "systemPropertyValue"; + System.setProperty(key, value); + mojo.setIncludeSystemProperties(true); + String escapeChars = "cr,lf,tab,|"; + mojo.setEscapeChars(escapeChars); + + // execute + mojo.execute(); + + // assert + assertTrue(mojo.outputFile.exists()); + Properties storedProperties = getStoredProperties(); + assertTrue(storedProperties.size() > 0); + assertFalse(storedProperties.containsKey(key)); + assertTrue(storedProperties.containsKey(key.trim())); + assertEquals(value, storedProperties.getProperty(key.trim())); + } + + // ----------------------------------- PRIVATE ------------------------------------------- + + private File getPropertiesFileLocation() { + return new File(testFolder.getRoot(), "test.properties"); + } + + private Properties getStoredProperties() throws FileNotFoundException, IOException { + Properties properties = new Properties(); + properties.load(new FileInputStream(getPropertiesFileLocation())); + return properties; + } +} diff --git a/dhp-build/dhp-build-properties-maven-plugin/src/test/resources/eu/dnetlib/maven/plugin/properties/included.properties b/dhp-build/dhp-build-properties-maven-plugin/src/test/resources/eu/dnetlib/maven/plugin/properties/included.properties new file mode 100644 index 000000000..3c79fe6cb --- /dev/null +++ b/dhp-build/dhp-build-properties-maven-plugin/src/test/resources/eu/dnetlib/maven/plugin/properties/included.properties @@ -0,0 +1 @@ +includedPropertyKey=irrelevantValue \ No newline at end of file diff --git a/dhp-build/dhp-build-properties-maven-plugin/target/classes/META-INF/maven/plugin.xml b/dhp-build/dhp-build-properties-maven-plugin/target/classes/META-INF/maven/plugin.xml new file mode 100644 index 000000000..03188dc53 --- /dev/null +++ b/dhp-build/dhp-build-properties-maven-plugin/target/classes/META-INF/maven/plugin.xml @@ -0,0 +1,281 @@ + + + dhp-build-properties-maven-plugin + + eu.dnetlib.dhp + dhp-build-properties-maven-plugin + 1.0.0-SNAPSHOT + dhp-build-properties + false + true + + + generate-properties + Generates oozie properties which were not provided from commandline. + false + true + false + false + false + true + eu.dnetlib.maven.plugin.properties.GenerateOoziePropertiesMojo + java + per-lookup + once-per-session + false + + + + write-project-properties + Writes project properties for the keys listed in specified properties files. +Based on: +http://site.kuali.org/maven/plugins/properties-maven-plugin/1.3.2/write-project-properties-mojo.html + false + true + false + false + false + true + eu.dnetlib.maven.plugin.properties.WritePredefinedProjectProperties + java + per-lookup + once-per-session + false + + + properties.escapeChars + java.lang.String + false + true + Comma separated list of characters to escape when writing property values. cr=carriage return, lf=linefeed, +tab=tab. Any other values are taken literally. + + + properties.exclude + java.lang.String + false + true + Comma separated set of properties to exclude when writing the properties file + + + properties.include + java.lang.String + false + true + Comma separated set of properties to write to the properties file. If provided, only the properties matching +those supplied here will be written to the properties file. + + + properties.includeEnvironmentVariables + boolean + false + true + If true, the plugin will include environment variables when writing the properties file. Environment variables +are prefixed with "env". Environment variables override project properties. + + + properties.includePropertyKeysFromFiles + java.lang.String[] + false + true + + + + properties.includeSystemProperties + boolean + false + true + If true, the plugin will include system properties when writing the properties file. System properties override +both environment variables and project properties. + + + properties.outputFile + java.io.File + true + true + The file that properties will be written to + + + project + org.apache.maven.project.MavenProject + true + false + + + + properties.quiet + boolean + false + true + If true, the plugin will silently ignore any non-existent properties files, and the build will continue + + + + + + + + + + + + + + + org.apache.maven + maven-plugin-api + jar + 2.0 + + + org.apache.maven + maven-project + jar + 2.0 + + + org.apache.maven + maven-profile + jar + 2.0 + + + org.apache.maven + maven-model + jar + 2.0 + + + org.apache.maven + maven-artifact-manager + jar + 2.0 + + + org.apache.maven + maven-repository-metadata + jar + 2.0 + + + org.apache.maven.wagon + wagon-provider-api + jar + 1.0-alpha-5 + + + org.codehaus.plexus + plexus-utils + jar + 1.0.4 + + + org.apache.maven + maven-artifact + jar + 2.0 + + + org.codehaus.plexus + plexus-container-default + jar + 1.0-alpha-8 + + + classworlds + classworlds + jar + 1.1-alpha-2 + + + org.kuali.maven.plugins + properties-maven-plugin + jar + 1.3.2 + + + org.springframework + spring-core + jar + 3.1.1.RELEASE + + + org.springframework + spring-asm + jar + 3.1.1.RELEASE + + + org.jasypt + jasypt + jar + 1.9.0 + + + org.kuali.maven.common + maven-kuali-common + jar + 1.2.8 + + + org.apache.ant + ant + jar + 1.8.2 + + + org.apache.ant + ant-launcher + jar + 1.8.2 + + + org.codehaus.plexus + plexus-interpolation + jar + 1.15 + + + commons-lang + commons-lang + jar + 2.6 + + + commons-io + commons-io + jar + 2.5 + + + org.slf4j + jcl-over-slf4j + jar + 1.6.4 + + + org.slf4j + slf4j-api + jar + 1.7.22 + + + org.slf4j + slf4j-log4j12 + jar + 1.7.22 + + + log4j + log4j + jar + 1.2.17 + + + javax.servlet + javax.servlet-api + jar + 3.1.0 + + + \ No newline at end of file diff --git a/dhp-build/dhp-build-properties-maven-plugin/target/classes/eu/dnetlib/maven/plugin/properties/GenerateOoziePropertiesMojo.class b/dhp-build/dhp-build-properties-maven-plugin/target/classes/eu/dnetlib/maven/plugin/properties/GenerateOoziePropertiesMojo.class new file mode 100644 index 0000000000000000000000000000000000000000..3eeb323f7bd9cd1388a59e092c64c2324ded4d6f GIT binary patch literal 2814 zcmbtW>r)d~6#rfF*krRPMuNsi4MH_MQ_)gIr9uEp4FL(DXj{5T7FbAj<7UIdw)SOP zTl>&HreizfOzj81TBb$RDIfcxANu!nI!@`in*fQRUpg?m=iYnH@0{~H=bp`<|2|v> za0K6p*pD6or(Jl(gWN~ZbU@%Au6EXg#i&~G3Y{!zlTIR9Olx9n>5Y|I4@!w zdR!Rg?+g5KQN(L_UBDY6cA&?NF}x{4MqES!NfBjGJQ&9We@yZNm8(qg<+Okl!|s8> z{(2Yxt_6D|L|bAd#tN>kii?7l4s;lO4cSqG1E}BiQ^3AVO`6bvSyCR zshq;Fs}v&E5$)>iKX2hNxKe6bH5DVvu>Im@XwDO=rkd>xwqV@|gFUP#33F>i)s$#1 z9aoGYIi8{&Z$wYXsS(*w`MR)aHz!p>J{3`Np`@mmDK#ES%QK1=%A|4=D!mLnqZp>D zWJ9MFO)+Fs>DMo-%D~!wpFX8iQi3w4ByuJ>s+=e&CS$UejO%kzIZfN;F;h-V_sJQ{ zHC2&8?4C;~8B--X1f&IMl;f&#iS2>l#?0$BZc!8kwiq0yKCKX-XLdZM=Zu8Xr5X$s z+4YbHWW%J~EAluwQ%!{;YSyG&oR%$}Rh4o!pIPOt_6egC8-r;-P7NtQ7Z9XO^D*?O zyb^(DFxkxeMM53wDY7Sfhia_yatVb}F_bAe+DuMnBpgIg0KG4P@@)Tq^`tt?>c;eV zN}p}cTI@9^RYO2lf(i2IDWXxr430{8i9cpBCt)6!8FoAwduL8fB^3&-A*b;M448Ug zwbp?GTtK5v35*+hI*^gg$+m!mxA3-vcW_0(yAs~R`xKdlY?Z6SFywh&GXWn+_z+hG zd?evxd?MhQgirCAgzLyr4J3SyFC=`4uO!^SO$lG4R>C*?eGZ{roGBj=Cp%^dkw9rmu=rZ;E z7G=FkNa?)IcTA~Ax-e3VKSiDZ|*w&htRBhvuXjqs#Qjm*LfHAPiU z=;<^~vf^PsOhbk4t0S99sg!$glhnd>n@;upPXo4kzJMzj$_#xOXrxpHi&0iHfiaIdjLPUIvNvMShHq_`Fn zs_CP=pFZd|_)&veFg#DIGKjQ3P_&ld)pPH%2izw7vWn( z6%WV;9?${e0N4W$_M!^U5f1^%wVHn9)FMoLoTc{=<<5hxqJU~}i2gWN6j~wd7Lq+E zdyFFj4*!FFWDGb-`9-wSlLj)W6JczDoJw@|^CRq_;CA*kEWp2nUD2i`>~31Vjq6Qq zcE5eO)nRLO`kX$;9q4|$&)L!@)c*>h&E@8FQ%DYP15-u9r%fOoK&EByc(kJaXx{#jOiJ64FT`uYx`f?Zm&05cBPlKRZQdocA%fS2&D- y4kBMbr+_g1Gpf%DcEht_-_o+qAES{Lr>vUcZ#cXw*aCa80jN0`y2wo#y8i>aD9Qi; literal 0 HcmV?d00001 diff --git a/dhp-build/dhp-build-properties-maven-plugin/target/classes/eu/dnetlib/maven/plugin/properties/WritePredefinedProjectProperties.class b/dhp-build/dhp-build-properties-maven-plugin/target/classes/eu/dnetlib/maven/plugin/properties/WritePredefinedProjectProperties.class new file mode 100644 index 0000000000000000000000000000000000000000..e09929deab09a79b8e1d1a5272a1fc8a61268d99 GIT binary patch literal 10384 zcmb_i3w%`Nl|N@PlY29B17skTbIaYAs0r=eu_%can+t z+h2d0`@PP0zVklk`-V5)d-XICE#()z6sNNq{l-f!I$K7y^p2N)OYh3?cQQQZ#pk^A z-t*E0dKWz=`7jpPrE!UuCeT@(-DugPajBR5bXEqM&bqW^0`3)XpL`6R%VlPQk0qweUiLTOn{Oy)>U!>AYInE9JyB@@e++T5i#JozAVYu}$Nvywoi+ zspa)LB?P=(=c{!}>6F&_8mw}Mmpi#j_HPilZk?}{wnyh)FZc0Aoj1vNvuxfXpMIUM z(|N1T+jPEO=Np8(?XqyAm&3e6qkS4jm|QJAO#Y7D;l1HdEF2#U^=4Ah_}~hr@|HwA zoe9S?o5HcI3CNB%rjowqwM>({dp2~p_Vo2{@9t@BYu~cHwQCbLw05;@Sl8Zl)%J~j zZOfUaN8^!LcEIdTC6Z<;bKn~DK)NlJ7;1~g%ruZ}Eq1DwOeJ=kkqp!PjznrO6i$XC zyUfr~c&`}`*=wOr83C;Tf>LiHn@MIff|qH6g*BQ8$r!7io3c?8`1m>|pP7z?lV;1V za0(PmwF&G!kj|JxHknbOt=e8`jqi=767eBZq>ze+cPPp=bH8GYNweXYN>@hX(adT{ zXh9uh;ciI`fS8FL(YV=_9ok{0`ed_+IuQxSa>(pScV<@2=_sVu9FHe5VFkdnw8I?8hO$Egp-4Ie5e)3e4yHrl z5s*-CHknMB>2%xL&EZrW#+()_*lSs_zpH!u#;(mh?R|Z%UEAB*TRYal6gO^e8#_C7 z(99G!fJ+wCIk+v5r3Z3M9l&|5R%^7!<7TQQ7EY(duqqwSBTh0vi;}EQZ*olradQGto zEin|$wDbaj*Y*rG&}CZOA0BAWTcbIJ7>Y2udWFV25#tftH6DbYnfqagB1xM=r-9YM z#q}5*Ol$>Y?o<(>G?oYtm?@?P6;aYj32-}8;URNhBDE(JNuQ?PAK5O2u47!bRnkd0$;gg`K`3yDeNs6~6LGg{VOjgQG`Dxd}Xkm541ZKjSQyBxMT zq6epgDvs?)P>9=(qXTy8bjM+A5Qj8i+?C$J6tK`c_N~oEW3W>wp@iwi|1UvBrBsy2 ziTkWnqO!Bf5bgG%x#T&9L?`#}Cldq!Es~S%^?w+w#Tmm%rB>2nTK4`j8(&>s3xkve z2zt;^DO^7rgI!#Gag@dB8Y?=a8D*)-SU6%zy0x7|f`pJJrICu3C6V-A1g{w8n5LyA zgs5_3Y?e&LgFZ${gKn44v-EjXeWPn#+wb6)b7~BZa{>|_Ic*&xxy&LJve)37IAzdB z1lga`n+9wJIx1^HSj!-()EIO#eOTkH!Fze1L4Qv_Hh4dS#G4KNFyDdr_;!Quc2+EMX+xed=KAZ@V$JW!S@T%pWyp6{-nVV&~b!ch18B1qO4yU^a|g`beWXz z<0ad5=FTu2tT5+l{Gh>y_){7`1RQeuEp#g`0_}tGgyig+cp~1Y@;|(%CYdj7Yb4Lt z%!h*Tw{YyO!K$G<6T1_wsxU@zkk5y3imr=bwG77Q(2=7JI?WH`L$4eB2qSX17w#X5 z8T@Hb&X4k8gOBiGjXz`XXZdhW%Z1q(r?;k3iBt^?Xh1j|%@hR6kLC6%>u*k_!UrT? z81!BGYl9ybC2%?Z$^`fcjX!7bll+vyPh**WO8Jz7GnsnVriue#YQu8LnU- z9?6;SsM8JlXF1_HK5Fn6_&zwuHgM_MhK%mo2D|5N_-ioui~J?{7N<1+vcb>u3rw}+ zDWQ2sI+F@VGNKF=WaEL!a$7hWLv7&Dlz+re+B0S-j>T}Dv-5*e4V-s{L^-FQsjDS6gp)}# zj>}DBk%TffJLgJ)QScjfs2lJ5!RwiXb%RI9%-vEPH3rJRQpq<7l+yFDK9*n0Sz?Jn z+&c$D0K}F#9{>!-6^-wNQmdUDN=ofDZ_1kSh^b^Frz0fcUXi0NFS-KNL6%55@blCG zrNu&UOo1#NH(}cfstT~>tOGFGu5h}`+z&gz#T?fIcU%n$Wn;_Lk_~PqKn*hO;?0M2 zm&o@Zd=3d1)S~Iu zp=3sQkP0jum5aPnfpXc$RonU6vk^}MfWp?bu$4WyYly>|OwueodB^caK7poDf|A5^ z6GkrDh;hm>uEaR)7;mBJlyQvbVm<2^ZwC&PU%=&->Q=QVDiv;&L#jpnsajOHszuGK zT2#8KMX`&v3N)d1#`jK)LTDMSCwPMRWd(K+-%>TXgziG$umI?8x(6esd$GAhHghd` zKn~FZuaT?&1eJ8u2VWugFnNZl6g}+-dFxM-ZjH*Ukyl0n=fj^dOy#f91niyYpEOJr zuaO`9$-^|oUy0!`RdogdHg%XT=?V^0AO|}QibQcL+bwhpQFkLEaxJlSp z4B`po>KIK1Gtt&?mt8AzDcf(N=nx2I$j}$)og1I;>a@T5Q=Y>zKQb?g#5z zX(@Ilp^E{k2lN9F#Dh3NL}bkz!px^IQ%0_HG)tq0%7I?w>7q6C2#z7D0Pv&uJG>QF zemat-&tN~UCYTb9ya5a)XgAwJz2+p%I7Ks=PEieBB6GgNAx)e=Sixgr>KXXhM7L21W*Nf>+s*> zSaShl1J>^cbdFhT?P;tDvlGa5o~ppCM$f!UtAQU^Dy)GWR|nd1!2G)5Bn6w?0rx4Y zM>_BXJT@Z%=_yX;HJF4v>WiDof`PJOT6lz-R43Hr4ZzeEDWNs_0zUh+>C&@LGXkD~af&W;5f1$69XcPq z@KV5YhSm#3iy3-ZT@Ms;poK;YXf~R;(zDU%K%<|cB`$36)(2qj!oUwltgwfsKmaeo z2wtMObc`D4Wmw-=>2f+w>u8v6q!SdSlZX_jV2`gt=&wQS!}J`z4v04}`Ud@ozKOE? zTl7mhL%*UwQA)ZQb~BeWz$JnEA@Zu80X!i(3Uzsbc@2F5t7X_fS*_~8b~$~KzJwi6 zif!T2vn*VCUse__z30&@0p!2a3$S%JcK#9gcmReYhERhw3)@S8v7ip+p$-8`jN=+A zJ5LieI;M(9Qs?R?JXttrPf(_(Jh6lB+&c8 z*$ISX7oaPk&QrkK0GzKvNbX!vC_6_L8l>o!do)1dA^>y)lxR~B=C*RxbVV;rebo_~ z=Zt!*;nIMp@Ly^9wu*bg>ze*t1ZW-wa)Q>ro@a0dSR;BHn)(@Ry@(237c zK$MWPrcNjTFB%s0JOv{;yPrGl5Zon-Y3s!2D*yUnYCl50Mi|!BM@g6Vn#LwKqFYD6 zeXPr=j4oRlNKuU^Xv6DG9tGuj9pZ7pG?q}OYP)gR5`WOzfggb-v%&Zwaxa({KQ6&&I-ok(QM|5(f|h{LKZE0yA`@58pQ9%x*PQ3IS>EH|mL6pJ?;vJL8d^$!ki<3@PVLaO8tUBZiy*e`>Mt*T4N1LizC^AwA$6y~84!{-Aj zc)bA_NZ}ixX)iV$6dk}O7oDJfQNT&M&W;UD9*K@fQ2~zxkcSXzTHy*G{+b)k?eDpxJI}L@kzty5AZ_tw{b}%oP zgQ_Bv8up;FeG%J1yEZGQ0|?%dxvFUoLik(I_=8xtgey>@aSKW^mw3YzDmhPeO7}lH zN4m;dowoY_4lsDV#U4@rw=t4HQ5$S%44~}20U{1~{M%2`jYp}X3G!Og?+*t&;`=Yd zWC$su5X*^-2os!KWAe{{uMo&{RZg@n^B|+C1;I zD%1()TDvZlo)rP4Cl#tYU$j{>IX)4?H^RBC`qav)mW}@eTkt+;85Xja#pL4pv2H!n z>DG+_%d>Y3Q>5$Yg&#XyT2-376X9$SHS8{GNg_(L?j^D&4xFz;y{dqK&(5`#_H%H67fah`Ej&ca>-`&tX+V6| zgrDNNq53<~-(vR%k*2J>$u1bTIE`)ER*<&5I1Mk(z>Bj`;9hv~K6vqdc=64OJIm0P z=XqML_$=_^>GH;I3;QQ%#k$uARUzZE11jiHfwd#(kDjF6oef=$rtgla08cmeRGx!P{XuI=8z z)?5^2`Zp9DRuo+gRN}Oy!Rq5wHgX5WziL8Ydk~4>kVBTGd0CdqPdUKB^dBgTN8_qL zPE$wYdKkDK83&h@ESUbYI9=LkT!(?{$T+y<;w^OjS8=+0qj5b3T#t`~OD^w%>sQ5b z8KZGM30zN&gG(L&1lO;N^SEj>u4jSk^W)%>M+d?6-}FEDN;upMzZ5(9`nV151R zlN9?p4IQUO!E5=<1hm92kegqm3jT@%w?D5}zk2AfjH!fB;kf93fxt!ohs!D + + 4.0.0 + + eu.dnetlib.dhp + dhp + 1.0.0-SNAPSHOT + + dhp-build + pom + + dhp-build-assembly-resources + dhp-build-properties-maven-plugin + + + diff --git a/dhp-common/pom.xml b/dhp-common/pom.xml new file mode 100644 index 000000000..42b8864fa --- /dev/null +++ b/dhp-common/pom.xml @@ -0,0 +1,177 @@ + + + 4.0.0 + + + eu.dnetlib.dhp + dhp + 1.0.0-SNAPSHOT + + + dhp-common + jar + + + + + ${project.groupId} + dhp-schemas + ${project.version} + + + + org.apache.oozie + oozie-core + provided + + + + org.apache.hadoop + hadoop-mapreduce-client-core + + + + org.apache.hadoop + hadoop-common + + + + org.apache.spark + spark-core_2.10 + + + + org.apache.spark + spark-sql_2.10 + + + + org.apache.avro + avro + + + + org.apache.avro + avro-mapred + hadoop2 + + + + org.apache.commons + commons-lang3 + + + + + org.springframework + spring-beans + + + + com.beust + jcommander + + + + org.apache.pig + pig + + + + com.linkedin.datafu + datafu + + + + commons-beanutils + commons-beanutils + + + + commons-io + commons-io + + + + org.jdom + jdom + + + + + + + + net.alchim31.maven + scala-maven-plugin + + + + + org.apache.avro + avro-maven-plugin + + + generate-test-sources + + schema + idl-protocol + + + String + + + + + + + org.codehaus.mojo + build-helper-maven-plugin + + + add-test-sources + generate-test-sources + + add-test-source + + + + ${project.build.directory}/generated-test-sources/avro/ + + + + + + + + + org.apache.maven.plugins + maven-jar-plugin + + + + test-jar + + + + + + + org.apache.maven.plugins + maven-surefire-plugin + + eu.dnetlib.iis.common.IntegrationTest + + + + + org.apache.maven.plugins + maven-failsafe-plugin + + + + + + diff --git a/dhp-common/src/main/java/eu/dnetlib/dhp/common/FsShellPermissions.java b/dhp-common/src/main/java/eu/dnetlib/dhp/common/FsShellPermissions.java new file mode 100644 index 000000000..7fbcd8fef --- /dev/null +++ b/dhp-common/src/main/java/eu/dnetlib/dhp/common/FsShellPermissions.java @@ -0,0 +1,106 @@ +package eu.dnetlib.dhp.common; + +import java.lang.reflect.Method; +import java.util.Arrays; +import java.util.LinkedList; + +import org.apache.hadoop.conf.Configurable; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.FsShell; +import org.springframework.beans.BeanUtils; +import org.springframework.util.ClassUtils; +import org.springframework.util.ReflectionUtils; + +/** + * Extracted from: + * https://github.com/spring-projects/spring-hadoop/blob/master/spring-hadoop-core/src/main/java/org/springframework/data/hadoop/fs/FsShellPermissions.java + * + * Utility class for accessing Hadoop FsShellPermissions (which is not public) + * without having to duplicate its code. + * @author Costin Leau + * + */ +public class FsShellPermissions { + + private static boolean IS_HADOOP_20X = ClassUtils.isPresent("org.apache.hadoop.fs.FsShellPermissions$Chmod", + FsShellPermissions.class.getClassLoader()); + + public enum Op { + CHOWN("-chown"), CHMOD("-chmod"), CHGRP("-chgrp"); + + private final String cmd; + + Op(String cmd) { + this.cmd = cmd; + } + + public String getCmd() { + return cmd; + } + } + + // TODO: move this into Spring Core (but add JDK 1.5 compatibility first) + @SafeVarargs + static T[] concatAll(T[] first, T[]... rest) { + // can add some sanity checks + int totalLength = first.length; + for (T[] array : rest) { + totalLength += array.length; + } + T[] result = Arrays.copyOf(first, totalLength); + int offset = first.length; + for (T[] array : rest) { + System.arraycopy(array, 0, result, offset, array.length); + offset += array.length; + } + return result; + } + + public static void changePermissions(FileSystem fs, Configuration config, + Op op, boolean recursive, String group, String uri) { + changePermissions(fs, config, op, recursive, group, new String[] {uri}); + } + + public static void changePermissions(FileSystem fs, Configuration config, + Op op, boolean recursive, String group, String... uris) { + String[] argvs; + if (recursive) { + argvs = new String[1]; + argvs[0] = "-R"; + } else { + argvs = new String[0]; + } + argvs = concatAll(argvs, new String[] { group }, uris); + + // Hadoop 1.0.x + if (!IS_HADOOP_20X) { + Class cls = ClassUtils.resolveClassName("org.apache.hadoop.fs.FsShellPermissions", config.getClass().getClassLoader()); + Object[] args = new Object[] { fs, op.getCmd(), argvs, 0, new FsShell(config) }; + + Method m = ReflectionUtils.findMethod(cls, "changePermissions", FileSystem.class, String.class, String[].class, int.class, FsShell.class); + ReflectionUtils.makeAccessible(m); + ReflectionUtils.invokeMethod(m, null, args); + } + // Hadoop 2.x + else { + Class cmd = ClassUtils.resolveClassName("org.apache.hadoop.fs.shell.Command", config.getClass().getClassLoader()); + Class targetClz = ClassUtils.resolveClassName("org.apache.hadoop.fs.FsShellPermissions$Chmod", config.getClass().getClassLoader()); + Configurable target = (Configurable) BeanUtils.instantiate(targetClz); + target.setConf(config); + // run(String...) swallows the exceptions - re-implement it here + // + LinkedList args = new LinkedList(Arrays.asList(argvs)); + try { + Method m = ReflectionUtils.findMethod(cmd, "processOptions", LinkedList.class); + ReflectionUtils.makeAccessible(m); + ReflectionUtils.invokeMethod(m, target, args); + m = ReflectionUtils.findMethod(cmd, "processRawArguments", LinkedList.class); + ReflectionUtils.makeAccessible(m); + ReflectionUtils.invokeMethod(m, target, args); + } catch (IllegalStateException ex){ + throw new RuntimeException("Cannot change permissions/ownership " + ex); + } + } + } +} diff --git a/dhp-common/src/main/java/eu/dnetlib/dhp/common/InfoSpaceConstants.java b/dhp-common/src/main/java/eu/dnetlib/dhp/common/InfoSpaceConstants.java new file mode 100644 index 000000000..1ce7cd426 --- /dev/null +++ b/dhp-common/src/main/java/eu/dnetlib/dhp/common/InfoSpaceConstants.java @@ -0,0 +1,75 @@ +package eu.dnetlib.dhp.common; + +import java.io.UnsupportedEncodingException; + +/** + * InfoSpaceConstants constants. + * + * @author mhorst + * + */ +public final class InfoSpaceConstants { + + public static final float CONFIDENCE_TO_TRUST_LEVEL_FACTOR = 0.9f; + + public static final String ENCODING_UTF8 = "utf-8"; + + public static final char ROW_PREFIX_SEPARATOR = '|'; + + public static final String ID_NAMESPACE_SEPARATOR = "::"; + public static final String CLASSIFICATION_HIERARCHY_SEPARATOR = ID_NAMESPACE_SEPARATOR; + public static final String INFERENCE_PROVENANCE_SEPARATOR = ID_NAMESPACE_SEPARATOR; + + public static final String ROW_PREFIX_RESULT = "50|"; + public static final String ROW_PREFIX_PROJECT = "40|"; + public static final String ROW_PREFIX_PERSON = "30|"; + public static final String ROW_PREFIX_ORGANIZATION = "20|"; + public static final String ROW_PREFIX_DATASOURCE = "10|"; + + public static final String QUALIFIER_BODY_STRING = "body"; + public static final byte[] QUALIFIER_BODY; + + public static final String SEMANTIC_CLASS_MAIN_TITLE = "main title"; + public static final String SEMANTIC_CLASS_PUBLICATION = "publication"; + public static final String SEMANTIC_CLASS_UNKNOWN = "UNKNOWN"; + + public static final String SEMANTIC_SCHEME_DNET_PERSON_ROLES = "dnet:personroles"; + public static final String SEMANTIC_SCHEME_DNET_RELATIONS_RESULT_RESULT = "dnet:result_result_relations"; + public static final String SEMANTIC_SCHEME_DNET_RELATIONS_RESULT_PROJECT = "dnet:result_project_relations"; + + public static final String SEMANTIC_SCHEME_DNET_TITLE = "dnet:dataCite_title"; + public static final String SEMANTIC_SCHEME_DNET_TITLE_TYPOLOGIES = "dnet:title_typologies"; + public static final String SEMANTIC_SCHEME_DNET_RESULT_TYPOLOGIES = "dnet:result_typologies"; + public static final String SEMANTIC_SCHEME_DNET_PROVENANCE_ACTIONS = "dnet:provenanceActions"; + public static final String SEMANTIC_SCHEME_DNET_LANGUAGES = "dnet:languages"; + public static final String SEMANTIC_SCHEME_DNET_PID_TYPES = "dnet:pid_types"; + public static final String SEMANTIC_SCHEME_DNET_CLASSIFICATION_TAXONOMIES = "dnet:subject_classification_typologies"; + + // resultResult citation and similarity related + public static final String SEMANTIC_SCHEME_DNET_DATASET_PUBLICATION_RELS = "dnet:dataset_publication_rels"; + + public static final String SEMANTIC_CLASS_TAXONOMIES_ARXIV = "arxiv"; + public static final String SEMANTIC_CLASS_TAXONOMIES_WOS = "wos"; + public static final String SEMANTIC_CLASS_TAXONOMIES_DDC = "ddc"; + public static final String SEMANTIC_CLASS_TAXONOMIES_MESHEUROPMC = "mesheuropmc"; + public static final String SEMANTIC_CLASS_TAXONOMIES_ACM = "acm"; + + public static final String EXTERNAL_ID_TYPE_INSTANCE_URL = "dnet:instance-url"; + public static final String EXTERNAL_ID_TYPE_UNKNOWN = "unknown"; + + // publication types class ids + public static final String SEMANTIC_CLASS_INSTANCE_TYPE_ARTICLE = "0001"; + public static final String SEMANTIC_CLASS_INSTANCE_TYPE_DATASET = "0021"; + + static { + try { + QUALIFIER_BODY = QUALIFIER_BODY_STRING.getBytes(ENCODING_UTF8); + + } catch (UnsupportedEncodingException e) { + throw new RuntimeException(e); + } + } + + private InfoSpaceConstants() { + } +} diff --git a/dhp-common/src/main/java/eu/dnetlib/dhp/common/WorkflowRuntimeParameters.java b/dhp-common/src/main/java/eu/dnetlib/dhp/common/WorkflowRuntimeParameters.java new file mode 100644 index 000000000..e71d69027 --- /dev/null +++ b/dhp-common/src/main/java/eu/dnetlib/dhp/common/WorkflowRuntimeParameters.java @@ -0,0 +1,74 @@ +package eu.dnetlib.dhp.common; + +import java.util.Map; + +import org.apache.commons.lang.StringUtils; +import org.apache.hadoop.conf.Configuration; + +/** + * Utility class holding parameter names and method simplifying access to parameters from hadoop context. + * @author mhorst + * + */ +public final class WorkflowRuntimeParameters { + + public static final String OOZIE_ACTION_OUTPUT_FILENAME = "oozie.action.output.properties"; + + public static final char DEFAULT_CSV_DELIMITER = ','; + + public static final String UNDEFINED_NONEMPTY_VALUE = "$UNDEFINED$"; + + // default values + public static final String DNET_SERVICE_READ_TIMEOUT_DEFAULT_VALUE = "60000"; + public static final String DNET_SERVICE_CONNECTION_TIMEOUT_DEFAULT_VALUE = "60000"; + // parameter names + public static final String DNET_SERVICE_CLIENT_READ_TIMEOUT = "dnet.service.client.read.timeout"; + public static final String DNET_SERVICE_CLIENT_CONNECTION_TIMEOUT = "dnet.service.client.connection.timeout"; + + // ----------------- CONSTRUCTORS ----------------------------- + + private WorkflowRuntimeParameters() {} + + /** + * Retrieves parameter from hadoop context configuration when set to value different than {@link WorkflowRuntimeParameters#UNDEFINED_NONEMPTY_VALUE}. + */ + public static String getParamValue(String paramName, Configuration configuration) { + String paramValue = configuration.get(paramName); + if (StringUtils.isNotBlank(paramValue) && !UNDEFINED_NONEMPTY_VALUE.equals(paramValue)) { + return paramValue; + } else { + return null; + } + } + + /** + * Retrieves {@link Integer} parameter from hadoop context configuration when set to non-empty value different than {@link WorkflowRuntimeParameters#UNDEFINED_NONEMPTY_VALUE}. + * Null is returned when parameter was not set. + * @throws {@link NumberFormatException} if parameter value does not contain a parsable integer + */ + public static Integer getIntegerParamValue(String paramName, Configuration configuration) throws NumberFormatException { + String paramValue = getParamValue(paramName, configuration); + return paramValue!=null?Integer.valueOf(paramValue):null; + } + + /** + * Retrieves parameter from hadoop context configuration when set to value different than {@link WorkflowRuntimeParameters#UNDEFINED_NONEMPTY_VALUE}. + * If requested parameter was not set, fallback parameter is retrieved using the same logic. + */ + public static String getParamValue(String paramName, String fallbackParamName, Configuration configuration) { + String resultCandidate = getParamValue(paramName, configuration); + return resultCandidate!=null?resultCandidate:getParamValue(fallbackParamName, configuration); + } + + /** + * Provides parameter value. Returns default value when entry not found among parameters. + * + * @param paramName parameter name + * @param defaultValue parameter default value to be returned when entry not found among parameters + * @param parameters map of parameters + */ + public static String getParamValue(String paramName, String defaultValue, Map parameters) { + return parameters.containsKey(paramName)?parameters.get(paramName):defaultValue; + } + +} diff --git a/dhp-common/src/main/java/eu/dnetlib/dhp/common/counter/NamedCounters.java b/dhp-common/src/main/java/eu/dnetlib/dhp/common/counter/NamedCounters.java new file mode 100644 index 000000000..5b37d31f1 --- /dev/null +++ b/dhp-common/src/main/java/eu/dnetlib/dhp/common/counter/NamedCounters.java @@ -0,0 +1,111 @@ +package eu.dnetlib.dhp.common.counter; + +import java.io.Serializable; +import java.util.Collection; +import java.util.Map; + +import com.google.common.base.Preconditions; +import com.google.common.collect.Maps; + +/** + * Class that groups several counters which are identified by name (String value). + * + * @author madryk + */ +public class NamedCounters implements Serializable { + + private static final long serialVersionUID = 1L; + + + private final Map counters; + + + //------------------------ CONSTRUCTORS -------------------------- + + /** + * Creates {@link NamedCounters} with empty initial counters. + */ + public NamedCounters() { + this.counters = Maps.newHashMap(); + } + + /** + * Creates {@link NamedCounters} with initial counters.
+ * Starting value of initial counters is zero. + * + * @param initialCounterNames - names of initial counters + */ + public NamedCounters(String[] initialCounterNames) { + Preconditions.checkNotNull(initialCounterNames); + + this.counters = Maps.newHashMap(); + + for (String initialCounterName : initialCounterNames) { + this.counters.put(initialCounterName, 0L); + } + } + + /** + * Creates {@link NamedCounters} with initial counters.
+ * Starting value of initial counters is zero. + * + * @param initialCounterNamesEnumClass - enum class providing names of initial counters + */ + public > NamedCounters(Class initialCounterNamesEnumClass) { + Preconditions.checkNotNull(initialCounterNamesEnumClass); + + this.counters = Maps.newHashMap(); + Enum[] enumConstants = initialCounterNamesEnumClass.getEnumConstants(); + + for (int i=0; i + * Internally uses {@link #increment(String, Long)} + */ + public void increment(String counterName) { + increment(counterName, 1L); + } + + /** + * Increments value of a counter with the name specified as parameter by the given value.
+ * If current instance of {@link NamedCounters} does not contain counter + * with provided name, then before incrementing counter will be created with starting + * value equal to zero. + */ + public void increment(String counterName, Long incrementValue) { + + long oldValue = counters.getOrDefault(counterName, 0L); + counters.put(counterName, oldValue + incrementValue); + } + + /** + * Returns current value of a counter with the name specified as parameter. + * + * @throws IllegalArgumentException when {@link NamedCounters} does not contain counter + * with provided name + */ + public long currentValue(String counterName) { + + if (!counters.containsKey(counterName)) { + throw new IllegalArgumentException("Couldn't find counter with name: " + counterName); + } + + return counters.get(counterName); + } + + /** + * Returns names of currently tracked counters. + */ + public Collection counterNames() { + return counters.keySet(); + } + +} diff --git a/dhp-common/src/main/java/eu/dnetlib/dhp/common/counter/NamedCountersAccumulableParam.java b/dhp-common/src/main/java/eu/dnetlib/dhp/common/counter/NamedCountersAccumulableParam.java new file mode 100644 index 000000000..6686432dd --- /dev/null +++ b/dhp-common/src/main/java/eu/dnetlib/dhp/common/counter/NamedCountersAccumulableParam.java @@ -0,0 +1,48 @@ +package eu.dnetlib.dhp.common.counter; + +import org.apache.spark.AccumulableParam; + +import scala.Tuple2; + +/** + * Spark {@link AccumulableParam} for tracking multiple counter values using {@link NamedCounters}. + * + * @author madryk + */ +public class NamedCountersAccumulableParam implements AccumulableParam> { + + private static final long serialVersionUID = 1L; + + + //------------------------ LOGIC -------------------------- + + /** + * Increments {@link NamedCounters} counter with the name same as the first element of passed incrementValue tuple + * by value defined in the second element of incrementValue tuple. + */ + @Override + public NamedCounters addAccumulator(NamedCounters counters, Tuple2 incrementValue) { + counters.increment(incrementValue._1, incrementValue._2); + return counters; + } + + /** + * Merges two passed {@link NamedCounters}. + */ + @Override + public NamedCounters addInPlace(NamedCounters counters1, NamedCounters counters2) { + for (String counterName2 : counters2.counterNames()) { + counters1.increment(counterName2, counters2.currentValue(counterName2)); + } + return counters1; + } + + /** + * Returns passed initialCounters value without any modifications. + */ + @Override + public NamedCounters zero(NamedCounters initialCounters) { + return initialCounters; + } + +} diff --git a/dhp-common/src/main/java/eu/dnetlib/dhp/common/counter/NamedCountersFileWriter.java b/dhp-common/src/main/java/eu/dnetlib/dhp/common/counter/NamedCountersFileWriter.java new file mode 100644 index 000000000..bebb82b6e --- /dev/null +++ b/dhp-common/src/main/java/eu/dnetlib/dhp/common/counter/NamedCountersFileWriter.java @@ -0,0 +1,52 @@ +package eu.dnetlib.dhp.common.counter; + +import java.io.File; +import java.io.FileOutputStream; +import java.io.IOException; +import java.io.OutputStream; +import java.util.Properties; + +/** + * Writer of {@link NamedCounters} object into a properties file. + * + * @author madryk + */ +public class NamedCountersFileWriter { + + + //------------------------ LOGIC -------------------------- + + /** + * Writes {@link NamedCounters} as a properties file located under + * provided filePath. + * + * @throws IOException if writing to properties file resulted in an error + */ + public void writeCounters(NamedCounters counters, String filePath) throws IOException { + + Properties counterProperties = buildPropertiesFromCounters(counters); + + File file = new File(filePath); + try (OutputStream os = new FileOutputStream(file)) { + + counterProperties.store(os, null); + + } + + } + + + //------------------------ PRIVATE -------------------------- + + private Properties buildPropertiesFromCounters(NamedCounters counters) { + + Properties properties = new Properties(); + + for (String counterName : counters.counterNames()) { + long count = counters.currentValue(counterName); + properties.put(counterName, String.valueOf(count)); + } + + return properties; + } +} diff --git a/dhp-common/src/main/java/eu/dnetlib/dhp/common/fault/FaultUtils.java b/dhp-common/src/main/java/eu/dnetlib/dhp/common/fault/FaultUtils.java new file mode 100644 index 000000000..bcc6494b2 --- /dev/null +++ b/dhp-common/src/main/java/eu/dnetlib/dhp/common/fault/FaultUtils.java @@ -0,0 +1,67 @@ +package eu.dnetlib.dhp.common.fault; + +import java.io.PrintWriter; +import java.io.StringWriter; +import java.util.ArrayList; +import java.util.List; +import java.util.Map; + +import eu.dnetlib.dhp.audit.schemas.Cause; +import eu.dnetlib.dhp.audit.schemas.Fault; + +/** + * {@link Fault} related utilities. + * @author mhorst + * + */ +public final class FaultUtils { + + // ---------------------- CONSTRUCTORS ------------------- + + private FaultUtils() {} + + // ---------------------- LOGIC -------------------------- + + /** + * Generates {@link Fault} instance based on {@link Throwable}. + * @param entityId entity identifier + * @param throwable + * @param auditSupplementaryData + * @return {@link Fault} instance generated for {@link Throwable} + */ + public static Fault exceptionToFault(CharSequence entityId, Throwable throwable, + Map auditSupplementaryData) { + Fault.Builder faultBuilder = Fault.newBuilder(); + faultBuilder.setInputObjectId(entityId); + faultBuilder.setTimestamp(System.currentTimeMillis()); + faultBuilder.setCode(throwable.getClass().getName()); + faultBuilder.setMessage(throwable.getMessage()); + StringWriter strWriter = new StringWriter(); + PrintWriter pw = new PrintWriter(strWriter); + throwable.printStackTrace(pw); + pw.close(); + faultBuilder.setStackTrace(strWriter.toString()); + if (throwable.getCause()!=null) { + faultBuilder.setCauses(appendThrowableToCauses( + throwable.getCause(), new ArrayList())); + } + if (auditSupplementaryData!=null && !auditSupplementaryData.isEmpty()) { + faultBuilder.setSupplementaryData(auditSupplementaryData); + } + return faultBuilder.build(); + } + + protected static List appendThrowableToCauses(Throwable e, List causes) { + Cause.Builder causeBuilder = Cause.newBuilder(); + causeBuilder.setCode(e.getClass().getName()); + causeBuilder.setMessage(e.getMessage()); + causes.add(causeBuilder.build()); + if (e.getCause()!=null) { + return appendThrowableToCauses( + e.getCause(),causes); + } else { + return causes; + } + } + +} diff --git a/dhp-common/src/main/java/eu/dnetlib/dhp/common/java/CmdLineParser.java b/dhp-common/src/main/java/eu/dnetlib/dhp/common/java/CmdLineParser.java new file mode 100644 index 000000000..b6106044b --- /dev/null +++ b/dhp-common/src/main/java/eu/dnetlib/dhp/common/java/CmdLineParser.java @@ -0,0 +1,98 @@ +package eu.dnetlib.dhp.common.java; + +import org.apache.commons.cli.CommandLine; +import org.apache.commons.cli.CommandLineParser; +import org.apache.commons.cli.GnuParser; +import org.apache.commons.cli.HelpFormatter; +import org.apache.commons.cli.Option; +import org.apache.commons.cli.OptionBuilder; +import org.apache.commons.cli.Options; +import org.apache.commons.cli.ParseException; +import org.apache.commons.lang3.StringUtils; + +/** + * + * @author Mateusz Kobos + * + */ +@SuppressWarnings("deprecation") +public final class CmdLineParser { + /** HACK: make the names of various types of parameters of the program + * more readable, e.g. "--Input_person=..." instead of "-Iperson=...", + * "--Output_merged=..." instead of "-Omerged=...". I wasn't able to + * get such notation so far using the Apache CLI. */ + public static final String constructorPrefix = "C"; + public static final String inputPrefix = "I"; + public static final String outputPrefix = "O"; + public static final String specialParametersPrefix = "S"; + /** HACK: This field should be removed since this list of special + * parameters is empty, thus not used anywhere.*/ + public static final String[] mandatorySpecialParameters = new String[]{}; + public static final String processParametersPrefix = "P"; + + // ------------------------- CONSTRUCTORS ------------------------------ + + private CmdLineParser() {} + + // ------------------------- LOGIC ------------------------------------- + + public static CommandLine parse(String[] args) { + Options options = new Options(); + @SuppressWarnings("static-access") + Option constructorParams = OptionBuilder.withArgName("STRING") + .hasArg() + .withDescription("Constructor parameter") + .withLongOpt("ConstructorParam") + .create(constructorPrefix); + options.addOption(constructorParams); + @SuppressWarnings("static-access") + Option inputs = OptionBuilder.withArgName("portName=URI") + .hasArgs(2) + .withValueSeparator() + .withDescription("Path binding for a given input port") + .withLongOpt("Input") + .create(inputPrefix); + options.addOption(inputs); + @SuppressWarnings("static-access") + Option outputs = OptionBuilder.withArgName("portName=URI") + .hasArgs(2) + .withValueSeparator() + .withDescription("Path binding for a given output port") + .create(outputPrefix); + options.addOption(outputs); + @SuppressWarnings("static-access") + Option specialParameter = OptionBuilder.withArgName("parameter_name=string") + .hasArgs(2) + .withValueSeparator() + .withDescription(String.format("Value of special parameter. " + + "These are the mandatory parameters={%s}", + StringUtils.join(mandatorySpecialParameters, ","))) + .create(specialParametersPrefix); + options.addOption(specialParameter); + @SuppressWarnings("static-access") + Option otherParameter = OptionBuilder.withArgName("parameter_name=string") + .hasArgs(2) + .withValueSeparator() + .withDescription( + String.format("Value of some other parameter.")) + .create(processParametersPrefix); + options.addOption(otherParameter); + + Option help = new Option("help", "print this message"); + options.addOption(help); + + CommandLineParser parser = new GnuParser(); + try { + CommandLine cmdLine = parser.parse(options, args); + if(cmdLine.hasOption("help")){ + HelpFormatter formatter = new HelpFormatter(); + formatter.printHelp("", options ); + System.exit(1); + } + return cmdLine; + } catch (ParseException e) { + throw new CmdLineParserException("Parsing command line arguments failed", e); + } + + } +} diff --git a/dhp-common/src/main/java/eu/dnetlib/dhp/common/java/CmdLineParserException.java b/dhp-common/src/main/java/eu/dnetlib/dhp/common/java/CmdLineParserException.java new file mode 100644 index 000000000..bbcad8d84 --- /dev/null +++ b/dhp-common/src/main/java/eu/dnetlib/dhp/common/java/CmdLineParserException.java @@ -0,0 +1,21 @@ +package eu.dnetlib.dhp.common.java; + +/** + * Command line parsing exception + * @author Mateusz Kobos + * + */ +public class CmdLineParserException extends RuntimeException { + /** + * + */ + private static final long serialVersionUID = 9219928547611876284L; + + public CmdLineParserException(String message){ + super(message); + } + + public CmdLineParserException(String message, Throwable cause){ + super(message, cause); + } +} diff --git a/dhp-common/src/main/java/eu/dnetlib/dhp/common/java/CmdLineParserForProcessConstruction.java b/dhp-common/src/main/java/eu/dnetlib/dhp/common/java/CmdLineParserForProcessConstruction.java new file mode 100644 index 000000000..2c65d0892 --- /dev/null +++ b/dhp-common/src/main/java/eu/dnetlib/dhp/common/java/CmdLineParserForProcessConstruction.java @@ -0,0 +1,45 @@ +package eu.dnetlib.dhp.common.java; + +import java.lang.reflect.Constructor; + +import org.apache.commons.cli.CommandLine; + +/** + * Handles parsing the command line arguments provided by the Oozie + * to create a {@link Process} + * @author Mateusz Kobos + * + */ +public class CmdLineParserForProcessConstruction { + public Process run(CommandLine cmdLine){ + String[] args = cmdLine.getArgs(); + if(args.length != 1){ + throw new CmdLineParserException("The name of the class has "+ + "to be specified as the first agrument"); + } + String className = args[0]; + + String[] constructorParams = cmdLine.getOptionValues( + CmdLineParser.constructorPrefix); + if(constructorParams == null){ + constructorParams = new String[0]; + } + try { + Class processClass = Class.forName(className); + Constructor processConstructor = null; + if(constructorParams.length == 0){ + try{ + processConstructor = processClass.getConstructor(); + return (Process) processConstructor.newInstance(); + } catch(NoSuchMethodException ex){ + } + } + processConstructor = processClass.getConstructor(String[].class); + return (Process) processConstructor.newInstance( + (Object)constructorParams); + } catch (Exception e) { + throw new CmdLineParserException(String.format( + "Problem while creating class \"%s\"", className), e); + } + } +} diff --git a/dhp-common/src/main/java/eu/dnetlib/dhp/common/java/CmdLineParserForProcessRunParameters.java b/dhp-common/src/main/java/eu/dnetlib/dhp/common/java/CmdLineParserForProcessRunParameters.java new file mode 100644 index 000000000..31db33103 --- /dev/null +++ b/dhp-common/src/main/java/eu/dnetlib/dhp/common/java/CmdLineParserForProcessRunParameters.java @@ -0,0 +1,100 @@ +package eu.dnetlib.dhp.common.java; + +import java.util.HashMap; +import java.util.Map; +import java.util.Map.Entry; +import java.util.Properties; +import java.util.Set; + +import org.apache.commons.cli.CommandLine; +import org.apache.hadoop.fs.Path; + +/** + * Handles parsing parameters passed to the {@link Process} + * @author Mateusz Kobos + * + */ +public class CmdLineParserForProcessRunParameters { + /** Parse the command line arguments. + * + * @param cmdLine command line arguments + * @param ports names of ports that ought to be extracted from command line + */ + public ProcessParameters run(CommandLine cmdLine, Ports ports) { + + Properties inputProperties = cmdLine.getOptionProperties( + CmdLineParser.inputPrefix); + assumePortNamesMatch(CmdLineParser.inputPrefix, inputProperties, + ports.getInput().keySet()); + Map inputBindings = getBindings( + inputProperties, ports.getInput().keySet()); + + Properties outputProperties = cmdLine.getOptionProperties( + CmdLineParser.outputPrefix); + assumePortNamesMatch(CmdLineParser.outputPrefix, outputProperties, + ports.getOutput().keySet()); + Map outputBindings = getBindings( + outputProperties, ports.getOutput().keySet()); + + PortBindings bindings = new PortBindings(inputBindings, outputBindings); + + Properties specialProperties = cmdLine.getOptionProperties( + CmdLineParser.specialParametersPrefix); + assumeContainAllMandatoryParameters( + specialProperties, CmdLineParser.mandatorySpecialParameters); + + Properties rawProperties = cmdLine.getOptionProperties( + CmdLineParser.processParametersPrefix); + Map processParameters = new HashMap(); + for(Entry entry: rawProperties.entrySet()){ + processParameters.put( + (String)entry.getKey(), (String)entry.getValue()); + } + + return new ProcessParameters(bindings, processParameters); + } + + private static void assumeContainAllMandatoryParameters( + Properties properties, String[] mandatoryParameters){ + for(String otherParameter: mandatoryParameters){ + if(!properties.containsKey(otherParameter)){ + throw new CmdLineParserException(String.format( + "Not all mandatory properties are set using the \"%s\" " + + "option are given, e.g. \"-%s\" parameter is missing", + CmdLineParser.specialParametersPrefix, otherParameter)); + } + } + } + + private static void assumePortNamesMatch(String cmdLineParamPrefix, + Properties cmdLineProperties, Set portNames) { + for (String name : portNames) { + if (!cmdLineProperties.containsKey(name)) { + throw new CmdLineParserException(String.format( + "The port with name \"%s\" is not specified in " + + "command line (command line option \"-%s\" is missing)", + name, cmdLineParamPrefix + name)); + } + } + for (Object cmdLineKeyObject : cmdLineProperties.keySet()) { + String name = (String) cmdLineKeyObject; + if (!portNames.contains(name)) { + throw new CmdLineParserException(String.format( + "A port name \"%s\" which is not specified is given " + + "in the command line " + + "(command line option \"%s\" is excess)", + name, cmdLineParamPrefix + name)); + } + } + } + + private static Map getBindings( + Properties cmdLineProperties, Set portNames) { + Map bindings = new HashMap(); + for (String name : portNames) { + Path path = new Path((String) cmdLineProperties.get(name)); + bindings.put(name, path); + } + return bindings; + } +} diff --git a/dhp-common/src/main/java/eu/dnetlib/dhp/common/java/PortBindings.java b/dhp-common/src/main/java/eu/dnetlib/dhp/common/java/PortBindings.java new file mode 100644 index 000000000..792002689 --- /dev/null +++ b/dhp-common/src/main/java/eu/dnetlib/dhp/common/java/PortBindings.java @@ -0,0 +1,43 @@ +package eu.dnetlib.dhp.common.java; + +import java.util.Map; + +import org.apache.commons.lang.NotImplementedException; +import org.apache.hadoop.fs.Path; + +/** + * Port names (see {@link Ports}) bound to certain paths in the file system + * @author Mateusz Kobos + * + */ +public class PortBindings { + private final Map input; + private final Map output; + + public PortBindings(Map input, Map output) { + this.input = input; + this.output = output; + } + + public Map getInput() { + return input; + } + + public Map getOutput() { + return output; + } + + @Override + public boolean equals(Object o){ + if(!(o instanceof PortBindings)){ + return false; + } + PortBindings other = (PortBindings) o; + return input.equals(other.input) && output.equals(other.output); + } + + @Override + public int hashCode(){ + throw new NotImplementedException(); + } +} diff --git a/dhp-common/src/main/java/eu/dnetlib/dhp/common/java/Ports.java b/dhp-common/src/main/java/eu/dnetlib/dhp/common/java/Ports.java new file mode 100644 index 000000000..165f25061 --- /dev/null +++ b/dhp-common/src/main/java/eu/dnetlib/dhp/common/java/Ports.java @@ -0,0 +1,27 @@ +package eu.dnetlib.dhp.common.java; + +import java.util.Map; + +import eu.dnetlib.dhp.common.java.porttype.PortType; + +/** + * A class that groups information about input and output ports, i.e. + * their (name of the port -> type of the port) mappings. + * @author Mateusz Kobos + */ +public class Ports { + private final Map input; + private final Map output; + + public Ports(Map input, Map output){ + this.input = input; + this.output = output; + } + + public Map getInput() { + return input; + } + public Map getOutput() { + return output; + } +} diff --git a/dhp-common/src/main/java/eu/dnetlib/dhp/common/java/Process.java b/dhp-common/src/main/java/eu/dnetlib/dhp/common/java/Process.java new file mode 100644 index 000000000..77e7b617a --- /dev/null +++ b/dhp-common/src/main/java/eu/dnetlib/dhp/common/java/Process.java @@ -0,0 +1,43 @@ +package eu.dnetlib.dhp.common.java; + +import java.util.Map; + +import org.apache.hadoop.conf.Configuration; + +import eu.dnetlib.dhp.common.java.porttype.PortType; + +/** Workflow node written in Java. + * + * The implementing class has to define a constructor with no parameters + * (possibly the default one) or a constructor with String[] as a single + * parameter. + * @author Mateusz Kobos + */ +public interface Process { + /** + * Run the process. + * + * The process ends with a success status if no exception is thrown, + * otherwise it ends with an error status. + * + * @param parameters parameters of the process. Each parameter + * corresponds to a single entry in the map, its name is the key, its + * value is the value. + * @throws Exception if thrown, it means that the process finished + * with an error status + */ + void run(PortBindings portBindings, Configuration conf, + Map parameters) throws Exception; + + /** + * @return map containing as the key: name of the port, as the value: type + * of the port + */ + Map getInputPorts(); + + /** + * @return map containing as the key: name of the port, as the value: type + * of the port + */ + Map getOutputPorts(); +} \ No newline at end of file diff --git a/dhp-common/src/main/java/eu/dnetlib/dhp/common/java/ProcessException.java b/dhp-common/src/main/java/eu/dnetlib/dhp/common/java/ProcessException.java new file mode 100644 index 000000000..9d8d82779 --- /dev/null +++ b/dhp-common/src/main/java/eu/dnetlib/dhp/common/java/ProcessException.java @@ -0,0 +1,20 @@ +package eu.dnetlib.dhp.common.java; + +/** + * Process exception + * @author Dominika Tkaczyk + * + */ +public class ProcessException extends RuntimeException { + + private static final long serialVersionUID = 2758953138374438377L; + + public ProcessException(String message){ + super(message); + } + + public ProcessException(String message, Throwable cause){ + super(message, cause); + } + +} diff --git a/dhp-common/src/main/java/eu/dnetlib/dhp/common/java/ProcessParameters.java b/dhp-common/src/main/java/eu/dnetlib/dhp/common/java/ProcessParameters.java new file mode 100644 index 000000000..33902dc20 --- /dev/null +++ b/dhp-common/src/main/java/eu/dnetlib/dhp/common/java/ProcessParameters.java @@ -0,0 +1,43 @@ +package eu.dnetlib.dhp.common.java; + +import java.util.Map; + +import org.apache.commons.lang.NotImplementedException; + +/** + * Parameters of the Process retrieved from Oozie + * @author Mateusz Kobos + * + */ +public class ProcessParameters { + private final PortBindings portBindings; + private final Map parameters; + + public PortBindings getPortBindings() { + return portBindings; + } + + public Map getParameters(){ + return parameters; + } + + public ProcessParameters(PortBindings portBindings, + Map parameters) { + this.portBindings = portBindings; + this.parameters = parameters; + } + + @Override + public boolean equals(Object o){ + if(!(o instanceof ProcessParameters)){ + return false; + } + ProcessParameters other = (ProcessParameters) o; + return this.portBindings.equals(other.portBindings); + } + + @Override + public int hashCode(){ + throw new NotImplementedException(); + } +} diff --git a/dhp-common/src/main/java/eu/dnetlib/dhp/common/java/ProcessUtils.java b/dhp-common/src/main/java/eu/dnetlib/dhp/common/java/ProcessUtils.java new file mode 100644 index 000000000..084a521e6 --- /dev/null +++ b/dhp-common/src/main/java/eu/dnetlib/dhp/common/java/ProcessUtils.java @@ -0,0 +1,43 @@ +package eu.dnetlib.dhp.common.java; + +import java.util.Map; + +import org.apache.hadoop.conf.Configuration; + +/** + * {@link Process} related utility class. + * @author mhorst + * + */ +public final class ProcessUtils { + + // ------------- CONSTRUCTORS ---------------- + + private ProcessUtils() {} + + // ------------- LOGIC ----------------------- + + /** + * Returns parameter value retrived from parameters or context. + * @param paramName + * @param hadoopConf + * @param parameters + * @return parameter value + */ + public static String getParameterValue(String paramName, + Configuration hadoopConf, + Map parameters) { + if (parameters!=null && !parameters.isEmpty()) { + String result = null; + result = parameters.get(paramName); + if (result!=null) { + return result; + } + } + if (hadoopConf!=null) { + return hadoopConf.get(paramName); + } else { + return null; + } + } +} diff --git a/dhp-common/src/main/java/eu/dnetlib/dhp/common/java/ProcessWrapper.java b/dhp-common/src/main/java/eu/dnetlib/dhp/common/java/ProcessWrapper.java new file mode 100644 index 000000000..d60eb0cd9 --- /dev/null +++ b/dhp-common/src/main/java/eu/dnetlib/dhp/common/java/ProcessWrapper.java @@ -0,0 +1,88 @@ +package eu.dnetlib.dhp.common.java; + +import java.io.IOException; +import java.util.Map; + +import org.apache.avro.file.DataFileWriter; +import org.apache.avro.generic.GenericContainer; +import org.apache.commons.cli.CommandLine; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.LocatedFileStatus; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.fs.RemoteIterator; + +import eu.dnetlib.dhp.common.java.io.DataStore; +import eu.dnetlib.dhp.common.java.io.FileSystemPath; +import eu.dnetlib.dhp.common.java.porttype.AvroPortType; +import eu.dnetlib.dhp.common.java.porttype.PortType; + +/** + * Creates {@link Process} object through reflection by parsing + * the command-line arguments + * @author Mateusz Kobos + * + */ +public class ProcessWrapper { + + public Configuration getConfiguration() throws Exception{ + return new Configuration(); + } + + public static void main(String[] args) throws Exception { + ProcessWrapper wrapper = new ProcessWrapper(); + wrapper.run(args); + } + + public void run(String[] args) throws Exception{ + CommandLine cmdLine = CmdLineParser.parse(args); + + CmdLineParserForProcessConstruction constructionParser = + new CmdLineParserForProcessConstruction(); + Process process = constructionParser.run(cmdLine); + Ports ports = + new Ports(process.getInputPorts(), process.getOutputPorts()); + CmdLineParserForProcessRunParameters runParametersParser = + new CmdLineParserForProcessRunParameters(); + ProcessParameters params = runParametersParser.run(cmdLine, ports); + Configuration conf = getConfiguration(); + process.run(params.getPortBindings(), conf, params.getParameters()); + createOutputsIfDontExist( + process.getOutputPorts(), params.getPortBindings().getOutput(), + conf); + } + + private static void createOutputsIfDontExist( + Map outputPortsSpecification, + Map outputPortBindings, Configuration conf) throws IOException{ + FileSystem fs = FileSystem.get(conf); + for(Map.Entry entry: outputPortBindings.entrySet()){ + Path path = entry.getValue(); + if(!fs.exists(path) || isEmptyDirectory(fs, path)){ + PortType rawType = outputPortsSpecification.get(entry.getKey()); + if(!(rawType instanceof AvroPortType)){ + throw new RuntimeException("The port \""+entry.getKey()+ + "\" is not of Avro type and only Avro types are "+ + "supported"); + } + AvroPortType type = (AvroPortType) rawType; + FileSystemPath fsPath = new FileSystemPath(fs, path); + DataFileWriter writer = + DataStore.create(fsPath, type.getSchema()); + writer.close(); + } + } + } + + private static boolean isEmptyDirectory(FileSystem fs, Path path) throws IOException{ + if(!fs.isDirectory(path)){ + return false; + } + RemoteIterator files = fs.listFiles(path, false); + /** There's at least one file, so the directory is not empty */ + if(files.hasNext()){ + return false; + } + return true; + } +} diff --git a/dhp-common/src/main/java/eu/dnetlib/dhp/common/java/io/AvroDataStoreReader.java b/dhp-common/src/main/java/eu/dnetlib/dhp/common/java/io/AvroDataStoreReader.java new file mode 100644 index 000000000..4d3618854 --- /dev/null +++ b/dhp-common/src/main/java/eu/dnetlib/dhp/common/java/io/AvroDataStoreReader.java @@ -0,0 +1,156 @@ +package eu.dnetlib.dhp.common.java.io; + +import java.io.IOException; +import java.util.NoSuchElementException; +import java.util.regex.Pattern; + +import org.apache.avro.Schema; +import org.apache.avro.file.DataFileReader; +import org.apache.avro.specific.SpecificDatumReader; +import org.apache.hadoop.fs.AvroFSInput; +import org.apache.hadoop.fs.FSDataInputStream; +import org.apache.hadoop.fs.LocatedFileStatus; +import org.apache.hadoop.fs.RemoteIterator; + + + +/** + * An abstraction over data store format which allows + * iterating over records stored in the data store. + * It handles the standard case of a data store that is a directory containing + * many Avro files (but it can also read records from a single file). + * + * @author mhorst + * @author Mateusz Kobos + */ +class AvroDataStoreReader implements CloseableIterator { + + private DataFileReader currentReader; + private RemoteIterator fileIterator; + private final FileSystemPath path; + private final Schema readerSchema; + + /** + * Ignore file starting with underscore. Such files are also ignored by + * default by map-reduce jobs. + */ + private final Pattern whitelistPattern = Pattern.compile("^(?!_).*"); + + /** + * Here the schema used for reading the data store is set to be the same + * as the one that was used to write it. + */ + public AvroDataStoreReader(final FileSystemPath path) + throws IOException { + this(path, null); + } + + /** + * @param path path to the data store to be read + * @param readerSchema the schema onto which the read data store will + * be projected + */ + public AvroDataStoreReader(final FileSystemPath path, Schema readerSchema) + throws IOException { + this.path = path; + this.readerSchema = readerSchema; + fileIterator = path.getFileSystem().listFiles(path.getPath(), false); + currentReader = getNextNonemptyReader(); + } + + private DataFileReader getNextNonemptyReader() throws IOException { + while (fileIterator != null && fileIterator.hasNext()) { + LocatedFileStatus currentFileStatus = fileIterator.next(); + if (isValidFile(currentFileStatus)) { + FileSystemPath currPath = new FileSystemPath( + path.getFileSystem(), currentFileStatus.getPath()); + DataFileReader reader = + getSingleFileReader(currPath, readerSchema); + /** Check if the file contains at least one record */ + if(reader.hasNext()){ + return reader; + } else { + reader.close(); + } + } + } + /** fallback */ + return null; + } + + /** + * Get a reader for the specified Avro file. A utility function. + * @param path path to the existing file + * @param readerSchema optional reader schema. If you want to use the + * default option of using writer schema as the reader schema, pass the + * {@code null} value. + * @throws IOException + */ + private static DataFileReader getSingleFileReader( + FileSystemPath path, Schema readerSchema) throws IOException{ + try{ + SpecificDatumReader datumReader = new SpecificDatumReader(); + if(readerSchema != null){ + datumReader.setExpected(readerSchema); + } + long len = path.getFileSystem().getFileStatus(path.getPath()).getLen(); + FSDataInputStream inputStream = path.getFileSystem().open(path.getPath()); + return new DataFileReader( + new AvroFSInput(inputStream, len), datumReader); + } catch (IOException ex){ + throw new IOException("Problem with file \""+ + path.getPath().toString()+"\": "+ex.getMessage(), ex); + } + } + + /** + * Checks whether file is valid + * + * @param fileStatus + * @return true when valid, false otherwise + */ + private boolean isValidFile(LocatedFileStatus fileStatus) { + if (fileStatus.isFile()) { + return whitelistPattern.matcher( + fileStatus.getPath().getName()).matches(); + } + /** fallback */ + return false; + } + + @Override + public boolean hasNext() { + return currentReader != null; + } + + @Override + public T next(){ + if(currentReader == null){ + throw new NoSuchElementException(); + } + T obj = currentReader.next(); + if(!currentReader.hasNext()){ + try{ + currentReader.close(); + currentReader = getNextNonemptyReader(); + } catch(IOException ex){ + throw new RuntimeException(ex); + } + } + return obj; + } + + @Override + public void remove() { + throw new UnsupportedOperationException(); + } + + @Override + public void close() throws IOException { + if(currentReader != null){ + currentReader.close(); + currentReader = null; + } + fileIterator = null; + } +} \ No newline at end of file diff --git a/dhp-common/src/main/java/eu/dnetlib/dhp/common/java/io/CloseableIterator.java b/dhp-common/src/main/java/eu/dnetlib/dhp/common/java/io/CloseableIterator.java new file mode 100644 index 000000000..1fc77832e --- /dev/null +++ b/dhp-common/src/main/java/eu/dnetlib/dhp/common/java/io/CloseableIterator.java @@ -0,0 +1,25 @@ +package eu.dnetlib.dhp.common.java.io; + +import java.io.Closeable; +import java.util.Iterator; + +/** + * An iterator for I/O operations that can be {@code close}d explicitly to + * release the resources it holds. + * + * You should call {@code close} only when interrupting the iteration in the + * middle since in such situation there is no way for the iterator to know if + * you're going to continue the iteration and it should still hold the resources + * or not. There's no need to call {@code close} when iterating over all + * elements since in such situation it is called automatically after the + * end of iteration. + * + * @author mhorst + * + * @param + */ +public interface CloseableIterator extends Iterator, Closeable { + + +} + diff --git a/dhp-common/src/main/java/eu/dnetlib/dhp/common/java/io/CountingIterator.java b/dhp-common/src/main/java/eu/dnetlib/dhp/common/java/io/CountingIterator.java new file mode 100644 index 000000000..a08f975e1 --- /dev/null +++ b/dhp-common/src/main/java/eu/dnetlib/dhp/common/java/io/CountingIterator.java @@ -0,0 +1,19 @@ +package eu.dnetlib.dhp.common.java.io; + +import java.util.Iterator; + +/** + * Counting iterator providing total number of results. + * @author mhorst + * + * @param + */ +public interface CountingIterator extends Iterator { + + /** + * Provides total number of results to be iterating on. + * @return total number of results to be iterating on + */ + int getCount(); + +} diff --git a/dhp-common/src/main/java/eu/dnetlib/dhp/common/java/io/DataStore.java b/dhp-common/src/main/java/eu/dnetlib/dhp/common/java/io/DataStore.java new file mode 100644 index 000000000..aa66b6f51 --- /dev/null +++ b/dhp-common/src/main/java/eu/dnetlib/dhp/common/java/io/DataStore.java @@ -0,0 +1,172 @@ +package eu.dnetlib.dhp.common.java.io; + +import java.io.IOException; +import java.util.ArrayList; +import java.util.List; + +import org.apache.avro.Schema; +import org.apache.avro.file.DataFileWriter; +import org.apache.avro.generic.GenericContainer; +import org.apache.avro.io.DatumWriter; +import org.apache.avro.specific.SpecificDatumWriter; + + +/** + * Utility for accessing to Avro-based data stores stored in file system + * @author Mateusz Kobos + * + */ +public final class DataStore { + + private final static String singleDataStoreFileName = "content.avro"; + + private static final int FILE_NO_PADDING_LENGTH = 7; + + private DataStore(){} + + /** + * Create a new data store directory with single file and return writer that allows + * adding new records + * @param path path to a directory to be created + * @param schema schema of the records to be stored in the file + * @return + * @throws IOException + */ + public static DataFileWriter create( + FileSystemPath path, Schema schema) throws IOException{ + return create(path, schema, singleDataStoreFileName); + } + + + /** + * Create a new data store directory and return writer that allows + * adding new records + * @param path path to a directory to be created + * @param schema schema of the records to be stored in the file + * @param dataStoreFileName datastore file name + * @return + * @throws IOException + */ + public static DataFileWriter create( + FileSystemPath path, Schema schema, String dataStoreFileName) throws IOException{ + path.getFileSystem().mkdirs(path.getPath()); + FileSystemPath outFile = new FileSystemPath( + path, dataStoreFileName); + return DataStore.createSingleFile(outFile, schema); + } + + /** + * Get reader for reading records from given data store + * + * Here the schema used for reading the data store is set to be the same + * as the one that was used to write it. + * + * @see getReader(FileSystemPath path, Schema readerSchema) for details. + * + */ + public static CloseableIterator getReader(FileSystemPath path) + throws IOException{ + return getReader(path, null); + } + + /** + * Get reader for reading records from given data store + * @param path path to a directory corresponding to data store + * @param readerSchema the schema onto which the read data store will + * be projected + */ + public static CloseableIterator getReader( + FileSystemPath path, Schema readerSchema) throws IOException{ + return new AvroDataStoreReader(path, readerSchema); + } + + /** + * Read data store entries and insert them into a list. A utility function. + * + * Here the schema used for reading the data store is set to be the same + * as the one that was used to write it. + */ + public static List read(FileSystemPath path) + throws IOException{ + return read(path, null); + } + + /** + * Read data store entries and insert them into a list. A utility function. + * + * @param readerSchema the schema onto which the read data store will + * be projected + */ + public static List read(FileSystemPath path, Schema readerSchema) + throws IOException{ + CloseableIterator iterator = getReader(path, readerSchema); + List elems = new ArrayList(); + while(iterator.hasNext()){ + elems.add(iterator.next()); + } + return elems; + } + + /** + * Create a data store from a list of entries. A utility function. + * The schema is implicitly + * taken from the first element from the {@code elements} list. + * @param elements list of elements to write. At least one element has + * to be present, because it is used to retrieve schema of the + * structures passed in the list. + */ + public static void create( + List elements, FileSystemPath path) throws IOException{ + if(elements.isEmpty()){ + throw new IllegalArgumentException( + "The list of elements has to be non-empty"); + } + Schema schema = elements.get(0).getSchema(); + create(elements, path, schema); + } + + /** + * Create a data store from a list of entries with schema given explicitly. + * A utility function. + */ + public static void create( + List elements, FileSystemPath path, Schema schema) + throws IOException{ + DataFileWriter writer = create(path, schema); + try{ + for(T i: elements){ + writer.append(i); + } + } finally { + if(writer != null){ + writer.close(); + } + } + } + + /** + * Create a single Avro file. This method shouldn't be normally used to + * create data stores since it creates only a single Avro file, + * while a data store consists of a directory containing one or more files. + */ + public static DataFileWriter createSingleFile( + FileSystemPath path, Schema schema) throws IOException{ + DatumWriter datumWriter = new SpecificDatumWriter(); + DataFileWriter writer = new DataFileWriter(datumWriter); + writer.create(schema, path.getFileSystem().create(path.getPath())); + return writer; + } + + /** + * Generates filename for given file number. + * @param fileNo file sequence number + */ + public static String generateFileName(int fileNo) { + StringBuffer strBuff = new StringBuffer(String.valueOf(fileNo)); + while(strBuff.length() { + + private SequenceFile.Reader sequenceReader; + + private final RemoteIterator fileIt; + + private final FileSystem fs; + + /** + * Ignore file starting with underscore. Such files are also ignored by + * default by map-reduce jobs. + */ + private final static Pattern WHITELIST_REGEXP = Pattern.compile("^[^_].*"); + + private Text toBeReturned; + + //------------------------ CONSTRUCTORS -------------------------- + + /** + * Default constructor. + * + * @param path HDFS path along with associated FileSystem + * @throws IOException + */ + public SequenceFileTextValueReader(final FileSystemPath path) throws IOException { + this.fs = path.getFileSystem(); + if (fs.isDirectory(path.getPath())) { + fileIt = fs.listFiles(path.getPath(), false); + sequenceReader = getNextSequenceReader(); + } else { + fileIt = null; + sequenceReader = new Reader(fs.getConf(), SequenceFile.Reader.file(path.getPath())); + } + } + + //------------------------ LOGIC --------------------------------- + + /* + * (non-Javadoc) + * + * @see java.util.Iterator#hasNext() + */ + @Override + public boolean hasNext() { + // check and provide next when already returned + if (toBeReturned == null) { + toBeReturned = getNext(); + } + return toBeReturned != null; + } + + /* + * (non-Javadoc) + * + * @see java.util.Iterator#next() + */ + @Override + public Text next() { + if (toBeReturned != null) { + // element fetched while executing hasNext() + Text result = toBeReturned; + toBeReturned = null; + return result; + } else { + Text resultCandidate = getNext(); + if (resultCandidate!=null) { + return resultCandidate; + } else { + throw new NoSuchElementException(); + } + } + } + + /* + * (non-Javadoc) + * + * @see eu.dnetlib.dhp.exp.iterator.ClosableIterator#close() + */ + @Override + public void close() throws IOException { + if (sequenceReader != null) { + sequenceReader.close(); + } + } + + //------------------------ PRIVATE ------------------------------- + + private final Reader getNextSequenceReader() throws IOException { + while (fileIt != null && fileIt.hasNext()) { + LocatedFileStatus currentFileStatus = fileIt.next(); + if (isValidFile(currentFileStatus)) { + return new Reader(this.fs.getConf(), SequenceFile.Reader.file(currentFileStatus.getPath())); + } + } + // fallback + return null; + } + + /** + * Checks whether file is valid candidate. + * + * @param fileStatus + * file status holding file name + * @return true when valid, false otherwise + */ + private final boolean isValidFile(LocatedFileStatus fileStatus) { + if (fileStatus.isFile()) { + return WHITELIST_REGEXP.matcher(fileStatus.getPath().getName()).matches(); + } else { + return false; + } + } + + /** + * @return next data package + */ + private Text getNext() { + try { + if (sequenceReader == null) { + return null; + } + Writable key = (Writable) ReflectionUtils.newInstance(sequenceReader.getKeyClass(), fs.getConf()); + Writable value = (Writable) ReflectionUtils.newInstance(sequenceReader.getValueClass(), fs.getConf()); + if (sequenceReader.next(key, value)) { + return (Text) value; + } else { + sequenceReader.close(); + sequenceReader = getNextSequenceReader(); + if (sequenceReader != null) { + return getNext(); + } + } + // fallback + return null; + } catch (IOException e) { + throw new RuntimeException(e); + } + } +} diff --git a/dhp-common/src/main/java/eu/dnetlib/dhp/common/java/jsonworkflownodes/ClassPathResourceToHdfsCopier.java b/dhp-common/src/main/java/eu/dnetlib/dhp/common/java/jsonworkflownodes/ClassPathResourceToHdfsCopier.java new file mode 100644 index 000000000..00a071ac9 --- /dev/null +++ b/dhp-common/src/main/java/eu/dnetlib/dhp/common/java/jsonworkflownodes/ClassPathResourceToHdfsCopier.java @@ -0,0 +1,54 @@ +package eu.dnetlib.dhp.common.java.jsonworkflownodes; + +import java.io.InputStream; +import java.io.OutputStream; +import java.util.HashMap; +import java.util.Map; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.io.IOUtils; + +import com.google.common.base.Preconditions; + +import eu.dnetlib.dhp.common.java.PortBindings; +import eu.dnetlib.dhp.common.java.Process; +import eu.dnetlib.dhp.common.java.porttype.PortType; + +/** + * Utility class responsible for copying resources available on classpath to specified HDFS location. + * @author mhorst + * + */ +public class ClassPathResourceToHdfsCopier implements Process { + + private static final String PARAM_INPUT_CLASSPATH_RESOURCE = "inputClasspathResource"; + + private static final String PARAM_OUTPUT_HDFS_FILE_LOCATION = "outputHdfsFileLocation"; + + @Override + public void run(PortBindings portBindings, Configuration conf, Map parameters) throws Exception { + Preconditions.checkNotNull(parameters.get(PARAM_INPUT_CLASSPATH_RESOURCE), PARAM_INPUT_CLASSPATH_RESOURCE + " parameter was not specified!"); + Preconditions.checkNotNull(parameters.get(PARAM_OUTPUT_HDFS_FILE_LOCATION), PARAM_OUTPUT_HDFS_FILE_LOCATION + " parameter was not specified!"); + + FileSystem fs = FileSystem.get(conf); + + try (InputStream in = Thread.currentThread().getContextClassLoader() + .getResourceAsStream(parameters.get(PARAM_INPUT_CLASSPATH_RESOURCE)); + OutputStream os = fs.create(new Path(parameters.get(PARAM_OUTPUT_HDFS_FILE_LOCATION)))) { + IOUtils.copyBytes(in, os, 4096, false); + } + } + + @Override + public Map getInputPorts() { + return new HashMap(); + } + + @Override + public Map getOutputPorts() { + return new HashMap(); + } + +} diff --git a/dhp-common/src/main/java/eu/dnetlib/dhp/common/java/jsonworkflownodes/PortSpecifications.java b/dhp-common/src/main/java/eu/dnetlib/dhp/common/java/jsonworkflownodes/PortSpecifications.java new file mode 100644 index 000000000..9de33809a --- /dev/null +++ b/dhp-common/src/main/java/eu/dnetlib/dhp/common/java/jsonworkflownodes/PortSpecifications.java @@ -0,0 +1,66 @@ +package eu.dnetlib.dhp.common.java.jsonworkflownodes; + +import java.util.HashMap; +import java.util.Map; + +import org.apache.avro.Schema; + +import eu.dnetlib.dhp.common.java.jsonworkflownodes.StringPortSpecificationExtractor.PortSpecification; +import eu.dnetlib.dhp.common.java.porttype.AvroPortType; +import eu.dnetlib.dhp.common.java.porttype.PortType; +import eu.dnetlib.dhp.common.utils.AvroUtils; + +/** + * @author Mateusz Kobos + */ +public class PortSpecifications { + private static final String[] propertyRegexps = + new String[]{"[\\w\\.]+", "[\\w\\./_\\-]+"}; + private final Map specs; + + public static class SpecificationValues { + + private final Schema schema; + + private final String jsonFilePath; + + public SpecificationValues(Schema schema, String jsonFilePath) { + this.schema = schema; + this.jsonFilePath = jsonFilePath; + } + + public Schema getSchema() { + return schema; + } + + public String getJsonFilePath() { + return jsonFilePath; + } + + } + + public PortSpecifications(String[] portSpecifications){ + StringPortSpecificationExtractor portSpecExtractor = + new StringPortSpecificationExtractor(propertyRegexps); + specs = new HashMap(); + for(int i = 0; i < portSpecifications.length; i++){ + PortSpecification portSpec = portSpecExtractor.getSpecification(portSpecifications[i]); + Schema schema = AvroUtils.toSchema(portSpec.getProperties()[0]); + String jsonPath = portSpec.getProperties()[1]; + specs.put(portSpec.getName(), new SpecificationValues(schema, jsonPath)); + } + } + + public SpecificationValues get(String portName){ + return specs.get(portName); + } + + public Map getPortTypes(){ + Map ports = new HashMap(); + for(Map.Entry e: specs.entrySet()){ + Schema schema = e.getValue().schema; + ports.put(e.getKey(), new AvroPortType(schema)); + } + return ports; + } +} diff --git a/dhp-common/src/main/java/eu/dnetlib/dhp/common/java/jsonworkflownodes/StringPortSpecificationExtractor.java b/dhp-common/src/main/java/eu/dnetlib/dhp/common/java/jsonworkflownodes/StringPortSpecificationExtractor.java new file mode 100644 index 000000000..0b10b6805 --- /dev/null +++ b/dhp-common/src/main/java/eu/dnetlib/dhp/common/java/jsonworkflownodes/StringPortSpecificationExtractor.java @@ -0,0 +1,89 @@ +package eu.dnetlib.dhp.common.java.jsonworkflownodes; + +import java.util.regex.Matcher; +import java.util.regex.Pattern; + +/** + * Extracts information about port name and its properties from a string + * of a form "{port_name, property_1, property_2, ...}" + * @author Mateusz Kobos + */ +public class StringPortSpecificationExtractor { + private final String[] propertiesRegexp; + private final String portSpecificationRegexp; + private final Pattern pattern; + + public static class PortSpecification { + + private final String name; + + private final String[] properties; + + public PortSpecification(String name, String[] properties) { + this.name = name; + this.properties = properties; + } + + public String getName() { + return name; + } + + public String[] getProperties() { + return properties; + } + } + + /** + * @param propertiesRegexp regular expressions specifying pattern for + * each of the properties associated with a port. An example of a single + * specification: {@code "[\\w\\.]+"}. + */ + public StringPortSpecificationExtractor(String[] propertiesRegexp){ + this.propertiesRegexp = propertiesRegexp; + this.portSpecificationRegexp = createRegexpString("[\\w\\._]+", propertiesRegexp); + this.pattern = Pattern.compile(this.portSpecificationRegexp); + } + + private static String createRegexpString(String portNameRegexp, String[] propertiesRegexp){ + StringBuilder regexp = new StringBuilder(); + regexp.append("s*\\{\\s*"); + regexp.append("("+portNameRegexp+")"); + for(String propertyRegexp: propertiesRegexp){ + regexp.append(",\\s*("+propertyRegexp+")"); + } + regexp.append("\\s*\\}\\s*"); + return regexp.toString(); + } + + private int getPropertiesCount(){ + return propertiesRegexp.length; + } + + public PortSpecification getSpecification(String text){ + Matcher m = pattern.matcher(text); + if(!m.matches()){ + throw new RuntimeException(String.format("Specification of " + + "the port (\"%s\") does not match regexp \"%s\"", + text, portSpecificationRegexp)); + } + final int expectedGroupsCount = getPropertiesCount()+1; + if(m.groupCount() != expectedGroupsCount){ + StringBuilder groups = new StringBuilder(); + for(int i = 0; i < m.groupCount(); i++){ + groups.append("\""+m.group(i)+"\""); + if(i != m.groupCount()-1) { + groups.append(", "); + } + } + throw new RuntimeException(String.format( + "Invalid output port specification \"%s\": got %d groups "+ + "instead of %d (namely: %s)", text, m.groupCount(), + expectedGroupsCount, groups.toString())); + } + String[] properties = new String[getPropertiesCount()]; + for(int i = 0; i < getPropertiesCount(); i++){ + properties[i] = m.group(i+2); + } + return new PortSpecification(m.group(1), properties); + } +} diff --git a/dhp-common/src/main/java/eu/dnetlib/dhp/common/java/porttype/AnyPortType.java b/dhp-common/src/main/java/eu/dnetlib/dhp/common/java/porttype/AnyPortType.java new file mode 100644 index 000000000..57bda6ed7 --- /dev/null +++ b/dhp-common/src/main/java/eu/dnetlib/dhp/common/java/porttype/AnyPortType.java @@ -0,0 +1,20 @@ +package eu.dnetlib.dhp.common.java.porttype; + +/** + * A port type that accepts any type of data + * @author Mateusz Kobos + * + */ +public class AnyPortType implements PortType { + + @Override + public String getName() { + return "Any"; + } + + @Override + public boolean accepts(PortType other) { + return true; + } + +} diff --git a/dhp-common/src/main/java/eu/dnetlib/dhp/common/java/porttype/AvroPortType.java b/dhp-common/src/main/java/eu/dnetlib/dhp/common/java/porttype/AvroPortType.java new file mode 100644 index 000000000..47a57164e --- /dev/null +++ b/dhp-common/src/main/java/eu/dnetlib/dhp/common/java/porttype/AvroPortType.java @@ -0,0 +1,65 @@ +package eu.dnetlib.dhp.common.java.porttype; + +import org.apache.avro.Schema; +import org.apache.commons.lang.NotImplementedException; + +/** + * This port type accepts data stores in a format of Avro + * Object Container Files, i.e. Avro data files. + * This kind of file corresponds to a list of objects, each one being of the + * same type, i.e. each one is defined by the same Avro schema. + * @author Mateusz Kobos + */ +public class AvroPortType implements PortType { + + private final Schema schema; + + + public AvroPortType(Schema schema) { + this.schema = schema; + } + + @Override + public String getName() { + return schema.getFullName(); + } + + @Override + /** Simple check if the port types are exactly the same + * (as defined by the {@code equals} method). + * + * TODO: this should work in a more relaxed way - + * {@code this.accepts(other)} should be true if {@code this} + * describes a subset of structures defined in {@code other}. To be + * more precise: the JSON schema tree tree defined by {@code this} should + * form a sub-tree of the JSON schema tree defined by {@code other}. */ + public boolean accepts(PortType other) { + return this.equals(other); + } + + /** + * Two patterns are equal if their schemas are the same. + */ + @Override + public boolean equals(Object o){ + if(!(o instanceof AvroPortType)){ + return false; + } + AvroPortType other = (AvroPortType) o; + return this.schema.equals(other.schema); + } + + @Override + public int hashCode(){ + throw new NotImplementedException(); + } + + /** + * Returns avro schema. + * @return avro schema + */ + public Schema getSchema() { + return schema; + } + +} diff --git a/dhp-common/src/main/java/eu/dnetlib/dhp/common/java/porttype/PortType.java b/dhp-common/src/main/java/eu/dnetlib/dhp/common/java/porttype/PortType.java new file mode 100644 index 000000000..5ae2d48c2 --- /dev/null +++ b/dhp-common/src/main/java/eu/dnetlib/dhp/common/java/porttype/PortType.java @@ -0,0 +1,33 @@ +package eu.dnetlib.dhp.common.java.porttype; + +/** + * Type of the port. This is used to specify what kind of data is + * accepted on a certain input port or produced on a certain output port + * of a workflow node. + * + * @author Mateusz Kobos + * + */ +public interface PortType { + + String getName(); + + /** + * This should be used to check whether data produced by a workflow node + * conforms to the data consumed by other workflow node. + * In a scenario when A produces certain data on a port p and B consumes + * this data on a port q, type(q).accepts(type(p)) has to be true. + * + * @return {@code true} if {@code this} port type is a more general + * version of the {@code other} port type, + * or as an alternative explanation: {@code other} is a subset of + * {@code this}, i.e. {@code other} has at least all the properties present + * in {@code this} (and possibly some others). This is analogous to a + * situation in object-oriented programming, where in order for assignment + * operation {@code this = other} to work, the type of {@code this} has to + * accept type of {@code other}, or in other words {@code other} has to + * inherit from {@code this}, or in yet other words: {@code other} has to + * conform to {@code this}. + */ + boolean accepts(PortType other); +} diff --git a/dhp-common/src/main/java/eu/dnetlib/dhp/common/lock/LockManagingProcess.java b/dhp-common/src/main/java/eu/dnetlib/dhp/common/lock/LockManagingProcess.java new file mode 100644 index 000000000..d5d38fe29 --- /dev/null +++ b/dhp-common/src/main/java/eu/dnetlib/dhp/common/lock/LockManagingProcess.java @@ -0,0 +1,149 @@ +package eu.dnetlib.dhp.common.lock; + +import java.security.InvalidParameterException; +import java.util.Collections; +import java.util.Map; +import java.util.concurrent.Semaphore; + +import org.apache.commons.lang.StringUtils; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.ha.ZKFailoverController; +import org.apache.log4j.Logger; +import org.apache.zookeeper.CreateMode; +import org.apache.zookeeper.KeeperException; +import org.apache.zookeeper.Watcher.Event; +import org.apache.zookeeper.ZooDefs; +import org.apache.zookeeper.ZooKeeper; + +import com.google.common.base.Preconditions; +import com.google.common.base.Stopwatch; + +import eu.dnetlib.dhp.common.java.PortBindings; +import eu.dnetlib.dhp.common.java.porttype.PortType; + +/** + * Zookeeper lock managing process. Blocks until lock is released. + * + * @author mhorst + * + */ +public class LockManagingProcess implements eu.dnetlib.dhp.common.java.Process { + + public static final String DEFAULT_ROOT_NODE = "/cache"; + + public static final String NODE_SEPARATOR = "/"; + + public static final String PARAM_ZK_SESSION_TIMEOUT = "zk_session_timeout"; + + public static final String PARAM_NODE_ID = "node_id"; + + public static final String PARAM_LOCK_MODE = "mode"; + + public static enum LockMode { + obtain, + release + } + + public static final int DEFAULT_SESSION_TIMEOUT = 60000; + + public static final Logger log = Logger.getLogger(LockManagingProcess.class); + + @Override + public Map getInputPorts() { + return Collections.emptyMap(); + } + + @Override + public Map getOutputPorts() { + return Collections.emptyMap(); + } + + @Override + public void run(PortBindings portBindings, Configuration conf, + Map parameters) throws Exception { + + Preconditions.checkArgument(parameters.containsKey(PARAM_NODE_ID), "node id not provided!"); + Preconditions.checkArgument(parameters.containsKey(PARAM_LOCK_MODE), "lock mode not provided!"); + + String zkConnectionString = conf.get(ZKFailoverController.ZK_QUORUM_KEY); + Preconditions.checkArgument(StringUtils.isNotBlank(zkConnectionString), + "zookeeper quorum is unknown, invalid '%s' property value: %s", ZKFailoverController.ZK_QUORUM_KEY, zkConnectionString); + + int sessionTimeout = parameters.containsKey(PARAM_ZK_SESSION_TIMEOUT)? + Integer.valueOf(parameters.get(PARAM_ZK_SESSION_TIMEOUT)) : DEFAULT_SESSION_TIMEOUT; + + final ZooKeeper zooKeeper = new ZooKeeper(zkConnectionString, sessionTimeout, (e) -> { + // we are not interested in generic events + }); + +// initializing root node if does not exist + if (zooKeeper.exists(DEFAULT_ROOT_NODE, false) == null) { + log.info("initializing root node: " + DEFAULT_ROOT_NODE); + zooKeeper.create(DEFAULT_ROOT_NODE, new byte[0], ZooDefs.Ids.OPEN_ACL_UNSAFE, CreateMode.PERSISTENT); + log.info("root node initialized"); + } + + final String nodePath = generatePath(parameters.get(PARAM_NODE_ID), DEFAULT_ROOT_NODE); + + final Semaphore semaphore = new Semaphore(1); + semaphore.acquire(); + + switch(LockMode.valueOf(parameters.get(PARAM_LOCK_MODE))) { + case obtain: { + obtain(zooKeeper, nodePath, semaphore); + break; + } + case release: { + release(zooKeeper, nodePath); + break; + } + default: { + throw new InvalidParameterException("unsupported lock mode: " + parameters.get(PARAM_LOCK_MODE)); + } + } + } + + // ------------------------- PRIVATE -------------------------- + + private void obtain(final ZooKeeper zooKeeper, final String nodePath, final Semaphore semaphore) throws KeeperException, InterruptedException { + log.info("trying to obtain lock: " + nodePath); + if (zooKeeper.exists(nodePath, (event) -> { + if (Event.EventType.NodeDeleted == event.getType()) { + try { + log.info(nodePath + " lock release detected"); + log.info("creating new lock instance: " + nodePath + "..."); + zooKeeper.create(nodePath, new byte[0], ZooDefs.Ids.OPEN_ACL_UNSAFE, CreateMode.PERSISTENT); + log.info("lock" + nodePath + " created"); + semaphore.release(); + } catch (KeeperException e) { + throw new RuntimeException(e); + } catch (InterruptedException e) { + throw new RuntimeException(e); + } + } + }) == null) { + log.info("lock not found, creating new lock instance: " + nodePath); + zooKeeper.create(nodePath, new byte[0], ZooDefs.Ids.OPEN_ACL_UNSAFE, CreateMode.PERSISTENT); + log.info("lock" + nodePath + " created"); + semaphore.release(); + } else { + // waiting until node is removed by other lock manager + log.info("waiting until lock is released"); + Stopwatch timer = new Stopwatch().start(); + semaphore.acquire(); + log.info("lock released, waited for " + timer.elapsedMillis() + " ms"); + semaphore.release(); + } + } + + private void release(final ZooKeeper zooKeeper, final String nodePath) throws InterruptedException, KeeperException { + log.info("removing lock" + nodePath + "..."); + zooKeeper.delete(nodePath, -1); + log.info("lock" + nodePath + " removed"); + } + + private static final String generatePath(String nodeId, String rootNode) { + return rootNode + NODE_SEPARATOR + nodeId.replace('/', '_'); + } + +} diff --git a/dhp-common/src/main/java/eu/dnetlib/dhp/common/oozie/OozieClientFactory.java b/dhp-common/src/main/java/eu/dnetlib/dhp/common/oozie/OozieClientFactory.java new file mode 100644 index 000000000..71599277a --- /dev/null +++ b/dhp-common/src/main/java/eu/dnetlib/dhp/common/oozie/OozieClientFactory.java @@ -0,0 +1,24 @@ +package eu.dnetlib.dhp.common.oozie; + +import org.apache.oozie.client.OozieClient; + +/** + * Factory of {@link OozieClient} + * + * @author madryk + */ +public class OozieClientFactory { + + + //------------------------ LOGIC -------------------------- + + /** + * Returns {@link OozieClient} object used for communication with oozie + */ + public OozieClient createOozieClient(String oozieUrl) { + + OozieClient oozieClient = new OozieClient(oozieUrl); + + return oozieClient; + } +} diff --git a/dhp-common/src/main/java/eu/dnetlib/dhp/common/oozie/property/ConditionalPropertySetter.java b/dhp-common/src/main/java/eu/dnetlib/dhp/common/oozie/property/ConditionalPropertySetter.java new file mode 100644 index 000000000..6b7fe2975 --- /dev/null +++ b/dhp-common/src/main/java/eu/dnetlib/dhp/common/oozie/property/ConditionalPropertySetter.java @@ -0,0 +1,76 @@ +package eu.dnetlib.dhp.common.oozie.property; + +import java.io.File; +import java.io.FileOutputStream; +import java.io.OutputStream; +import java.util.Collections; +import java.util.Map; +import java.util.Properties; + +import eu.dnetlib.dhp.common.java.PortBindings; +import eu.dnetlib.dhp.common.java.Process; +import eu.dnetlib.dhp.common.java.porttype.PortType; +import org.apache.hadoop.conf.Configuration; + +import static eu.dnetlib.dhp.common.WorkflowRuntimeParameters.OOZIE_ACTION_OUTPUT_FILENAME; + +/** + * This process is a solution for setting dynamic properties in oozie workflow definition. + * + * Expects three parameters to be provided: the first 'condition' parameter is boolean value + * based on which either first 'inCaseOfTrue' or second 'elseCase' parameter value is set as + * the 'result' property. + * + * This can be understood as the: + * + * condition ? inCaseOfTrue : elseCase + * + * java syntax equivalent. + * + * @author mhorst + * + */ +public class ConditionalPropertySetter implements Process { + + public static final String PARAM_CONDITION = "condition"; + public static final String PARAM_INCASEOFTRUE = "inCaseOfTrue"; + public static final String PARAM_ELSECASE = "elseCase"; + + public static final String OUTPUT_PROPERTY_RESULT = "result"; + + @Override + public Map getInputPorts() { + return Collections.emptyMap(); + } + + @Override + public Map getOutputPorts() { + return Collections.emptyMap(); + } + + @Override + public void run(PortBindings portBindings, Configuration conf, + Map parameters) throws Exception { + + String condition = parameters.get(PARAM_CONDITION); + if (condition == null) { + throw new RuntimeException("unable to make decision: " + + PARAM_CONDITION + " parameter was not set!"); + } + + Properties props = new Properties(); + props.setProperty(OUTPUT_PROPERTY_RESULT, + Boolean.parseBoolean(condition)? + parameters.get(PARAM_INCASEOFTRUE): + parameters.get(PARAM_ELSECASE)); + OutputStream os = new FileOutputStream( + new File(System.getProperty(OOZIE_ACTION_OUTPUT_FILENAME))); + try { + props.store(os, ""); + } finally { + os.close(); + } + + } + +} diff --git a/dhp-common/src/main/java/eu/dnetlib/dhp/common/protobuf/AvroToProtoBufConverter.java b/dhp-common/src/main/java/eu/dnetlib/dhp/common/protobuf/AvroToProtoBufConverter.java new file mode 100644 index 000000000..78da47f12 --- /dev/null +++ b/dhp-common/src/main/java/eu/dnetlib/dhp/common/protobuf/AvroToProtoBufConverter.java @@ -0,0 +1,12 @@ +package eu.dnetlib.dhp.common.protobuf; + +import com.google.protobuf.Message; +import org.apache.avro.generic.IndexedRecord; + +/** + * @author Mateusz Fedoryszak (m.fedoryszak@icm.edu.pl) + */ +public interface AvroToProtoBufConverter { + String convertIntoKey(IN datum); + OUT convertIntoValue(IN datum); +} diff --git a/dhp-common/src/main/java/eu/dnetlib/dhp/common/protobuf/AvroToProtoBufOneToOneMapper.java b/dhp-common/src/main/java/eu/dnetlib/dhp/common/protobuf/AvroToProtoBufOneToOneMapper.java new file mode 100644 index 000000000..00a318aff --- /dev/null +++ b/dhp-common/src/main/java/eu/dnetlib/dhp/common/protobuf/AvroToProtoBufOneToOneMapper.java @@ -0,0 +1,62 @@ +package eu.dnetlib.dhp.common.protobuf; + +import com.google.protobuf.Message; +import org.apache.avro.generic.IndexedRecord; +import org.apache.avro.mapred.AvroKey; +import org.apache.hadoop.io.BytesWritable; +import org.apache.hadoop.io.NullWritable; +import org.apache.hadoop.io.Text; +import org.apache.hadoop.mapreduce.Mapper; +import org.apache.log4j.Logger; + +import java.io.IOException; + +/** + * @author Mateusz Fedoryszak (m.fedoryszak@icm.edu.pl) + */ +public class AvroToProtoBufOneToOneMapper + extends Mapper, NullWritable, Text, BytesWritable> { + private static final String CONVERTER_CLASS_PROPERTY = "converter_class"; + private static final Logger log = Logger.getLogger(AvroToProtoBufOneToOneMapper.class); + + private final Text keyWritable = new Text(); + private final BytesWritable valueWritable = new BytesWritable(); + private AvroToProtoBufConverter converter; + + @SuppressWarnings("unchecked") + @Override + public void setup(Context context) throws IOException, InterruptedException { + Class converterClass = context.getConfiguration().getClass(CONVERTER_CLASS_PROPERTY, null); + + if (converterClass == null) { + throw new IOException("Please specify " + CONVERTER_CLASS_PROPERTY); + } + + try { + converter = (AvroToProtoBufConverter) converterClass.newInstance(); + } catch (ClassCastException e) { + throw new IOException( + "Class specified in " + CONVERTER_CLASS_PROPERTY + " doesn't implement AvroToProtoBufConverter", e); + } catch (Exception e) { + throw new IOException( + "Could not instantiate specified AvroToProtoBufConverter class, " + converterClass, e); + } + } + + @Override + public void map(AvroKey avro, NullWritable ignore, Context context) + throws IOException, InterruptedException { + String key = null; + try { + key = converter.convertIntoKey(avro.datum()); + keyWritable.set(key); + + byte[] value = converter.convertIntoValue(avro.datum()).toByteArray(); + valueWritable.set(value, 0, value.length); + + context.write(keyWritable, valueWritable); + } catch (Exception e) { + log.error("Error" + (key != null ? " while processing " + key : ""), e); + } + } +} diff --git a/dhp-common/src/main/java/eu/dnetlib/dhp/common/report/ReportEntryFactory.java b/dhp-common/src/main/java/eu/dnetlib/dhp/common/report/ReportEntryFactory.java new file mode 100644 index 000000000..3c301fa8d --- /dev/null +++ b/dhp-common/src/main/java/eu/dnetlib/dhp/common/report/ReportEntryFactory.java @@ -0,0 +1,32 @@ +package eu.dnetlib.dhp.common.report; + +import eu.dnetlib.dhp.common.schemas.ReportEntry; +import eu.dnetlib.dhp.common.schemas.ReportEntryType; + +/** + * Factory of {@link ReportEntry} objects. + * + * @author madryk + */ +public final class ReportEntryFactory { + + // ----------------------- CONSTRUCTORS ----------------------------- + + private ReportEntryFactory() {} + + // ----------------------- LOGIC ------------------------------------ + + /** + * Creates {@link ReportEntry} with {@link ReportEntryType#COUNTER} type + */ + public static ReportEntry createCounterReportEntry(String key, long count) { + return new ReportEntry(key, ReportEntryType.COUNTER, String.valueOf(count)); + } + + /** + * Creates {@link ReportEntry} with {@link ReportEntryType#DURATION} type + */ + public static ReportEntry createDurationReportEntry(String key, long duration) { + return new ReportEntry(key, ReportEntryType.DURATION, String.valueOf(duration)); + } +} diff --git a/dhp-common/src/main/java/eu/dnetlib/dhp/common/report/ReportGenerator.java b/dhp-common/src/main/java/eu/dnetlib/dhp/common/report/ReportGenerator.java new file mode 100644 index 000000000..1ed3dfb08 --- /dev/null +++ b/dhp-common/src/main/java/eu/dnetlib/dhp/common/report/ReportGenerator.java @@ -0,0 +1,110 @@ +package eu.dnetlib.dhp.common.report; + +import java.util.Collections; +import java.util.List; +import java.util.Map; +import java.util.stream.Collectors; + +import org.apache.commons.lang3.tuple.Pair; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.Path; + +import com.google.common.collect.Lists; + +import eu.dnetlib.dhp.common.java.PortBindings; +import eu.dnetlib.dhp.common.java.Process; +import eu.dnetlib.dhp.common.java.io.DataStore; +import eu.dnetlib.dhp.common.java.io.FileSystemPath; +import eu.dnetlib.dhp.common.java.porttype.AvroPortType; +import eu.dnetlib.dhp.common.java.porttype.PortType; +import eu.dnetlib.dhp.common.schemas.ReportEntry; + +/** + * Java workflow node process for building report.
+ * It writes report properties into avro datastore of {@link ReportEntry}s + * with location specified in output port.
+ * Report property name must start with report. to + * be included in output datastore. + * + * Usage example:
+ *
+ * {@code
+ * 
+ *     
+ *         eu.dnetlib.dhp.common.java.ProcessWrapper
+ *         eu.dnetlib.dhp.common.report.ReportGenerator
+ *         -Preport.someProperty=someValue
+ *         -Oreport=/report/path
+ *     
+ *     ...
+ * 
+ * }
+ * 
+ * Above example will produce avro datastore in /report/path + * with single {@link ReportEntry}. + * Where the {@link ReportEntry#getKey()} will be equal to someProperty and + * the {@link ReportEntry#getValue()} will be equal to someValue + * (notice the stripped report. prefix from the entry key). + * + * + * @author madryk + * + */ +public class ReportGenerator implements Process { + + private static final String REPORT_PORT_OUT_NAME = "report"; + + private static final String REPORT_PROPERTY_PREFIX = "report."; + + + //------------------------ LOGIC -------------------------- + + @Override + public Map getInputPorts() { + return Collections.emptyMap(); + } + + @Override + public Map getOutputPorts() { + return Collections.singletonMap(REPORT_PORT_OUT_NAME, new AvroPortType(ReportEntry.SCHEMA$)); + } + + @Override + public void run(PortBindings portBindings, Configuration conf, Map parameters) throws Exception { + + Map entriesToReport = collectEntriesToReport(parameters); + + List avroReport = convertToAvroReport(entriesToReport); + + + FileSystem fs = FileSystem.get(conf); + + Path reportPath = portBindings.getOutput().get(REPORT_PORT_OUT_NAME); + + DataStore.create(avroReport, new FileSystemPath(fs, reportPath)); + + } + + + //------------------------ PRIVATE -------------------------- + + private Map collectEntriesToReport(Map parameters) { + + return parameters.entrySet().stream() + .filter(property -> property.getKey().startsWith(REPORT_PROPERTY_PREFIX)) + .map(x -> Pair.of(x.getKey().substring(REPORT_PROPERTY_PREFIX.length()), x.getValue())) + .collect(Collectors.toMap(e -> e.getLeft(), e -> e.getRight())); + + } + + private List convertToAvroReport(Map entriesToReport) { + + List avroReport = Lists.newArrayList(); + entriesToReport.forEach((key, value) -> avroReport.add(ReportEntryFactory.createCounterReportEntry(key, Long.valueOf(value)))); + + return avroReport; + } + + +} diff --git a/dhp-common/src/main/java/eu/dnetlib/dhp/common/spark/pipe/SparkPipeExecutor.java b/dhp-common/src/main/java/eu/dnetlib/dhp/common/spark/pipe/SparkPipeExecutor.java new file mode 100644 index 000000000..33b7f788c --- /dev/null +++ b/dhp-common/src/main/java/eu/dnetlib/dhp/common/spark/pipe/SparkPipeExecutor.java @@ -0,0 +1,74 @@ +package eu.dnetlib.dhp.common.spark.pipe; + +import java.io.Serializable; + +import org.apache.avro.generic.GenericRecord; +import org.apache.avro.mapred.AvroKey; +import org.apache.hadoop.io.NullWritable; +import org.apache.spark.SparkFiles; +import org.apache.spark.api.java.JavaPairRDD; +import org.apache.spark.api.java.JavaRDD; + +import eu.dnetlib.dhp.common.utils.AvroGsonFactory; +import scala.Tuple2; + + +/** + * Executor of mapreduce scripts using spark pipes. + * It imitates hadoop streaming behavior. + * + * @author madryk + * + */ +public class SparkPipeExecutor implements Serializable { + + private static final long serialVersionUID = 1L; + + + //------------------------ LOGIC -------------------------- + + /** + * Imitates map part of hadoop streaming job. + * It executes provided script for every key in inputRecords rdd. + *

+ * It is assumed that provided script will read records from standard input (one line for one record) + * and write mapped record into standard output (also one line for one record). + * Mapped record can be a key/value pair. In that case script should return key and value + * splitted by tab (\t) character in single line. + */ + public JavaPairRDD doMap(JavaPairRDD, NullWritable> inputRecords, String scriptName, String args) { + + JavaRDD mappedRecords = inputRecords.keys().pipe("python " + SparkFiles.get(scriptName) + " " + args); + + JavaPairRDD outputRecords = mappedRecords + .mapToPair(line -> { + String[] splittedPair = line.split("\t"); + return new Tuple2(splittedPair[0], (splittedPair.length == 1) ? null : splittedPair[1]); + }); + + return outputRecords; + } + + /** + * Imitates reduce part of hadoop streaming job. + *

+ * It is assumed that provided script will read records from standard input (one line for one record) + * and group records with the same key into single record (reduce). + * Method assures that all input records with the same key will be transfered in adjacent lines. + * Reduced records should be written by script into standard output (one line for one record). + * Reduced records must be json strings of class provided as argument. + */ + public JavaPairRDD, NullWritable> doReduce(JavaPairRDD inputRecords, String scriptName, String args, Class outputClass) { + + JavaRDD reducedRecords = inputRecords.sortByKey() + .map(record -> record._1 + ((record._2 == null) ? "" : ("\t" + record._2))) + .pipe("python " + SparkFiles.get(scriptName) + " " + args); + + JavaPairRDD, NullWritable> outputRecords = reducedRecords + .map(recordString -> AvroGsonFactory.create().fromJson(recordString, outputClass)) + .mapToPair(record -> new Tuple2, NullWritable>(new AvroKey<>(record), NullWritable.get())); + + return outputRecords; + } + +} diff --git a/dhp-common/src/main/java/eu/dnetlib/dhp/common/string/CharSequenceUtils.java b/dhp-common/src/main/java/eu/dnetlib/dhp/common/string/CharSequenceUtils.java new file mode 100644 index 000000000..5889cf57a --- /dev/null +++ b/dhp-common/src/main/java/eu/dnetlib/dhp/common/string/CharSequenceUtils.java @@ -0,0 +1,31 @@ +package eu.dnetlib.dhp.common.string; + +/** + * Operations on {@link CharSequence} + * + * @author Łukasz Dumiszewski +*/ + +public final class CharSequenceUtils { + + + //------------------------ CONSTRUCTORS -------------------------- + + private CharSequenceUtils() { + throw new IllegalStateException("may not be initialized"); + } + + + //------------------------ LOGIC -------------------------- + + /** + * Converts the given {@link CharSequence} value to {@link String} by using {@link CharSequence#toString()}. + * Returns empty string if value is null. + */ + public static String toStringWithNullToEmpty(CharSequence value) { + + return value == null? "": value.toString(); + + } + +} diff --git a/dhp-common/src/main/java/eu/dnetlib/dhp/common/string/DiacriticsRemover.java b/dhp-common/src/main/java/eu/dnetlib/dhp/common/string/DiacriticsRemover.java new file mode 100644 index 000000000..b69ed3419 --- /dev/null +++ b/dhp-common/src/main/java/eu/dnetlib/dhp/common/string/DiacriticsRemover.java @@ -0,0 +1,113 @@ +/* + * This file is part of CoAnSys project. + * Copyright (c) 2012-2015 ICM-UW + * + * CoAnSys is free software: you can redistribute it and/or modify + * it under the terms of the GNU Affero General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + + * CoAnSys is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Affero General Public License for more details. + * + * You should have received a copy of the GNU Affero General Public License + * along with CoAnSys. If not, see . + */ + +package eu.dnetlib.dhp.common.string; + +import java.text.Normalizer; +import java.util.HashMap; +import java.util.Map; + +/** + * Mapping to the basic Latin alphabet (a-z, A-Z). In most cases, a character is + * mapped to the closest visual form, rather than functional one, e.g.: "ö" is + * mapped to "o" rather than "oe", and "đ" is mapped to "d" rather than "dj" or + * "gj". Notable exceptions include: "ĸ" mapped to "q", "ß" mapped to "ss", and + * "Þ", "þ" mapped to "Y", "y". + * + *

Each character is processed as follows:

  1. the character is + * compatibility decomposed,
  2. all the combining marks are removed,
  3. + *
  4. the character is compatibility composed,
  5. additional "manual" + * substitutions are applied.

+ * + *

All the characters from the "Latin-1 Supplement" and "Latin Extended-A" + * Unicode blocks are mapped to the "Basic Latin" block. Characters from other + * alphabets are generally left intact, although the decomposable ones may be + * affected by the procedure.

+ * + * @author Lukasz Bolikowski (bolo@icm.edu.pl) + * + * @author Łukasz Dumiszewski /just copied from coansys-commons/ + * + */ +public final class DiacriticsRemover { + + private static final Character[] from = { + 'Æ', 'Ð', 'Ø', 'Þ', 'ß', 'æ', 'ð', 'ø', 'þ', 'Đ', 'đ', 'Ħ', + 'ħ', 'ı', 'ĸ', 'Ł', 'ł', 'Ŋ', 'ŋ', 'Œ', 'œ', 'Ŧ', 'ŧ'}; + private static final String[] to = { + "AE", "D", "O", "Y", "ss", "ae", "d", "o", "y", "D", "d", "H", + "h", "i", "q", "L", "l", "N", "n", "OE", "oe", "T", "t"}; + + private static Map lookup = buildLookup(); + + + //------------------------ CONSTRUCTORS ------------------- + + + private DiacriticsRemover() {} + + + //------------------------ LOGIC -------------------------- + + + /** + * Removes diacritics from a text. + * + * @param text Text to process. + * @return Text without diacritics. + */ + public static String removeDiacritics(String text) { + if (text == null) { + return null; + } + + String tmp = Normalizer.normalize(text, Normalizer.Form.NFKD); + + StringBuilder builder = new StringBuilder(); + for (int i = 0; i < tmp.length(); i++) { + Character ch = tmp.charAt(i); + if (Character.getType(ch) == Character.NON_SPACING_MARK) { + continue; + } + + if (lookup.containsKey(ch)) { + builder.append(lookup.get(ch)); + } else { + builder.append(ch); + } + } + + return builder.toString(); + } + + + //------------------------ PRIVATE -------------------------- + + private static Map buildLookup() { + if (from.length != to.length) { + throw new IllegalStateException(); + } + + Map _lookup = new HashMap(); + for (int i = 0; i < from.length; i++) { + _lookup.put(from[i], to[i]); + } + + return _lookup; + } +} diff --git a/dhp-common/src/main/java/eu/dnetlib/dhp/common/string/LenientComparisonStringNormalizer.java b/dhp-common/src/main/java/eu/dnetlib/dhp/common/string/LenientComparisonStringNormalizer.java new file mode 100644 index 000000000..bae64ae38 --- /dev/null +++ b/dhp-common/src/main/java/eu/dnetlib/dhp/common/string/LenientComparisonStringNormalizer.java @@ -0,0 +1,130 @@ +/* + * This file is part of CoAnSys project. + * Copyright (c) 2012-2015 ICM-UW + * + * CoAnSys is free software: you can redistribute it and/or modify + * it under the terms of the GNU Affero General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + + * CoAnSys is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Affero General Public License for more details. + * + * You should have received a copy of the GNU Affero General Public License + * along with CoAnSys. If not, see . + */ +package eu.dnetlib.dhp.common.string; + +import java.io.Serializable; +import java.util.List; + +import org.apache.commons.lang3.StringUtils; + +import com.google.common.collect.ImmutableList; + +/** + * An implementation of {@link StringNormalizer} that normalizes strings for non-strict comparisons + * in which one does not care about characters other than letters and digits or about differently written diacritics. + * + * @author Łukasz Dumiszewski + * + */ +public final class LenientComparisonStringNormalizer implements StringNormalizer, Serializable { + + + private static final long serialVersionUID = 1L; + + + private List whitelistCharacters; + + + //------------------------ CONSTRUCTORS -------------------------- + + public LenientComparisonStringNormalizer() { + this(ImmutableList.of()); + } + + /** + * @param whitelistCharacters - non alphanumeric characters that will not be removed + * during normalization + */ + public LenientComparisonStringNormalizer(List whitelistCharacters) { + this.whitelistCharacters = whitelistCharacters; + } + + + //------------------------ LOGIC -------------------------- + + + + /** + * Normalizes the given value.
+ * The normalized strings are better suited for non-strict comparisons, in which one does NOT care about characters that are + * neither letters nor digits; about accidental spaces or different diacritics etc.

+ * This method: + *
    + *
  • Replaces all characters that are not letters or digits with spaces (except those on whitelist characters list)
  • + *
  • Replaces white spaces with spaces
  • + *
  • Trims
  • + *
  • Compacts multi-space gaps to one-space gaps
  • + *
  • Removes diacritics
  • + *
  • Changes characters to lower case
  • + *
+ * Returns "" if the passed value is null or blank + * + * @param value the string to normalize + * @see DiacriticsRemover#removeDiacritics(String, boolean) + * + * + */ + public String normalize(String value) { + + if (StringUtils.isBlank(value)) { + + return ""; + + } + + + String result = value; + + result = DiacriticsRemover.removeDiacritics(result); + + result = removeNonLetterDigitCharacters(result); + + result = result.toLowerCase(); + + result = result.trim().replaceAll(" +", " "); + + return result; + } + + + + + //------------------------ PRIVATE -------------------------- + + + private String removeNonLetterDigitCharacters(final String value) { + + StringBuilder sb = new StringBuilder(); + + for (int i = 0; i < value.length(); ++i) { + + char c = value.charAt(i); + + if (Character.isLetterOrDigit(c) || whitelistCharacters.contains(c)) { + sb.append(c); + } else { + sb.append(' '); + } + } + + return sb.toString(); + } + + + +} diff --git a/dhp-common/src/main/java/eu/dnetlib/dhp/common/string/StringNormalizer.java b/dhp-common/src/main/java/eu/dnetlib/dhp/common/string/StringNormalizer.java new file mode 100644 index 000000000..6e28422bc --- /dev/null +++ b/dhp-common/src/main/java/eu/dnetlib/dhp/common/string/StringNormalizer.java @@ -0,0 +1,16 @@ +package eu.dnetlib.dhp.common.string; + +/** + * String normalizer. + * + * @author Łukasz Dumiszewski + * + */ +public interface StringNormalizer { + + /** + * Normalizes the given string value. + */ + String normalize(String value); + +} diff --git a/dhp-common/src/main/java/eu/dnetlib/dhp/common/utils/AvroGsonFactory.java b/dhp-common/src/main/java/eu/dnetlib/dhp/common/utils/AvroGsonFactory.java new file mode 100644 index 000000000..7fcc0506a --- /dev/null +++ b/dhp-common/src/main/java/eu/dnetlib/dhp/common/utils/AvroGsonFactory.java @@ -0,0 +1,45 @@ +package eu.dnetlib.dhp.common.utils; + +import java.lang.reflect.Type; + +import com.google.gson.Gson; +import com.google.gson.GsonBuilder; +import com.google.gson.JsonDeserializationContext; +import com.google.gson.JsonDeserializer; +import com.google.gson.JsonElement; +import com.google.gson.JsonParseException; + +/** + * Factory for gson object that supports serializing avro generated classes + * + * @author madryk + * + */ +public final class AvroGsonFactory { + + //------------------------ CONSTRUCTORS ------------------- + + + private AvroGsonFactory() {} + + + //------------------------ LOGIC -------------------------- + + public static Gson create() { + GsonBuilder builder = new GsonBuilder(); + + builder.registerTypeAdapter(CharSequence.class, new CharSequenceDeserializer()); + + return builder.create(); + } + + public static class CharSequenceDeserializer implements JsonDeserializer { + + @Override + public CharSequence deserialize(JsonElement json, Type typeOfT, JsonDeserializationContext context) + throws JsonParseException { + return json.getAsString(); + } + + } +} diff --git a/dhp-common/src/main/java/eu/dnetlib/dhp/common/utils/AvroUtils.java b/dhp-common/src/main/java/eu/dnetlib/dhp/common/utils/AvroUtils.java new file mode 100644 index 000000000..44dd218b5 --- /dev/null +++ b/dhp-common/src/main/java/eu/dnetlib/dhp/common/utils/AvroUtils.java @@ -0,0 +1,77 @@ +package eu.dnetlib.dhp.common.utils; + +import java.lang.reflect.Field; + +import org.apache.avro.Schema; + +/** + * + * @author Mateusz Kobos + * + */ +public final class AvroUtils { + + public final static String primitiveTypePrefix = "org.apache.avro.Schema.Type."; + + + //------------------------ CONSTRUCTORS ------------------- + + + private AvroUtils() {} + + + //------------------------ LOGIC -------------------------- + + + /** + * For a given name of a class generated from Avro schema return + * a JSON schema. + * + * Apart from a name of a class you can also give a name of one of enums + * defined in {@link org.apache.avro.Schema.Type}; in such case an + * appropriate primitive type will be returned. + * + * @param typeName fully qualified name of a class generated from Avro schema, + * e.g. {@code eu.dnetlib.dhp.common.avro.Person}, + * or a fully qualified name of enum defined by + * {@link org.apache.avro.Schema.Type}, + * e.g. {@link org.apache.avro.Schema.Type.STRING}. + * @return JSON string + */ + public static Schema toSchema(String typeName) { + Schema schema = null; + if(typeName.startsWith(primitiveTypePrefix)){ + String shortName = typeName.substring( + primitiveTypePrefix.length(), typeName.length()); + schema = getPrimitiveTypeSchema(shortName); + } else { + schema = getAvroClassSchema(typeName); + } + return schema; + } + + private static Schema getPrimitiveTypeSchema(String shortName){ + Schema.Type type = Schema.Type.valueOf(shortName); + return Schema.create(type); + } + + private static Schema getAvroClassSchema(String className){ + try { + Class avroClass = Class.forName(className); + Field f = avroClass.getDeclaredField("SCHEMA$"); + return (Schema) f.get(null); + } catch (ClassNotFoundException e) { + throw new RuntimeException( + "Class \""+className+"\" does not exist", e); + } catch (SecurityException e) { + throw new RuntimeException(e); + } catch (NoSuchFieldException e) { + throw new RuntimeException(e); + } catch (IllegalArgumentException e) { + throw new RuntimeException(e); + } catch (IllegalAccessException e) { + throw new RuntimeException(e); + } + } + +} diff --git a/dhp-common/src/main/java/eu/dnetlib/dhp/common/utils/ByteArrayUtils.java b/dhp-common/src/main/java/eu/dnetlib/dhp/common/utils/ByteArrayUtils.java new file mode 100644 index 000000000..152271ab7 --- /dev/null +++ b/dhp-common/src/main/java/eu/dnetlib/dhp/common/utils/ByteArrayUtils.java @@ -0,0 +1,45 @@ +package eu.dnetlib.dhp.common.utils; + +/** + * Byte array utility class. + * @author mhorst + * + */ +public final class ByteArrayUtils { + + //------------------------ CONSTRUCTORS ------------------- + + private ByteArrayUtils() {} + + //------------------------ LOGIC -------------------------- + + /** + * Does this byte array begin with match array content? + * @param source Byte array to examine + * @param match Byte array to locate in source + * @return true If the starting bytes are equal + */ + public static boolean startsWith(byte[] source, byte[] match) { + return startsWith(source, 0, match); + } + + /** + * Does this byte array begin with match array content? + * @param source Byte array to examine + * @param offset An offset into the source array + * @param match Byte array to locate in source + * @return true If the starting bytes are equal + */ + public static boolean startsWith(byte[] source, int offset, byte[] match) { + if (match.length > (source.length - offset)) { + return false; + } + for (int i = 0; i < match.length; i++) { + if (source[offset + i] != match[i]) { + return false; + } + } + return true; + } + +} diff --git a/dhp-common/src/main/java/eu/dnetlib/dhp/common/utils/EmptyDatastoreVerifierProcess.java b/dhp-common/src/main/java/eu/dnetlib/dhp/common/utils/EmptyDatastoreVerifierProcess.java new file mode 100644 index 000000000..1e6e04149 --- /dev/null +++ b/dhp-common/src/main/java/eu/dnetlib/dhp/common/utils/EmptyDatastoreVerifierProcess.java @@ -0,0 +1,89 @@ +package eu.dnetlib.dhp.common.utils; + +import java.io.File; +import java.io.FileOutputStream; +import java.io.IOException; +import java.io.OutputStream; +import java.security.InvalidParameterException; +import java.util.Collections; +import java.util.HashMap; +import java.util.Map; +import java.util.Properties; + +import eu.dnetlib.dhp.common.java.PortBindings; +import eu.dnetlib.dhp.common.java.Ports; +import eu.dnetlib.dhp.common.java.Process; +import eu.dnetlib.dhp.common.java.io.CloseableIterator; +import eu.dnetlib.dhp.common.java.io.DataStore; +import eu.dnetlib.dhp.common.java.io.FileSystemPath; +import eu.dnetlib.dhp.common.java.porttype.AnyPortType; +import eu.dnetlib.dhp.common.java.porttype.PortType; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.Path; + +import static eu.dnetlib.dhp.common.WorkflowRuntimeParameters.OOZIE_ACTION_OUTPUT_FILENAME; + +/** + * Simple process verifying whether given datastore is empty. + * @author mhorst + * + */ +public class EmptyDatastoreVerifierProcess implements Process { + + public static final String INPUT_PORT_NAME = "input"; + + public static final String DEFAULT_ENCODING = "UTF-8"; + + public static final String OUTPUT_PROPERTY_IS_EMPTY = "isEmpty"; + + /** + * Ports handled by this module. + */ + private final Ports ports; + + + // ------------------------ CONSTRUCTORS -------------------------- + + public EmptyDatastoreVerifierProcess() { +// preparing ports + Map input = new HashMap(); + input.put(INPUT_PORT_NAME, new AnyPortType()); + Map output = Collections.emptyMap(); + ports = new Ports(input, output); + } + + @Override + public Map getInputPorts() { + return ports.getInput(); + } + + @Override + public Map getOutputPorts() { + return ports.getOutput(); + } + + @Override + public void run(PortBindings portBindings, Configuration conf, Map parameters) throws Exception { + if (!portBindings.getInput().containsKey(INPUT_PORT_NAME)) { + throw new InvalidParameterException("missing input port!"); + } + + try (CloseableIterator closeableIt = getIterator(conf, portBindings.getInput().get(INPUT_PORT_NAME))) { + File file = new File(System.getProperty(OOZIE_ACTION_OUTPUT_FILENAME)); + Properties props = new Properties(); + props.setProperty(OUTPUT_PROPERTY_IS_EMPTY, Boolean.toString(!closeableIt.hasNext())); + try (OutputStream os = new FileOutputStream(file)) { + props.store(os, ""); + } + } + } + + /** + * Returns iterator over datastore. + */ + protected CloseableIterator getIterator(Configuration conf, Path path) throws IOException { + return DataStore.getReader(new FileSystemPath(FileSystem.get(conf), path)); + } + +} diff --git a/dhp-schemas/README.md b/dhp-schemas/README.md new file mode 100644 index 000000000..473ad4cf1 --- /dev/null +++ b/dhp-schemas/README.md @@ -0,0 +1,3 @@ +Description of the project +-------------------------- +This project defines **serialization schemas** of Avro data store files that are used to pass data between workflow nodes in the system. diff --git a/dhp-schemas/pom.xml b/dhp-schemas/pom.xml new file mode 100644 index 000000000..2c6e18f27 --- /dev/null +++ b/dhp-schemas/pom.xml @@ -0,0 +1,62 @@ + + + 4.0.0 + + + eu.dnetlib.dhp + dhp + 1.0.0-SNAPSHOT + + + dhp-schemas + jar + + + + org.apache.avro + avro + + + + + + + + org.apache.avro + avro-maven-plugin + + + generate-sources + + schema + idl-protocol + + + + + + + org.codehaus.mojo + build-helper-maven-plugin + + + add-source + generate-sources + + add-source + + + + ${project.build.directory}/generated-sources/avro/ + + + + + + + + + diff --git a/dhp-schemas/src/main/avro/eu/dnetlib/dhp/audit/Fault.avdl b/dhp-schemas/src/main/avro/eu/dnetlib/dhp/audit/Fault.avdl new file mode 100644 index 000000000..3bce821a4 --- /dev/null +++ b/dhp-schemas/src/main/avro/eu/dnetlib/dhp/audit/Fault.avdl @@ -0,0 +1,29 @@ +@namespace("eu.dnetlib.dhp.audit.schemas") +protocol DHP { + + record Cause { +// generic cause code, root exception class name when derived from exception + string code; +// cause message + union { null , string } message = null; + } + + record Fault { +// input object identifier + string inputObjectId; +// fault creation timestamp + long timestamp; +// generic fault code, root exception class name when derived from exception + string code; +// fault message + union { null , string } message = null; +// stack trace + union { null , string } stackTrace = null; +// fault causes, array is indexed with cause depth + union { null , array } causes = null; +// Other supplementary data related to specific type of fault. +// See parameters description in oozie workflow.xml documentation of modules +// that use this structure for information what exactly can be stored as supplementary data. + union { null , map } supplementaryData = null; + } +} diff --git a/dhp-schemas/src/main/avro/eu/dnetlib/dhp/common/ReportEntry.avdl b/dhp-schemas/src/main/avro/eu/dnetlib/dhp/common/ReportEntry.avdl new file mode 100644 index 000000000..99406b4f0 --- /dev/null +++ b/dhp-schemas/src/main/avro/eu/dnetlib/dhp/common/ReportEntry.avdl @@ -0,0 +1,16 @@ +@namespace("eu.dnetlib.dhp.common.schemas") +protocol DHP{ + + enum ReportEntryType { + COUNTER, DURATION + } + + + record ReportEntry { + + string key; + ReportEntryType type; + string value; + + } +} diff --git a/dhp-schemas/src/main/avro/eu/dnetlib/dhp/importer/NativeRecord.avdl b/dhp-schemas/src/main/avro/eu/dnetlib/dhp/importer/NativeRecord.avdl new file mode 100644 index 000000000..9ad5435fa --- /dev/null +++ b/dhp-schemas/src/main/avro/eu/dnetlib/dhp/importer/NativeRecord.avdl @@ -0,0 +1,21 @@ +@namespace("eu.dnetlib.dhp.importer.schemas") +protocol DHP { + + enum RecordFormat { + XML, JSON + } + + record ImportedRecord { + + // record identifier + string id; + + RecordFormat format; + + // format name (OAF, OAI_DC, Datacite, etc) for which there is a parser implementation + string formatName; + + // record body + string body; + } +} diff --git a/dhp-wf/dhp-wf-import/pom.xml b/dhp-wf/dhp-wf-import/pom.xml new file mode 100644 index 000000000..6bf4ba825 --- /dev/null +++ b/dhp-wf/dhp-wf-import/pom.xml @@ -0,0 +1,105 @@ + + + + eu.dnetlib.dhp + dhp-wf + 1.0.0-SNAPSHOT + + 4.0.0 + + dhp-wf-import + + + + + ${project.groupId} + dhp-common + ${project.version} + + + + ${project.groupId} + dhp-common + ${project.version} + test-jar + test + + + + ${project.groupId} + dhp-schemas + ${project.version} + + + + + org.apache.hadoop + hadoop-mapreduce-client-core + + + org.apache.hadoop + hadoop-common + + + + com.googlecode.json-simple + json-simple + + + commons-cli + commons-cli + + + eu.dnetlib + dnet-objectstore-rmi + + + eu.dnetlib + cnr-rmi-api + + + eu.dnetlib + cnr-resultset-client + + + eu.dnetlib + dnet-openaireplus-mapping-utils + + + + org.springframework + spring-context + + + org.apache.cxf + cxf-rt-frontend-jaxws + + + com.google.code.gson + gson + + + + org.apache.spark + spark-core_2.10 + + + org.apache.spark + spark-sql_2.10 + + + com.databricks + spark-avro_2.10 + + + org.mongodb.spark + mongo-spark-connector_2.10 + + + + + + + diff --git a/dhp-wf/dhp-wf-import/src/main/java/eu/dnetlib/dhp/wf/importer/DataFileRecordReceiver.java b/dhp-wf/dhp-wf-import/src/main/java/eu/dnetlib/dhp/wf/importer/DataFileRecordReceiver.java new file mode 100644 index 000000000..214d6691d --- /dev/null +++ b/dhp-wf/dhp-wf-import/src/main/java/eu/dnetlib/dhp/wf/importer/DataFileRecordReceiver.java @@ -0,0 +1,29 @@ +package eu.dnetlib.dhp.wf.importer; + +import java.io.IOException; + +import org.apache.avro.file.DataFileWriter; + +/** + * {@link DataFileWriter} based record receiver. + * @author mhorst + * + */ +public class DataFileRecordReceiver implements RecordReceiver { + + private final DataFileWriter writer; + + /** + * Default constructor. + * @param writer + */ + public DataFileRecordReceiver(DataFileWriter writer) { + this.writer = writer; + } + + @Override + public void receive(T object) throws IOException { + this.writer.append(object); + } + +} diff --git a/dhp-wf/dhp-wf-import/src/main/java/eu/dnetlib/dhp/wf/importer/DataFileRecordReceiverWithCounter.java b/dhp-wf/dhp-wf-import/src/main/java/eu/dnetlib/dhp/wf/importer/DataFileRecordReceiverWithCounter.java new file mode 100644 index 000000000..955f18065 --- /dev/null +++ b/dhp-wf/dhp-wf-import/src/main/java/eu/dnetlib/dhp/wf/importer/DataFileRecordReceiverWithCounter.java @@ -0,0 +1,50 @@ +package eu.dnetlib.dhp.wf.importer; + +import java.io.IOException; + +import org.apache.avro.file.DataFileWriter; + +/** + * {@link DataFileWriter} based record receiver with counter of + * received records. + * + * @author madryk + */ +public class DataFileRecordReceiverWithCounter extends DataFileRecordReceiver { + + private long receivedCount = 0L; + + + //------------------------ CONSTRUCTORS -------------------------- + + /** + * Default constructor + * + * @param writer - writer of the received records + */ + public DataFileRecordReceiverWithCounter(DataFileWriter writer) { + super(writer); + } + + + //------------------------ GETTERS -------------------------- + + /** + * Returns number of received records + */ + public long getReceivedCount() { + return receivedCount; + } + + + //------------------------ LOGIC -------------------------- + + /** + * Receives passed record and increments the counter. + */ + @Override + public void receive(T record) throws IOException { + super.receive(record); + ++receivedCount; + } +} diff --git a/dhp-wf/dhp-wf-import/src/main/java/eu/dnetlib/dhp/wf/importer/ImportWorkflowRuntimeParameters.java b/dhp-wf/dhp-wf-import/src/main/java/eu/dnetlib/dhp/wf/importer/ImportWorkflowRuntimeParameters.java new file mode 100644 index 000000000..40f673ee0 --- /dev/null +++ b/dhp-wf/dhp-wf-import/src/main/java/eu/dnetlib/dhp/wf/importer/ImportWorkflowRuntimeParameters.java @@ -0,0 +1,52 @@ +package eu.dnetlib.dhp.wf.importer; + +/** + * Import realated workflow parameters. + * @author mhorst + * + */ +public final class ImportWorkflowRuntimeParameters { + + // parameter names + + public static final String IMPORT_INFERENCE_PROVENANCE_BLACKLIST = "import.inference.provenance.blacklist"; + public static final String IMPORT_SKIP_DELETED_BY_INFERENCE = "import.skip.deleted.by.inference"; + public static final String IMPORT_TRUST_LEVEL_THRESHOLD = "import.trust.level.threshold"; + public static final String IMPORT_APPROVED_DATASOURCES_CSV = "import.approved.datasources.csv"; + public static final String IMPORT_APPROVED_COLUMNFAMILIES_CSV = "import.approved.columnfamilies.csv"; + public static final String IMPORT_MERGE_BODY_WITH_UPDATES = "import.merge.body.with.updates"; + public static final String IMPORT_CONTENT_APPROVED_OBJECSTORES_CSV = "import.content.approved.objectstores.csv"; + public static final String IMPORT_CONTENT_BLACKLISTED_OBJECSTORES_CSV = "import.content.blacklisted.objectstores.csv"; + + public static final String IMPORT_CONTENT_OBJECT_STORE_LOC = "import.content.object.store.location"; + public static final String IMPORT_CONTENT_OBJECT_STORE_IDS_CSV = "import.content.object.store.ids.csv"; + public static final String IMPORT_CONTENT_MAX_FILE_SIZE_MB = "import.content.max.file.size.mb"; + public static final String IMPORT_CONTENT_CONNECTION_TIMEOUT = "import.content.connection.timeout"; + public static final String IMPORT_CONTENT_READ_TIMEOUT = "import.content.read.timeout"; + + public static final String IMPORT_MDSTORE_IDS_CSV = "import.mdstore.ids.csv"; + public static final String IMPORT_MDSTORE_SERVICE_LOCATION = "import.mdstore.service.location"; + public static final String IMPORT_MDSTORE_RECORD_MAXLENGTH = "import.mdstore.record.maxlength"; + + public static final String IMPORT_ISLOOKUP_SERVICE_LOCATION = "import.islookup.service.location"; + public static final String IMPORT_VOCABULARY_CODE = "import.vocabulary.code"; + public static final String IMPORT_VOCABULARY_OUTPUT_FILENAME = "import.vocabulary.output.filename"; + + public static final String IMPORT_RESULT_SET_CLIENT_READ_TIMEOUT = "import.resultset.client.read.timeout"; + public static final String IMPORT_RESULT_SET_CLIENT_CONNECTION_TIMEOUT = "import.resultset.client.connection.timeout"; + public static final String IMPORT_RESULT_SET_PAGESIZE = "import.resultset.pagesize"; + + + public static final String HBASE_ENCODING = "hbase.table.encoding"; + + public static final String IMPORT_FACADE_FACTORY_CLASS = "import.facade.factory.classname"; + + // default values + + public static final String RESULTSET_READ_TIMEOUT_DEFAULT_VALUE = "60000"; + public static final String RESULTSET_CONNECTION_TIMEOUT_DEFAULT_VALUE = "60000"; + public static final String RESULTSET_PAGESIZE_DEFAULT_VALUE = "100"; + + private ImportWorkflowRuntimeParameters() {} + +} diff --git a/dhp-wf/dhp-wf-import/src/main/java/eu/dnetlib/dhp/wf/importer/RecordReceiver.java b/dhp-wf/dhp-wf-import/src/main/java/eu/dnetlib/dhp/wf/importer/RecordReceiver.java new file mode 100644 index 000000000..c0a5e8950 --- /dev/null +++ b/dhp-wf/dhp-wf-import/src/main/java/eu/dnetlib/dhp/wf/importer/RecordReceiver.java @@ -0,0 +1,14 @@ +package eu.dnetlib.dhp.wf.importer; + +import java.io.IOException; + +/** + * Record receiver interface. + * @author mhorst + * + * @param + */ +public interface RecordReceiver { + + void receive(T object) throws IOException; +} diff --git a/dhp-wf/dhp-wf-import/src/main/java/eu/dnetlib/dhp/wf/importer/facade/AbstractResultSetAwareWebServiceFacade.java b/dhp-wf/dhp-wf-import/src/main/java/eu/dnetlib/dhp/wf/importer/facade/AbstractResultSetAwareWebServiceFacade.java new file mode 100644 index 000000000..0a3cd6fb4 --- /dev/null +++ b/dhp-wf/dhp-wf-import/src/main/java/eu/dnetlib/dhp/wf/importer/facade/AbstractResultSetAwareWebServiceFacade.java @@ -0,0 +1,104 @@ +package eu.dnetlib.dhp.wf.importer.facade; + +import java.util.Map; + +import javax.xml.ws.BindingProvider; +import javax.xml.ws.wsaddressing.W3CEndpointReferenceBuilder; + +import org.apache.log4j.Logger; + +import eu.dnetlib.enabling.tools.JaxwsServiceResolverImpl; + +/** + * Abstract class utilized by all WebService facades. + * @author mhorst + * + */ +public abstract class AbstractResultSetAwareWebServiceFacade { + + private final Logger log = Logger.getLogger(this.getClass()); + + /** + * Web service. + */ + private final T service; + + /** + * ResultSet read timeout. + */ + private final long resultSetReadTimeout; + + /** + * ResultSet connection timeout. + */ + private final long resultSetConnectionTimeout; + + /** + * ResultSet page size. + */ + private final int resultSetPageSize; + + + //------------------------ CONSTRUCTORS ------------------- + + /** + * Instantiates underlying service. + * @param clazz webservice class + * @param serviceLocation webservice location + * @param serviceReadTimeout service read timeout + * @param serviceConnectionTimeout service connection timeout + * @param resultSetReadTimeout resultset read timeout + * @param resultSetConnectionTimeout resultset connection timeout + * @param resultSetPageSize resultset page size + */ + protected AbstractResultSetAwareWebServiceFacade(Class clazz, String serviceLocation, + long serviceReadTimeout, long serviceConnectionTimeout, + long resultSetReadTimeout, long resultSetConnectionTimeout, int resultSetPageSize) { + W3CEndpointReferenceBuilder eprBuilder = new W3CEndpointReferenceBuilder(); + eprBuilder.address(serviceLocation); + eprBuilder.build(); + this.service = new JaxwsServiceResolverImpl().getService(clazz, eprBuilder.build()); + if (this.service instanceof BindingProvider) { + log.info(String.format("setting timeouts for %s: read timeout (%s) and connect timeout (%s)", + BindingProvider.class, serviceReadTimeout, serviceConnectionTimeout)); + final Map requestContext = ((BindingProvider) service).getRequestContext(); + + // can't be sure about which will be used. Set them all. + requestContext.put("com.sun.xml.internal.ws.request.timeout", serviceReadTimeout); + requestContext.put("com.sun.xml.internal.ws.connect.timeout", serviceConnectionTimeout); + + requestContext.put("com.sun.xml.ws.request.timeout", serviceReadTimeout); + requestContext.put("com.sun.xml.ws.connect.timeout", serviceConnectionTimeout); + + requestContext.put("javax.xml.ws.client.receiveTimeout", serviceReadTimeout); + requestContext.put("javax.xml.ws.client.connectionTimeout", serviceConnectionTimeout); + } + + this.resultSetReadTimeout = resultSetReadTimeout; + this.resultSetConnectionTimeout = resultSetConnectionTimeout; + this.resultSetPageSize = resultSetPageSize; + } + + + //------------------------ GETTERS ------------------------- + + public T getService() { + return service; + } + + + public long getResultSetReadTimeout() { + return resultSetReadTimeout; + } + + + public long getResultSetConnectionTimeout() { + return resultSetConnectionTimeout; + } + + + public int getResultSetPageSize() { + return resultSetPageSize; + } + +} diff --git a/dhp-wf/dhp-wf-import/src/main/java/eu/dnetlib/dhp/wf/importer/facade/ISLookupFacade.java b/dhp-wf/dhp-wf-import/src/main/java/eu/dnetlib/dhp/wf/importer/facade/ISLookupFacade.java new file mode 100644 index 000000000..c156ae1cc --- /dev/null +++ b/dhp-wf/dhp-wf-import/src/main/java/eu/dnetlib/dhp/wf/importer/facade/ISLookupFacade.java @@ -0,0 +1,17 @@ +package eu.dnetlib.dhp.wf.importer.facade; + +/** + * ISLookup service facade. + * + * @author mhorst + * + */ +public interface ISLookupFacade { + + /** + * Provides all profiles matching given query + * @param xPathQuery XPath query + */ + Iterable searchProfile(String xPathQuery) throws ServiceFacadeException; + +} diff --git a/dhp-wf/dhp-wf-import/src/main/java/eu/dnetlib/dhp/wf/importer/facade/MDStoreFacade.java b/dhp-wf/dhp-wf-import/src/main/java/eu/dnetlib/dhp/wf/importer/facade/MDStoreFacade.java new file mode 100644 index 000000000..f50b02b98 --- /dev/null +++ b/dhp-wf/dhp-wf-import/src/main/java/eu/dnetlib/dhp/wf/importer/facade/MDStoreFacade.java @@ -0,0 +1,17 @@ +package eu.dnetlib.dhp.wf.importer.facade; + +/** + * MDStore service facade. + * + * @author mhorst + * + */ +public interface MDStoreFacade { + + /** + * Delivers all records for given MDStore identifier + * @param mdStoreId MDStore identifier + */ + Iterable deliverMDRecords(String mdStoreId) throws ServiceFacadeException; + +} diff --git a/dhp-wf/dhp-wf-import/src/main/java/eu/dnetlib/dhp/wf/importer/facade/ObjectStoreFacade.java b/dhp-wf/dhp-wf-import/src/main/java/eu/dnetlib/dhp/wf/importer/facade/ObjectStoreFacade.java new file mode 100644 index 000000000..0e1aa19ef --- /dev/null +++ b/dhp-wf/dhp-wf-import/src/main/java/eu/dnetlib/dhp/wf/importer/facade/ObjectStoreFacade.java @@ -0,0 +1,19 @@ +package eu.dnetlib.dhp.wf.importer.facade; + +/** + * ObjectStore service facade. + * + * @author mhorst + * + */ +public interface ObjectStoreFacade { + + /** + * Returns metadata records from given objectstore created in specified time range. + * @param objectStoreId object store identifier + * @param from from time in millis + * @param until until time in millis + */ + Iterable deliverObjects(String objectStoreId, long from, long until) throws ServiceFacadeException; + +} diff --git a/dhp-wf/dhp-wf-import/src/main/java/eu/dnetlib/dhp/wf/importer/facade/ServiceFacadeException.java b/dhp-wf/dhp-wf-import/src/main/java/eu/dnetlib/dhp/wf/importer/facade/ServiceFacadeException.java new file mode 100644 index 000000000..9776306fa --- /dev/null +++ b/dhp-wf/dhp-wf-import/src/main/java/eu/dnetlib/dhp/wf/importer/facade/ServiceFacadeException.java @@ -0,0 +1,27 @@ +package eu.dnetlib.dhp.wf.importer.facade; + +/** + * Service facade generic exception. + * + * @author mhorst + * + */ +public class ServiceFacadeException extends Exception { + + private static final long serialVersionUID = 0L; + + //------------------------ CONSTRUCTORS ------------------- + + public ServiceFacadeException(String message, Throwable cause) { + super(message, cause); + } + + public ServiceFacadeException(String message) { + super(message); + } + + public ServiceFacadeException(Throwable cause) { + super(cause); + } + +} diff --git a/dhp-wf/dhp-wf-import/src/main/java/eu/dnetlib/dhp/wf/importer/facade/ServiceFacadeFactory.java b/dhp-wf/dhp-wf-import/src/main/java/eu/dnetlib/dhp/wf/importer/facade/ServiceFacadeFactory.java new file mode 100644 index 000000000..94b9307c4 --- /dev/null +++ b/dhp-wf/dhp-wf-import/src/main/java/eu/dnetlib/dhp/wf/importer/facade/ServiceFacadeFactory.java @@ -0,0 +1,20 @@ +package eu.dnetlib.dhp.wf.importer.facade; + +import java.util.Map; + +/** + * Generic service facade factory. All implementations must be instantiable with no-argument construtor. + * + * @author mhorst + * + */ +public interface ServiceFacadeFactory { + + /** + * Creates service of given type configured with parameters. + * + * @param parameters service configuration + * + */ + T instantiate(Map parameters); +} diff --git a/dhp-wf/dhp-wf-import/src/main/java/eu/dnetlib/dhp/wf/importer/facade/ServiceFacadeUtils.java b/dhp-wf/dhp-wf-import/src/main/java/eu/dnetlib/dhp/wf/importer/facade/ServiceFacadeUtils.java new file mode 100644 index 000000000..53a76d761 --- /dev/null +++ b/dhp-wf/dhp-wf-import/src/main/java/eu/dnetlib/dhp/wf/importer/facade/ServiceFacadeUtils.java @@ -0,0 +1,80 @@ +package eu.dnetlib.dhp.wf.importer.facade; + +import static eu.dnetlib.dhp.wf.importer.ImportWorkflowRuntimeParameters.IMPORT_FACADE_FACTORY_CLASS; + +import java.lang.reflect.Constructor; +import java.util.Map; + +import org.apache.commons.lang.StringUtils; +import org.apache.hadoop.conf.Configuration; + +import com.google.common.collect.ImmutableMap; + +import eu.dnetlib.dhp.wf.importer.ImportWorkflowRuntimeParameters; + +/** + * Service facade utility methods. + * @author mhorst + * + */ +public final class ServiceFacadeUtils { + + //------------------------ CONSTRUCTORS ------------------- + + private ServiceFacadeUtils() {} + + //------------------------ LOGIC -------------------------- + + /** + * Instantiates service based on provided parameters. + * + * Service factory class name is mandatory and has to be provided as {@value ImportWorkflowRuntimeParameters#IMPORT_FACADE_FACTORY_CLASS} parameter. + * Other parameters will be used by factory itself. Factory must be instantiable with no-argument construtor. + * + * @param parameters set of parameters required for service instantiation + * + */ + public static T instantiate(Map parameters) throws ServiceFacadeException { + String serviceFactoryClassName = parameters.get(IMPORT_FACADE_FACTORY_CLASS); + if (StringUtils.isBlank(serviceFactoryClassName)) { + throw new ServiceFacadeException("unknown service facade factory, no " + IMPORT_FACADE_FACTORY_CLASS + " parameter provided!"); + } + try { + Class clazz = Class.forName(serviceFactoryClassName); + Constructor constructor = clazz.getConstructor(); + @SuppressWarnings("unchecked") + ServiceFacadeFactory serviceFactory = (ServiceFacadeFactory) constructor.newInstance(); + return serviceFactory.instantiate(parameters); + } catch (Exception e) { + throw new ServiceFacadeException("exception occurred while instantiating service by facade factory: " + IMPORT_FACADE_FACTORY_CLASS, e); + } + + } + + /** + * Instantiates service based on provided configuration. + * + * Service factory class name is mandatory and has to be provided as {@value ImportWorkflowRuntimeParameters#IMPORT_FACADE_FACTORY_CLASS} configuration entry. + * Other parameters will be used by factory itself. Factory must be instantiable with no-argument construtor. + * + * @param config set of configuration entries required for service instantiation + */ + public static T instantiate(Configuration config) throws ServiceFacadeException { + return instantiate(buildParameters(config)); + } + + + // ------------------------ PRIVATE -------------------------- + + /** + * Converts configuration entries into plain map. + */ + private static Map buildParameters(Configuration config) { + ImmutableMap.Builder builder = ImmutableMap.builder(); + for (Map.Entry entry : config) { + builder.put(entry); + } + return builder.build(); + } + +} diff --git a/dhp-wf/dhp-wf-import/src/main/java/eu/dnetlib/dhp/wf/importer/facade/WebServiceISLookupFacade.java b/dhp-wf/dhp-wf-import/src/main/java/eu/dnetlib/dhp/wf/importer/facade/WebServiceISLookupFacade.java new file mode 100644 index 000000000..7c787f2f8 --- /dev/null +++ b/dhp-wf/dhp-wf-import/src/main/java/eu/dnetlib/dhp/wf/importer/facade/WebServiceISLookupFacade.java @@ -0,0 +1,55 @@ +package eu.dnetlib.dhp.wf.importer.facade; + +import java.util.Collections; + +import org.apache.log4j.Logger; + +import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpDocumentNotFoundException; +import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpException; +import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpService; + +/** + * WebService based database facade. + * + * @author mhorst + * + */ +public class WebServiceISLookupFacade extends AbstractResultSetAwareWebServiceFacade implements ISLookupFacade { + + private static final Logger log = Logger.getLogger(WebServiceISLookupFacade.class); + + + //------------------------ CONSTRUCTORS ------------------- + + /** + * @param serviceLocation database service location + * @param serviceReadTimeout service read timeout + * @param serviceConnectionTimeout service connection timeout + * @param resultSetReadTimeout result set providing database results read timeout + * @param resultSetConnectionTimeout result set connection timeout + * @param resultSetPageSize result set data chunk size + */ + public WebServiceISLookupFacade(String serviceLocation, + long serviceReadTimeout, long serviceConnectionTimeout, + long resultSetReadTimeout, long resultSetConnectionTimeout, int resultSetPageSize) { + super(ISLookUpService.class, serviceLocation, + serviceReadTimeout, serviceConnectionTimeout, + resultSetReadTimeout, resultSetConnectionTimeout, resultSetPageSize); + } + + //------------------------ LOGIC -------------------------- + + @Override + public Iterable searchProfile(String xPathQuery) throws ServiceFacadeException { + try { + return getService().quickSearchProfile(xPathQuery); + } catch (ISLookUpDocumentNotFoundException e) { + log.error("unable to find profile for query: " + xPathQuery, e); + return Collections.emptyList(); + } catch (ISLookUpException e) { + throw new ServiceFacadeException("searching profiles in ISLookup failed with query '" + xPathQuery + "'", e); + } + + } + +} diff --git a/dhp-wf/dhp-wf-import/src/main/java/eu/dnetlib/dhp/wf/importer/facade/WebServiceISLookupFacadeFactory.java b/dhp-wf/dhp-wf-import/src/main/java/eu/dnetlib/dhp/wf/importer/facade/WebServiceISLookupFacadeFactory.java new file mode 100644 index 000000000..6557ead94 --- /dev/null +++ b/dhp-wf/dhp-wf-import/src/main/java/eu/dnetlib/dhp/wf/importer/facade/WebServiceISLookupFacadeFactory.java @@ -0,0 +1,45 @@ +package eu.dnetlib.dhp.wf.importer.facade; + +import static eu.dnetlib.dhp.common.WorkflowRuntimeParameters.DNET_SERVICE_CLIENT_CONNECTION_TIMEOUT; +import static eu.dnetlib.dhp.common.WorkflowRuntimeParameters.DNET_SERVICE_CLIENT_READ_TIMEOUT; +import static eu.dnetlib.dhp.common.WorkflowRuntimeParameters.DNET_SERVICE_CONNECTION_TIMEOUT_DEFAULT_VALUE; +import static eu.dnetlib.dhp.common.WorkflowRuntimeParameters.DNET_SERVICE_READ_TIMEOUT_DEFAULT_VALUE; +import static eu.dnetlib.dhp.wf.importer.ImportWorkflowRuntimeParameters.IMPORT_ISLOOKUP_SERVICE_LOCATION; +import static eu.dnetlib.dhp.wf.importer.ImportWorkflowRuntimeParameters.IMPORT_RESULT_SET_CLIENT_CONNECTION_TIMEOUT; +import static eu.dnetlib.dhp.wf.importer.ImportWorkflowRuntimeParameters.IMPORT_RESULT_SET_CLIENT_READ_TIMEOUT; +import static eu.dnetlib.dhp.wf.importer.ImportWorkflowRuntimeParameters.IMPORT_RESULT_SET_PAGESIZE; +import static eu.dnetlib.dhp.wf.importer.ImportWorkflowRuntimeParameters.RESULTSET_CONNECTION_TIMEOUT_DEFAULT_VALUE; +import static eu.dnetlib.dhp.wf.importer.ImportWorkflowRuntimeParameters.RESULTSET_PAGESIZE_DEFAULT_VALUE; +import static eu.dnetlib.dhp.wf.importer.ImportWorkflowRuntimeParameters.RESULTSET_READ_TIMEOUT_DEFAULT_VALUE; + +import java.util.Map; + +import com.google.common.base.Preconditions; + +import eu.dnetlib.dhp.common.WorkflowRuntimeParameters; + +/** + * WebService Database service facade factory. + * + * @author mhorst + * + */ +public class WebServiceISLookupFacadeFactory implements ServiceFacadeFactory { + + + //------------------------ LOGIC -------------------------- + + @Override + public ISLookupFacade instantiate(Map parameters) { + Preconditions.checkArgument(parameters.containsKey(IMPORT_ISLOOKUP_SERVICE_LOCATION), + "unknown ISLookup service location: no parameter provided: '%s'", IMPORT_ISLOOKUP_SERVICE_LOCATION); + + return new WebServiceISLookupFacade(parameters.get(IMPORT_ISLOOKUP_SERVICE_LOCATION), + Long.parseLong(WorkflowRuntimeParameters.getParamValue(DNET_SERVICE_CLIENT_READ_TIMEOUT, DNET_SERVICE_READ_TIMEOUT_DEFAULT_VALUE, parameters)), + Long.parseLong(WorkflowRuntimeParameters.getParamValue(DNET_SERVICE_CLIENT_CONNECTION_TIMEOUT, DNET_SERVICE_CONNECTION_TIMEOUT_DEFAULT_VALUE, parameters)), + Long.parseLong(WorkflowRuntimeParameters.getParamValue(IMPORT_RESULT_SET_CLIENT_READ_TIMEOUT, RESULTSET_READ_TIMEOUT_DEFAULT_VALUE, parameters)), + Long.parseLong(WorkflowRuntimeParameters.getParamValue(IMPORT_RESULT_SET_CLIENT_CONNECTION_TIMEOUT, RESULTSET_CONNECTION_TIMEOUT_DEFAULT_VALUE, parameters)), + Integer.parseInt(WorkflowRuntimeParameters.getParamValue(IMPORT_RESULT_SET_PAGESIZE, RESULTSET_PAGESIZE_DEFAULT_VALUE, parameters))); + } + +} diff --git a/dhp-wf/dhp-wf-import/src/main/java/eu/dnetlib/dhp/wf/importer/facade/WebServiceMDStoreFacade.java b/dhp-wf/dhp-wf-import/src/main/java/eu/dnetlib/dhp/wf/importer/facade/WebServiceMDStoreFacade.java new file mode 100644 index 000000000..d37d020ed --- /dev/null +++ b/dhp-wf/dhp-wf-import/src/main/java/eu/dnetlib/dhp/wf/importer/facade/WebServiceMDStoreFacade.java @@ -0,0 +1,52 @@ +package eu.dnetlib.dhp.wf.importer.facade; + +import javax.xml.ws.wsaddressing.W3CEndpointReference; + +import eu.dnetlib.data.mdstore.MDStoreService; +import eu.dnetlib.data.mdstore.MDStoreServiceException; +import eu.dnetlib.enabling.resultset.client.ResultSetClientFactory; +import eu.dnetlib.enabling.tools.JaxwsServiceResolverImpl; + +/** + * WebService based MDStore facade. + * + * @author mhorst + * + */ +public class WebServiceMDStoreFacade extends AbstractResultSetAwareWebServiceFacade implements MDStoreFacade { + + + //------------------------ CONSTRUCTORS ------------------- + + /** + * @param serviceLocation MDStore webservice location + * @param serviceReadTimeout service read timeout + * @param serviceConnectionTimeout service connection timeout + * @param resultSetReadTimeout resultset read timeout + * @param resultSetConnectionTimeout result set connection timeout + * @param resultSetPageSize resultset page size + */ + public WebServiceMDStoreFacade(String serviceLocation, + long serviceReadTimeout, long serviceConnectionTimeout, + long resultSetReadTimeout, long resultSetConnectionTimeout, int resultSetPageSize) { + super(MDStoreService.class, serviceLocation, + serviceReadTimeout, serviceConnectionTimeout, + resultSetReadTimeout, resultSetConnectionTimeout, resultSetPageSize); + } + + //------------------------ LOGIC -------------------------- + + @Override + public Iterable deliverMDRecords(String mdStoreId) throws ServiceFacadeException { + try { + W3CEndpointReference eprResult = getService().deliverMDRecords(mdStoreId, null, null, null); + ResultSetClientFactory rsFactory = new ResultSetClientFactory( + getResultSetPageSize(), getResultSetReadTimeout(), getResultSetConnectionTimeout()); + rsFactory.setServiceResolver(new JaxwsServiceResolverImpl()); + return rsFactory.getClient(eprResult); + } catch (MDStoreServiceException e) { + throw new ServiceFacadeException("delivering records for md store " + mdStoreId + " failed!", e); + } + } + +} diff --git a/dhp-wf/dhp-wf-import/src/main/java/eu/dnetlib/dhp/wf/importer/facade/WebServiceMDStoreFacadeFactory.java b/dhp-wf/dhp-wf-import/src/main/java/eu/dnetlib/dhp/wf/importer/facade/WebServiceMDStoreFacadeFactory.java new file mode 100644 index 000000000..00bb0c3f7 --- /dev/null +++ b/dhp-wf/dhp-wf-import/src/main/java/eu/dnetlib/dhp/wf/importer/facade/WebServiceMDStoreFacadeFactory.java @@ -0,0 +1,45 @@ +package eu.dnetlib.dhp.wf.importer.facade; + +import static eu.dnetlib.dhp.common.WorkflowRuntimeParameters.DNET_SERVICE_CLIENT_CONNECTION_TIMEOUT; +import static eu.dnetlib.dhp.common.WorkflowRuntimeParameters.DNET_SERVICE_CLIENT_READ_TIMEOUT; +import static eu.dnetlib.dhp.common.WorkflowRuntimeParameters.DNET_SERVICE_CONNECTION_TIMEOUT_DEFAULT_VALUE; +import static eu.dnetlib.dhp.common.WorkflowRuntimeParameters.DNET_SERVICE_READ_TIMEOUT_DEFAULT_VALUE; +import static eu.dnetlib.dhp.wf.importer.ImportWorkflowRuntimeParameters.IMPORT_MDSTORE_SERVICE_LOCATION; +import static eu.dnetlib.dhp.wf.importer.ImportWorkflowRuntimeParameters.IMPORT_RESULT_SET_CLIENT_CONNECTION_TIMEOUT; +import static eu.dnetlib.dhp.wf.importer.ImportWorkflowRuntimeParameters.IMPORT_RESULT_SET_CLIENT_READ_TIMEOUT; +import static eu.dnetlib.dhp.wf.importer.ImportWorkflowRuntimeParameters.IMPORT_RESULT_SET_PAGESIZE; +import static eu.dnetlib.dhp.wf.importer.ImportWorkflowRuntimeParameters.RESULTSET_CONNECTION_TIMEOUT_DEFAULT_VALUE; +import static eu.dnetlib.dhp.wf.importer.ImportWorkflowRuntimeParameters.RESULTSET_PAGESIZE_DEFAULT_VALUE; +import static eu.dnetlib.dhp.wf.importer.ImportWorkflowRuntimeParameters.RESULTSET_READ_TIMEOUT_DEFAULT_VALUE; + +import java.util.Map; + +import com.google.common.base.Preconditions; + +import eu.dnetlib.dhp.common.WorkflowRuntimeParameters; + +/** + * WebService MDStore service facade factory. + * + * @author mhorst + * + */ +public class WebServiceMDStoreFacadeFactory implements ServiceFacadeFactory { + + + //------------------------ LOGIC -------------------------- + + @Override + public WebServiceMDStoreFacade instantiate(Map parameters) { + Preconditions.checkArgument(parameters.containsKey(IMPORT_MDSTORE_SERVICE_LOCATION), + "unknown MDStore service location: no parameter provided: '%s'", IMPORT_MDSTORE_SERVICE_LOCATION); + + return new WebServiceMDStoreFacade(parameters.get(IMPORT_MDSTORE_SERVICE_LOCATION), + Long.parseLong(WorkflowRuntimeParameters.getParamValue(DNET_SERVICE_CLIENT_READ_TIMEOUT, DNET_SERVICE_READ_TIMEOUT_DEFAULT_VALUE, parameters)), + Long.parseLong(WorkflowRuntimeParameters.getParamValue(DNET_SERVICE_CLIENT_CONNECTION_TIMEOUT, DNET_SERVICE_CONNECTION_TIMEOUT_DEFAULT_VALUE, parameters)), + Long.parseLong(WorkflowRuntimeParameters.getParamValue(IMPORT_RESULT_SET_CLIENT_READ_TIMEOUT, RESULTSET_READ_TIMEOUT_DEFAULT_VALUE, parameters)), + Long.parseLong(WorkflowRuntimeParameters.getParamValue(IMPORT_RESULT_SET_CLIENT_CONNECTION_TIMEOUT, RESULTSET_CONNECTION_TIMEOUT_DEFAULT_VALUE, parameters)), + Integer.parseInt(WorkflowRuntimeParameters.getParamValue(IMPORT_RESULT_SET_PAGESIZE, RESULTSET_PAGESIZE_DEFAULT_VALUE, parameters))); + } + +} diff --git a/dhp-wf/dhp-wf-import/src/main/java/eu/dnetlib/dhp/wf/importer/facade/WebServiceObjectStoreFacade.java b/dhp-wf/dhp-wf-import/src/main/java/eu/dnetlib/dhp/wf/importer/facade/WebServiceObjectStoreFacade.java new file mode 100644 index 000000000..6e1aee80b --- /dev/null +++ b/dhp-wf/dhp-wf-import/src/main/java/eu/dnetlib/dhp/wf/importer/facade/WebServiceObjectStoreFacade.java @@ -0,0 +1,52 @@ +package eu.dnetlib.dhp.wf.importer.facade; + +import javax.xml.ws.wsaddressing.W3CEndpointReference; + +import eu.dnetlib.data.objectstore.rmi.ObjectStoreService; +import eu.dnetlib.data.objectstore.rmi.ObjectStoreServiceException; +import eu.dnetlib.enabling.resultset.client.ResultSetClientFactory; +import eu.dnetlib.enabling.tools.JaxwsServiceResolverImpl; + +/** + * WebService based ObjectStore facade. + * + * @author mhorst + * + */ +public class WebServiceObjectStoreFacade extends AbstractResultSetAwareWebServiceFacade implements ObjectStoreFacade { + + + //------------------------ CONSTRUCTORS ------------------- + + /** + * @param serviceLocation ObjectStore webservice location + * @param serviceReadTimeout service read timeout + * @param serviceConnectionTimeout service connection timeout + * @param resultSetReadTimeout resultset read timeout + * @param resultSetConnectionTimeout result set connection timeout + * @param resultSetPageSize resultset page size + */ + public WebServiceObjectStoreFacade(String serviceLocation, + long serviceReadTimeout, long serviceConnectionTimeout, + long resultSetReadTimeout, long resultSetConnectionTimeout, int resultSetPageSize) { + super(ObjectStoreService.class, serviceLocation, + serviceReadTimeout, serviceConnectionTimeout, + resultSetReadTimeout, resultSetConnectionTimeout, resultSetPageSize); + } + + //------------------------ LOGIC -------------------------- + + @Override + public Iterable deliverObjects(String objectStoreId, long from, long until) throws ServiceFacadeException { + try { + W3CEndpointReference eprResult = getService().deliverObjects(objectStoreId, from, until); + ResultSetClientFactory rsFactory = new ResultSetClientFactory( + getResultSetPageSize(), getResultSetReadTimeout(), getResultSetConnectionTimeout()); + rsFactory.setServiceResolver(new JaxwsServiceResolverImpl()); + return rsFactory.getClient(eprResult); + } catch (ObjectStoreServiceException e) { + throw new ServiceFacadeException("delivering records for object store " + objectStoreId + " failed!", e); + } + } + +} diff --git a/dhp-wf/dhp-wf-import/src/main/java/eu/dnetlib/dhp/wf/importer/facade/WebServiceObjectStoreFacadeFactory.java b/dhp-wf/dhp-wf-import/src/main/java/eu/dnetlib/dhp/wf/importer/facade/WebServiceObjectStoreFacadeFactory.java new file mode 100644 index 000000000..9c77c4546 --- /dev/null +++ b/dhp-wf/dhp-wf-import/src/main/java/eu/dnetlib/dhp/wf/importer/facade/WebServiceObjectStoreFacadeFactory.java @@ -0,0 +1,44 @@ +package eu.dnetlib.dhp.wf.importer.facade; + +import static eu.dnetlib.dhp.common.WorkflowRuntimeParameters.DNET_SERVICE_CLIENT_CONNECTION_TIMEOUT; +import static eu.dnetlib.dhp.common.WorkflowRuntimeParameters.DNET_SERVICE_CLIENT_READ_TIMEOUT; +import static eu.dnetlib.dhp.common.WorkflowRuntimeParameters.DNET_SERVICE_CONNECTION_TIMEOUT_DEFAULT_VALUE; +import static eu.dnetlib.dhp.common.WorkflowRuntimeParameters.DNET_SERVICE_READ_TIMEOUT_DEFAULT_VALUE; +import static eu.dnetlib.dhp.wf.importer.ImportWorkflowRuntimeParameters.IMPORT_CONTENT_OBJECT_STORE_LOC; +import static eu.dnetlib.dhp.wf.importer.ImportWorkflowRuntimeParameters.IMPORT_RESULT_SET_CLIENT_CONNECTION_TIMEOUT; +import static eu.dnetlib.dhp.wf.importer.ImportWorkflowRuntimeParameters.IMPORT_RESULT_SET_CLIENT_READ_TIMEOUT; +import static eu.dnetlib.dhp.wf.importer.ImportWorkflowRuntimeParameters.IMPORT_RESULT_SET_PAGESIZE; +import static eu.dnetlib.dhp.wf.importer.ImportWorkflowRuntimeParameters.RESULTSET_CONNECTION_TIMEOUT_DEFAULT_VALUE; +import static eu.dnetlib.dhp.wf.importer.ImportWorkflowRuntimeParameters.RESULTSET_PAGESIZE_DEFAULT_VALUE; +import static eu.dnetlib.dhp.wf.importer.ImportWorkflowRuntimeParameters.RESULTSET_READ_TIMEOUT_DEFAULT_VALUE; + +import java.util.Map; + +import com.google.common.base.Preconditions; + +import eu.dnetlib.dhp.common.WorkflowRuntimeParameters; + +/** + * WebService ObjectStore facade factory. + * + * @author mhorst + * + */ +public class WebServiceObjectStoreFacadeFactory implements ServiceFacadeFactory { + + //------------------------ LOGIC -------------------------- + + @Override + public WebServiceObjectStoreFacade instantiate(Map parameters) { + Preconditions.checkArgument(parameters.containsKey(IMPORT_CONTENT_OBJECT_STORE_LOC), + "unknown object store service location: no parameter provided: '%s'", IMPORT_CONTENT_OBJECT_STORE_LOC); + + return new WebServiceObjectStoreFacade(parameters.get(IMPORT_CONTENT_OBJECT_STORE_LOC), + Long.parseLong(WorkflowRuntimeParameters.getParamValue(DNET_SERVICE_CLIENT_READ_TIMEOUT, DNET_SERVICE_READ_TIMEOUT_DEFAULT_VALUE, parameters)), + Long.parseLong(WorkflowRuntimeParameters.getParamValue(DNET_SERVICE_CLIENT_CONNECTION_TIMEOUT, DNET_SERVICE_CONNECTION_TIMEOUT_DEFAULT_VALUE, parameters)), + Long.parseLong(WorkflowRuntimeParameters.getParamValue(IMPORT_RESULT_SET_CLIENT_READ_TIMEOUT, RESULTSET_READ_TIMEOUT_DEFAULT_VALUE, parameters)), + Long.parseLong(WorkflowRuntimeParameters.getParamValue(IMPORT_RESULT_SET_CLIENT_CONNECTION_TIMEOUT, RESULTSET_CONNECTION_TIMEOUT_DEFAULT_VALUE, parameters)), + Integer.parseInt(WorkflowRuntimeParameters.getParamValue(IMPORT_RESULT_SET_PAGESIZE, RESULTSET_PAGESIZE_DEFAULT_VALUE, parameters))); + } + +} diff --git a/dhp-wf/dhp-wf-import/src/main/java/eu/dnetlib/dhp/wf/importer/mdrecord/MDRecordHandler.java b/dhp-wf/dhp-wf-import/src/main/java/eu/dnetlib/dhp/wf/importer/mdrecord/MDRecordHandler.java new file mode 100644 index 000000000..5d61f06a5 --- /dev/null +++ b/dhp-wf/dhp-wf-import/src/main/java/eu/dnetlib/dhp/wf/importer/mdrecord/MDRecordHandler.java @@ -0,0 +1,94 @@ +package eu.dnetlib.dhp.wf.importer.mdrecord; + +import java.util.Stack; + +import org.xml.sax.Attributes; +import org.xml.sax.SAXException; +import org.xml.sax.helpers.DefaultHandler; + +import eu.dnetlib.dhp.common.InfoSpaceConstants; + +/** + * MDRecord handler extracting record identifier. + * + * Notice: writer is not being closed by handler. Created outside, let it be closed outside as well. + * @author mhorst + * + */ +public class MDRecordHandler extends DefaultHandler { + + public static final String ELEM_OBJ_IDENTIFIER = "objIdentifier"; + + private static final String ELEM_HEADER = "header"; + + private Stack parents; + + private StringBuilder currentValue = new StringBuilder(); + + private String recordId; + + + // ------------------------ LOGIC -------------------------- + + @Override + public void startDocument() throws SAXException { + parents = new Stack(); + recordId = null; + } + + @Override + public void startElement(String uri, String localName, String qName, + Attributes attributes) throws SAXException { + if (this.recordId == null) { + if (isWithinElement(localName, ELEM_OBJ_IDENTIFIER, ELEM_HEADER)) { +// identifierType attribute is mandatory + this.currentValue = new StringBuilder(); + } + this.parents.push(localName); + } + } + + @Override + public void endElement(String uri, String localName, String qName) + throws SAXException { + if (this.recordId == null) { + this.parents.pop(); + if (isWithinElement(localName, ELEM_OBJ_IDENTIFIER, ELEM_HEADER)) { + this.recordId = InfoSpaceConstants.ROW_PREFIX_RESULT + this.currentValue.toString().trim(); + } +// resetting current value; + this.currentValue = null; + } + } + + @Override + public void endDocument() throws SAXException { + parents.clear(); + parents = null; + } + + @Override + public void characters(char[] ch, int start, int length) + throws SAXException { + if (this.currentValue!=null) { + this.currentValue.append(ch, start, length); + } + } + + /** + * @return record identifier + */ + public String getRecordId() { + return recordId; + } + + // ------------------------ PRIVATE -------------------------- + + private boolean isWithinElement(String localName, String expectedElement, String expectedParent) { + return localName.equals(expectedElement) && !this.parents.isEmpty() && + expectedParent.equals(this.parents.peek()); + } + + +} + diff --git a/dhp-wf/dhp-wf-import/src/main/java/eu/dnetlib/dhp/wf/importer/mdrecord/MDStoreRecordsImporter.java b/dhp-wf/dhp-wf-import/src/main/java/eu/dnetlib/dhp/wf/importer/mdrecord/MDStoreRecordsImporter.java new file mode 100644 index 000000000..461093851 --- /dev/null +++ b/dhp-wf/dhp-wf-import/src/main/java/eu/dnetlib/dhp/wf/importer/mdrecord/MDStoreRecordsImporter.java @@ -0,0 +1,157 @@ +package eu.dnetlib.dhp.wf.importer.mdrecord; + +import java.io.IOException; +import java.io.StringReader; +import java.util.Collections; +import java.util.HashMap; +import java.util.Map; +import javax.xml.parsers.SAXParser; +import javax.xml.parsers.SAXParserFactory; + +import com.google.common.base.Preconditions; +import eu.dnetlib.dhp.common.WorkflowRuntimeParameters; +import eu.dnetlib.dhp.common.counter.NamedCounters; +import eu.dnetlib.dhp.common.counter.NamedCountersFileWriter; +import eu.dnetlib.dhp.common.java.PortBindings; +import eu.dnetlib.dhp.common.java.Process; +import eu.dnetlib.dhp.common.java.io.DataStore; +import eu.dnetlib.dhp.common.java.io.FileSystemPath; +import eu.dnetlib.dhp.common.java.porttype.AvroPortType; +import eu.dnetlib.dhp.common.java.porttype.PortType; +import eu.dnetlib.dhp.importer.schemas.ImportedRecord; +import eu.dnetlib.dhp.importer.schemas.RecordFormat; +import eu.dnetlib.dhp.wf.importer.facade.MDStoreFacade; +import eu.dnetlib.dhp.wf.importer.facade.ServiceFacadeUtils; +import org.apache.avro.file.DataFileWriter; +import org.apache.commons.lang3.StringUtils; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.FileSystem; +import org.apache.log4j.Logger; +import org.xml.sax.InputSource; + +import static eu.dnetlib.dhp.common.WorkflowRuntimeParameters.OOZIE_ACTION_OUTPUT_FILENAME; +import static eu.dnetlib.dhp.wf.importer.ImportWorkflowRuntimeParameters.IMPORT_MDSTORE_IDS_CSV; +import static eu.dnetlib.dhp.wf.importer.ImportWorkflowRuntimeParameters.IMPORT_MDSTORE_RECORD_MAXLENGTH; + +/** + * {@link MDStoreFacade} based metadata records importer. + * @author mhorst + * + */ +public class MDStoreRecordsImporter implements Process { + + protected static final String COUNTER_NAME_TOTAL = "TOTAL"; + + protected static final String COUNTER_NAME_SIZE_EXCEEDED = "SIZE_EXCEEDED"; + + protected static final String PORT_OUT_MDRECORDS = "mdrecords"; + + private static final Logger log = Logger.getLogger(MDStoreRecordsImporter.class); + + private final static int progressLogInterval = 100000; + + private final NamedCountersFileWriter countersWriter = new NamedCountersFileWriter(); + + private final Map outputPorts = new HashMap(); + + + //------------------------ CONSTRUCTORS ------------------- + + public MDStoreRecordsImporter() { + outputPorts.put(PORT_OUT_MDRECORDS, new AvroPortType(ImportedRecord.SCHEMA$)); + } + + //------------------------ LOGIC -------------------------- + + @Override + public Map getInputPorts() { + return Collections.emptyMap(); + } + + @Override + public Map getOutputPorts() { + return outputPorts; + } + + @Override + public void run(PortBindings portBindings, Configuration conf, + Map parameters) throws Exception { + + Preconditions.checkArgument(parameters.containsKey(IMPORT_MDSTORE_IDS_CSV), + "unknown mdstore identifier, required parameter '%s' is missing!", IMPORT_MDSTORE_IDS_CSV); + String mdStoreIdsCSV = parameters.get(IMPORT_MDSTORE_IDS_CSV); + int recordMaxLength = parameters.containsKey(IMPORT_MDSTORE_RECORD_MAXLENGTH)? + Integer.parseInt(parameters.get(IMPORT_MDSTORE_RECORD_MAXLENGTH)):Integer.MAX_VALUE; + + NamedCounters counters = new NamedCounters(new String[] { COUNTER_NAME_TOTAL, COUNTER_NAME_SIZE_EXCEEDED }); + + if (StringUtils.isNotBlank(mdStoreIdsCSV) && !WorkflowRuntimeParameters.UNDEFINED_NONEMPTY_VALUE.equals(mdStoreIdsCSV)) { + + String[] mdStoreIds = StringUtils.split(mdStoreIdsCSV, WorkflowRuntimeParameters.DEFAULT_CSV_DELIMITER); + + try (DataFileWriter recordWriter = getWriter(FileSystem.get(conf), portBindings)) { + + MDStoreFacade mdStoreFacade = ServiceFacadeUtils.instantiate(parameters); + + SAXParserFactory parserFactory = SAXParserFactory.newInstance(); + parserFactory.setNamespaceAware(true); + SAXParser saxParser = parserFactory.newSAXParser(); + MDRecordHandler mdRecordHandler = new MDRecordHandler(); + + long startTime = System.currentTimeMillis(); + int currentCount = 0; + + for (String mdStoreId : mdStoreIds) { + for (String mdRecord : mdStoreFacade.deliverMDRecords(mdStoreId)) { + if (!StringUtils.isEmpty(mdRecord)) { + if (mdRecord.length() <= recordMaxLength) { + saxParser.parse(new InputSource(new StringReader(mdRecord)), mdRecordHandler); + String recordId = mdRecordHandler.getRecordId(); + if (StringUtils.isNotBlank(recordId)) { + recordWriter.append( + ImportedRecord.newBuilder() + .setId(recordId) + .setBody(mdRecord) + .setFormat(RecordFormat.XML) + .build()); + counters.increment(COUNTER_NAME_TOTAL); + } else { + log.error("skipping, unable to extract identifier from record: " + mdRecord); + } + } else { + counters.increment(COUNTER_NAME_SIZE_EXCEEDED); + log.error("mdstore record maximum length (" + recordMaxLength + "): was exceeded: " + + mdRecord.length() + ", record content:\n" + mdRecord); + } + + } else { + log.error("got empty metadata record from mdstore: " + mdStoreId); + } + currentCount++; + if (currentCount % progressLogInterval == 0) { + log.info("current progress: " + currentCount + ", last package of " + progressLogInterval + + " processed in " + ((System.currentTimeMillis() - startTime) / 1000) + " secs"); + startTime = System.currentTimeMillis(); + } + } + } + log.info("total number of processed records: " + currentCount); + } + } + + if (counters.currentValue(COUNTER_NAME_TOTAL)==0) { + log.warn("parsed 0 metadata records from mdstores: " + mdStoreIdsCSV); + } + countersWriter.writeCounters(counters, System.getProperty(OOZIE_ACTION_OUTPUT_FILENAME)); + + } + + /** + * Provides {@link ImportedRecord} writer consuming records. + */ + protected DataFileWriter getWriter(FileSystem fs, PortBindings portBindings) throws IOException { + return DataStore.create( + new FileSystemPath(fs, portBindings.getOutput().get(PORT_OUT_MDRECORDS)), ImportedRecord.SCHEMA$); + } + +} diff --git a/dhp-wf/dhp-wf-import/src/main/java/eu/dnetlib/dhp/wf/importer/mdrecord/MongoRecordImporter.java b/dhp-wf/dhp-wf-import/src/main/java/eu/dnetlib/dhp/wf/importer/mdrecord/MongoRecordImporter.java new file mode 100644 index 000000000..4776af22b --- /dev/null +++ b/dhp-wf/dhp-wf-import/src/main/java/eu/dnetlib/dhp/wf/importer/mdrecord/MongoRecordImporter.java @@ -0,0 +1,48 @@ +package eu.dnetlib.dhp.wf.importer.mdrecord; + +import java.util.Collections; +import java.util.HashMap; +import java.util.Map; + +import eu.dnetlib.dhp.common.java.PortBindings; +import eu.dnetlib.dhp.common.java.Process; +import eu.dnetlib.dhp.common.java.porttype.PortType; +import org.apache.hadoop.conf.Configuration; + +public class MongoRecordImporter implements Process { + + private final Map outputPorts = new HashMap(); + + @Override + public Map getInputPorts() { + return Collections.emptyMap(); + } + + @Override + public Map getOutputPorts() { + return outputPorts; + } + + @Override + public void run(final PortBindings portBindings, final Configuration conf, final Map parameters) throws Exception { + + /* + SparkSession spark = SparkSession.builder() + .master("local") + .appName("MongoSparkConnectorIntro") + .config("spark.mongodb.input.uri", "mongodb://127.0.0.1/test.myCollection") + .config("spark.mongodb.output.uri", "mongodb://127.0.0.1/test.myCollection") + .getOrCreate(); + + // Create a JavaSparkContext using the SparkSession's SparkContext object + JavaSparkContext jsc = new JavaSparkContext(spark.sparkContext()); + + // More application logic would go here... + + jsc.close(); + */ + + } + + +} diff --git a/dhp-wf/dhp-wf-import/src/main/resources/eu/dnetlib/dhp/wf/importer/mdrecord/oozie_app/workflow.xml b/dhp-wf/dhp-wf-import/src/main/resources/eu/dnetlib/dhp/wf/importer/mdrecord/oozie_app/workflow.xml new file mode 100644 index 000000000..dee61af91 --- /dev/null +++ b/dhp-wf/dhp-wf-import/src/main/resources/eu/dnetlib/dhp/wf/importer/mdrecord/oozie_app/workflow.xml @@ -0,0 +1,124 @@ + + + + + mdstore_facade_factory_classname + eu.dnetlib.dhp.wf.importer.facade.WebServiceMDStoreFacadeFactory + ServiceFacadeFactory implementation class name producing eu.dnetlib.dhp.wf.importer.facade.WebServiceMDStoreFacade + + + mdstore_service_location + $UNDEFINED$ + mdstore service location + + + mdstore_ids_csv + $UNDEFINED$ + comma separated mdstore identifiers + + + mdstore_record_maxlength + 500000 + maximum allowed length of mdstore record + + + output + ImportedRecord avro datastore output holding mdrecords + + + output_report_root_path + base directory for storing reports + + + output_report_relative_path + import_mdrecord + directory for storing report (relative to output_report_root_path) + + + dnet_service_client_read_timeout + 60000 + DNet service client reading timeout (expressed in milliseconds) + + + dnet_service_client_connection_timeout + 60000 + DNet service client connection timeout (expressed in milliseconds) + + + resultset_client_read_timeout + 60000 + result set client reading timeout (expressed in milliseconds) + + + resultset_client_connection_timeout + 60000 + result set client connection timeout (expressed in milliseconds) + + + report_properties_prefix + import.mdrecord + report entry related to total number of imported records + + + + + ${jobTracker} + ${nameNode} + + + mapreduce.job.queuename + ${queueName} + + + oozie.launcher.mapred.job.queue.name + ${oozieLauncherQueueName} + + + + + + + + + + + + + eu.dnetlib.dhp.common.java.ProcessWrapper + eu.dnetlib.dhp.wf.importer.mdrecord.MDStoreRecordsImporter + + -Pimport.mdstore.service.location=${mdstore_service_location} + -Pimport.mdstore.ids.csv=${mdstore_ids_csv} + -Pimport.mdstore.record.maxlength=${mdstore_record_maxlength} + + -Pimport.resultset.client.read.timeout=${resultset_client_read_timeout} + -Pimport.resultset.client.connection.timeout=${resultset_client_connection_timeout} + -Pdnet.service.client.read.timeout=${dnet_service_client_read_timeout} + -Pdnet.service.client.connection.timeout=${dnet_service_client_connection_timeout} + + -Pimport.facade.factory.classname=${mdstore_facade_factory_classname} + -Omdrecords=${output} + + + + + + + + + eu.dnetlib.dhp.common.java.ProcessWrapper + eu.dnetlib.dhp.common.report.ReportGenerator + -Preport.${report_properties_prefix}.total=${wf:actionData('mdrecord-importer')['TOTAL']} + -Preport.${report_properties_prefix}.invalid.sizeExceeded=${wf:actionData('mdrecord-importer')['SIZE_EXCEEDED']} + -Oreport=${output_report_root_path}/${output_report_relative_path} + + + + + + + Unfortunately, the process failed -- error message: + [${wf:errorMessage(wf:lastErrorNode())}] + + + diff --git a/dhp-wf/dhp-wf-import/target/classes/eu/dnetlib/dhp/wf/importer/DataFileRecordReceiver.class b/dhp-wf/dhp-wf-import/target/classes/eu/dnetlib/dhp/wf/importer/DataFileRecordReceiver.class new file mode 100644 index 0000000000000000000000000000000000000000..5bd02d36034998ac1745f42ba3c408b55f779607 GIT binary patch literal 1161 zcmb7D+iuf95IviuF*czsX%nE~)|P^k2v!Il5Q*}_MM8?G)JBDuK5U$AvXyIFTPKCj zLgFIvzz6VAh*{@SOr$D(@XpT6&Y3yKyTAYZ`~~1S>JFB0-$o9b4z{pu;emx63lACc zXIjNlGwifO-RB|_-av9O(jh-mzT~e(EZ)-QLvpOaVDGDbAmX8x47EjSnw?IgMTPmM z3RK);aNR}98r1`a+^euheOB5kkne{lU8y^w>r|&uk+td7W#G`qh;D)GHHvh} zX2?3VA7HimH)Ot$m&G!z%VdFd!YWa987t(K$EUH1>*P`01ihHbt<7@Itn)hUN>ras z`W{aD+O@C9)W0Kp-qwr=Y|zk^e_GnJmNstSCV`!|G`Vq0le|oaT8bRZscxK_$VHb zKuA3B0elqVcylRL`w-|p*fXB-%sFSgzkdJt3E(-(Ii&G0ixgI!wU)tp29E^ht@gDU z>UKRG1d)JvCooqvfr)AYp4SL%N2J5!OYa(qo%ew-pdf3(i z3k?(Kt-)bS+oo#uc(T|Cebw7l);NEBn2NgQNTAlx1KAFA)H5yF?)K&JzBGsZ(6Z{Z z$$RaGw$0VSw0kD%#!%UpNRML#0&6n?6)2rX^Qk{URKF0EYO}dnBc94EFeb^_9n%R^ zG_X|Z$xKR*ZaTS(9UYA&nM^L}l{vf2c%A~wUgJOwmF%gYBez=z+K=K%%`t<5y2+f0 z3na-zAou#Cultb+gQE;Q>g@5@_1&sAU!1-6A9I|DklP6d*4OSe39L^Z7O9;-^QR7S z0hf?1U=EoAuH%M4^&-zmVB?>!3oOOjnNYsno@#~r{Jqlr-2@U29jzp7f~B9M=hIJc z&sQFQhp1$~A@L^uZstgew#P literal 0 HcmV?d00001 diff --git a/dhp-wf/dhp-wf-import/target/classes/eu/dnetlib/dhp/wf/importer/ImportWorkflowRuntimeParameters.class b/dhp-wf/dhp-wf-import/target/classes/eu/dnetlib/dhp/wf/importer/ImportWorkflowRuntimeParameters.class new file mode 100644 index 0000000000000000000000000000000000000000..13a46a67ad341470bf894e4ec54f0dfb22761bd7 GIT binary patch literal 2706 zcmbW3YnRhR7{{N11tM}25nVu7U=i5W=`JEFAetuA-LPp=leDmSiD@&t4VxxCxh&$h z@|EZK!Vlnw^7tf8H?uA3Irc>-zs$eSedhGfzkdG%0Pcf#5{`m;0@w*?5xAwbJErBh zn$cnwGZf}_EVIW9xp-{qva+RXwnN~&{?gm`8le~MHtZ}3qTNRXPAhSgW?q!_yfEhy zY!Fzg?Acpd$5olm9HzR>?b^Nsn*^>@D#z;Dj;pgC(_LrNV)mw~s}g7gu2l-TQ^upZ zs_e+N*|ijAyNcbD;34dx>b3}6t#oZ>ZD3ZWy6ryGoK3geL95J` zKqqj!(kQ0kFvD>V9GlIjOtBpkqnkKx6IhuM9~26cozok2HdEuJ#&Julu5Kz4m;`Rl z7(FD?)XIBj-dbCJ?zS|YxwiI#x$UL|-x9b!V+~y!OmQ^Ra2>79%&sGWMd0#`%wn=y z3v2=xD?wYGM0*ej4#l)d(>Ivnkum#ZzXgnz13l4|V-pvLdU@#RVq~ z)6!)d=kiAa%Mf6rjzvP1qCub5g4Wb>M@Mys4IEnXpQT9L5jMiN|){h zW9st|6Y**Pb^SZM{whh9@+UFQIiGH%JlPMh-*pnx@TQ7{58}k97-h(# z-E3G7M39DYyqAyZypgH@eXLDC5543y9h66HnwqeI>5TKNEJ1|F1P?_OJT@y2vy^9a zfJ1=&Je=doKgr%yuEja=*xThr#R*&(_PvzTtT^fv4?^tYzkzyccf5h;bBYVj59Sei zX*vn?*n>6v;AOJDwg&5P1pkKsT!uwhfMbA{4bl-ffh-C97V=4f-$p(q@M+{T0-r@b zC-6JS=LLQj`GUajAzu{uedG@WUPQhma1rknfj>mPD)2SrC4sLa-w^mF^0L6UkUtXm zHu4>T?;@`Vyo&s>z@H#ffg8wc0&v! zhR~hcb(w|QiziOfv^h#1(}#2W}C% literal 0 HcmV?d00001 diff --git a/dhp-wf/dhp-wf-import/target/classes/eu/dnetlib/dhp/wf/importer/facade/AbstractResultSetAwareWebServiceFacade.class b/dhp-wf/dhp-wf-import/target/classes/eu/dnetlib/dhp/wf/importer/facade/AbstractResultSetAwareWebServiceFacade.class new file mode 100644 index 0000000000000000000000000000000000000000..abb39ad860b7920540735e30f0c2d958b3c9e53d GIT binary patch literal 3431 zcmb_e+jA3D82=rTbdz+`(kqmk0j;DdW|50PTMDJzS^}-4v{t;ZNlw!(o87RxX-mC} z*BPID^UXg%N1d?+WN`G!8OJA|eA2(e7xg=vZMtbHQ${nHv*&!@cfRxcuIK#x*LU9o zID(I(XvVl0o{!=xG7;>-goJBRgakQ+Nij@Gcp-|2phPe&WL}hTJ&czmyd1?Vcr}U} zP=$hQBVx#fk#7XYOc;eQX2a0Jm-Z0z+5YvS*lMOTf2GM1g=IfvMhj8-sIr)-fxU}AzO?H0Gox|89~ zRj%eIv?4dl4g(uy*imH-n}$IOG}EXGv{aenYJq38CDO=77#apNLvschdJ{eX!@6qQ z{XRm*u{5L5KT5xmo=Jw_u$da|JGipxFb7d%N zJC>Rw2Y8+sT2L)M#j}};FVDMTq%)M$)uknd7M~#%%Lswz_%y*4j;alSXD$&vu^MM@ z_lO&~Im%1cnX;zmxy5kIJ$F%AEb7XFO@B3?r!d&$NoDHr@Hr!2GBtzjnBf*Ta@^ZU z$U6nBH80*St$Olkt>XrKmU)mYF5;QERg7-3fb-F zZxFq9KYy)GXfstxVql`L{_w$i5<@g&maQB=uZfE2-CVw=MET1|!jfUbkx-T~j|CY| zAkJ`9a34F9WGv#gj3vCraL89!+@RtVS?HLiZY!he;)3n@nA~UT^V}LKmUJ2C@Vbn> z*vGJ|2EZB3$kYCD%beHTR3Emv;}9k8R63F!pE0d?x7`=Fs7JElIe%jmmG>~sC)Vx#>zUE_vB$Z}kp=hdd%y^)ZoYOQ(8 z=*H7B_KP8b9)^p)Q2wVLNo{b=<8Bex#Gq1&bn_%c%MVj=crtA@Ww_c>xBbgCN7oo_ ztz3Bw2kKftf6t>XNUvuyT=H-9xYheQWZo@TBPK)r=W> ztaSKQc&$~$y~S;cwfA?+Y6AFf@R$pJl|Wfj5)YH(8)$trF7N2zUV26~(>pGNR?!1# z)+$;N7@nm07Cc33Dh!uSwc&oLJm?!jkossZ$s7RKpODZ*)IXAU!IH@pY`KTPGJ+2f zn!byMKDkRiumWkSOI|^ED)|5rB1e1Ox#9u3!(ey2w;j`Zn>ma!|@K_GX3Lna3C42_fMpz%1Z+Q@+?upNEmzhTk!c6ltA7l;o$N;2^z)$*Fi2hv+M2 zhv~`9+}Tk)N6r_tV-%QxJ3CIZkUJAWliZmITg07-z%>%mp9o$C=! zIO!TGR|GuM1e7(308q)l3nX=sTtzL7XhWoaRqf6%*Qvd}DYf+a-B8`G_3CbHQr#t_ atLWB@-u=ZobbM3lws>Y=ri>8fH}((h0*LMa literal 0 HcmV?d00001 diff --git a/dhp-wf/dhp-wf-import/target/classes/eu/dnetlib/dhp/wf/importer/facade/ISLookupFacade.class b/dhp-wf/dhp-wf-import/target/classes/eu/dnetlib/dhp/wf/importer/facade/ISLookupFacade.class new file mode 100644 index 0000000000000000000000000000000000000000..7689db9d69df5d0b1e2c8d834ac9e17251f61d08 GIT binary patch literal 378 zcma)&K~4fe5Ji8%$OsBC@e;V9S-3EPxRZp0M3X*2PfyLzFm_Lx9`tN3Jb;HXG!vt^ z(%r90)vN#Oc_)hC5bXub;dtOvQxpc(qY`B`Rf0A%i!zoZXzXmVv#> zDxMJr53ic*V4U5h7!vx@lvahtN6h@6nE46%>)QPhEnVX^FIr32OSy7x+tdrCRnC*6 zKf;t7ky{Rx$wa=c#Y-W~bM1WeCyI7Z{N5v6{Um$LHmn2T?r(}@zc_A amjES%E|OTC#VU#Y9@3~fIFGvDs=)!r)NWJ& literal 0 HcmV?d00001 diff --git a/dhp-wf/dhp-wf-import/target/classes/eu/dnetlib/dhp/wf/importer/facade/MDStoreFacade.class b/dhp-wf/dhp-wf-import/target/classes/eu/dnetlib/dhp/wf/importer/facade/MDStoreFacade.class new file mode 100644 index 0000000000000000000000000000000000000000..ec0403005b2c247fe581bb9b5c9d772afe6673b6 GIT binary patch literal 379 zcma)&O-=(b427S|(4pl=h*Oje*|0!r#i~?7qCz4k$RzHNDkP&cGjKK*9DqY1(h-8P zg59%ae}2#JAFpo!E^$)gu*4DJ)UdN1$L0Kib%<<{Fur-w+$0CeQH9?_Qa|m6u;JT*q>bY)tYr8T>TA^96Otx^vLhPLjqxt bp9xSw*h7)4{ah8fKSY_;00&u*dNuw8CAn^F literal 0 HcmV?d00001 diff --git a/dhp-wf/dhp-wf-import/target/classes/eu/dnetlib/dhp/wf/importer/facade/ObjectStoreFacade.class b/dhp-wf/dhp-wf-import/target/classes/eu/dnetlib/dhp/wf/importer/facade/ObjectStoreFacade.class new file mode 100644 index 0000000000000000000000000000000000000000..e9e7a2cbdf9d0dc04cd59e3b783f1d11be85610a GIT binary patch literal 389 zcma)&Jx;?w5QX0a6O#ZD6=whnwtxaDC>@cAB2nxK*7n3~w5+Yo+Hf`s4#1%zCJur~ z6rp?b_syI6T)%$+xWy>Naf%bds96=aCjSQA@4&w|PBw5|H6iDzbwWGe8q~20JTVM5-0cTx4y_eO9row+K7r7a aw*r(94vU6#hE#0&iUs5Gktw~egSZV?IhwT>rl~Q!@#D2Erzrwl;n0tC{Mc1ZR3K0T{A47 zyN>5`$M0}E5OG3;A4B3)I?_L5sO~lI`IwtFclu__SJLU%XDaQ1awoiNi~4?tA$IQe z1jA}mI^rf6c7T}#_BgNVJcaO=I z8z_<@wXe#2VTW2u3QI`okk_HWaQt7N3~OKBKHXy%F?wQvp^O9!S{EpbU=dN;)8!PG zsnVc4N~=+=9Xx?OhD|FJlhh2PX*h#b+U2{ay_4@ s)}9dgNnIytxsaOw==!84rc});%wt7=vWmni{lU_@=wC*WltfT^2Ub0y-v9sr literal 0 HcmV?d00001 diff --git a/dhp-wf/dhp-wf-import/target/classes/eu/dnetlib/dhp/wf/importer/facade/ServiceFacadeFactory.class b/dhp-wf/dhp-wf-import/target/classes/eu/dnetlib/dhp/wf/importer/facade/ServiceFacadeFactory.class new file mode 100644 index 0000000000000000000000000000000000000000..18e211bf9c36ee6ecde15b23d2feda618147c488 GIT binary patch literal 347 zcmZutyHdhX5Iu`X1T1W=ZJ_Zs4h3N-t#mAsA6Sx0TqfLPE*lts&B71xqZ}`RA|n>N zv*+xdeSQCYegSyGZG=&TF<~rjr|eW%E#YDEvf(|OMx`{bT*uQBF4-04ZMCtP#)M%a zg=1}eP|W`*79ByNUR)7hGRt_HrlHYfk&<^^!d-iL=h6Xz&xoeZTS- zw3qe?jPxY=+o^zGa;D>YVc;xoO8bQ{l0n6nLqyh<#zyw@q>)12%0cmj|+{M z!9^J_MQ{n1Wn8Jp%Mr-98bKpok@2b+Uz4FlFbiEqq7f}HWF+g65_0JXI$(;25y2eh zBgi1z2n)7&5fRK>F$}45FWD=hJ=>5X&IA+?5yETYqJ@y zw8m{+%gktwDdxq+u$wmX5{|}=f|{@lH)GDKiF8g~OsZx!XFIOps7X!N5{5cuI18q3 zobqNDTr-m&lF(>cc~`SsQ*-HJcXxbFThP=3chw0kH`L?LGn$oB&(6*nx=Wo}J)_;Y zAz^)Bb1b9fJ$t%MgHAzrZAZe6fP`ZtGt^hd$^x|2oG(R1jq4Hy)|v_P5t_6mF@uc( zk0-C`M$R>Di+hor=4e@ia1gB)U#OI&giV!Uc-c%+UZCzF;vq2QI;NGPwf4$VT+c0+ zV?#jNGcp_-q5Q7E@+vs^LeD&5=EKv|L&C|C%B+S1K8t~`ToHOrQ?91Zv)W!uvGLi< z1gwM}0c)5tQO^67F@v|tafKngO-LWJID7>Wd$Y7tkGeWhMAU(1qoN54A&~Xu+gXNf=fC(rQmJ6Lt-rl?zOLicky0DK0aN@cwfN>_>c$J8uK~j2diKYx+Juf z{RI-$jAi1F+PYqF93v53OtZex@?l3FFX1qI{Q)QjABTg;`6C4$2iYGjRhz1 zmlw$gynmz&UPro$OddS*S0&!v0lM^gCa(sL$zrIonLokRdGc7$aPdUyx%`lIUWDYN z?f7T#p4Bu1XOP!xy z6pzE8lhVB@wn`d-@TSv_y(q|eNL&NEsw=UCgWWxABw(i48K~Vu&sz!gW~o`)>89K- z%S}X6a$3tz8rRr4yuY%XVauCrSe6g=5oK}(N}etvQTbu;wpm@$fupO|I|L=fszzZ5 z2Y5Gc;$@>%CCZpJRYE4~1Bqm#1+!9u7J%2FGR z)pBhj)>?BH;SlblHr{&&bvM!UE96V9^|#S*j|-8BzM9|AxCCW#A77f5(0m^)gW)aV z*j=m(;nsvF-r9E?ZTIM|1J7a<>uFvD(?zG^sbDYK(9PXm2G+-CKL(h=DE5<<4tT+i zF!Wu_ZZG9RlxRl}VhneHHhQTOreq`bQ8Oe0r}lo13T+&~L3%#KS&UM?jl(4yM@lx1 za@W^9#&N*Y4*iV|8Bg=iXb?kO3Dc5{VZMd%j0f3>51Hpf;9VcPgAI4E@s=l3hWxmf zg%5c>juQv0usDi{pkdmMa`z{WA|e`WkH?mZ+vASJl z7%A9JRX3W3S>rmbT-(tLcD2f#d4@zzSi)Uk7#%O{7*7nnZdg^l=sLox&acwn%ESYP zc-}5^hIBz#eBEo5xU*%H>a>z6*rri`U^qg~!^OB;6D@{ZfqQz{;%;4(^m47K?^Sfs zXxfg;9lc_hMw#mmd8x>qC&J__#e!|`dd($YLCDgrKp{Q2!nUNdV+d(6+;)!}HKPdeGr5ukIGPd*sw;d-*MA2HMI^qsoioCt}Z582Ve>aLn4K zV^@TX$oO00I1vH9LhNO%h#{lxi_su)i!>B?U9>1qG8($m5PA@r`^`JH={30JuG{XC z?OEl;rzUSoVr1+3|F9E1r#{^@+}d4_JI{zq(KXE7HKXY#l8TTDj6rShr=<24Mb$D~ z&mru&E~(}s^ux+pAjrv_No4)vi;c@FdlBW(pPoe2$df z2~iE}Z+>CE3-`yWR4v+`WAY_G_vc^ZIJ$9K7B0h;NJDgoDuof;OQDFZ6z+p!l<(d| z$8t&GEY2~UeY2Zxd!k0vk9Zw;bt@A)^dymN;9Ch)Q6 z?Rq`zkT#g?IZOP{m<84A-#R5KOO2b1L39vWd8Kw z^y3|zBIEtIOj69JHR^+s^J#kg=mcekv{z_0>(iVhKxrbG#T40?CW=|oOL7}+>Kpz# zqtkwv))mY=Qpm$nldJO~!hn>q6mgTEpnW6gX}2^v93>j@8j Kf}6g9PyYisSk4jv literal 0 HcmV?d00001 diff --git a/dhp-wf/dhp-wf-import/target/classes/eu/dnetlib/dhp/wf/importer/facade/WebServiceISLookupFacadeFactory.class b/dhp-wf/dhp-wf-import/target/classes/eu/dnetlib/dhp/wf/importer/facade/WebServiceISLookupFacadeFactory.class new file mode 100644 index 0000000000000000000000000000000000000000..c9ac13238267ccac5aa5be2ce20381b16947c9ce GIT binary patch literal 2265 zcmbVOTUQ%Z6#h;UGQ%)j8ZGvsb+lZPpqW%#R6trpsnE2=rUs!ePLjiL%48;MF0k;y z-=r@Vb=lQtf0WBT7n%#9pz|?h|3g-xD6gYJ~@S z)8@V<>Uwjptsm~{qSba>pS$|5(J-1^-{SQucMn8^uU9LMvmdnAq8D`c9rtLFVMN%T zZ`i&te2TZ4uRJ#n3_b9Lr9U*2*y$mA2s;Elzn*n&W*Q876x{9kow@-RV_b zino?wn|jrEg>5dLmMl}e;i@og!w+0SAo>to(8>S8d9yDTRV%7ql=0*GbKdYNyPr=B zFHqFv*m{m@=>m3rhY^-XsXBq%;ArLcNV;4=)(m+4aR1XT%7ReTQ`4Ksu68jfufxu_%^>#3xAF82b< z_bAxFAjPxP2Kl^tO1W*A+!H?$kxRemO_!5>Q-p=4NjBQp za6agio8CEM^8JaN2-Ek$TioHNy(1*!Q~)&zyy^ zr>@!E;do-hFxHV(9NVPfNloJMc->Q=#H6snaH(UoZu{KiE`uD|(YCLyCua1OyEY)} z4Z79~HR8$@`nWP&rSEN;o-_gI{x0owdNO(r6<;9zD=kuZkM7w>cpW!rt;Py?9~$jg zNIcRCjbCDnR=|&Qxy0OK^l+2K1mLw_UCsqhkf%YM$GD$*A+2Y)9cqfc}RzBWrR{NB|vGCI!&9}7!bLYq$ae)K(PxX#gt3$vb8o!WyvGSK-1sT z_x1%QWv1?zvxf|M~BaKLI>MO+rq_C}w4h;AtNi<|KS7 z7N3b>UJTD<^dK)`L57S)u~-tr=Mt7B6eJX73}7*VFT}i(z^a(nWc1^?gclONl(5ba zpEWGQn`0PDue{XuG}Y9snp*N4!>Z+q^ebdG8De=`=M4QThQ-&K^)h!hw6aNzsmw43DjIP2Vu;+-`adtjI7LZscvt;uXQ6wLRo;w`qDM z?zK^kwa^`ClC8}bT8)>CS7baZFbwG2H1@c&y0FeGwxg5X=?%KA!)BceI^`LI5`!4-8T_>R~1w!3=kYBX)hHY4+a+N#Ws`%uNOa3Ca{mk$*>TPb3 z06`dN?Q&18mkHypH{Vop9qwQ407u%d8W5y)GWv5xT2JUcnZ&73|<^1>cBa4Bs-`@5IrwZPQhY+QGgX<^eg@HYxcF z^@b@ZzfQ#+-gj0!^{Rn;g+`fUX|xUi!%N^VzXc-cXNxVlm@k_lcc=?%)IebP^vD6!UR+ z35xsBM+7B&D2)v9B_ohUx39>*#8sT%9zR6)-(XqB*lWakuzHN19YV(Gljutvp?7Wa z81l{$`VOH?P8{OeZ*;1E=NJQoOb!kn;_WPbgF`eN;`+o9hX3+4e1wnj7D*I#-SX`i zC#|y>#sq0RiD^vH2^BL030oi1OHSA_L3>>^Cuv_L`Wdo$8uxr#w_Bdr#(m!t4~Q}j bwkHw&BZ6ZjyPx<9=g-g=K#%dn$9(cXh0YEt literal 0 HcmV?d00001 diff --git a/dhp-wf/dhp-wf-import/target/classes/eu/dnetlib/dhp/wf/importer/facade/WebServiceMDStoreFacadeFactory.class b/dhp-wf/dhp-wf-import/target/classes/eu/dnetlib/dhp/wf/importer/facade/WebServiceMDStoreFacadeFactory.class new file mode 100644 index 0000000000000000000000000000000000000000..b7c9aaee6ce448619b2e050fa3944e57578e072d GIT binary patch literal 2276 zcmbtWS##S|6#lO5Sc;-7H8gaAR*=nhz;dAtG!7wYOG;9w4Rvuy`hqOqC~9QMqa~Rz z!+(Mo8er(~%#UI?T2e1@N~Y*Ry7%aOXS?4y(%=95{tCctJQwhZfRY3Os)+IkZsAiA zp9#1v;SLrAd>)Fv5O7z-qKJxw6qY1>iDeOAiC7V_D&n4q`w|}DYYD?xlW+wO1$-l5 zoguqq+NQtEkj@wC44L~*lQWE0O`AUsS`F?#(;5~@a#csyth(l!;XV;&{C(47xL4(Y z+O)ZEnGLnM-&T+IRI}A~T%Wt@o~CO}u5R;2jk|}Y&Nm;_e8=T$k(}wn{1n=~!F2!3* zvkkT8yQXba&PtZa-*C+|Y|Rf`O2zF<<$^~3U&2#;PN{xTJ)_W{G+uDsr!apyE4)CN zlXL7jz@-b|dJeu4mPx8PfvfX1Gi2D*n<-KX<7ZfW1J%|e&TTvH!Jg$DJq?IVExy%t z@@gMgcUmpSR?kJqxQ6QtH)ArFT1_uxZpn*@T(YPoeABT7Y{+3tt7cB ziK=p{x~SNW@|uiF+jS1jCT}h(%G``M$1u@DI(obK2*@rnx4K~E>8bGa8- zzDLdmIw_u~Hpu4HGsQBO*5=B4YN^w zpX&#!t`W58>=+99-L9rihSc*eQPTY3uumJD5VL~3}S?%ZC_naVf4YfHX!N^ zy4DMIqRJKe&N5u3Pj8x@)ctgSpLQxe89j%JFOmL*7Abr{_iQA*jvKU=V+GuVLOT}n zjH#&cUu@j8vCOXSY zon@}Gd?)vAtb6$-CVwU$=_sxnlm^KEBxX>=Ed43WVFh^_QU%mW62?0j$CSiN(P0YH z6!#3hi*pGjvm}`#NecOiRm!PA`!J_6Ny416#ra?HUg-I}iewWHsl-kZ^O5$){{kJe BjduV5 literal 0 HcmV?d00001 diff --git a/dhp-wf/dhp-wf-import/target/classes/eu/dnetlib/dhp/wf/importer/facade/WebServiceObjectStoreFacade.class b/dhp-wf/dhp-wf-import/target/classes/eu/dnetlib/dhp/wf/importer/facade/WebServiceObjectStoreFacade.class new file mode 100644 index 0000000000000000000000000000000000000000..8ffb1d81bbaa055bdb85526f13ca1a7f7eebc830 GIT binary patch literal 2839 zcmb7G?OWSa6#w1PH6_b7${38z&7xCV7zBqelQK3|nQLWZQUvimZEj0gn)FFhK>erm zgP`&}>SzBUAHVo`?o9{*Isr;Rn8TwB zU??g0P^`?0;jtJ#Qjx|J1!WZ%P!S&=i(x^*Qw7fyEUL(&lEHKF{v?AX@vf@q!?J=; z6|5*&Wk}Chjup%@T+c6T7@LM}8%|TN2A<_Kixv7S7giWjCD-H(mlrIDKkKw=+*>wk zHm&q8xOKx`F+59rM~kUo-SQb8EbxwQIy|tgnr^PQ^{s|(wc4&1a8GX-b;IQPDz8@VMWxgDMXWab1s>yYOUV;&Y?PtJe1f04psvnk8>UH0eF}!YI#rr?%(iPeWO9Rh+^I`;&nLVHw(l73 zClTikDG-dI)jkjOx@~bM&_iwHdP#mq>|_cfhYrawjlj@d85v(hM)z8lo`_TwyZP-p zZwq)y#>H^E3}o4))MburukWkAxpSYqJ1wy#%=?JYk_A_{PiNAsL2_o;Qy=mThv6;dWH@RAM=g*Im!_a}C$ah54V8^^()Dffo#!T*IJX znj;!)w1jtbmfb(us{aW+!;_QwPnX;bQzwf$mU|3i$@(nWhVK`T*J0d4)F!9nR-W|? z1Nr1>7|N!Cy>x)~g6lLHlug6#@Wlqhm3$@1BFMyGCQXPcKVgI1b@V$FbEZRffNYNs zh=aqwCtslBe<(>j)J=#^UFc?#-leUi^W{oKbXnR?BXzTVoc2qCVIrSAN{&>~kpe0T zDZQjLJl+}FAxLNNxX@LbC%aUlk?eP|7@ANc#U*XD+uWhzAMX;NWE{hoMMXu7nZON~ zj^;=d-0saa5#r@_&)t%(wXlK_x+e|O?ddvQtA}oZbrPpwO75l@WT2*=s~$a&{YU$lY1INwbt(dy8gi zIm^>5BWDGSl3Xes+(C~tjX=R)*6%xb&mg+PAicEUowVU)jOH7(M-0Y1qNwaC#R*e@Js5(xvh; zsTWS(l1_~hPGe-w9gNfO15D5k9VJZ0j$cE8C==w+I0lF|Nqi*&;BDNMPVyKQj;RP1 rPS@EG7LIo%Ebo!V4A|e8Q}90S(N~Hr>LZOJKGXDsvm$2X%KiTUKh8j3 literal 0 HcmV?d00001 diff --git a/dhp-wf/dhp-wf-import/target/classes/eu/dnetlib/dhp/wf/importer/facade/WebServiceObjectStoreFacadeFactory.class b/dhp-wf/dhp-wf-import/target/classes/eu/dnetlib/dhp/wf/importer/facade/WebServiceObjectStoreFacadeFactory.class new file mode 100644 index 0000000000000000000000000000000000000000..b6a1ceca93d3028b3983ecba52256417b6df7a5c GIT binary patch literal 2314 zcmbtWS$Eq+6#m9`B1KV_8XCGl5wwk$faO9PAWlO=N=j0<4Rvuy`hqM^6g9Hsqa``v zjei6$G{B*!&-^G3SBvo)8cy^e&0OE_Zu8wc{_F4GUjev{=K?+!P?8`(6;YnVEqo&4 zQvtUn+`*!N&qC4X0=^KjB%&fAg=GnMu_EG25%)x_iug*znuPoKTEYa@C0xP-0pAGN zU>ILEZPQ<2NaqW6hRmANzZR)f3Gw1!2JY}L^N)~h^F zn>P0?v!OQk+v?GtYPQ;r>vLD#({!!L)otFWare;F`IE*AuKP9Lart`m8@*|BTw$0r zZO_+i-_(3^xs|WJ&<-^<@J&m7q_r!B^Z6kMQd@Ji7N@u88R;SX3CVpJQ$Y#gMJ$k54RonV9#=no(4p&7T@Y& zfjR)zomR`S)l(5NuHY)ejhNsi-LZXc`z0r0b;%29U9zZGeABT7Y|40uD#Kb}AK1>3 zt;Bjtq^EdsM<^ZrCB=4>e<`Z8UFXnj^5&AF{Gq`8|NVH1dDwhRp%lIBU;aTp=_y8X=+&@ z-Vb_#rw+^5QqQFCGCjdCOm}5f$2MrXQmc48UiTF!ffP0wE_6W~)Tst{8N>)j+kv`v zV00~B84>j+JsX8O(d81|#tfI~f=<(y<^jFmrJYJ&M&GgGOQe6HMGEiHdpr_e#Wh;X zu>#(QLOT|6kF-MYD@@Z0_+cTN`3++!WXCgqLfA=XGqn?le_&!K`bA3NH72uic52}S z(7J0(otO3C=E~qbGU&bZqomWdE7&eCRQGG6ryiU% literal 0 HcmV?d00001 diff --git a/dhp-wf/dhp-wf-import/target/classes/eu/dnetlib/dhp/wf/importer/mdrecord/MDRecordHandler.class b/dhp-wf/dhp-wf-import/target/classes/eu/dnetlib/dhp/wf/importer/mdrecord/MDRecordHandler.class new file mode 100644 index 0000000000000000000000000000000000000000..092c87c70147da720622f46f556048e99cf9949b GIT binary patch literal 2610 zcmbtW+j84f6kYNqavdd2?A|Yw-i+%uRiQUZLaAfB)Wu2Lq$IrnMV1vusVu3o1YrjL zf|v3Gyzl@sG@XGN9$|)$VpvB~Y}J7oraVMCd+&4hUVE=~L2 zGm0K~B4u3e;ErAMfoXLNk6Uw|p*vou*^w|dXWFKJT|&BiW<^4#=5$zdcgwVm#h}+V zy!%?)qNmVubj@1PJX6@QGvlwD?BH_C2-J>k_?FpLJL|6c<(g{tT*vbbPwjQ0V(LPD zDXe-+vpZG@V}j%H{JNtDJ>r&-Z*1y@>zkaL42>Oy@rGqY;d|vBEzSNXkK=XK&7P(9 zwN16^lb?3r8~s_)Z#wWyj@uH=EowbBntc|%^V(pigafUA$w8V!&K5K`o=M)YI|Inq zUK6r_HqJze+34$>s^Wv4j(-+ig*Hve|F+T>q=^LqQTrI6^ChJguV%dz?OM zhw(SV_;K2lSh%K~_|7p#P4f8`t?Nj01x!_5LaO|V)WerZzd+`v@I;wU@e2YJkYS}W zTq)6hRB)D-gmcW53Uk%sT+_^xWuCDYkSo#)Tl`GGE%PmjNBhMSvgn87sg*gd5*~hYDVRATbNiC71I##_l4&wkO{Yl84gD! z*tf&Vm!h0;WoD;ej8?4P;Yu`K#zn3K>N0<&FB8WVt}{rzkz=+Y)~k3o#`-;@8AhrR z);R%c3zMq}%qO=o7pAr_%?+6?6t+;L!38HK#ujoAHE{@IOn8M&T*Yn{I*e zhI%f%(pnYs+%Ckf@-14+vUu)CE>a{FoUL#!C0fbG)l%Yf86W literal 0 HcmV?d00001 diff --git a/dhp-wf/dhp-wf-import/target/classes/eu/dnetlib/dhp/wf/importer/mdrecord/MDStoreRecordsImporter.class b/dhp-wf/dhp-wf-import/target/classes/eu/dnetlib/dhp/wf/importer/mdrecord/MDStoreRecordsImporter.class new file mode 100644 index 0000000000000000000000000000000000000000..254a7f64781dfaab98dcc63c9bf33ff59a18d331 GIT binary patch literal 9081 zcmcgy349#YdH;Va$+KFGe5@|7v0;r3mV9V2K7h3`*piF|vTP*zkU7kFcO)&^-C1^a zeQ*Q<<_Hi72_&(TQX0~_G_(*BWH~OF^dKd@(ks31v`yQz^iDzp?*GlqO0&{Rr2N|G z_w(#~Z{Byl?|R?(>+gN#bpY$-gof8NydFX$&S`ifgle3tMF?Nj@CzYm?&KFW{8A9V ztl`aCd=0-6f{wT9@OAuZ2)~A358^k} z?md70yoNsv;*Wy(;}90(PeS-p{8R+g@NN9P zdi;ZW{G*2Ngs=+dLii{Aa~)RT?K=Dm{4d2!9y%09w`ys5y zJ0bixexQKg4N-{qLO3rZbY7%Z0#c<$)#_0bk~yN0KM7I~QmaWQgbthwN*#YiSC9IT z%$0dTnXe`bf)Wl&Lr@y4AP+qkl%}9G2c@Nk`sH8Mx7DVFL5V1!MM1eh5m>Crk`M;v zLPhAJphVTnr9ruvg2^(Tf-KkMl8{^~D>P{h!H~9)tdv!1+iFeL2*Mq^_Vf;P^&RZp zy0h!xz^;L%>u-_T- zW%=Xmi}e~AGuhz*ZBM057m!5itYRToEbO-O1zRxBm9$Oj!A3Y-eYG!OEL` z&dl!i-jjv;VYASkom9et*0yp%xu<{X+W(30b4huZU;kD4Vpd?ZP8G;IPDhVQecMvm zBn{Etw&SdRWruQ83CJEHt4~T57{h?opHn6J42V=|}}_Q~96q^yx3^v;~*? znVH!R7@Vnf#sU>PCQrCKX?OI~Cg!;k?lg|}nAzdN2u-6DXas9!;HqMXp*EY#9hUy? zIqg0_uQMrbnoCdkv&~4EG3=P zU6Yt5modF+ct(@8%r4BGnq02QI;Kob)>EBbM-ygF`IxQA2EoJszn=R;N~HX|mZ7C& zs&};dRqmO`kR4NY+JDS0n3+v89Padh)8YJt-v_d; z;O0cy%T=L%tC&xiN`Hb2Dkqk9r8_~aa$??R~u_6*_Db1hetwi>j59tdm&W;6kgotTKBJ zT@?wPB4H~ds>*b#P|)NGU9Oa?D4xXWlDce?&6-@T%NE?rNaMza_Ds@N3BEm*wA&N* zVO_3~YX#R7v!hw-NH*%dj;4}~>8YWVnO_~vn|Bmbc{3TE2#e7bm)I+!DLa};*)~(` zQo($OGbd#4I&{04C}^@(mu=FaOQ&>|y?5(F!9`P8a%FGN7)R4iWYuN6TvyVygIN`L zEc*njDr=`=m=)A`MQVLn$*B~zLz8ZuY3v5pc+2O621H#3WREU;WuGqlWl*qgiVS`nK2zy)N--}1YjQxB zo8)F)ZjoCxIjGBRVlc?+a!3-oB*oNaNN&(&SeTevRN1Vm2v!O)r+9N+Qqbs}R?6~f z^*)ouOI|4?r5?94cyPVC~swG zeX1~xM-~1L>vF$jHF-dn<3gjpsAR70rYKD(l__STuKh$?70iRQptgAQh+&gN!Zed6 zk+D=(cC9c+SsWB-?D3GGb=WFI%}lOvEXwq6Bw41Eb~(IwV~Z8JfgKCI+eRT|_5UlrEbpf$gf(6Tw0FQAPVg%epgVwo~;?(CzL; zr=9e3X19_7q%I$mrv$5~SggO~qAg8k%H2j`gtdV#A6KC#;=8>~<~OxR8QUi` zd0LleN5m zPLZcpzZ@RvjFz}vcpp@>xb;))*q;u)l-kekPKp(! zbB*?NQAZ0=EvA%FV41nhldj~*m94KXYaZ!#z!HSH)azQz?#JYCfo&F4kH}_tH@6-! zRJW(P^?BWedW~Q=Z zo#v2HOczRx3WchUj_WE+b!y7l_V}PL@#H276K!IocW1L^zJuA^HmUA7Wu9W6?-OU2 zd#I?#jUNnB`7C>GWWCStv*j-S!MW0ie4CZzc}-30(6K87OzQy5N|Rpv!Bj}HHHTEyDY*Q9eS!p@W8`vXme6H3?8^rVe=(O+ZF7^-E%b9fQB^wbSZeQi9sDvqlw@Rts%A7~$*(+)HJU0NX;)<_v$T1d zLa65YBdSC1bP%1E+qP-u;idM9_d9>hQKCZcSG^s#ZOrR!r93sIw^K1gsUp?U!)yg7 z1DI-PI(K^73=P7`MVXnTaMJa(dbzAiT~=`UljlsesB$n0?0y!ZOZ!jMiDKnBxa=be zmhy8{BLu_z?6w#qNO62SzaRy$g!56~{6fytzIh$LvGKCUy^CNWHv!j+uxgZWJ*@g7 zTrWEK4r30-?7#4Nj4Pw4;Rsl}@-(Veo<;TGY1F)mIUH&n246+($}gifTz3}w;47#< zjk#wquL}FV=%Blk&t{?kHuLl02F$}1WaMgGhAp^@tGbIEcjF$;1Ruh^6iN|p=S&f< z9>@Hb`5S=3rI-UceX!LYtjqp=eE;DI95D|^jbd>sl5QW8nrTSx#{;NRjMO|zqKim> zd`)=48H57}H&nld#=)xag8p$dIjhY9yn&W@^_uWPXUhpRIzxAT9FbF~316_W<}4Nu z*1U!m0x#(wtPWq;e;OCX=R_!E^h98Jcxj}13>TjaEQ(amUpLf<||ygISTix(TyBYiOqA(M)YOI zFc98T!m`&LwJc1>Mh2gw;7|pi%}7z zWg#+h5t~jcU`ZQt(vCZ19UHY*!IsVJ%U;9vZ750)8xBJ_CMn!0Iou_W;cj`Bzn{l_ z@;dI9ui^oD3&-U5cUC-G$91w0iv zjZZlKK87&)xDLn3Ukl!rMR<^-X1paGc!;A$JTA}SVLZZ!`;>eJAHk!j!?W@P9;3WE zPRWD#D4sw)UX=UrBtC|@cuDS|9&(tE`vO(?I4K2D49vyTc!u$sk;98FYV=8Jiy@4o zPvZoGni`#C{H?;Wz%%#^IjBZQpoq^BUkwffZpDvLw{!60JdtMZcPTta3NGYN5b|?; z_boL20M}@Eeyf~k*lN)5c@00Q;ioj5;;Lx)X$>!E_!$jf(D0&$pVjbl8eXdAn-_RO zco+OG^t^_b-$mXTJpT?Nfq3K{T>pIpzK5Fg{QnW5+0cThzEF1^A3ON(JQ@(0bvwD9 z{3C=5UwxiZY83#1OQ4cUHTCz3m#x0U8QZ>Yq^1Sda0Dzl3uAEA>MdIhh9LI-5^WxhE0fVW@l*brF;fjOld@r`3URq>e5y!NR!5!WkaLS0>WsQfDui k5E-I}zJzUxWAUY~wM222YXSVc+`-=v-=4?UXzR8A14^=FeEKg(OGyIlf&@Y+RUfE8Af*&YRSEI5vA2#Z+iN*C&@bS3 zKmrwk#0T(Eh*{S(iRnWg;KA$NncXvU&dl!5U*CQJxQDwIOstx?X`zJGF^r;Wp@zDN zTPALsSYsGhX%eN$3l%3_hNXm^vrT0Se^11oUgt(5>3Ed&!(!NX``in3JQ?%AiKVS#hi|LURj;!7 zv)oDXCtR643wsL8RBMAQX-}(Y`zOpNDe_Lg_~kea8AP?nre8#@-6Jn_NbGK-sbZIl zNZ1|8JHk;)3m)$CEgC#GjYZ6`UXf3NB8u^zv-Wu$3#O{_C~_`l)}nU|K>YN<4~PoGQlgjgL#Grg?lw@gELI`blYhkNIPM;S^yj9uUOf# zOQo9YojcK(VZN8>mhydq)6z4XkwKIkQr=jhN5%wM=$q9f=s!-6Q3(|oG$+0^PLX93 zWtsug^&_zQca&Zqq5K8Lr)>8$jr!F>c5V_g*(}>nxO22(IE#7Op|RF!rG3U%jC`bj z3HtnKW}EFhYvgB-Gg=`3;t2-NF-Gd2VH80Y`#?ti0$IX2BBK*|qm!VIX%^+aEY>oB x`mr+y&>YU=LY5`NMO@1GGAtAGGA + + + + mdstore_facade_factory_classname + eu.dnetlib.dhp.wf.importer.facade.WebServiceMDStoreFacadeFactory + ServiceFacadeFactory implementation class name producing eu.dnetlib.dhp.wf.importer.facade.WebServiceMDStoreFacade + + + mdstore_service_location + $UNDEFINED$ + mdstore service location + + + mdstore_ids_csv + $UNDEFINED$ + comma separated mdstore identifiers + + + mdstore_record_maxlength + 500000 + maximum allowed length of mdstore record + + + output + ImportedRecord avro datastore output holding mdrecords + + + output_report_root_path + base directory for storing reports + + + output_report_relative_path + import_mdrecord + directory for storing report (relative to output_report_root_path) + + + dnet_service_client_read_timeout + 60000 + DNet service client reading timeout (expressed in milliseconds) + + + dnet_service_client_connection_timeout + 60000 + DNet service client connection timeout (expressed in milliseconds) + + + resultset_client_read_timeout + 60000 + result set client reading timeout (expressed in milliseconds) + + + resultset_client_connection_timeout + 60000 + result set client connection timeout (expressed in milliseconds) + + + report_properties_prefix + import.mdrecord + report entry related to total number of imported records + + + + + ${jobTracker} + ${nameNode} + + + mapreduce.job.queuename + ${queueName} + + + oozie.launcher.mapred.job.queue.name + ${oozieLauncherQueueName} + + + + + + + + + + + + + eu.dnetlib.dhp.common.java.ProcessWrapper + eu.dnetlib.dhp.wf.importer.mdrecord.MDStoreRecordsImporter + + -Pimport.mdstore.service.location=${mdstore_service_location} + -Pimport.mdstore.ids.csv=${mdstore_ids_csv} + -Pimport.mdstore.record.maxlength=${mdstore_record_maxlength} + + -Pimport.resultset.client.read.timeout=${resultset_client_read_timeout} + -Pimport.resultset.client.connection.timeout=${resultset_client_connection_timeout} + -Pdnet.service.client.read.timeout=${dnet_service_client_read_timeout} + -Pdnet.service.client.connection.timeout=${dnet_service_client_connection_timeout} + + -Pimport.facade.factory.classname=${mdstore_facade_factory_classname} + -Omdrecords=${output} + + + + + + + + + eu.dnetlib.dhp.common.java.ProcessWrapper + eu.dnetlib.dhp.common.report.ReportGenerator + -Preport.${report_properties_prefix}.total=${wf:actionData('mdrecord-importer')['TOTAL']} + -Preport.${report_properties_prefix}.invalid.sizeExceeded=${wf:actionData('mdrecord-importer')['SIZE_EXCEEDED']} + -Oreport=${output_report_root_path}/${output_report_relative_path} + + + + + + + Unfortunately, the process failed -- error message: + [${wf:errorMessage(wf:lastErrorNode())}] + + + diff --git a/dhp-wf/dhp-wf-import/target/maven-status/maven-compiler-plugin/compile/default-compile/createdFiles.lst b/dhp-wf/dhp-wf-import/target/maven-status/maven-compiler-plugin/compile/default-compile/createdFiles.lst new file mode 100644 index 000000000..672248f22 --- /dev/null +++ b/dhp-wf/dhp-wf-import/target/maven-status/maven-compiler-plugin/compile/default-compile/createdFiles.lst @@ -0,0 +1,20 @@ +eu/dnetlib/dhp/wf/importer/RecordReceiver.class +eu/dnetlib/dhp/wf/importer/facade/ObjectStoreFacade.class +eu/dnetlib/dhp/wf/importer/facade/WebServiceISLookupFacade.class +eu/dnetlib/dhp/wf/importer/facade/WebServiceMDStoreFacade.class +eu/dnetlib/dhp/wf/importer/facade/ServiceFacadeFactory.class +eu/dnetlib/dhp/wf/importer/facade/MDStoreFacade.class +eu/dnetlib/dhp/wf/importer/facade/AbstractResultSetAwareWebServiceFacade.class +eu/dnetlib/dhp/wf/importer/mdrecord/MDStoreRecordsImporter.class +eu/dnetlib/dhp/wf/importer/DataFileRecordReceiver.class +eu/dnetlib/dhp/wf/importer/mdrecord/MongoRecordImporter.class +eu/dnetlib/dhp/wf/importer/facade/ServiceFacadeUtils.class +eu/dnetlib/dhp/wf/importer/facade/ServiceFacadeException.class +eu/dnetlib/dhp/wf/importer/DataFileRecordReceiverWithCounter.class +eu/dnetlib/dhp/wf/importer/facade/ISLookupFacade.class +eu/dnetlib/dhp/wf/importer/facade/WebServiceObjectStoreFacadeFactory.class +eu/dnetlib/dhp/wf/importer/mdrecord/MDRecordHandler.class +eu/dnetlib/dhp/wf/importer/facade/WebServiceObjectStoreFacade.class +eu/dnetlib/dhp/wf/importer/facade/WebServiceISLookupFacadeFactory.class +eu/dnetlib/dhp/wf/importer/ImportWorkflowRuntimeParameters.class +eu/dnetlib/dhp/wf/importer/facade/WebServiceMDStoreFacadeFactory.class diff --git a/dhp-wf/dhp-wf-import/target/maven-status/maven-compiler-plugin/compile/default-compile/inputFiles.lst b/dhp-wf/dhp-wf-import/target/maven-status/maven-compiler-plugin/compile/default-compile/inputFiles.lst new file mode 100644 index 000000000..e9820d1d9 --- /dev/null +++ b/dhp-wf/dhp-wf-import/target/maven-status/maven-compiler-plugin/compile/default-compile/inputFiles.lst @@ -0,0 +1,20 @@ +/Users/claudio/workspace/dnet-hadoop/dhp-wf/dhp-wf-import/src/main/java/eu/dnetlib/dhp/wf/importer/facade/ISLookupFacade.java +/Users/claudio/workspace/dnet-hadoop/dhp-wf/dhp-wf-import/src/main/java/eu/dnetlib/dhp/wf/importer/RecordReceiver.java +/Users/claudio/workspace/dnet-hadoop/dhp-wf/dhp-wf-import/src/main/java/eu/dnetlib/dhp/wf/importer/mdrecord/MDRecordHandler.java +/Users/claudio/workspace/dnet-hadoop/dhp-wf/dhp-wf-import/src/main/java/eu/dnetlib/dhp/wf/importer/mdrecord/MDStoreRecordsImporter.java +/Users/claudio/workspace/dnet-hadoop/dhp-wf/dhp-wf-import/src/main/java/eu/dnetlib/dhp/wf/importer/facade/WebServiceISLookupFacade.java +/Users/claudio/workspace/dnet-hadoop/dhp-wf/dhp-wf-import/src/main/java/eu/dnetlib/dhp/wf/importer/facade/WebServiceMDStoreFacade.java +/Users/claudio/workspace/dnet-hadoop/dhp-wf/dhp-wf-import/src/main/java/eu/dnetlib/dhp/wf/importer/facade/MDStoreFacade.java +/Users/claudio/workspace/dnet-hadoop/dhp-wf/dhp-wf-import/src/main/java/eu/dnetlib/dhp/wf/importer/DataFileRecordReceiver.java +/Users/claudio/workspace/dnet-hadoop/dhp-wf/dhp-wf-import/src/main/java/eu/dnetlib/dhp/wf/importer/facade/ServiceFacadeException.java +/Users/claudio/workspace/dnet-hadoop/dhp-wf/dhp-wf-import/src/main/java/eu/dnetlib/dhp/wf/importer/facade/AbstractResultSetAwareWebServiceFacade.java +/Users/claudio/workspace/dnet-hadoop/dhp-wf/dhp-wf-import/src/main/java/eu/dnetlib/dhp/wf/importer/DataFileRecordReceiverWithCounter.java +/Users/claudio/workspace/dnet-hadoop/dhp-wf/dhp-wf-import/src/main/java/eu/dnetlib/dhp/wf/importer/facade/WebServiceMDStoreFacadeFactory.java +/Users/claudio/workspace/dnet-hadoop/dhp-wf/dhp-wf-import/src/main/java/eu/dnetlib/dhp/wf/importer/facade/ObjectStoreFacade.java +/Users/claudio/workspace/dnet-hadoop/dhp-wf/dhp-wf-import/src/main/java/eu/dnetlib/dhp/wf/importer/facade/WebServiceObjectStoreFacadeFactory.java +/Users/claudio/workspace/dnet-hadoop/dhp-wf/dhp-wf-import/src/main/java/eu/dnetlib/dhp/wf/importer/facade/WebServiceObjectStoreFacade.java +/Users/claudio/workspace/dnet-hadoop/dhp-wf/dhp-wf-import/src/main/java/eu/dnetlib/dhp/wf/importer/ImportWorkflowRuntimeParameters.java +/Users/claudio/workspace/dnet-hadoop/dhp-wf/dhp-wf-import/src/main/java/eu/dnetlib/dhp/wf/importer/facade/ServiceFacadeUtils.java +/Users/claudio/workspace/dnet-hadoop/dhp-wf/dhp-wf-import/src/main/java/eu/dnetlib/dhp/wf/importer/mdrecord/MongoRecordImporter.java +/Users/claudio/workspace/dnet-hadoop/dhp-wf/dhp-wf-import/src/main/java/eu/dnetlib/dhp/wf/importer/facade/WebServiceISLookupFacadeFactory.java +/Users/claudio/workspace/dnet-hadoop/dhp-wf/dhp-wf-import/src/main/java/eu/dnetlib/dhp/wf/importer/facade/ServiceFacadeFactory.java diff --git a/dhp-wf/pom.xml b/dhp-wf/pom.xml new file mode 100644 index 000000000..4c0aa666a --- /dev/null +++ b/dhp-wf/pom.xml @@ -0,0 +1,249 @@ + + + 4.0.0 + + + eu.dnetlib.dhp + dhp + 1.0.0-SNAPSHOT + + + dhp-wf + pom + + + dhp-wf-import + + + + yyyy-MM-dd_HH_mm + + + oozie-package + + src/test/resources/define/path/pointing/to/directory/holding/oozie_app + oozie_app + default + default + default + primed + + runtime + + true + + ${user.home}/.dhp/application.properties + + ${maven.build.timestamp} + + ${project.version} + true + + + + + + + org.apache.oozie + oozie-client + + + net.schmizz + sshj + test + + + + + + oozie-package + + + + org.apache.maven.plugins + maven-enforcer-plugin + 1.4.1 + + + enforce-connection-properties-file-existence + initialize + + enforce + + + + + + ${dhpConnectionProperties} + + + The file with connection properties could not be found. Please, create the ${dhpConnectionProperties} file or set the location to another already created file by using + -DdhpConnectionProperties property. + + + + true + + + + + + org.apache.maven.plugins + maven-dependency-plugin + + + copy dependencies + prepare-package + + copy-dependencies + + + ${oozie.package.dependencies.include.scope} + ${oozie.package.dependencies.exclude.scope} + true + + + + + + + org.apache.maven.plugins + maven-jar-plugin + + + attach-test-resources-package + prepare-package + + test-jar + + + ${oozie.package.skip.test.jar} + + + + + + + + pl.project13.maven + git-commit-id-plugin + 2.1.11 + + + + revision + + + + + true + yyyy-MM-dd'T'HH:mm:ssZ + true + target/${oozie.package.file.name}/${oozieAppDir}/version.properties + + + + org.apache.maven.plugins + maven-assembly-plugin + 3.0.0 + + + assembly-oozie-installer + package + + single + + + false + ${oozie.package.file.name}_shell_scripts + + oozie-installer + + + + + + + + + + maven-antrun-plugin + + + + installer-copy-custom + process-resources + + run + + + + + + + + + + + package + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + run + + + + + + + + + + + diff --git a/pom.xml b/pom.xml new file mode 100644 index 000000000..8861cbe25 --- /dev/null +++ b/pom.xml @@ -0,0 +1,953 @@ + + + 4.0.0 + + eu.dnetlib.dhp + dhp + 1.0.0-SNAPSHOT + pom + + http://www.d-net.research-infrastructures.eu + + + + The Apache Software License, Version 2.0 + http://www.apache.org/licenses/LICENSE-2.0.txt + repo + A business-friendly OSS license + + + + + dhp-build + dhp-common + dhp-schemas + dhp-wf + + + + Redmine + https://issue.openaire.research-infrastructures.eu/projects/openaire + + + + jenkins + https://jenkins-dnet.d4science.org/ + + + + scm:git:ssh://git@github.com/??? + scm:git:ssh://git@github.com/???.git + https://github.com/??? + HEAD + + + + + + + + + dnet45-releases + D-Net 45 releases + http://maven.research-infrastructures.eu/nexus/content/repositories/dnet45-releases + default + + false + + + true + + + + dnet45-bootstrap-release + dnet45 bootstrap release + http://maven.research-infrastructures.eu/nexus/content/repositories/dnet45-bootstrap-release + default + + false + + + true + + + + + cloudera + Cloudera Repository + https://repository.cloudera.com/artifactory/cloudera-repos + + true + + + false + + + + + + + + org.slf4j + slf4j-api + 1.7.22 + + + org.slf4j + slf4j-log4j12 + 1.7.22 + + + log4j + log4j + 1.2.17 + + + javax.servlet + javax.servlet-api + 3.1.0 + runtime + + + junit + junit + 4.12 + test + + + org.hamcrest + hamcrest-all + 1.3 + test + + + org.mockito + mockito-all + 1.10.19 + test + + + org.powermock + powermock-core + 1.6.6 + test + + + com.google.code.findbugs + annotations + 3.0.1 + provided + + + com.google.code.findbugs + jsr305 + 3.0.1 + provided + + + + + + + + org.apache.hadoop + hadoop-common + ${dhp.hadoop.version} + provided + + + servlet-api + javax.servlet + + + + + + org.apache.hadoop + hadoop-yarn-common + ${dhp.hadoop.version} + provided + + + org.apache.hadoop + hadoop-mapreduce-client-common + ${dhp.hadoop.version} + provided + + + + org.apache.hadoop + hadoop-yarn-common + ${dhp.hadoop.version} + test + test-jar + + + + org.apache.hadoop + hadoop-mapreduce-client-common + ${dhp.hadoop.version} + test + test-jar + + + + org.apache.hadoop + hadoop-mapreduce-client-app + ${dhp.hadoop.version} + provided + + + servlet-api + javax.servlet + + + + + + org.apache.hadoop + hadoop-mapreduce-client-core + ${dhp.hadoop.version} + provided + + + servlet-api + javax.servlet + + + + org.apache.calcite + calcite-core + + + org.apache.calcite + calcite-avatica + + + + + + org.apache.hadoop + hadoop-hdfs + ${dhp.hadoop.version} + + + + org.apache.hadoop + hadoop-hdfs + ${dhp.hadoop.version} + test-jar + test + + + + + + org.apache.oozie + oozie-core + ${dhp.oozie.version} + + + + servlet-api + javax.servlet + + + + org.apache.calcite + calcite-core + + + org.apache.calcite + calcite-avatica + + + + + + org.apache.oozie + oozie-client + ${dhp.oozie.version} + + + + slf4j-simple + org.slf4j + + + + + + org.apache.spark + spark-core_2.10 + ${dhp.spark.version} + provided + + + + org.apache.spark + spark-sql_2.10 + ${dhp.spark.version} + provided + + + + com.databricks + spark-avro_2.10 + 1.1.0-${dhp.cdh.version} + + + + com.databricks + spark-csv_2.10 + 1.5.0 + + + + pl.edu.icm.spark-utils + spark-utils + 1.0.0 + + + + org.mongodb.spark + mongo-spark-connector_2.10 + 2.2.1 + provided + + + + + + org.apache.avro + avro + ${dhp.avro.version} + + + + org.apache.avro + avro-mapred + ${dhp.avro.version} + hadoop2 + + + + servlet-api + org.mortbay.jetty + + + netty + io.netty + + + + + + + + org.apache.pig + pig + ${dhp.pig.version} + provided + + + org.mortbay.jetty + servlet-api-2.5 + + + servlet-api + javax.servlet + + + + + + org.apache.pig + piggybank + ${dhp.pig.version} + provided + + + + org.apache.pig + pigunit + ${dhp.pig.version} + + + + + pl.edu.icm.cermine + cermine-impl + 1.13 + + + + + pl.edu.icm.coansys + models + ${dhp.coansys.version} + + + + pl.edu.icm.coansys + citation-matching-core-code + ${dhp.coansys.version} + + + + org.apache.avro + avro-mapred + + + org.apache.hadoop + hadoop-client + + + org.apache.hadoop + hadoop-yarn-api + + + org.apache.hadoop + hadoop-yarn-client + + + org.apache.hadoop + hadoop-mapreduce-client-common + + + org.apache.hadoop + hadoop-mapreduce-client-app + + + org.apache.hadoop + hadoop-mapreduce-client-core + + + + org.slf4j + slf4j-simple + + + + + + + + eu.dnetlib + dnet-openaireplus-mapping-utils + [6.0.0, 7.0.0) + + + + org.apache.hadoop + hadoop-core + + + org.apache.hadoop + hadoop-hdfs + + + org.apache.zookeeper + zookeeper + + + jgrapht + jgrapht + + + + + + eu.dnetlib + dnet-objectstore-rmi + [2.0.0, 3.0.0) + + + + eu.dnetlib + cnr-rmi-api + [2.0.0, 3.0.0) + + + + eu.dnetlib + cnr-resultset-client + [2.0.0, 3.0.0) + + + + org.springframework + spring-web + + + org.springframework + spring-webmvc + + + + + + eu.dnetlib + dnet-actionmanager-common + [6.0.0, 7.0.0) + + + + apache + commons-logging + + + + + + eu.dnetlib + cnr-service-utils + [1.0.0, 2.0.0) + + + + + com.beust + jcommander + 1.60 + + + + com.google.code.gson + gson + 2.8.0 + + + + com.googlecode.json-simple + json-simple + 1.1.1 + + + + org.apache.commons + commons-lang3 + 3.5 + + + + org.apache.commons + commons-collections4 + 4.1 + + + + com.thoughtworks.xstream + xstream + 1.4.9 + + + + xalan + xalan + 2.7.2 + + + + xml-apis + xml-apis + 1.4.01 + + + + org.jdom + jdom + 1.1.3 + + + + org.jsoup + jsoup + 1.10.2 + + + + net.sf.opencsv + opencsv + 2.3 + + + + com.googlecode.protobuf-java-format + protobuf-java-format + 1.2 + + + + com.google.protobuf + protobuf-java + 2.5.0 + + + + + com.google.guava + guava + 12.0 + + + + commons-cli + commons-cli + 1.3.1 + + + + commons-io + commons-io + 2.5 + + + + de.sven-jacobs + loremipsum + 1.0 + + + + net.schmizz + sshj + 0.10.0 + + + + + org.bouncycastle + bcprov-jdk15on + 1.50 + + + + + com.thoughtworks.paranamer + paranamer + 2.8 + + + + + com.linkedin.datafu + datafu + 1.2.0 + + + + org.apache.cxf + cxf-rt-frontend-jaxws + ${cxf.version} + + + + + com.sun.xml.bind + jaxb-impl + 2.2.7 + runtime + + + + org.springframework + spring-beans + ${spring.version} + + + + org.springframework + spring-context + ${spring.version} + + + + org.scala-lang + scala-library + ${scala.version} + + + + commons-beanutils + commons-beanutils + 1.9.3 + + + + org.apache.curator + curator-test + 3.3.0 + test + + + + + + + + target + target/classes + ${project.artifactId}-${project.version} + target/test-classes + + + + + + org.apache.maven.plugins + maven-compiler-plugin + 3.6.0 + + 1.8 + 1.8 + ${project.build.sourceEncoding} + + + + + org.apache.maven.plugins + maven-jar-plugin + 3.0.2 + + + + org.apache.maven.plugins + maven-source-plugin + 3.0.1 + + + attach-sources + verify + + jar-no-fork + + + + + + + org.apache.maven.plugins + maven-surefire-plugin + 2.19.1 + + true + + + + + org.apache.maven.plugins + maven-javadoc-plugin + 2.10.4 + + true + + + + + org.apache.maven.plugins + maven-dependency-plugin + 3.0.0 + + + + net.alchim31.maven + scala-maven-plugin + 3.2.2 + + + scala-compile-first + process-resources + + add-source + compile + + + + scala-test-compile + process-test-resources + + testCompile + + + + + + + + org.apache.avro + avro-maven-plugin + 1.7.7 + + + + org.codehaus.mojo + build-helper-maven-plugin + 1.12 + + + + + org.eclipse.m2e + lifecycle-mapping + 1.0.0 + + + + + + + org.apache.avro + + + avro-maven-plugin + + + [1.7.4,) + + + idl-protocol + schema + + + + + + + + + + org.codehaus.mojo + + + build-helper-maven-plugin + + + [1.7,) + + + add-source + + + + + + + + + + org.apache.maven.plugins + + + maven-plugin-plugin + + + [3.2,) + + + descriptor + + + + + + + + + + + + + + + + + org.apache.maven.plugins + maven-release-plugin + 2.5.3 + + + + org.jacoco + jacoco-maven-plugin + 0.7.9 + + + **/schemas/* + **/com/cloudera/**/* + **/org/apache/avro/io/**/* + + + + + default-prepare-agent + + prepare-agent + + + + default-report + prepare-package + + report + + + + + + + + + org.apache.maven.wagon + wagon-ssh + 2.10 + + + + + + + + + dnet45-snapshots + DNet45 Snapshots + http://maven.research-infrastructures.eu/nexus/content/repositories/dnet45-snapshots + default + + + dnet45-releases + http://maven.research-infrastructures.eu/nexus/content/repositories/dnet45-releases + + + + + + + org.apache.maven.plugins + maven-javadoc-plugin + 2.10.4 + + true + + + + + + + UTF-8 + UTF-8 + [4.2.5.RELEASE] + [3.1.5] + + cdh5.9.0 + + 4.1.0-${dhp.cdh.version} + 0.12.0-${dhp.cdh.version} + 1.7.6-${dhp.cdh.version} + 2.6.0-${dhp.cdh.version} + 1.6.0-${dhp.cdh.version} + 2.10.6 + + +