2019-03-18 10:44:35 +01:00
<?xml version="1.0" encoding="UTF-8"?>
2019-10-31 12:08:49 +01:00
<project xmlns= "http://maven.apache.org/POM/4.0.0" xmlns:xsi= "http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation= "http://maven.apache.org/POM/4.0.0 http://maven.apache.org/maven-v4_0_0.xsd" >
2019-10-17 18:38:30 +02:00
2019-03-18 10:44:35 +01:00
<modelVersion > 4.0.0</modelVersion>
2019-10-17 18:38:30 +02:00
2019-03-18 10:44:35 +01:00
<parent >
<groupId > eu.dnetlib.dhp</groupId>
<artifactId > dhp</artifactId>
2020-03-30 20:06:00 +02:00
<version > 1.1.7-SNAPSHOT</version>
2019-04-03 16:05:16 +02:00
<relativePath > ../</relativePath>
2019-03-18 10:44:35 +01:00
</parent>
2019-10-17 18:38:30 +02:00
2019-03-18 10:44:35 +01:00
<artifactId > dhp-workflows</artifactId>
<packaging > pom</packaging>
2019-10-17 18:38:30 +02:00
2020-04-29 18:44:17 +02:00
<description > This module is the container for the oozie workflow definitions in dnet-hadoop project</description>
2019-03-18 10:44:35 +01:00
<modules >
2020-04-29 09:25:50 +02:00
<module > dhp-worfklow-profiles</module>
2019-03-18 10:44:35 +01:00
<module > dhp-aggregation</module>
2019-10-17 18:38:30 +02:00
<module > dhp-distcp</module>
2020-04-29 09:25:50 +02:00
<module > dhp-actionmanager</module>
2019-10-24 16:00:28 +02:00
<module > dhp-graph-mapper</module>
2020-03-25 15:57:09 +01:00
<module > dhp-dedup-openaire</module>
2020-02-17 11:44:48 +01:00
<module > dhp-bulktag</module>
<module > dhp-propagation</module>
2020-02-14 12:28:52 +01:00
<module > dhp-graph-provision</module>
2020-03-26 09:46:46 +01:00
<module > dhp-dedup-scholexplorer</module>
<module > dhp-graph-provision-scholexplorer</module>
2020-04-30 18:30:21 +02:00
<module > dhp-blacklist</module>
2020-04-01 21:05:35 +02:00
<module > dhp-stats-update</module>
2020-05-05 17:09:00 +02:00
<module > dhp-broker-events</module>
2019-03-18 10:44:35 +01:00
</modules>
2019-10-17 18:38:30 +02:00
<pluginRepositories >
<pluginRepository >
<id > iis-releases</id>
<name > iis releases plugin repository</name>
<url > http://maven.ceon.pl/artifactory/iis-releases</url>
<layout > default</layout>
</pluginRepository>
</pluginRepositories>
<properties >
<maven.build.timestamp.format > yyyy-MM-dd_HH_mm</maven.build.timestamp.format>
<!-- default Oozie installer properties requred to be defined at pom.xml level -->
<!-- other project properties are defined in project - default.properties -->
<oozie.package.file.name > oozie-package</oozie.package.file.name>
<!-- notice: sandboxName is generated based on workflow.source.dir property -->
<workflow.source.dir > src/test/resources/define/path/pointing/to/directory/holding/oozie_app</workflow.source.dir>
<oozieAppDir > oozie_app</oozieAppDir>
<queueName > default</queueName>
<importerQueueName > default</importerQueueName>
<oozieLauncherQueueName > default</oozieLauncherQueueName>
<primed.dir > primed</primed.dir>
<oozie.package.dependencies.include.scope > runtime</oozie.package.dependencies.include.scope>
2019-10-31 12:08:49 +01:00
<oozie.package.dependencies.exclude.scope />
2019-10-17 18:38:30 +02:00
<oozie.package.skip.test.jar > true</oozie.package.skip.test.jar>
<dhpConnectionProperties > ${user.home}/.dhp/application.properties</dhpConnectionProperties>
<output.dir.name > ${maven.build.timestamp}</output.dir.name>
<projectVersion > ${project.version}</projectVersion>
<oozie.use.system.libpath > true</oozie.use.system.libpath>
</properties>
<dependencies >
<dependency >
<groupId > eu.dnetlib.dhp</groupId>
<artifactId > dhp-build-assembly-resources</artifactId>
<version > ${project.version}</version>
</dependency>
<dependency >
<groupId > org.apache.oozie</groupId>
<artifactId > oozie-client</artifactId>
</dependency>
<dependency >
<groupId > net.schmizz</groupId>
<artifactId > sshj</artifactId>
<scope > test</scope>
</dependency>
</dependencies>
<profiles >
<profile >
<!-- This profile sets properties that are required for test oozie workflows To be used only with 'oozie - package' profile -->
<id > attach-test-resources</id>
<properties >
2019-12-11 15:43:24 +01:00
<!-- overriding default scope (set to 'runtime') with the 'test' value. Test resources attached to oozie package requires all test dependencies. -->
2019-10-31 12:08:49 +01:00
<oozie.package.dependencies.include.scope />
2019-10-17 18:38:30 +02:00
<oozie.package.dependencies.exclude.scope > provided</oozie.package.dependencies.exclude.scope>
<!-- Do not skip creation of test jar for priming (in oozie - package profile) -->
<oozie.package.skip.test.jar > false</oozie.package.skip.test.jar>
</properties>
</profile>
<profile >
<id > oozie-package</id>
<build >
<plugins >
<plugin >
<groupId > org.apache.maven.plugins</groupId>
<artifactId > maven-enforcer-plugin</artifactId>
<version > 1.4.1</version>
<executions >
<execution >
<id > enforce-connection-properties-file-existence</id>
<phase > initialize</phase>
<goals >
<goal > enforce</goal>
</goals>
<configuration >
<rules >
<requireFilesExist >
<files >
<file > ${dhpConnectionProperties}</file>
</files>
<message >
The file with connection properties could not be found. Please, create the ${dhpConnectionProperties} file or set the location to another already created file by using
-DdhpConnectionProperties property.
</message>
</requireFilesExist>
</rules>
<fail > true</fail>
</configuration>
</execution>
</executions>
</plugin>
<plugin >
<groupId > org.apache.maven.plugins</groupId>
<artifactId > maven-dependency-plugin</artifactId>
<executions >
<execution >
<id > copy dependencies</id>
<phase > prepare-package</phase>
<goals >
<goal > copy-dependencies</goal>
</goals>
<configuration >
<includeScope > ${oozie.package.dependencies.include.scope}</includeScope>
<excludeScope > ${oozie.package.dependencies.exclude.scope}</excludeScope>
<silent > true</silent>
</configuration>
</execution>
</executions>
</plugin>
<!-- Plugin originally defined in attach - test - resources It was moved here to ensure that it will execute before priming -->
<plugin >
<groupId > org.apache.maven.plugins</groupId>
<artifactId > maven-jar-plugin</artifactId>
<executions >
<execution >
<id > attach-test-resources-package</id>
<phase > prepare-package</phase>
<goals >
<goal > test-jar</goal>
</goals>
<configuration >
<skip > ${oozie.package.skip.test.jar}</skip>
</configuration>
</execution>
</executions>
</plugin>
<plugin >
<groupId > eu.dnetlib.primer</groupId>
<artifactId > primer-maven-plugin</artifactId>
2020-03-27 10:42:17 +01:00
<version > 1.2.0</version>
2019-10-17 18:38:30 +02:00
<executions >
<execution >
<id > priming</id>
<phase > prepare-package</phase>
<goals >
<goal > prime</goal>
</goals>
<configuration >
<classProviderFiles >
<classProviderFile > ${project.build.directory}/dependency/*.jar</classProviderFile>
<classProviderFile > ${project.build.directory}/*-tests.jar</classProviderFile>
<classProviderFile > ${project.build.directory}/classes</classProviderFile>
</classProviderFiles>
<coansysPackageDir > ${project.build.directory}/dependency</coansysPackageDir>
<destination > ${project.build.directory}/${primed.dir}</destination>
<classpath > ${workflow.source.dir}</classpath>
</configuration>
</execution>
</executions>
</plugin>
<!-- reading job.properties to use them in .sh scripts -->
<plugin >
<groupId > org.kuali.maven.plugins</groupId>
<artifactId > properties-maven-plugin</artifactId>
2020-04-29 18:44:17 +02:00
<version > ${properties.maven.plugin.version}</version>
2019-10-17 18:38:30 +02:00
<dependencies >
<dependency >
<groupId > eu.dnetlib.dhp</groupId>
<artifactId > dhp-build-assembly-resources</artifactId>
<version > ${project.version}</version>
<!-- contains project - default.properties -->
</dependency>
</dependencies>
<executions >
<execution >
<id > reading-dhp-properties</id>
<phase > initialize</phase>
<goals >
<goal > read-project-properties</goal>
</goals>
<configuration >
<locations >
<location > ${dhpConnectionProperties}</location>
</locations>
<quiet > false</quiet>
</configuration>
</execution>
<execution >
<id > read-default-properties</id>
<phase > prepare-package</phase>
<goals >
<goal > read-project-properties</goal>
</goals>
<configuration >
<locations >
<location > classpath:project-default.properties</location>
</locations>
<quiet > true</quiet>
</configuration>
</execution>
<execution >
<id > read-job-properties</id>
<phase > prepare-package</phase>
<goals >
<goal > read-project-properties</goal>
</goals>
<configuration >
<locations >
<param > ${project.build.directory}/${primed.dir}/job.properties</param>
<param > job-override.properties</param>
</locations>
<quiet > true</quiet>
</configuration>
</execution>
</executions>
</plugin>
<plugin >
<groupId > eu.dnetlib.dhp</groupId>
<artifactId > dhp-build-properties-maven-plugin</artifactId>
2020-03-27 10:42:17 +01:00
<version > ${project.version}</version>
2019-10-17 18:38:30 +02:00
<executions >
<execution >
<phase > validate</phase>
<goals >
<goal > generate-properties</goal>
<!-- generates sandboxName based on workflow.source.dir when not specified as commandline parameter -->
</goals>
<configuration >
</configuration>
</execution>
<execution >
<id > write-job-properties</id>
<phase > prepare-package</phase>
<goals >
<goal > write-project-properties</goal>
</goals>
<configuration >
<outputFile > target/${oozie.package.file.name}/job.properties</outputFile>
<!-- notice: dots are not allowed for job.properties! -->
<include > nameNode,jobTracker,queueName,importerQueueName,oozieLauncherQueueName,
workingDir,oozieTopWfApplicationPath,oozieServiceLoc,
sparkDriverMemory,sparkExecutorMemory,sparkExecutorCores,
2020-04-01 18:29:26 +02:00
oozie.wf.application.path,projectVersion,oozie.use.system.libpath,
oozieActionShareLibForSpark1,spark1YarnHistoryServerAddress,spark1EventLogDir,
2020-04-21 16:13:43 +02:00
oozieActionShareLibForSpark2,spark2YarnHistoryServerAddress,spark2EventLogDir,
sparkSqlWarehouseDir
2020-04-01 18:29:26 +02:00
</include>
2019-10-17 18:38:30 +02:00
<includeSystemProperties > true</includeSystemProperties>
<includePropertyKeysFromFiles >
<!-- <param>${workflow.source.dir}/job.properties</param> -->
<param > ${project.build.directory}/${primed.dir}/job.properties</param>
<param > job-override.properties</param>
</includePropertyKeysFromFiles>
</configuration>
</execution>
</executions>
</plugin>
<plugin >
<groupId > pl.project13.maven</groupId>
<artifactId > git-commit-id-plugin</artifactId>
<version > 2.1.11</version>
<executions >
<execution >
<goals >
<goal > revision</goal>
</goals>
</execution>
</executions>
<configuration >
<verbose > true</verbose>
<dateFormat > yyyy-MM-dd'T'HH:mm:ssZ</dateFormat>
<generateGitPropertiesFile > true</generateGitPropertiesFile>
<generateGitPropertiesFilename > target/${oozie.package.file.name}/${oozieAppDir}/version.properties</generateGitPropertiesFilename>
</configuration>
</plugin>
<plugin >
<groupId > org.apache.maven.plugins</groupId>
<artifactId > maven-assembly-plugin</artifactId>
<version > 3.0.0</version>
<dependencies >
<dependency >
<groupId > eu.dnetlib.dhp</groupId>
<artifactId > dhp-build-assembly-resources</artifactId>
<version > ${project.version}</version>
</dependency>
</dependencies>
<executions >
<execution >
<id > assembly-oozie-installer</id>
<phase > package</phase>
<goals >
<goal > single</goal>
</goals>
<configuration >
<appendAssemblyId > false</appendAssemblyId>
<finalName > ${oozie.package.file.name}_shell_scripts</finalName>
<descriptorRefs >
<descriptorRef > oozie-installer</descriptorRef>
</descriptorRefs>
</configuration>
</execution>
</executions>
</plugin>
2019-12-06 13:38:00 +01:00
2019-10-17 18:38:30 +02:00
<plugin >
<!-- this plugin prepares oozie installer package -->
<artifactId > maven-antrun-plugin</artifactId>
<executions >
<!-- extracting shared resources phase -->
<execution >
<id > installer-copy-custom</id>
<phase > process-resources</phase>
<goals >
<goal > run</goal>
</goals>
<configuration >
<tasks >
2019-12-11 15:43:24 +01:00
<property name= "assembly-resources.loc" value= "${maven.dependency.eu.dnetlib.dhp.dhp-build-assembly-resources.jar.path}" />
2019-10-17 18:38:30 +02:00
<unjar src= "${assembly-resources.loc}" dest= "${project.build.directory}/assembly-resources" />
</tasks>
</configuration>
</execution>
<!-- packaging phase -->
<execution >
<phase > package</phase>
<configuration >
<tasks >
<!-- copying workflow resources -->
<mkdir dir= "target/${oozie.package.file.name}" />
<mkdir dir= "target/${oozie.package.file.name}/${oozieAppDir}" />
<copy todir= "target/${oozie.package.file.name}/${oozieAppDir}" >
<!-- <fileset dir="${workflow.source.dir}/${oozieAppDir}" /> replacing with primed dir location -->
<fileset dir= "target/${primed.dir}/${oozieAppDir}" />
</copy>
<!-- copying all jars to oozie lib directory -->
<mkdir dir= "target/${oozie.package.file.name}/${oozieAppDir}/lib" />
<copy todir= "target/${oozie.package.file.name}/${oozieAppDir}/lib" >
<fileset dir= "${project.build.directory}/dependency" />
</copy>
<!-- copying current module lib -->
<copy todir= "target/${oozie.package.file.name}/${oozieAppDir}/lib" >
<fileset dir= "${project.build.directory}" >
<include name= "*.jar" />
</fileset>
</copy>
2019-10-31 12:08:49 +01:00
<fixcrlf srcdir= "target/${oozie.package.file.name}/${oozieAppDir}/" encoding= "UTF-8" outputencoding= "UTF-8" includes= "**/*.sh,**/*.json,**/*.py,**/*.sql" eol= "lf" />
2019-10-17 18:38:30 +02:00
<!-- creating tar.gz package -->
2019-11-04 12:28:56 +01:00
<tar destfile= "target/${oozie.package.file.name}.tar.gz" compression= "gzip" longfile= "gnu" >
2019-10-17 18:38:30 +02:00
<tarfileset dir= "target/${oozie.package.file.name}" />
<tarfileset dir= "target/${oozie.package.file.name}_shell_scripts" filemode= "0755" >
<include name= "**/*.sh" />
</tarfileset>
<tarfileset dir= "target/${oozie.package.file.name}_shell_scripts" filemode= "0644" >
<exclude name= "**/*.sh" />
</tarfileset>
</tar>
<!-- cleanup -->
<delete dir= "target/${oozie.package.file.name}" />
<delete dir= "target/${oozie.package.file.name}_shell_scripts" />
</tasks>
</configuration>
<goals >
<goal > run</goal>
</goals>
</execution>
</executions>
</plugin>
</plugins>
</build>
</profile>
<profile >
<id > deploy</id>
<build >
<plugins >
<plugin >
<groupId > org.codehaus.mojo</groupId>
<artifactId > exec-maven-plugin</artifactId>
<version > 1.5.0</version>
<executions >
<execution >
<id > create-target-dir</id>
<phase > package</phase>
<goals >
<goal > exec</goal>
</goals>
<configuration >
<executable > ssh</executable>
<arguments >
<argument > ${dhp.hadoop.frontend.user.name}@${dhp.hadoop.frontend.host.name}</argument>
<argument > -p ${dhp.hadoop.frontend.port.ssh}</argument>
<argument > -o StrictHostKeyChecking=no</argument>
<argument > rm -rf ${dhp.hadoop.frontend.temp.dir}/oozie-packages/${sandboxName}/${output.dir.name}/; mkdir -p ${dhp.hadoop.frontend.temp.dir}/oozie-packages/${sandboxName}/${output.dir.name}/</argument>
</arguments>
</configuration>
</execution>
<execution >
<id > upload-oozie-package</id>
<phase > package</phase>
<goals >
<goal > exec</goal>
</goals>
<configuration >
<executable > scp</executable>
<arguments >
<argument > -P ${dhp.hadoop.frontend.port.ssh}</argument>
<argument > -o StrictHostKeyChecking=no</argument>
<argument > target/${oozie.package.file.name}.tar.gz</argument>
<argument > ${dhp.hadoop.frontend.user.name}@${dhp.hadoop.frontend.host.name}:${dhp.hadoop.frontend.temp.dir}/oozie-packages/${sandboxName}/${output.dir.name}/${oozie.package.file.name}.tar.gz</argument>
</arguments>
</configuration>
</execution>
<execution >
<id > extract-and-upload-to-hdfs</id>
<phase > package</phase>
<goals >
<goal > exec</goal>
</goals>
<configuration >
<executable > ssh</executable>
<!-- <outputFile>target/redirected_upload.log</outputFile> -->
<arguments >
<argument > ${dhp.hadoop.frontend.user.name}@${dhp.hadoop.frontend.host.name}</argument>
<argument > -p ${dhp.hadoop.frontend.port.ssh}</argument>
<argument > -o StrictHostKeyChecking=no</argument>
<argument > cd ${dhp.hadoop.frontend.temp.dir}/oozie-packages/${sandboxName}/${output.dir.name}/; </argument>
<argument > tar -zxf oozie-package.tar.gz; </argument>
<argument > rm ${dhp.hadoop.frontend.temp.dir}/oozie-packages/${sandboxName}/${output.dir.name}/oozie-package.tar.gz; </argument>
<argument > ./upload_workflow.sh</argument>
</arguments>
</configuration>
</execution>
</executions>
</plugin>
</plugins>
</build>
</profile>
<profile >
<id > run</id>
<build >
<plugins >
<plugin >
<groupId > org.codehaus.mojo</groupId>
<artifactId > exec-maven-plugin</artifactId>
<version > 1.5.0</version>
<executions >
<execution >
<id > run-job</id>
<phase > package</phase>
<goals >
<goal > exec</goal>
</goals>
<configuration >
<executable > ssh</executable>
<!-- this file will be used by test verification profile reading job identifier -->
<outputFile > ${oozie.execution.log.file.location}</outputFile>
<arguments >
<argument > ${dhp.hadoop.frontend.user.name}@${dhp.hadoop.frontend.host.name}</argument>
<argument > -p ${dhp.hadoop.frontend.port.ssh}</argument>
<argument > -o StrictHostKeyChecking=no</argument>
<argument > cd ${dhp.hadoop.frontend.temp.dir}/oozie-packages/${sandboxName}/${output.dir.name}/; </argument>
<argument > ./run_workflow.sh</argument>
</arguments>
</configuration>
</execution>
<execution >
<id > show-run-log-on-stdout</id>
<phase > package</phase>
<goals >
<goal > exec</goal>
</goals>
<configuration >
<executable > cat</executable>
<arguments >
<argument > ${oozie.execution.log.file.location}</argument>
</arguments>
</configuration>
</execution>
</executions>
</plugin>
</plugins>
</build>
</profile>
<profile >
<!-- this profile is handling unit and integration test definitions of all child modules -->
<id > child-tests</id>
<build >
<plugins >
<plugin >
<groupId > org.apache.maven.plugins</groupId>
<artifactId > maven-jar-plugin</artifactId>
<executions >
<execution >
<id > integration-test-package</id>
<phase > integration-test</phase>
<goals >
<goal > test-jar</goal>
</goals>
</execution>
</executions>
</plugin>
<plugin >
<groupId > org.apache.maven.plugins</groupId>
<artifactId > maven-surefire-plugin</artifactId>
2019-10-18 10:50:32 +02:00
<!--
2019-10-17 18:38:30 +02:00
<configuration >
<excludedGroups > eu.dnetlib.iis.common.IntegrationTest</excludedGroups>
</configuration>
2019-10-18 10:50:32 +02:00
-->
2019-10-17 18:38:30 +02:00
</plugin>
<plugin >
<groupId > org.apache.maven.plugins</groupId>
<artifactId > maven-failsafe-plugin</artifactId>
2019-12-20 13:41:08 +01:00
<version > ${maven.failsave.plugin.version}</version>
2019-10-17 18:38:30 +02:00
<executions >
<execution >
<id > default-integration-test</id>
<configuration >
<argLine > -Xmx1024m</argLine>
<systemPropertiesVariables >
<!-- if dhpConnectionProperties is not defined, then ${user.home}/.dhp/integration - test.properties will be used -->
<dhpConnectionProperties > ${dhpConnectionProperties}</dhpConnectionProperties>
<output.dir.name > ${output.dir.name}</output.dir.name>
</systemPropertiesVariables>
</configuration>
</execution>
</executions>
</plugin>
</plugins>
</build>
<activation >
<file >
<missing > src/main/resources/parent.marker</missing>
</file>
</activation>
</profile>
</profiles>
2019-03-18 10:44:35 +01:00
</project>