implemented import crossref job

This commit is contained in:
Sandro La Bruzzo 2020-04-01 14:12:33 +02:00
parent 36236dd1c1
commit 205e9521c6
14 changed files with 912 additions and 372 deletions

View File

@ -75,10 +75,10 @@ public class SparkCreateDedupTest {
final HashFunction hashFunction = Hashing.murmur3_128(); final HashFunction hashFunction = Hashing.murmur3_128();
System.out.println( s1.hashCode()); // System.out.println( s1.hashCode());
System.out.println(hashFunction.hashUnencodedChars(s1).asLong()); // System.out.println(hashFunction.hashUnencodedChars(s1).asLong());
System.out.println( s2.hashCode()); // System.out.println( s2.hashCode());
System.out.println(hashFunction.hashUnencodedChars(s2).asLong()); // System.out.println(hashFunction.hashUnencodedChars(s2).asLong());
} }

View File

@ -0,0 +1,53 @@
<?xml version="1.0" encoding="UTF-8"?>
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
<parent>
<artifactId>dhp-workflows</artifactId>
<groupId>eu.dnetlib.dhp</groupId>
<version>1.1.6-SNAPSHOT</version>
</parent>
<modelVersion>4.0.0</modelVersion>
<artifactId>dhp-doiboost</artifactId>
<dependencies>
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-client</artifactId>
</dependency>
<dependency>
<groupId>org.apache.httpcomponents</groupId>
<artifactId>httpclient</artifactId>
<version>4.3.4</version>
</dependency>
<dependency>
<groupId>eu.dnetlib.dhp</groupId>
<artifactId>dhp-common</artifactId>
<version>${project.version}</version>
<exclusions>
<exclusion>
<groupId>org.apache.cxf</groupId>
<artifactId>cxf-rt-transports-http</artifactId>
</exclusion>
</exclusions>
</dependency>
<dependency>
<groupId>eu.dnetlib.dhp</groupId>
<artifactId>dhp-schemas</artifactId>
<version>${project.version}</version>
</dependency>
<dependency>
<groupId>com.jayway.jsonpath</groupId>
<artifactId>json-path</artifactId>
</dependency>
</dependencies>
</project>

View File

@ -0,0 +1,65 @@
package eu.dnetlib.doiboost;
import eu.dnetlib.dhp.application.ArgumentApplicationParser;
import org.apache.commons.io.IOUtils;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.SequenceFile;
import org.apache.hadoop.io.Text;
import org.apache.http.HttpHost;
public class CrossrefImporter {
public static void main(String[] args) throws Exception {
final ArgumentApplicationParser parser = new ArgumentApplicationParser(IOUtils.toString(CrossrefImporter.class.getResourceAsStream("/eu/dnetlib/dhp/doiboost/import_from_es.json")));
parser.parseArgument(args);
System.out.println(parser.get("targetPath"));
final String hdfsuri = parser.get("namenode");
System.out.println(hdfsuri);
Path hdfswritepath = new Path(parser.get("targetPath"));
// ====== Init HDFS File System Object
Configuration conf = new Configuration();
// Set FileSystem URI
conf.set("fs.defaultFS", hdfsuri);
// Because of Maven
conf.set("fs.hdfs.impl", org.apache.hadoop.hdfs.DistributedFileSystem.class.getName());
conf.set("fs.file.impl", org.apache.hadoop.fs.LocalFileSystem.class.getName());
ESClient client = new ESClient("ip-90-147-167-25.ct1.garrservices.it", "crossref");
try (SequenceFile.Writer writer = SequenceFile.createWriter(conf,
SequenceFile.Writer.file(hdfswritepath), SequenceFile.Writer.keyClass(IntWritable.class),
SequenceFile.Writer.valueClass(Text.class))) {
int i = 0;
long start= System.currentTimeMillis();
long end = 0;
final IntWritable key = new IntWritable(i);
final Text value = new Text();
while (client.hasNext()) {
key.set(i++);
value.set(client.next());
writer.append(key, value);
if (i % 100000 == 0) {
end = System.currentTimeMillis();
final float time = (end - start) / 1000;
System.out.println(String.format("Imported %d records last 100000 imported in %f seconds", i, time));
start = System.currentTimeMillis();
}
}
}
}
}

View File

@ -0,0 +1,103 @@
package eu.dnetlib.doiboost;
import com.jayway.jsonpath.JsonPath;
import org.apache.commons.io.IOUtils;
import org.apache.http.client.methods.CloseableHttpResponse;
import org.apache.http.client.methods.HttpPost;
import org.apache.http.entity.StringEntity;
import org.apache.http.impl.client.CloseableHttpClient;
import org.apache.http.impl.client.HttpClients;
import java.io.IOException;
import java.util.Iterator;
import java.util.List;
public class ESClient implements Iterator<String> {
final static String blobPath = "$.hits[*].hits[*]._source.blob";
final static String scrollIdPath = "$._scroll_id";
String scrollId;
List<String> buffer;
final String esHost;
final String esIndex;
public ESClient(final String esHost, final String esIndex) throws IOException {
this.esHost = esHost;
this.esIndex = esIndex;
final String body =getResponse(String.format("http://%s:9200/%s/_search?scroll=1m", esHost, esIndex), "{\"size\":1000}");
scrollId= getJPathString(scrollIdPath, body);
buffer = getBlobs(body);
}
private String getResponse(final String url,final String json ) {
CloseableHttpClient client = HttpClients.createDefault();
try {
HttpPost httpPost = new HttpPost(url);
if (json!= null) {
StringEntity entity = new StringEntity(json);
httpPost.setEntity(entity);
httpPost.setHeader("Accept", "application/json");
httpPost.setHeader("Content-type", "application/json");
}
CloseableHttpResponse response = client.execute(httpPost);
return IOUtils.toString(response.getEntity().getContent());
} catch (Throwable e) {
throw new RuntimeException("Error on executing request ",e);
} finally {
try {
client.close();
} catch (IOException e) {
throw new RuntimeException("Unable to close client ",e);
}
}
}
private String getJPathString(final String jsonPath, final String json) {
try {
Object o = JsonPath.read(json, jsonPath);
if (o instanceof String)
return (String) o;
return null;
} catch (Exception e) {
return "";
}
}
private List<String> getBlobs(final String body) {
final List<String > res = JsonPath.read(body, "$.hits.hits[*]._source.blob");
return res;
}
@Override
public boolean hasNext() {
return (buffer!= null && !buffer.isEmpty());
}
@Override
public String next() {
final String nextItem = buffer.remove(0);
if (buffer.isEmpty()) {
final String json_param = String.format("{\"scroll_id\":\"%s\",\"scroll\" : \"1m\"}", scrollId);
final String body =getResponse(String.format("http://%s:9200/_search/scroll", esHost), json_param);
try {
buffer = getBlobs(body);
} catch (Throwable e) {
System.out.println(body);
}
}
return nextItem;
}
}

View File

@ -0,0 +1,16 @@
package eu.dnetlib.doiboost
case class Journal(
JournalId: Long,
Rank: Int,
NormalizedName: String,
DisplayName: String,
Issn: String,
Publisher: String,
Webpage: String,
PaperCount: Long,
CitationCount: Long,
CreatedDate: String
)

View File

@ -0,0 +1,49 @@
package eu.dnetlib.doiboost
//import org.apache.spark.SparkConf
//import org.apache.spark.sql.{Dataset, Encoders, Row, SparkSession}
//
//object SparkDownloadContentFromCrossref {
//
//
// def main(args: Array[String]): Unit = {
//
//
// val conf: SparkConf = new SparkConf().setAppName("DownloadContentFromCrossref").setMaster("local[*]")
//
// val spark = SparkSession.builder().config(conf).getOrCreate()
//
//
// val sc = spark.sparkContext
// import spark.implicits._
// spark.read.option("header", "false")
// .option("delimiter", "\t")
// .csv("/Users/sandro/Downloads/doiboost/mag_Journals.txt.gz")
//
//
// val d = spark.read.option("header", "false")
// .option("delimiter", "\t")
// .csv("/Users/sandro/Downloads/doiboost/mag_Journals.txt.gz")
// .map(f =>
// Journal( f.getAs[String](0).toLong, f.getAs[String](1).toInt, f.getAs[String](2),
// f.getAs[String](3), f.getAs[String](4), f.getAs[String](5), f.getAs[String](6),
// f.getAs[String](7).toLong, f.getAs[String](8).toLong, f.getAs[String](9)
// ))
//
// d.show()
//
// d.printSchema()
//
//
//
//
//
//
//
//
// }
//
//
//}
//

View File

@ -0,0 +1,18 @@
<configuration>
<property>
<name>jobTracker</name>
<value>yarnRM</value>
</property>
<property>
<name>nameNode</name>
<value>hdfs://nameservice1</value>
</property>
<property>
<name>oozie.action.sharelib.for.java</name>
<value>spark2</value>
</property>
<property>
<name>oozie.launcher.mapreduce.user.classpath.first</name>
<value>true</value>
</property>
</configuration>

View File

@ -0,0 +1,39 @@
<workflow-app name="import Crossref from index into HDFS" xmlns="uri:oozie:workflow:0.5">
<parameters>
<property>
<name>workingPath</name>
<description>the working dir base path</description>
</property>
</parameters>
<start to="ResetWorkingPath"/>
<kill name="Kill">
<message>Action failed, error message[${wf:errorMessage(wf:lastErrorNode())}]</message>
</kill>
<action name="ResetWorkingPath">
<fs>
<delete path='${workingPath}'/>
<mkdir path='${workingPath}/input/crossref'/>
</fs>
<ok to="ImportCrossRef"/>
<error to="Kill"/>
</action>
<action name="ImportCrossRef">
<java>
<job-tracker>${jobTracker}</job-tracker>
<name-node>${nameNode}</name-node>
<main-class>eu.dnetlib.doiboost.CrossrefImporter</main-class>
<arg>-t</arg><arg>${workingPath}/input/crossref/index_dump</arg>
<arg>-n</arg><arg>${nameNode}</arg>
</java>
<ok to="End"/>
<error to="Kill"/>
</action>
<end name="End"/>
</workflow-app>

View File

@ -0,0 +1,4 @@
[
{"paramName":"t", "paramLongName":"targetPath", "paramDescription": "the path of the sequencial file to write", "paramRequired": true},
{"paramName":"n", "paramLongName":"namenode", "paramDescription": "the hive metastore uris", "paramRequired": true}
]

View File

@ -0,0 +1,61 @@
package eu.dnetlib.doiboost;
import com.jayway.jsonpath.JsonPath;
import org.apache.commons.io.IOUtils;
import org.junit.Assert;
import org.junit.Ignore;
import org.junit.Test;
import java.io.IOException;
import java.util.List;
public class DoiBoostTest {
@Test
@Ignore
public void test() throws Exception {
//SparkDownloadContentFromCrossref.main(null);
CrossrefImporter.main(new String[]{
"-n","file:///tmp",
"-t","file:///tmp/p.seq",
});
}
@Test
public void testPath() throws Exception {
final String json = IOUtils.toString(getClass().getResourceAsStream("response.json"));
final List<String > res = JsonPath.read(json, "$.hits.hits[*]._source.blob");
System.out.println(res.size());
}
@Test
@Ignore
public void testParseResponse() throws IOException {
long end, start = System.currentTimeMillis();
ESClient client = new ESClient("ip-90-147-167-25.ct1.garrservices.it", "crossref");
int i = 0;
while (client.hasNext()) {
Assert.assertNotNull(client.next());
i++;
if(i % 1000 == 0) {
end = System.currentTimeMillis();
System.out.println("Vel 1000 records in "+((end -start)/1000)+"s");
start = System.currentTimeMillis();
}
if (i >1000000)
break;
}
}
}

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

View File

@ -19,6 +19,7 @@
<module>dhp-graph-mapper</module> <module>dhp-graph-mapper</module>
<module>dhp-dedup</module> <module>dhp-dedup</module>
<module>dhp-graph-provision</module> <module>dhp-graph-provision</module>
<module>dhp-doiboost</module>
</modules> </modules>
<pluginRepositories> <pluginRepositories>

736
pom.xml
View File

@ -1,178 +1,178 @@
<project xmlns="http://maven.apache.org/POM/4.0.0" <project xmlns="http://maven.apache.org/POM/4.0.0"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/maven-v4_0_0.xsd"> xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/maven-v4_0_0.xsd">
<modelVersion>4.0.0</modelVersion> <modelVersion>4.0.0</modelVersion>
<groupId>eu.dnetlib.dhp</groupId> <groupId>eu.dnetlib.dhp</groupId>
<artifactId>dhp</artifactId> <artifactId>dhp</artifactId>
<version>1.1.6-SNAPSHOT</version> <version>1.1.6-SNAPSHOT</version>
<packaging>pom</packaging> <packaging>pom</packaging>
<url>http://www.d-net.research-infrastructures.eu</url> <url>http://www.d-net.research-infrastructures.eu</url>
<licenses> <licenses>
<license> <license>
<name>The Apache Software License, Version 2.0</name> <name>The Apache Software License, Version 2.0</name>
<url>http://www.apache.org/licenses/LICENSE-2.0.txt</url> <url>http://www.apache.org/licenses/LICENSE-2.0.txt</url>
<distribution>repo</distribution> <distribution>repo</distribution>
<comments>A business-friendly OSS license</comments> <comments>A business-friendly OSS license</comments>
</license> </license>
</licenses> </licenses>
<modules> <modules>
<module>dhp-build</module> <module>dhp-build</module>
<module>dhp-schemas</module> <module>dhp-schemas</module>
<module>dhp-common</module> <module>dhp-common</module>
<module>dhp-workflows</module> <module>dhp-workflows</module>
</modules> </modules>
<issueManagement> <issueManagement>
<system>Redmine</system> <system>Redmine</system>
<url>https://issue.openaire.research-infrastructures.eu/projects/openaire</url> <url>https://issue.openaire.research-infrastructures.eu/projects/openaire</url>
</issueManagement> </issueManagement>
<ciManagement> <ciManagement>
<system>jenkins</system> <system>jenkins</system>
<url>https://jenkins-dnet.d4science.org/</url> <url>https://jenkins-dnet.d4science.org/</url>
</ciManagement> </ciManagement>
<scm> <scm>
<connection>scm:git:gitea@code-repo.d4science.org:D-Net/dnet-hadoop.git</connection> <connection>scm:git:gitea@code-repo.d4science.org:D-Net/dnet-hadoop.git</connection>
<developerConnection>scm:git:gitea@code-repo.d4science.org:D-Net/dnet-hadoop.git</developerConnection> <developerConnection>scm:git:gitea@code-repo.d4science.org:D-Net/dnet-hadoop.git</developerConnection>
<url>https://code-repo.d4science.org/D-Net/dnet-hadoop/</url> <url>https://code-repo.d4science.org/D-Net/dnet-hadoop/</url>
<tag>HEAD</tag> <tag>HEAD</tag>
</scm> </scm>
<pluginRepositories> <pluginRepositories>
</pluginRepositories> </pluginRepositories>
<repositories> <repositories>
<repository> <repository>
<id>dnet45-releases</id> <id>dnet45-releases</id>
<name>D-Net 45 releases</name> <name>D-Net 45 releases</name>
<url>http://maven.research-infrastructures.eu/nexus/content/repositories/dnet45-releases</url> <url>http://maven.research-infrastructures.eu/nexus/content/repositories/dnet45-releases</url>
<layout>default</layout> <layout>default</layout>
<snapshots> <snapshots>
<enabled>false</enabled> <enabled>false</enabled>
</snapshots> </snapshots>
<releases> <releases>
<enabled>true</enabled> <enabled>true</enabled>
</releases> </releases>
</repository> </repository>
<repository> <repository>
<id>cloudera</id> <id>cloudera</id>
<name>Cloudera Repository</name> <name>Cloudera Repository</name>
<url>https://repository.cloudera.com/artifactory/cloudera-repos</url> <url>https://repository.cloudera.com/artifactory/cloudera-repos</url>
<releases> <releases>
<enabled>true</enabled> <enabled>true</enabled>
</releases> </releases>
<snapshots> <snapshots>
<enabled>false</enabled> <enabled>false</enabled>
</snapshots> </snapshots>
</repository> </repository>
</repositories> </repositories>
<dependencies> <dependencies>
<dependency> <dependency>
<groupId>junit</groupId> <groupId>junit</groupId>
<artifactId>junit</artifactId> <artifactId>junit</artifactId>
<version>${junit.version}</version> <version>${junit.version}</version>
<scope>test</scope> <scope>test</scope>
</dependency> </dependency>
<dependency> <dependency>
<groupId>org.mockito</groupId> <groupId>org.mockito</groupId>
<artifactId>mockito-core</artifactId> <artifactId>mockito-core</artifactId>
<version>2.7.22</version> <version>2.7.22</version>
<scope>test</scope> <scope>test</scope>
</dependency> </dependency>
</dependencies> </dependencies>
<dependencyManagement> <dependencyManagement>
<dependencies> <dependencies>
<dependency> <dependency>
<groupId>org.apache.hadoop</groupId> <groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-hdfs</artifactId> <artifactId>hadoop-hdfs</artifactId>
<version>${dhp.hadoop.version}</version> <version>${dhp.hadoop.version}</version>
<scope>provided</scope> <scope>provided</scope>
</dependency> </dependency>
<dependency> <dependency>
<groupId>org.apache.hadoop</groupId> <groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-common</artifactId> <artifactId>hadoop-common</artifactId>
<version>${dhp.hadoop.version}</version> <version>${dhp.hadoop.version}</version>
<scope>provided</scope> <scope>provided</scope>
</dependency> </dependency>
<dependency> <dependency>
<groupId>org.apache.hadoop</groupId> <groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-client</artifactId> <artifactId>hadoop-client</artifactId>
<version>${dhp.hadoop.version}</version> <version>${dhp.hadoop.version}</version>
<scope>provided</scope> <scope>provided</scope>
</dependency> </dependency>
<dependency> <dependency>
<groupId>org.apache.hadoop</groupId> <groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-distcp</artifactId> <artifactId>hadoop-distcp</artifactId>
<version>${dhp.hadoop.version}</version> <version>${dhp.hadoop.version}</version>
<scope>provided</scope> <scope>provided</scope>
</dependency> </dependency>
<dependency> <dependency>
<groupId>org.apache.spark</groupId> <groupId>org.apache.spark</groupId>
<artifactId>spark-core_2.11</artifactId> <artifactId>spark-core_2.11</artifactId>
<version>${dhp.spark.version}</version> <version>${dhp.spark.version}</version>
<scope>provided</scope> <scope>provided</scope>
</dependency> </dependency>
<dependency> <dependency>
<groupId>org.apache.spark</groupId> <groupId>org.apache.spark</groupId>
<artifactId>spark-sql_2.11</artifactId> <artifactId>spark-sql_2.11</artifactId>
<version>${dhp.spark.version}</version> <version>${dhp.spark.version}</version>
<scope>provided</scope> <scope>provided</scope>
</dependency> </dependency>
<dependency> <dependency>
<groupId>org.apache.spark</groupId> <groupId>org.apache.spark</groupId>
<artifactId>spark-graphx_2.11</artifactId> <artifactId>spark-graphx_2.11</artifactId>
<version>${dhp.spark.version}</version> <version>${dhp.spark.version}</version>
<scope>provided</scope> <scope>provided</scope>
</dependency> </dependency>
<dependency> <dependency>
<groupId>org.slf4j</groupId> <groupId>org.slf4j</groupId>
<artifactId>jcl-over-slf4j</artifactId> <artifactId>jcl-over-slf4j</artifactId>
<version>1.7.25</version> <version>1.7.25</version>
<scope>provided</scope> <scope>provided</scope>
</dependency> </dependency>
<dependency> <dependency>
<groupId>org.apache.commons</groupId> <groupId>org.apache.commons</groupId>
<artifactId>commons-lang3</artifactId> <artifactId>commons-lang3</artifactId>
<version>${dhp.commons.lang.version}</version> <version>${dhp.commons.lang.version}</version>
</dependency> </dependency>
<dependency> <dependency>
<groupId>com.google.guava</groupId> <groupId>com.google.guava</groupId>
<artifactId>guava</artifactId> <artifactId>guava</artifactId>
<version>${dhp.guava.version}</version> <version>${dhp.guava.version}</version>
</dependency> </dependency>
<dependency> <dependency>
<groupId>commons-codec</groupId> <groupId>commons-codec</groupId>
<artifactId>commons-codec</artifactId> <artifactId>commons-codec</artifactId>
<version>1.9</version> <version>1.9</version>
</dependency> </dependency>
<dependency> <dependency>
<groupId>commons-io</groupId> <groupId>commons-io</groupId>
<artifactId>commons-io</artifactId> <artifactId>commons-io</artifactId>
<version>2.4</version> <version>2.4</version>
</dependency> </dependency>
<dependency> <dependency>
<groupId>commons-cli</groupId> <groupId>commons-cli</groupId>
<artifactId>commons-cli</artifactId> <artifactId>commons-cli</artifactId>
<version>1.2</version> <version>1.2</version>
<scope>provided</scope> <scope>provided</scope>
</dependency> </dependency>
<dependency> <dependency>
<groupId>net.sf.saxon</groupId> <groupId>net.sf.saxon</groupId>
@ -180,25 +180,25 @@
<version>9.9.1-6</version> <version>9.9.1-6</version>
</dependency> </dependency>
<dependency> <dependency>
<groupId>dom4j</groupId> <groupId>dom4j</groupId>
<artifactId>dom4j</artifactId> <artifactId>dom4j</artifactId>
<version>1.6.1</version> <version>1.6.1</version>
</dependency> </dependency>
<dependency> <dependency>
<groupId>xml-apis</groupId> <groupId>xml-apis</groupId>
<artifactId>xml-apis</artifactId> <artifactId>xml-apis</artifactId>
<version>1.4.01</version> <version>1.4.01</version>
</dependency> </dependency>
<dependency> <dependency>
<groupId>jaxen</groupId> <groupId>jaxen</groupId>
<artifactId>jaxen</artifactId> <artifactId>jaxen</artifactId>
<version>1.1.6</version> <version>1.1.6</version>
</dependency> </dependency>
<dependency> <dependency>
<groupId>com.mycila.xmltool</groupId> <groupId>com.mycila.xmltool</groupId>
<artifactId>xmltool</artifactId> <artifactId>xmltool</artifactId>
<version>3.3</version> <version>3.3</version>
@ -249,47 +249,47 @@
</dependency> </dependency>
<dependency> <dependency>
<groupId>net.schmizz</groupId> <groupId>net.schmizz</groupId>
<artifactId>sshj</artifactId> <artifactId>sshj</artifactId>
<version>0.10.0</version> <version>0.10.0</version>
<scope>test</scope> <scope>test</scope>
</dependency> </dependency>
<dependency> <dependency>
<groupId>com.fasterxml.jackson.core</groupId> <groupId>com.fasterxml.jackson.core</groupId>
<artifactId>jackson-core</artifactId> <artifactId>jackson-core</artifactId>
<version>${dhp.jackson.version}</version> <version>${dhp.jackson.version}</version>
<scope>provided</scope> <scope>provided</scope>
</dependency> </dependency>
<dependency> <dependency>
<groupId>com.fasterxml.jackson.core</groupId> <groupId>com.fasterxml.jackson.core</groupId>
<artifactId>jackson-annotations</artifactId> <artifactId>jackson-annotations</artifactId>
<version>${dhp.jackson.version}</version> <version>${dhp.jackson.version}</version>
<scope>provided</scope> <scope>provided</scope>
</dependency> </dependency>
<dependency> <dependency>
<groupId>com.fasterxml.jackson.core</groupId> <groupId>com.fasterxml.jackson.core</groupId>
<artifactId>jackson-databind</artifactId> <artifactId>jackson-databind</artifactId>
<version>${dhp.jackson.version}</version> <version>${dhp.jackson.version}</version>
<scope>provided</scope> <scope>provided</scope>
</dependency> </dependency>
<dependency> <dependency>
<groupId>eu.dnetlib</groupId> <groupId>eu.dnetlib</groupId>
<artifactId>dnet-actionmanager-common</artifactId> <artifactId>dnet-actionmanager-common</artifactId>
<version>6.0.5</version> <version>6.0.5</version>
</dependency> </dependency>
<dependency> <dependency>
<groupId>eu.dnetlib</groupId> <groupId>eu.dnetlib</groupId>
<artifactId>dnet-openaire-data-protos</artifactId> <artifactId>dnet-openaire-data-protos</artifactId>
<version>3.9.8-proto250</version> <version>3.9.8-proto250</version>
</dependency> </dependency>
<dependency> <dependency>
<groupId>eu.dnetlib</groupId> <groupId>eu.dnetlib</groupId>
<artifactId>dnet-pace-core</artifactId> <artifactId>dnet-pace-core</artifactId>
<version>4.0.0</version> <version>4.0.0</version>
</dependency> </dependency>
<dependency> <dependency>
<groupId>eu.dnetlib</groupId> <groupId>eu.dnetlib</groupId>
<artifactId>cnr-rmi-api</artifactId> <artifactId>cnr-rmi-api</artifactId>
@ -301,12 +301,12 @@
<artifactId>cxf-rt-transports-http</artifactId> <artifactId>cxf-rt-transports-http</artifactId>
<version>3.1.5</version> <version>3.1.5</version>
</dependency> </dependency>
<dependency> <dependency>
<groupId>javax.persistence</groupId> <groupId>javax.persistence</groupId>
<artifactId>javax.persistence-api</artifactId> <artifactId>javax.persistence-api</artifactId>
<version>2.2</version> <version>2.2</version>
<scope>provided</scope> <scope>provided</scope>
</dependency> </dependency>
<dependency> <dependency>
<groupId>com.rabbitmq</groupId> <groupId>com.rabbitmq</groupId>
@ -339,174 +339,174 @@
<version>4.0</version> <version>4.0</version>
</dependency> </dependency>
<dependency> <dependency>
<groupId>org.apache.oozie</groupId> <groupId>org.apache.oozie</groupId>
<artifactId>oozie-client</artifactId> <artifactId>oozie-client</artifactId>
<version>${dhp.oozie.version}</version> <version>${dhp.oozie.version}</version>
<scope>provided</scope> <scope>provided</scope>
<exclusions> <exclusions>
<!-- conflicts --> <!-- conflicts -->
<exclusion> <exclusion>
<artifactId>slf4j-simple</artifactId> <artifactId>slf4j-simple</artifactId>
<groupId>org.slf4j</groupId> <groupId>org.slf4j</groupId>
</exclusion> </exclusion>
</exclusions> </exclusions>
</dependency> </dependency>
</dependencies> </dependencies>
</dependencyManagement> </dependencyManagement>
<build> <build>
<directory>target</directory> <directory>target</directory>
<outputDirectory>target/classes</outputDirectory> <outputDirectory>target/classes</outputDirectory>
<finalName>${project.artifactId}-${project.version}</finalName> <finalName>${project.artifactId}-${project.version}</finalName>
<testOutputDirectory>target/test-classes</testOutputDirectory> <testOutputDirectory>target/test-classes</testOutputDirectory>
<pluginManagement> <pluginManagement>
<plugins> <plugins>
<plugin> <plugin>
<groupId>org.apache.maven.plugins</groupId> <groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-compiler-plugin</artifactId> <artifactId>maven-compiler-plugin</artifactId>
<version>${maven.compiler.plugin.version}</version> <version>${maven.compiler.plugin.version}</version>
<configuration> <configuration>
<source>1.8</source> <source>1.8</source>
<target>1.8</target> <target>1.8</target>
<encoding>${project.build.sourceEncoding}</encoding> <encoding>${project.build.sourceEncoding}</encoding>
</configuration> </configuration>
</plugin> </plugin>
<plugin> <plugin>
<groupId>org.apache.maven.plugins</groupId> <groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-jar-plugin</artifactId> <artifactId>maven-jar-plugin</artifactId>
<version>3.0.2</version> <version>3.0.2</version>
</plugin> </plugin>
<plugin> <plugin>
<groupId>org.apache.maven.plugins</groupId> <groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-source-plugin</artifactId> <artifactId>maven-source-plugin</artifactId>
<version>3.0.1</version> <version>3.0.1</version>
<executions> <executions>
<execution> <execution>
<id>attach-sources</id> <id>attach-sources</id>
<phase>verify</phase> <phase>verify</phase>
<goals> <goals>
<goal>jar-no-fork</goal> <goal>jar-no-fork</goal>
</goals> </goals>
</execution> </execution>
</executions> </executions>
</plugin> </plugin>
<plugin> <plugin>
<groupId>org.apache.maven.plugins</groupId> <groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-surefire-plugin</artifactId> <artifactId>maven-surefire-plugin</artifactId>
<version>2.19.1</version> <version>2.19.1</version>
<configuration> <configuration>
<redirectTestOutputToFile>true</redirectTestOutputToFile> <redirectTestOutputToFile>true</redirectTestOutputToFile>
</configuration> </configuration>
</plugin> </plugin>
<plugin> <plugin>
<groupId>org.apache.maven.plugins</groupId> <groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-javadoc-plugin</artifactId> <artifactId>maven-javadoc-plugin</artifactId>
<version>2.10.4</version> <version>2.10.4</version>
<configuration> <configuration>
<detectLinks>true</detectLinks> <detectLinks>true</detectLinks>
</configuration> </configuration>
</plugin> </plugin>
<plugin> <plugin>
<groupId>org.apache.maven.plugins</groupId> <groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-dependency-plugin</artifactId> <artifactId>maven-dependency-plugin</artifactId>
<version>3.0.0</version> <version>3.0.0</version>
</plugin> </plugin>
<plugin> <plugin>
<groupId>org.codehaus.mojo</groupId> <groupId>org.codehaus.mojo</groupId>
<artifactId>build-helper-maven-plugin</artifactId> <artifactId>build-helper-maven-plugin</artifactId>
<version>1.12</version> <version>1.12</version>
</plugin> </plugin>
</plugins> </plugins>
</pluginManagement> </pluginManagement>
<plugins> <plugins>
<plugin> <plugin>
<groupId>org.apache.maven.plugins</groupId> <groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-release-plugin</artifactId> <artifactId>maven-release-plugin</artifactId>
<version>2.5.3</version> <version>2.5.3</version>
</plugin> </plugin>
<plugin> <plugin>
<groupId>org.jacoco</groupId> <groupId>org.jacoco</groupId>
<artifactId>jacoco-maven-plugin</artifactId> <artifactId>jacoco-maven-plugin</artifactId>
<version>0.7.9</version> <version>0.7.9</version>
<configuration> <configuration>
<excludes> <excludes>
<exclude>**/schemas/*</exclude> <exclude>**/schemas/*</exclude>
<exclude>**/com/cloudera/**/*</exclude> <exclude>**/com/cloudera/**/*</exclude>
<exclude>**/org/apache/avro/io/**/*</exclude> <exclude>**/org/apache/avro/io/**/*</exclude>
</excludes> </excludes>
</configuration> </configuration>
<executions> <executions>
<execution> <execution>
<id>default-prepare-agent</id> <id>default-prepare-agent</id>
<goals> <goals>
<goal>prepare-agent</goal> <goal>prepare-agent</goal>
</goals> </goals>
</execution> </execution>
<execution> <execution>
<id>default-report</id> <id>default-report</id>
<phase>prepare-package</phase> <phase>prepare-package</phase>
<goals> <goals>
<goal>report</goal> <goal>report</goal>
</goals> </goals>
</execution> </execution>
</executions> </executions>
</plugin> </plugin>
</plugins> </plugins>
<extensions> <extensions>
<extension> <extension>
<groupId>org.apache.maven.wagon</groupId> <groupId>org.apache.maven.wagon</groupId>
<artifactId>wagon-ssh</artifactId> <artifactId>wagon-ssh</artifactId>
<version>2.10</version> <version>2.10</version>
</extension> </extension>
</extensions> </extensions>
</build> </build>
<distributionManagement> <distributionManagement>
<snapshotRepository> <snapshotRepository>
<id>dnet45-snapshots</id> <id>dnet45-snapshots</id>
<name>DNet45 Snapshots</name> <name>DNet45 Snapshots</name>
<url>http://maven.research-infrastructures.eu/nexus/content/repositories/dnet45-snapshots</url> <url>http://maven.research-infrastructures.eu/nexus/content/repositories/dnet45-snapshots</url>
<layout>default</layout> <layout>default</layout>
</snapshotRepository> </snapshotRepository>
<repository> <repository>
<id>dnet45-releases</id> <id>dnet45-releases</id>
<url>http://maven.research-infrastructures.eu/nexus/content/repositories/dnet45-releases</url> <url>http://maven.research-infrastructures.eu/nexus/content/repositories/dnet45-releases</url>
</repository> </repository>
</distributionManagement> </distributionManagement>
<reporting> <reporting>
<plugins> <plugins>
<plugin> <plugin>
<groupId>org.apache.maven.plugins</groupId> <groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-javadoc-plugin</artifactId> <artifactId>maven-javadoc-plugin</artifactId>
<version>2.10.4</version> <version>2.10.4</version>
<configuration> <configuration>
<detectLinks>true</detectLinks> <detectLinks>true</detectLinks>
</configuration> </configuration>
</plugin> </plugin>
</plugins> </plugins>
</reporting> </reporting>
<properties> <properties>
<project.build.sourceEncoding>UTF-8</project.build.sourceEncoding> <project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
<project.reporting.outputEncoding>UTF-8</project.reporting.outputEncoding> <project.reporting.outputEncoding>UTF-8</project.reporting.outputEncoding>
<maven.compiler.plugin.version>3.6.0</maven.compiler.plugin.version> <maven.compiler.plugin.version>3.6.0</maven.compiler.plugin.version>
<maven.failsave.plugin.version>2.22.2</maven.failsave.plugin.version> <maven.failsave.plugin.version>2.22.2</maven.failsave.plugin.version>
<dhp.cdh.version>cdh5.9.2</dhp.cdh.version> <dhp.cdh.version>cdh5.9.2</dhp.cdh.version>
<dhp.hadoop.version>2.6.0-${dhp.cdh.version}</dhp.hadoop.version> <dhp.hadoop.version>2.6.0-${dhp.cdh.version}</dhp.hadoop.version>
<dhp.oozie.version>4.1.0-${dhp.cdh.version}</dhp.oozie.version> <dhp.oozie.version>4.1.0-${dhp.cdh.version}</dhp.oozie.version>
<dhp.spark.version>2.4.0.cloudera2</dhp.spark.version> <dhp.spark.version>2.4.0.cloudera2</dhp.spark.version>
<dhp.jackson.version>2.9.6</dhp.jackson.version> <dhp.jackson.version>2.9.6</dhp.jackson.version>
<dhp.commons.lang.version>3.5</dhp.commons.lang.version> <dhp.commons.lang.version>3.5</dhp.commons.lang.version>
<dhp.guava.version>11.0.2</dhp.guava.version> <dhp.guava.version>11.0.2</dhp.guava.version>
<scala.version>2.11.12</scala.version> <scala.version>2.11.12</scala.version>
<junit.version>4.12</junit.version> <junit.version>4.12</junit.version>
<mongodb.driver.version>3.4.2</mongodb.driver.version> <mongodb.driver.version>3.4.2</mongodb.driver.version>
</properties> </properties>
</project> </project>