Build with spark 3.4 (dedup and dependencies only tested)

This commit is contained in:
Giambattista Bloisi 2023-07-10 15:54:48 +02:00
parent 861c368e65
commit d80f12da06
20 changed files with 234 additions and 73 deletions

View File

@ -52,6 +52,8 @@
</execution> </execution>
</executions> </executions>
<configuration> <configuration>
<failOnMultipleScalaVersions>true</failOnMultipleScalaVersions>
<scalaCompatVersion>${scala.binary.version}</scalaCompatVersion>
<scalaVersion>${scala.version}</scalaVersion> <scalaVersion>${scala.version}</scalaVersion>
</configuration> </configuration>
</plugin> </plugin>
@ -76,11 +78,11 @@
<dependency> <dependency>
<groupId>org.apache.spark</groupId> <groupId>org.apache.spark</groupId>
<artifactId>spark-core_2.11</artifactId> <artifactId>spark-core_${scala.binary.version}</artifactId>
</dependency> </dependency>
<dependency> <dependency>
<groupId>org.apache.spark</groupId> <groupId>org.apache.spark</groupId>
<artifactId>spark-sql_2.11</artifactId> <artifactId>spark-sql_${scala.binary.version}</artifactId>
</dependency> </dependency>
<dependency> <dependency>
@ -159,7 +161,7 @@
<dependency> <dependency>
<groupId>eu.dnetlib.dhp</groupId> <groupId>eu.dnetlib.dhp</groupId>
<artifactId>dhp-schemas</artifactId> <artifactId>dhp-schemas_${scala.binary.version}</artifactId>
</dependency> </dependency>
<dependency> <dependency>

View File

@ -20,7 +20,7 @@
<plugin> <plugin>
<groupId>net.alchim31.maven</groupId> <groupId>net.alchim31.maven</groupId>
<artifactId>scala-maven-plugin</artifactId> <artifactId>scala-maven-plugin</artifactId>
<version>4.0.1</version> <version>${net.alchim31.maven.version}</version>
<executions> <executions>
<execution> <execution>
<id>scala-compile-first</id> <id>scala-compile-first</id>
@ -39,8 +39,9 @@
</execution> </execution>
</executions> </executions>
<configuration> <configuration>
<failOnMultipleScalaVersions>true</failOnMultipleScalaVersions>
<scalaCompatVersion>${scala.binary.version}</scalaCompatVersion>
<scalaVersion>${scala.version}</scalaVersion> <scalaVersion>${scala.version}</scalaVersion>
<addScalacArgs>-target:jvm-1.8</addScalacArgs>
</configuration> </configuration>
</plugin> </plugin>
</plugins> </plugins>
@ -98,11 +99,11 @@
</dependency> </dependency>
<dependency> <dependency>
<groupId>org.apache.spark</groupId> <groupId>org.apache.spark</groupId>
<artifactId>spark-core_2.11</artifactId> <artifactId>spark-core_${scala.binary.version}</artifactId>
</dependency> </dependency>
<dependency> <dependency>
<groupId>org.apache.spark</groupId> <groupId>org.apache.spark</groupId>
<artifactId>spark-sql_2.11</artifactId> <artifactId>spark-sql_${scala.binary.version}</artifactId>
</dependency> </dependency>
</dependencies> </dependencies>

View File

@ -11,12 +11,12 @@
<dependencies> <dependencies>
<dependency> <dependency>
<groupId>org.apache.spark</groupId> <groupId>org.apache.spark</groupId>
<artifactId>spark-core_2.11</artifactId> <artifactId>spark-core_${scala.binary.version}</artifactId>
</dependency> </dependency>
<dependency> <dependency>
<groupId>org.apache.spark</groupId> <groupId>org.apache.spark</groupId>
<artifactId>spark-sql_2.11</artifactId> <artifactId>spark-sql_${scala.binary.version}</artifactId>
</dependency> </dependency>
<dependency> <dependency>

View File

@ -38,6 +38,8 @@
</execution> </execution>
</executions> </executions>
<configuration> <configuration>
<failOnMultipleScalaVersions>true</failOnMultipleScalaVersions>
<scalaCompatVersion>${scala.binary.version}</scalaCompatVersion>
<scalaVersion>${scala.version}</scalaVersion> <scalaVersion>${scala.version}</scalaVersion>
</configuration> </configuration>
</plugin> </plugin>
@ -54,11 +56,11 @@
<dependency> <dependency>
<groupId>org.apache.spark</groupId> <groupId>org.apache.spark</groupId>
<artifactId>spark-core_2.11</artifactId> <artifactId>spark-core_${scala.binary.version}</artifactId>
</dependency> </dependency>
<dependency> <dependency>
<groupId>org.apache.spark</groupId> <groupId>org.apache.spark</groupId>
<artifactId>spark-sql_2.11</artifactId> <artifactId>spark-sql_${scala.binary.version}</artifactId>
</dependency> </dependency>
<dependency> <dependency>

View File

@ -16,11 +16,11 @@
</dependency> </dependency>
<dependency> <dependency>
<groupId>org.apache.spark</groupId> <groupId>org.apache.spark</groupId>
<artifactId>spark-core_2.11</artifactId> <artifactId>spark-core_${scala.binary.version}</artifactId>
</dependency> </dependency>
<dependency> <dependency>
<groupId>org.apache.spark</groupId> <groupId>org.apache.spark</groupId>
<artifactId>spark-sql_2.11</artifactId> <artifactId>spark-sql_${scala.binary.version}</artifactId>
</dependency> </dependency>
</dependencies> </dependencies>

View File

@ -18,11 +18,11 @@
<dependency> <dependency>
<groupId>org.apache.spark</groupId> <groupId>org.apache.spark</groupId>
<artifactId>spark-core_2.11</artifactId> <artifactId>spark-core_${scala.binary.version}</artifactId>
</dependency> </dependency>
<dependency> <dependency>
<groupId>org.apache.spark</groupId> <groupId>org.apache.spark</groupId>
<artifactId>spark-sql_2.11</artifactId> <artifactId>spark-sql_${scala.binary.version}</artifactId>
</dependency> </dependency>
<dependency> <dependency>
<groupId>org.elasticsearch</groupId> <groupId>org.elasticsearch</groupId>

View File

@ -13,7 +13,7 @@
<plugin> <plugin>
<groupId>net.alchim31.maven</groupId> <groupId>net.alchim31.maven</groupId>
<artifactId>scala-maven-plugin</artifactId> <artifactId>scala-maven-plugin</artifactId>
<version>4.0.1</version> <version>${net.alchim31.maven.version}</version>
<executions> <executions>
<execution> <execution>
<id>scala-compile-first</id> <id>scala-compile-first</id>
@ -32,8 +32,9 @@
</execution> </execution>
</executions> </executions>
<configuration> <configuration>
<failOnMultipleScalaVersions>true</failOnMultipleScalaVersions>
<scalaCompatVersion>${scala.binary.version}</scalaCompatVersion>
<scalaVersion>${scala.version}</scalaVersion> <scalaVersion>${scala.version}</scalaVersion>
<addScalacArgs>-target:jvm-1.8</addScalacArgs>
</configuration> </configuration>
</plugin> </plugin>
</plugins> </plugins>
@ -58,38 +59,67 @@
<groupId>eu.dnetlib.dhp</groupId> <groupId>eu.dnetlib.dhp</groupId>
<artifactId>dhp-common</artifactId> <artifactId>dhp-common</artifactId>
<version>${project.version}</version> <version>${project.version}</version>
<exclusions>
<exclusion>
<artifactId>log4j</artifactId>
<groupId>log4j</groupId>
</exclusion>
<exclusion>
<artifactId>annotations</artifactId>
<groupId>org.jetbrains</groupId>
</exclusion>
<exclusion>
<artifactId>slf4j-api</artifactId>
<groupId>org.slf4j</groupId>
</exclusion>
</exclusions>
</dependency> </dependency>
<dependency> <dependency>
<groupId>eu.dnetlib.dhp</groupId> <groupId>eu.dnetlib.dhp</groupId>
<artifactId>dhp-pace-core</artifactId> <artifactId>dhp-pace-core</artifactId>
<version>${project.version}</version> <version>${project.version}</version>
<exclusions>
<exclusion>
<artifactId>jsr305</artifactId>
<groupId>com.google.code.findbugs</groupId>
</exclusion>
<exclusion>
<artifactId>javassist</artifactId>
<groupId>org.javassist</groupId>
</exclusion>
</exclusions>
</dependency>
<dependency>
<groupId>org.apache.commons</groupId>
<artifactId>commons-lang3</artifactId>
</dependency> </dependency>
<dependency> <dependency>
<groupId>org.scala-lang.modules</groupId> <groupId>org.scala-lang.modules</groupId>
<artifactId>scala-java8-compat_2.11</artifactId> <artifactId>scala-java8-compat_${scala.binary.version}</artifactId>
<version>1.0.2</version> <version>1.0.2</version>
</dependency> </dependency>
<dependency> <dependency>
<groupId>org.scala-lang.modules</groupId> <groupId>org.scala-lang.modules</groupId>
<artifactId>scala-collection-compat_2.11</artifactId> <artifactId>scala-collection-compat_${scala.binary.version}</artifactId>
<version>2.8.0</version> <version>2.11.0</version>
</dependency> </dependency>
<dependency> <dependency>
<groupId>org.apache.spark</groupId> <groupId>org.apache.spark</groupId>
<artifactId>spark-core_2.11</artifactId> <artifactId>spark-core_${scala.binary.version}</artifactId>
</dependency> </dependency>
<dependency> <dependency>
<groupId>org.apache.spark</groupId> <groupId>org.apache.spark</groupId>
<artifactId>spark-sql_2.11</artifactId> <artifactId>spark-sql_${scala.binary.version}</artifactId>
</dependency> </dependency>
<dependency> <dependency>
<groupId>org.apache.spark</groupId> <groupId>org.apache.spark</groupId>
<artifactId>spark-graphx_2.11</artifactId> <artifactId>spark-graphx_${scala.binary.version}</artifactId>
</dependency> </dependency>
<dependency> <dependency>
@ -107,12 +137,6 @@
<artifactId>jaxen</artifactId> <artifactId>jaxen</artifactId>
</dependency> </dependency>
<dependency>
<groupId>com.influxdb</groupId>
<artifactId>influxdb-client-java</artifactId>
<version>3.1.0</version>
</dependency>
<dependency> <dependency>
<groupId>com.fasterxml.jackson.core</groupId> <groupId>com.fasterxml.jackson.core</groupId>
<artifactId>jackson-databind</artifactId> <artifactId>jackson-databind</artifactId>

View File

@ -0,0 +1,118 @@
package eu.dnetlib.dhp.oa.dedup;
import java.io.IOException;
import java.util.Optional;
import org.apache.commons.io.IOUtils;
import org.apache.commons.lang3.StringUtils;
import org.apache.spark.SparkConf;
import org.apache.spark.api.java.JavaSparkContext;
import org.apache.spark.api.java.function.MapFunction;
import org.apache.spark.sql.*;
import org.dom4j.DocumentException;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.xml.sax.SAXException;
import eu.dnetlib.dhp.application.ArgumentApplicationParser;
import eu.dnetlib.dhp.application.dedup.log.DedupLogModel;
import eu.dnetlib.dhp.application.dedup.log.DedupLogWriter;
import eu.dnetlib.dhp.schema.oaf.Relation;
import eu.dnetlib.dhp.utils.ISLookupClientFactory;
import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpException;
import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpService;
import eu.dnetlib.pace.config.DedupConfig;
import eu.dnetlib.pace.model.SparkDedupConfig;
public class SparkSimRelsAnalytics extends AbstractSparkAction {
private static final Logger log = LoggerFactory.getLogger(SparkSimRelsAnalytics.class);
public SparkSimRelsAnalytics(ArgumentApplicationParser parser, SparkSession spark) {
super(parser, spark);
spark.sparkContext().setLogLevel("WARN");
}
public static void main(String[] args) throws Exception {
ArgumentApplicationParser parser = new ArgumentApplicationParser(
IOUtils
.toString(
SparkSimRelsAnalytics.class
.getResourceAsStream(
"/eu/dnetlib/dhp/oa/dedup/createSimRels_parameters.json")));
parser.parseArgument(args);
SparkConf conf = new SparkConf();
new SparkSimRelsAnalytics(parser, getSparkSession(conf))
.run(ISLookupClientFactory.getLookUpService(parser.get("isLookUpUrl")));
}
@Override
public void run(ISLookUpService isLookUpService)
throws DocumentException, IOException, ISLookUpException, SAXException {
// read oozie parameters
final String graphBasePath = parser.get("graphBasePath");
final String isLookUpUrl = parser.get("isLookUpUrl");
final String actionSetId = parser.get("actionSetId");
final String workingPath = parser.get("workingPath");
final int numPartitions = Optional
.ofNullable(parser.get("numPartitions"))
.map(Integer::valueOf)
.orElse(NUM_PARTITIONS);
log.info("numPartitions: '{}'", numPartitions);
log.info("graphBasePath: '{}'", graphBasePath);
log.info("isLookUpUrl: '{}'", isLookUpUrl);
log.info("actionSetId: '{}'", actionSetId);
log.info("workingPath: '{}'", workingPath);
final String dfLogPath = parser.get("dataframeLog");
final String runTag = Optional.ofNullable(parser.get("runTAG")).orElse("UNKNOWN");
// for each dedup configuration
for (DedupConfig dedupConf : getConfigurations(isLookUpService, actionSetId)) {
final long start = System.currentTimeMillis();
final String entity = dedupConf.getWf().getEntityType();
final String subEntity = dedupConf.getWf().getSubEntityValue();
log.info("Creating simrels for: '{}'", subEntity);
final String outputPath = DedupUtility.createSimRelPath(workingPath, actionSetId, subEntity);
removeOutputDir(spark, outputPath);
JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext());
SparkDedupConfig sparkConfig = new SparkDedupConfig(dedupConf, numPartitions);
spark.udf().register("collect_sort_slice", sparkConfig.collectSortSliceUDAF());
Dataset<?> simRels = spark
.read()
.textFile(DedupUtility.createEntityPath(graphBasePath, subEntity))
.transform(sparkConfig.modelExtractor()) // Extract fields from input json column according to model
// definition
.transform(sparkConfig.generateClustersWithWindows()) // generate <key,block> pairs according to
// filters, clusters, and model
// definition
.transform(sparkConfig.processClusters()) // process blocks and emits <from,to> pairs of found
// similarities
.map(
(MapFunction<Row, Relation>) t -> DedupUtility
.createSimRel(t.getStruct(0).getString(0), t.getStruct(0).getString(1), entity),
Encoders.bean(Relation.class));
saveParquet(simRels, outputPath, SaveMode.Overwrite);
final long end = System.currentTimeMillis();
if (StringUtils.isNotBlank(dfLogPath)) {
final DedupLogModel model = new DedupLogModel(runTag, dedupConf.toString(), subEntity, start, end,
end - start);
new DedupLogWriter(dfLogPath).appendLog(model, spark);
}
}
}
}

View File

@ -13,6 +13,6 @@
</property> </property>
<property> <property>
<name>oozie.action.sharelib.for.spark</name> <name>oozie.action.sharelib.for.spark</name>
<value>spark2</value> <value>spark342</value>
</property> </property>
</configuration> </configuration>

View File

@ -126,15 +126,25 @@
--conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
--conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
--conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
--conf spark.sql.shuffle.partitions=15000 --conf spark.sql.shuffle.partitions=5000
--conf spark.driver.extraJavaOptions="-Xss256k"
--conf spark.executor.extraJavaOptions="-Dlog4j.configuration=spark-log4j.properties -Xss256k"
--conf spark.extraListeners=
--conf spark.sql.queryExecutionListeners=
--conf spark.dynamicAllocation.enabled=true --conf spark.dynamicAllocation.minExecutors=100 --conf spark.dynamicAllocation.shuffleTracking.enabled=true
--conf spark.network.io.preferDirectBufs=true --conf spark.memory.fraction=0.4 --conf spark.sql.adaptive.coalescePartitions.minPartitionNum=5000
--conf spark.shuffle.useOldFetchProtocol=true --conf spark.shuffle.service.enabled=true --conf spark.eventLog.enabled=true
--conf spark.executor.heartbeatInterval=60s
--conf spark.network.timeout=640s
--conf spark.sql.legacy.allowUntypedScalaUDF=true
</spark-opts> </spark-opts>
<arg>--graphBasePath</arg><arg>${graphBasePath}</arg> <arg>--graphBasePath</arg><arg>${graphBasePath}</arg>
<arg>--isLookUpUrl</arg><arg>${isLookUpUrl}</arg> <arg>--isLookUpUrl</arg><arg>${isLookUpUrl}</arg>
<arg>--actionSetId</arg><arg>${actionSetId}</arg> <arg>--actionSetId</arg><arg>${actionSetId}</arg>
<arg>--workingPath</arg><arg>${workingPath}</arg> <arg>--workingPath</arg><arg>${workingPath}</arg>
<arg>--numPartitions</arg><arg>15000</arg> <arg>--numPartitions</arg><arg>5000</arg>
</spark> </spark>
<ok to="WhitelistSimRels"/> <ok to="End"/>
<error to="Kill"/> <error to="Kill"/>
</action> </action>

View File

@ -95,6 +95,7 @@ public class SparkDedupTest implements Serializable {
final SparkConf conf = new SparkConf(); final SparkConf conf = new SparkConf();
conf.set("spark.sql.shuffle.partitions", "200"); conf.set("spark.sql.shuffle.partitions", "200");
conf.set("spark.sql.legacy.allowUntypedScalaUDF", "true");
spark = SparkSession spark = SparkSession
.builder() .builder()
.appName(SparkDedupTest.class.getSimpleName()) .appName(SparkDedupTest.class.getSimpleName())

View File

@ -33,6 +33,8 @@
</execution> </execution>
</executions> </executions>
<configuration> <configuration>
<failOnMultipleScalaVersions>true</failOnMultipleScalaVersions>
<scalaCompatVersion>${scala.binary.version}</scalaCompatVersion>
<scalaVersion>${scala.version}</scalaVersion> <scalaVersion>${scala.version}</scalaVersion>
</configuration> </configuration>
</plugin> </plugin>
@ -70,12 +72,12 @@
<dependency> <dependency>
<groupId>org.apache.spark</groupId> <groupId>org.apache.spark</groupId>
<artifactId>spark-core_2.11</artifactId> <artifactId>spark-core_${scala.binary.version}</artifactId>
</dependency> </dependency>
<dependency> <dependency>
<groupId>org.apache.spark</groupId> <groupId>org.apache.spark</groupId>
<artifactId>spark-sql_2.11</artifactId> <artifactId>spark-sql_${scala.binary.version}</artifactId>
</dependency> </dependency>
<dependency> <dependency>

View File

@ -12,11 +12,11 @@
<dependency> <dependency>
<groupId>org.apache.spark</groupId> <groupId>org.apache.spark</groupId>
<artifactId>spark-core_2.11</artifactId> <artifactId>spark-core_${scala.binary.version}</artifactId>
</dependency> </dependency>
<dependency> <dependency>
<groupId>org.apache.spark</groupId> <groupId>org.apache.spark</groupId>
<artifactId>spark-sql_2.11</artifactId> <artifactId>spark-sql_${scala.binary.version}</artifactId>
</dependency> </dependency>
<dependency> <dependency>
@ -27,7 +27,7 @@
<dependency> <dependency>
<groupId>org.apache.spark</groupId> <groupId>org.apache.spark</groupId>
<artifactId>spark-hive_2.11</artifactId> <artifactId>spark-hive_${scala.binary.version}</artifactId>
<scope>test</scope> <scope>test</scope>
</dependency> </dependency>

View File

@ -14,7 +14,7 @@
<plugin> <plugin>
<groupId>net.alchim31.maven</groupId> <groupId>net.alchim31.maven</groupId>
<artifactId>scala-maven-plugin</artifactId> <artifactId>scala-maven-plugin</artifactId>
<version>4.0.1</version> <version>${net.alchim31.maven.version}</version>
<executions> <executions>
<execution> <execution>
<id>scala-compile-first</id> <id>scala-compile-first</id>
@ -37,6 +37,8 @@
<arg>-Xmax-classfile-name</arg> <arg>-Xmax-classfile-name</arg>
<arg>200</arg> <arg>200</arg>
</args> </args>
<failOnMultipleScalaVersions>true</failOnMultipleScalaVersions>
<scalaCompatVersion>${scala.binary.version}</scalaCompatVersion>
<scalaVersion>${scala.version}</scalaVersion> <scalaVersion>${scala.version}</scalaVersion>
</configuration> </configuration>
</plugin> </plugin>
@ -64,15 +66,15 @@
<dependency> <dependency>
<groupId>org.apache.spark</groupId> <groupId>org.apache.spark</groupId>
<artifactId>spark-core_2.11</artifactId> <artifactId>spark-core_${scala.binary.version}</artifactId>
</dependency> </dependency>
<dependency> <dependency>
<groupId>org.apache.spark</groupId> <groupId>org.apache.spark</groupId>
<artifactId>spark-sql_2.11</artifactId> <artifactId>spark-sql_${scala.binary.version}</artifactId>
</dependency> </dependency>
<dependency> <dependency>
<groupId>org.apache.spark</groupId> <groupId>org.apache.spark</groupId>
<artifactId>spark-hive_2.11</artifactId> <artifactId>spark-hive_${scala.binary.version}</artifactId>
<scope>test</scope> <scope>test</scope>
<exclusions> <exclusions>
<exclusion> <exclusion>
@ -125,7 +127,7 @@
</dependency> </dependency>
<dependency> <dependency>
<groupId>org.json4s</groupId> <groupId>org.json4s</groupId>
<artifactId>json4s-jackson_2.11</artifactId> <artifactId>json4s-jackson_${scala.binary.version}</artifactId>
</dependency> </dependency>

View File

@ -14,7 +14,7 @@
<plugin> <plugin>
<groupId>net.alchim31.maven</groupId> <groupId>net.alchim31.maven</groupId>
<artifactId>scala-maven-plugin</artifactId> <artifactId>scala-maven-plugin</artifactId>
<version>4.0.1</version> <version>${net.alchim31.maven.version}</version>
<executions> <executions>
<execution> <execution>
<id>scala-compile-first</id> <id>scala-compile-first</id>
@ -37,6 +37,8 @@
<arg>-Xmax-classfile-name</arg> <arg>-Xmax-classfile-name</arg>
<arg>200</arg> <arg>200</arg>
</args> </args>
<failOnMultipleScalaVersions>true</failOnMultipleScalaVersions>
<scalaCompatVersion>${scala.binary.version}</scalaCompatVersion>
<scalaVersion>${scala.version}</scalaVersion> <scalaVersion>${scala.version}</scalaVersion>
</configuration> </configuration>
</plugin> </plugin>
@ -48,11 +50,11 @@
<dependency> <dependency>
<groupId>org.apache.spark</groupId> <groupId>org.apache.spark</groupId>
<artifactId>spark-core_2.11</artifactId> <artifactId>spark-core_${scala.binary.version}</artifactId>
</dependency> </dependency>
<dependency> <dependency>
<groupId>org.apache.spark</groupId> <groupId>org.apache.spark</groupId>
<artifactId>spark-sql_2.11</artifactId> <artifactId>spark-sql_${scala.binary.version}</artifactId>
</dependency> </dependency>
<dependency> <dependency>
<groupId>com.jayway.jsonpath</groupId> <groupId>com.jayway.jsonpath</groupId>

View File

@ -10,11 +10,11 @@
<dependencies> <dependencies>
<dependency> <dependency>
<groupId>org.apache.spark</groupId> <groupId>org.apache.spark</groupId>
<artifactId>spark-core_2.11</artifactId> <artifactId>spark-core_${scala.binary.version}</artifactId>
</dependency> </dependency>
<dependency> <dependency>
<groupId>org.apache.spark</groupId> <groupId>org.apache.spark</groupId>
<artifactId>spark-sql_2.11</artifactId> <artifactId>spark-sql_${scala.binary.version}</artifactId>
</dependency> </dependency>
</dependencies> </dependencies>
<build> <build>

View File

@ -10,11 +10,11 @@
<dependencies> <dependencies>
<dependency> <dependency>
<groupId>org.apache.spark</groupId> <groupId>org.apache.spark</groupId>
<artifactId>spark-core_2.11</artifactId> <artifactId>spark-core_${scala.binary.version}</artifactId>
</dependency> </dependency>
<dependency> <dependency>
<groupId>org.apache.spark</groupId> <groupId>org.apache.spark</groupId>
<artifactId>spark-sql_2.11</artifactId> <artifactId>spark-sql_${scala.binary.version}</artifactId>
</dependency> </dependency>
</dependencies> </dependencies>
<build> <build>

View File

@ -46,13 +46,11 @@
<dependencies> <dependencies>
<dependency> <dependency>
<groupId>org.apache.spark</groupId> <groupId>org.apache.spark</groupId>
<artifactId>spark-core_2.11</artifactId> <artifactId>spark-core_${scala.binary.version}</artifactId>
<version>2.2.0</version>
</dependency> </dependency>
<dependency> <dependency>
<groupId>org.apache.spark</groupId> <groupId>org.apache.spark</groupId>
<artifactId>spark-sql_2.11</artifactId> <artifactId>spark-sql_${scala.binary.version}</artifactId>
<version>2.4.5</version>
</dependency> </dependency>
<dependency> <dependency>
<groupId>com.googlecode.json-simple</groupId> <groupId>com.googlecode.json-simple</groupId>

View File

@ -46,13 +46,11 @@
<dependencies> <dependencies>
<dependency> <dependency>
<groupId>org.apache.spark</groupId> <groupId>org.apache.spark</groupId>
<artifactId>spark-core_2.11</artifactId> <artifactId>spark-core_${scala.binary.version}</artifactId>
<version>2.2.0</version>
</dependency> </dependency>
<dependency> <dependency>
<groupId>org.apache.spark</groupId> <groupId>org.apache.spark</groupId>
<artifactId>spark-sql_2.11</artifactId> <artifactId>spark-sql_${scala.binary.version}</artifactId>
<version>2.4.5</version>
</dependency> </dependency>
<dependency> <dependency>
<groupId>com.googlecode.json-simple</groupId> <groupId>com.googlecode.json-simple</groupId>

31
pom.xml
View File

@ -142,7 +142,7 @@
<dependencies> <dependencies>
<dependency> <dependency>
<groupId>eu.dnetlib.dhp</groupId> <groupId>eu.dnetlib.dhp</groupId>
<artifactId>dhp-schemas</artifactId> <artifactId>dhp-schemas_${scala.binary.version}</artifactId>
<version>${dhp-schemas.version}</version> <version>${dhp-schemas.version}</version>
</dependency> </dependency>
<dependency> <dependency>
@ -171,25 +171,25 @@
</dependency> </dependency>
<dependency> <dependency>
<groupId>org.apache.spark</groupId> <groupId>org.apache.spark</groupId>
<artifactId>spark-core_2.11</artifactId> <artifactId>spark-core_${scala.binary.version}</artifactId>
<version>${dhp.spark.version}</version> <version>${dhp.spark.version}</version>
<scope>provided</scope> <scope>provided</scope>
</dependency> </dependency>
<dependency> <dependency>
<groupId>org.apache.spark</groupId> <groupId>org.apache.spark</groupId>
<artifactId>spark-sql_2.11</artifactId> <artifactId>spark-sql_${scala.binary.version}</artifactId>
<version>${dhp.spark.version}</version> <version>${dhp.spark.version}</version>
<scope>provided</scope> <scope>provided</scope>
</dependency> </dependency>
<dependency> <dependency>
<groupId>org.apache.spark</groupId> <groupId>org.apache.spark</groupId>
<artifactId>spark-graphx_2.11</artifactId> <artifactId>spark-graphx_${scala.binary.version}</artifactId>
<version>${dhp.spark.version}</version> <version>${dhp.spark.version}</version>
<scope>provided</scope> <scope>provided</scope>
</dependency> </dependency>
<dependency> <dependency>
<groupId>org.apache.spark</groupId> <groupId>org.apache.spark</groupId>
<artifactId>spark-hive_2.11</artifactId> <artifactId>spark-hive_${scala.binary.version}</artifactId>
<version>${dhp.spark.version}</version> <version>${dhp.spark.version}</version>
<scope>test</scope> <scope>test</scope>
</dependency> </dependency>
@ -295,7 +295,7 @@
<dependency> <dependency>
<groupId>com.lucidworks.spark</groupId> <groupId>com.lucidworks.spark</groupId>
<artifactId>spark-solr</artifactId> <artifactId>spark-solr</artifactId>
<version>3.6.0</version> <version>4.0.2</version>
<exclusions> <exclusions>
<exclusion> <exclusion>
<artifactId>*</artifactId> <artifactId>*</artifactId>
@ -523,7 +523,7 @@
<dependency> <dependency>
<groupId>org.json4s</groupId> <groupId>org.json4s</groupId>
<artifactId>json4s-jackson_2.11</artifactId> <artifactId>json4s-jackson_${scala.binary.version}</artifactId>
<version>${json4s.version}</version> <version>${json4s.version}</version>
</dependency> </dependency>
@ -699,7 +699,7 @@
</plugin> </plugin>
<plugin> <plugin>
<groupId>org.antipathy</groupId> <groupId>org.antipathy</groupId>
<artifactId>mvn-scalafmt_2.11</artifactId> <artifactId>mvn-scalafmt_${scala.binary.version}</artifactId>
<version>1.0.1640073709.733712b</version> <version>1.0.1640073709.733712b</version>
<dependencies> <dependencies>
<dependency> <dependency>
@ -756,7 +756,7 @@
</plugin> </plugin>
<plugin> <plugin>
<groupId>org.antipathy</groupId> <groupId>org.antipathy</groupId>
<artifactId>mvn-scalafmt_2.11</artifactId> <artifactId>mvn-scalafmt_${scala.binary.version}</artifactId>
<configuration> <configuration>
<configLocation>https://code-repo.d4science.org/D-Net/dnet-hadoop/raw/branch/beta/dhp-build/dhp-code-style/src/main/resources/scalafmt/scalafmt.conf</configLocation> <configLocation>https://code-repo.d4science.org/D-Net/dnet-hadoop/raw/branch/beta/dhp-build/dhp-code-style/src/main/resources/scalafmt/scalafmt.conf</configLocation>
<skipTestSources>false</skipTestSources> <skipTestSources>false</skipTestSources>
@ -865,17 +865,18 @@
<dhp.cdh.version>cdh5.9.2</dhp.cdh.version> <dhp.cdh.version>cdh5.9.2</dhp.cdh.version>
<dhp.hadoop.version>2.6.0-${dhp.cdh.version}</dhp.hadoop.version> <dhp.hadoop.version>2.6.0-${dhp.cdh.version}</dhp.hadoop.version>
<dhp.oozie.version>4.1.0-${dhp.cdh.version}</dhp.oozie.version> <dhp.oozie.version>4.1.0-${dhp.cdh.version}</dhp.oozie.version>
<dhp.spark.version>2.4.0.cloudera2</dhp.spark.version> <dhp.spark.version>3.4.1</dhp.spark.version>
<dhp.jackson.version>2.9.6</dhp.jackson.version> <dhp.jackson.version>2.14.2</dhp.jackson.version>
<dhp.commons.lang.version>3.5</dhp.commons.lang.version> <dhp.commons.lang.version>3.5</dhp.commons.lang.version>
<dhp.site.skip>true</dhp.site.skip> <dhp.site.skip>true</dhp.site.skip>
<dhp.guava.version>11.0.2</dhp.guava.version> <dhp.guava.version>11.0.2</dhp.guava.version>
<scala.version>2.11.12</scala.version> <scala.version>2.12.18</scala.version>
<scala.binary.version>2.12</scala.binary.version>
<junit-jupiter.version>5.6.1</junit-jupiter.version> <junit-jupiter.version>5.6.1</junit-jupiter.version>
<mockito-core.version>3.3.3</mockito-core.version> <mockito-core.version>3.3.3</mockito-core.version>
<mongodb.driver.version>3.4.2</mongodb.driver.version> <mongodb.driver.version>3.4.2</mongodb.driver.version>
<vtd.version>[2.12,3.0)</vtd.version> <vtd.version>[2.12,3.0)</vtd.version>
<dhp-schemas.version>[3.17.1]</dhp-schemas.version> <dhp-schemas.version>3.17.2-SNAPSHOT</dhp-schemas.version>
<dnet-actionmanager-api.version>[4.0.3]</dnet-actionmanager-api.version> <dnet-actionmanager-api.version>[4.0.3]</dnet-actionmanager-api.version>
<dnet-actionmanager-common.version>[6.0.5]</dnet-actionmanager-common.version> <dnet-actionmanager-common.version>[6.0.5]</dnet-actionmanager-common.version>
<dnet-openaire-broker-common.version>[3.1.6]</dnet-openaire-broker-common.version> <dnet-openaire-broker-common.version>[3.1.6]</dnet-openaire-broker-common.version>
@ -883,13 +884,13 @@
<solr.version>7.5.0</solr.version> <solr.version>7.5.0</solr.version>
<okhttp.version>4.7.2</okhttp.version> <okhttp.version>4.7.2</okhttp.version>
<common.compress.version>1.20</common.compress.version> <common.compress.version>1.20</common.compress.version>
<json4s.version>3.5.3</json4s.version> <json4s.version>3.7.0-M11</json4s.version>
<jsonschemagenerator.version>4.13.0</jsonschemagenerator.version> <jsonschemagenerator.version>4.13.0</jsonschemagenerator.version>
<common.csv.version>1.8</common.csv.version> <common.csv.version>1.8</common.csv.version>
<apache.poi.version>4.1.2</apache.poi.version> <apache.poi.version>4.1.2</apache.poi.version>
<common.text.version>1.8</common.text.version> <common.text.version>1.8</common.text.version>
<org.apache.httpcomponents.version>4.5.3</org.apache.httpcomponents.version> <org.apache.httpcomponents.version>4.5.3</org.apache.httpcomponents.version>
<net.alchim31.maven.version>4.0.1</net.alchim31.maven.version> <net.alchim31.maven.version>4.8.1</net.alchim31.maven.version>
<google.gson.version>2.2.2</google.gson.version> <google.gson.version>2.2.2</google.gson.version>
<commons.logging.version>1.1.3</commons.logging.version> <commons.logging.version>1.1.3</commons.logging.version>
<commons.collections.version>3.2.1</commons.collections.version> <commons.collections.version>3.2.1</commons.collections.version>