forked from D-Net/dnet-hadoop
Build with spark 3.4 (dedup and dependencies only tested)
This commit is contained in:
parent
861c368e65
commit
d80f12da06
|
@ -52,6 +52,8 @@
|
|||
</execution>
|
||||
</executions>
|
||||
<configuration>
|
||||
<failOnMultipleScalaVersions>true</failOnMultipleScalaVersions>
|
||||
<scalaCompatVersion>${scala.binary.version}</scalaCompatVersion>
|
||||
<scalaVersion>${scala.version}</scalaVersion>
|
||||
</configuration>
|
||||
</plugin>
|
||||
|
@ -76,11 +78,11 @@
|
|||
|
||||
<dependency>
|
||||
<groupId>org.apache.spark</groupId>
|
||||
<artifactId>spark-core_2.11</artifactId>
|
||||
<artifactId>spark-core_${scala.binary.version}</artifactId>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>org.apache.spark</groupId>
|
||||
<artifactId>spark-sql_2.11</artifactId>
|
||||
<artifactId>spark-sql_${scala.binary.version}</artifactId>
|
||||
</dependency>
|
||||
|
||||
<dependency>
|
||||
|
@ -159,7 +161,7 @@
|
|||
|
||||
<dependency>
|
||||
<groupId>eu.dnetlib.dhp</groupId>
|
||||
<artifactId>dhp-schemas</artifactId>
|
||||
<artifactId>dhp-schemas_${scala.binary.version}</artifactId>
|
||||
</dependency>
|
||||
|
||||
<dependency>
|
||||
|
|
|
@ -20,7 +20,7 @@
|
|||
<plugin>
|
||||
<groupId>net.alchim31.maven</groupId>
|
||||
<artifactId>scala-maven-plugin</artifactId>
|
||||
<version>4.0.1</version>
|
||||
<version>${net.alchim31.maven.version}</version>
|
||||
<executions>
|
||||
<execution>
|
||||
<id>scala-compile-first</id>
|
||||
|
@ -39,8 +39,9 @@
|
|||
</execution>
|
||||
</executions>
|
||||
<configuration>
|
||||
<failOnMultipleScalaVersions>true</failOnMultipleScalaVersions>
|
||||
<scalaCompatVersion>${scala.binary.version}</scalaCompatVersion>
|
||||
<scalaVersion>${scala.version}</scalaVersion>
|
||||
<addScalacArgs>-target:jvm-1.8</addScalacArgs>
|
||||
</configuration>
|
||||
</plugin>
|
||||
</plugins>
|
||||
|
@ -98,11 +99,11 @@
|
|||
</dependency>
|
||||
<dependency>
|
||||
<groupId>org.apache.spark</groupId>
|
||||
<artifactId>spark-core_2.11</artifactId>
|
||||
<artifactId>spark-core_${scala.binary.version}</artifactId>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>org.apache.spark</groupId>
|
||||
<artifactId>spark-sql_2.11</artifactId>
|
||||
<artifactId>spark-sql_${scala.binary.version}</artifactId>
|
||||
</dependency>
|
||||
</dependencies>
|
||||
|
||||
|
|
|
@ -11,12 +11,12 @@
|
|||
<dependencies>
|
||||
<dependency>
|
||||
<groupId>org.apache.spark</groupId>
|
||||
<artifactId>spark-core_2.11</artifactId>
|
||||
<artifactId>spark-core_${scala.binary.version}</artifactId>
|
||||
</dependency>
|
||||
|
||||
<dependency>
|
||||
<groupId>org.apache.spark</groupId>
|
||||
<artifactId>spark-sql_2.11</artifactId>
|
||||
<artifactId>spark-sql_${scala.binary.version}</artifactId>
|
||||
</dependency>
|
||||
|
||||
<dependency>
|
||||
|
|
|
@ -38,6 +38,8 @@
|
|||
</execution>
|
||||
</executions>
|
||||
<configuration>
|
||||
<failOnMultipleScalaVersions>true</failOnMultipleScalaVersions>
|
||||
<scalaCompatVersion>${scala.binary.version}</scalaCompatVersion>
|
||||
<scalaVersion>${scala.version}</scalaVersion>
|
||||
</configuration>
|
||||
</plugin>
|
||||
|
@ -54,11 +56,11 @@
|
|||
|
||||
<dependency>
|
||||
<groupId>org.apache.spark</groupId>
|
||||
<artifactId>spark-core_2.11</artifactId>
|
||||
<artifactId>spark-core_${scala.binary.version}</artifactId>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>org.apache.spark</groupId>
|
||||
<artifactId>spark-sql_2.11</artifactId>
|
||||
<artifactId>spark-sql_${scala.binary.version}</artifactId>
|
||||
</dependency>
|
||||
|
||||
<dependency>
|
||||
|
|
|
@ -16,11 +16,11 @@
|
|||
</dependency>
|
||||
<dependency>
|
||||
<groupId>org.apache.spark</groupId>
|
||||
<artifactId>spark-core_2.11</artifactId>
|
||||
<artifactId>spark-core_${scala.binary.version}</artifactId>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>org.apache.spark</groupId>
|
||||
<artifactId>spark-sql_2.11</artifactId>
|
||||
<artifactId>spark-sql_${scala.binary.version}</artifactId>
|
||||
</dependency>
|
||||
</dependencies>
|
||||
|
||||
|
|
|
@ -18,11 +18,11 @@
|
|||
|
||||
<dependency>
|
||||
<groupId>org.apache.spark</groupId>
|
||||
<artifactId>spark-core_2.11</artifactId>
|
||||
<artifactId>spark-core_${scala.binary.version}</artifactId>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>org.apache.spark</groupId>
|
||||
<artifactId>spark-sql_2.11</artifactId>
|
||||
<artifactId>spark-sql_${scala.binary.version}</artifactId>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>org.elasticsearch</groupId>
|
||||
|
|
|
@ -13,7 +13,7 @@
|
|||
<plugin>
|
||||
<groupId>net.alchim31.maven</groupId>
|
||||
<artifactId>scala-maven-plugin</artifactId>
|
||||
<version>4.0.1</version>
|
||||
<version>${net.alchim31.maven.version}</version>
|
||||
<executions>
|
||||
<execution>
|
||||
<id>scala-compile-first</id>
|
||||
|
@ -32,8 +32,9 @@
|
|||
</execution>
|
||||
</executions>
|
||||
<configuration>
|
||||
<failOnMultipleScalaVersions>true</failOnMultipleScalaVersions>
|
||||
<scalaCompatVersion>${scala.binary.version}</scalaCompatVersion>
|
||||
<scalaVersion>${scala.version}</scalaVersion>
|
||||
<addScalacArgs>-target:jvm-1.8</addScalacArgs>
|
||||
</configuration>
|
||||
</plugin>
|
||||
</plugins>
|
||||
|
@ -58,38 +59,67 @@
|
|||
<groupId>eu.dnetlib.dhp</groupId>
|
||||
<artifactId>dhp-common</artifactId>
|
||||
<version>${project.version}</version>
|
||||
<exclusions>
|
||||
<exclusion>
|
||||
<artifactId>log4j</artifactId>
|
||||
<groupId>log4j</groupId>
|
||||
</exclusion>
|
||||
<exclusion>
|
||||
<artifactId>annotations</artifactId>
|
||||
<groupId>org.jetbrains</groupId>
|
||||
</exclusion>
|
||||
<exclusion>
|
||||
<artifactId>slf4j-api</artifactId>
|
||||
<groupId>org.slf4j</groupId>
|
||||
</exclusion>
|
||||
</exclusions>
|
||||
</dependency>
|
||||
|
||||
<dependency>
|
||||
<groupId>eu.dnetlib.dhp</groupId>
|
||||
<artifactId>dhp-pace-core</artifactId>
|
||||
<version>${project.version}</version>
|
||||
<exclusions>
|
||||
<exclusion>
|
||||
<artifactId>jsr305</artifactId>
|
||||
<groupId>com.google.code.findbugs</groupId>
|
||||
</exclusion>
|
||||
<exclusion>
|
||||
<artifactId>javassist</artifactId>
|
||||
<groupId>org.javassist</groupId>
|
||||
</exclusion>
|
||||
</exclusions>
|
||||
</dependency>
|
||||
|
||||
<dependency>
|
||||
<groupId>org.apache.commons</groupId>
|
||||
<artifactId>commons-lang3</artifactId>
|
||||
</dependency>
|
||||
|
||||
<dependency>
|
||||
<groupId>org.scala-lang.modules</groupId>
|
||||
<artifactId>scala-java8-compat_2.11</artifactId>
|
||||
<artifactId>scala-java8-compat_${scala.binary.version}</artifactId>
|
||||
<version>1.0.2</version>
|
||||
</dependency>
|
||||
|
||||
<dependency>
|
||||
<groupId>org.scala-lang.modules</groupId>
|
||||
<artifactId>scala-collection-compat_2.11</artifactId>
|
||||
<version>2.8.0</version>
|
||||
<artifactId>scala-collection-compat_${scala.binary.version}</artifactId>
|
||||
<version>2.11.0</version>
|
||||
</dependency>
|
||||
|
||||
<dependency>
|
||||
<groupId>org.apache.spark</groupId>
|
||||
<artifactId>spark-core_2.11</artifactId>
|
||||
<artifactId>spark-core_${scala.binary.version}</artifactId>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>org.apache.spark</groupId>
|
||||
<artifactId>spark-sql_2.11</artifactId>
|
||||
<artifactId>spark-sql_${scala.binary.version}</artifactId>
|
||||
</dependency>
|
||||
|
||||
<dependency>
|
||||
<groupId>org.apache.spark</groupId>
|
||||
<artifactId>spark-graphx_2.11</artifactId>
|
||||
<artifactId>spark-graphx_${scala.binary.version}</artifactId>
|
||||
</dependency>
|
||||
|
||||
<dependency>
|
||||
|
@ -107,12 +137,6 @@
|
|||
<artifactId>jaxen</artifactId>
|
||||
</dependency>
|
||||
|
||||
<dependency>
|
||||
<groupId>com.influxdb</groupId>
|
||||
<artifactId>influxdb-client-java</artifactId>
|
||||
<version>3.1.0</version>
|
||||
</dependency>
|
||||
|
||||
<dependency>
|
||||
<groupId>com.fasterxml.jackson.core</groupId>
|
||||
<artifactId>jackson-databind</artifactId>
|
||||
|
|
|
@ -0,0 +1,118 @@
|
|||
|
||||
package eu.dnetlib.dhp.oa.dedup;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.Optional;
|
||||
|
||||
import org.apache.commons.io.IOUtils;
|
||||
import org.apache.commons.lang3.StringUtils;
|
||||
import org.apache.spark.SparkConf;
|
||||
import org.apache.spark.api.java.JavaSparkContext;
|
||||
import org.apache.spark.api.java.function.MapFunction;
|
||||
import org.apache.spark.sql.*;
|
||||
import org.dom4j.DocumentException;
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
import org.xml.sax.SAXException;
|
||||
|
||||
import eu.dnetlib.dhp.application.ArgumentApplicationParser;
|
||||
import eu.dnetlib.dhp.application.dedup.log.DedupLogModel;
|
||||
import eu.dnetlib.dhp.application.dedup.log.DedupLogWriter;
|
||||
import eu.dnetlib.dhp.schema.oaf.Relation;
|
||||
import eu.dnetlib.dhp.utils.ISLookupClientFactory;
|
||||
import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpException;
|
||||
import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpService;
|
||||
import eu.dnetlib.pace.config.DedupConfig;
|
||||
import eu.dnetlib.pace.model.SparkDedupConfig;
|
||||
|
||||
public class SparkSimRelsAnalytics extends AbstractSparkAction {
|
||||
|
||||
private static final Logger log = LoggerFactory.getLogger(SparkSimRelsAnalytics.class);
|
||||
|
||||
public SparkSimRelsAnalytics(ArgumentApplicationParser parser, SparkSession spark) {
|
||||
super(parser, spark);
|
||||
spark.sparkContext().setLogLevel("WARN");
|
||||
}
|
||||
|
||||
public static void main(String[] args) throws Exception {
|
||||
ArgumentApplicationParser parser = new ArgumentApplicationParser(
|
||||
IOUtils
|
||||
.toString(
|
||||
SparkSimRelsAnalytics.class
|
||||
.getResourceAsStream(
|
||||
"/eu/dnetlib/dhp/oa/dedup/createSimRels_parameters.json")));
|
||||
parser.parseArgument(args);
|
||||
|
||||
SparkConf conf = new SparkConf();
|
||||
new SparkSimRelsAnalytics(parser, getSparkSession(conf))
|
||||
.run(ISLookupClientFactory.getLookUpService(parser.get("isLookUpUrl")));
|
||||
}
|
||||
|
||||
@Override
|
||||
public void run(ISLookUpService isLookUpService)
|
||||
throws DocumentException, IOException, ISLookUpException, SAXException {
|
||||
|
||||
// read oozie parameters
|
||||
final String graphBasePath = parser.get("graphBasePath");
|
||||
final String isLookUpUrl = parser.get("isLookUpUrl");
|
||||
final String actionSetId = parser.get("actionSetId");
|
||||
final String workingPath = parser.get("workingPath");
|
||||
final int numPartitions = Optional
|
||||
.ofNullable(parser.get("numPartitions"))
|
||||
.map(Integer::valueOf)
|
||||
.orElse(NUM_PARTITIONS);
|
||||
|
||||
log.info("numPartitions: '{}'", numPartitions);
|
||||
log.info("graphBasePath: '{}'", graphBasePath);
|
||||
log.info("isLookUpUrl: '{}'", isLookUpUrl);
|
||||
log.info("actionSetId: '{}'", actionSetId);
|
||||
log.info("workingPath: '{}'", workingPath);
|
||||
|
||||
final String dfLogPath = parser.get("dataframeLog");
|
||||
final String runTag = Optional.ofNullable(parser.get("runTAG")).orElse("UNKNOWN");
|
||||
|
||||
// for each dedup configuration
|
||||
for (DedupConfig dedupConf : getConfigurations(isLookUpService, actionSetId)) {
|
||||
final long start = System.currentTimeMillis();
|
||||
|
||||
final String entity = dedupConf.getWf().getEntityType();
|
||||
final String subEntity = dedupConf.getWf().getSubEntityValue();
|
||||
log.info("Creating simrels for: '{}'", subEntity);
|
||||
|
||||
final String outputPath = DedupUtility.createSimRelPath(workingPath, actionSetId, subEntity);
|
||||
removeOutputDir(spark, outputPath);
|
||||
|
||||
JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext());
|
||||
|
||||
SparkDedupConfig sparkConfig = new SparkDedupConfig(dedupConf, numPartitions);
|
||||
|
||||
spark.udf().register("collect_sort_slice", sparkConfig.collectSortSliceUDAF());
|
||||
|
||||
Dataset<?> simRels = spark
|
||||
.read()
|
||||
.textFile(DedupUtility.createEntityPath(graphBasePath, subEntity))
|
||||
.transform(sparkConfig.modelExtractor()) // Extract fields from input json column according to model
|
||||
// definition
|
||||
.transform(sparkConfig.generateClustersWithWindows()) // generate <key,block> pairs according to
|
||||
// filters, clusters, and model
|
||||
// definition
|
||||
.transform(sparkConfig.processClusters()) // process blocks and emits <from,to> pairs of found
|
||||
// similarities
|
||||
.map(
|
||||
(MapFunction<Row, Relation>) t -> DedupUtility
|
||||
.createSimRel(t.getStruct(0).getString(0), t.getStruct(0).getString(1), entity),
|
||||
Encoders.bean(Relation.class));
|
||||
|
||||
saveParquet(simRels, outputPath, SaveMode.Overwrite);
|
||||
final long end = System.currentTimeMillis();
|
||||
if (StringUtils.isNotBlank(dfLogPath)) {
|
||||
final DedupLogModel model = new DedupLogModel(runTag, dedupConf.toString(), subEntity, start, end,
|
||||
end - start);
|
||||
new DedupLogWriter(dfLogPath).appendLog(model, spark);
|
||||
|
||||
}
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
}
|
|
@ -13,6 +13,6 @@
|
|||
</property>
|
||||
<property>
|
||||
<name>oozie.action.sharelib.for.spark</name>
|
||||
<value>spark2</value>
|
||||
<value>spark342</value>
|
||||
</property>
|
||||
</configuration>
|
|
@ -126,15 +126,25 @@
|
|||
--conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
|
||||
--conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
|
||||
--conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
|
||||
--conf spark.sql.shuffle.partitions=15000
|
||||
--conf spark.sql.shuffle.partitions=5000
|
||||
--conf spark.driver.extraJavaOptions="-Xss256k"
|
||||
--conf spark.executor.extraJavaOptions="-Dlog4j.configuration=spark-log4j.properties -Xss256k"
|
||||
--conf spark.extraListeners=
|
||||
--conf spark.sql.queryExecutionListeners=
|
||||
--conf spark.dynamicAllocation.enabled=true --conf spark.dynamicAllocation.minExecutors=100 --conf spark.dynamicAllocation.shuffleTracking.enabled=true
|
||||
--conf spark.network.io.preferDirectBufs=true --conf spark.memory.fraction=0.4 --conf spark.sql.adaptive.coalescePartitions.minPartitionNum=5000
|
||||
--conf spark.shuffle.useOldFetchProtocol=true --conf spark.shuffle.service.enabled=true --conf spark.eventLog.enabled=true
|
||||
--conf spark.executor.heartbeatInterval=60s
|
||||
--conf spark.network.timeout=640s
|
||||
--conf spark.sql.legacy.allowUntypedScalaUDF=true
|
||||
</spark-opts>
|
||||
<arg>--graphBasePath</arg><arg>${graphBasePath}</arg>
|
||||
<arg>--isLookUpUrl</arg><arg>${isLookUpUrl}</arg>
|
||||
<arg>--actionSetId</arg><arg>${actionSetId}</arg>
|
||||
<arg>--workingPath</arg><arg>${workingPath}</arg>
|
||||
<arg>--numPartitions</arg><arg>15000</arg>
|
||||
<arg>--numPartitions</arg><arg>5000</arg>
|
||||
</spark>
|
||||
<ok to="WhitelistSimRels"/>
|
||||
<ok to="End"/>
|
||||
<error to="Kill"/>
|
||||
</action>
|
||||
|
||||
|
|
|
@ -95,6 +95,7 @@ public class SparkDedupTest implements Serializable {
|
|||
|
||||
final SparkConf conf = new SparkConf();
|
||||
conf.set("spark.sql.shuffle.partitions", "200");
|
||||
conf.set("spark.sql.legacy.allowUntypedScalaUDF", "true");
|
||||
spark = SparkSession
|
||||
.builder()
|
||||
.appName(SparkDedupTest.class.getSimpleName())
|
||||
|
|
|
@ -33,6 +33,8 @@
|
|||
</execution>
|
||||
</executions>
|
||||
<configuration>
|
||||
<failOnMultipleScalaVersions>true</failOnMultipleScalaVersions>
|
||||
<scalaCompatVersion>${scala.binary.version}</scalaCompatVersion>
|
||||
<scalaVersion>${scala.version}</scalaVersion>
|
||||
</configuration>
|
||||
</plugin>
|
||||
|
@ -70,12 +72,12 @@
|
|||
|
||||
<dependency>
|
||||
<groupId>org.apache.spark</groupId>
|
||||
<artifactId>spark-core_2.11</artifactId>
|
||||
<artifactId>spark-core_${scala.binary.version}</artifactId>
|
||||
</dependency>
|
||||
|
||||
<dependency>
|
||||
<groupId>org.apache.spark</groupId>
|
||||
<artifactId>spark-sql_2.11</artifactId>
|
||||
<artifactId>spark-sql_${scala.binary.version}</artifactId>
|
||||
</dependency>
|
||||
|
||||
<dependency>
|
||||
|
|
|
@ -12,11 +12,11 @@
|
|||
|
||||
<dependency>
|
||||
<groupId>org.apache.spark</groupId>
|
||||
<artifactId>spark-core_2.11</artifactId>
|
||||
<artifactId>spark-core_${scala.binary.version}</artifactId>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>org.apache.spark</groupId>
|
||||
<artifactId>spark-sql_2.11</artifactId>
|
||||
<artifactId>spark-sql_${scala.binary.version}</artifactId>
|
||||
</dependency>
|
||||
|
||||
<dependency>
|
||||
|
@ -27,7 +27,7 @@
|
|||
|
||||
<dependency>
|
||||
<groupId>org.apache.spark</groupId>
|
||||
<artifactId>spark-hive_2.11</artifactId>
|
||||
<artifactId>spark-hive_${scala.binary.version}</artifactId>
|
||||
<scope>test</scope>
|
||||
</dependency>
|
||||
|
||||
|
|
|
@ -14,7 +14,7 @@
|
|||
<plugin>
|
||||
<groupId>net.alchim31.maven</groupId>
|
||||
<artifactId>scala-maven-plugin</artifactId>
|
||||
<version>4.0.1</version>
|
||||
<version>${net.alchim31.maven.version}</version>
|
||||
<executions>
|
||||
<execution>
|
||||
<id>scala-compile-first</id>
|
||||
|
@ -37,6 +37,8 @@
|
|||
<arg>-Xmax-classfile-name</arg>
|
||||
<arg>200</arg>
|
||||
</args>
|
||||
<failOnMultipleScalaVersions>true</failOnMultipleScalaVersions>
|
||||
<scalaCompatVersion>${scala.binary.version}</scalaCompatVersion>
|
||||
<scalaVersion>${scala.version}</scalaVersion>
|
||||
</configuration>
|
||||
</plugin>
|
||||
|
@ -64,15 +66,15 @@
|
|||
|
||||
<dependency>
|
||||
<groupId>org.apache.spark</groupId>
|
||||
<artifactId>spark-core_2.11</artifactId>
|
||||
<artifactId>spark-core_${scala.binary.version}</artifactId>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>org.apache.spark</groupId>
|
||||
<artifactId>spark-sql_2.11</artifactId>
|
||||
<artifactId>spark-sql_${scala.binary.version}</artifactId>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>org.apache.spark</groupId>
|
||||
<artifactId>spark-hive_2.11</artifactId>
|
||||
<artifactId>spark-hive_${scala.binary.version}</artifactId>
|
||||
<scope>test</scope>
|
||||
<exclusions>
|
||||
<exclusion>
|
||||
|
@ -125,7 +127,7 @@
|
|||
</dependency>
|
||||
<dependency>
|
||||
<groupId>org.json4s</groupId>
|
||||
<artifactId>json4s-jackson_2.11</artifactId>
|
||||
<artifactId>json4s-jackson_${scala.binary.version}</artifactId>
|
||||
</dependency>
|
||||
|
||||
|
||||
|
|
|
@ -14,7 +14,7 @@
|
|||
<plugin>
|
||||
<groupId>net.alchim31.maven</groupId>
|
||||
<artifactId>scala-maven-plugin</artifactId>
|
||||
<version>4.0.1</version>
|
||||
<version>${net.alchim31.maven.version}</version>
|
||||
<executions>
|
||||
<execution>
|
||||
<id>scala-compile-first</id>
|
||||
|
@ -37,6 +37,8 @@
|
|||
<arg>-Xmax-classfile-name</arg>
|
||||
<arg>200</arg>
|
||||
</args>
|
||||
<failOnMultipleScalaVersions>true</failOnMultipleScalaVersions>
|
||||
<scalaCompatVersion>${scala.binary.version}</scalaCompatVersion>
|
||||
<scalaVersion>${scala.version}</scalaVersion>
|
||||
</configuration>
|
||||
</plugin>
|
||||
|
@ -48,11 +50,11 @@
|
|||
|
||||
<dependency>
|
||||
<groupId>org.apache.spark</groupId>
|
||||
<artifactId>spark-core_2.11</artifactId>
|
||||
<artifactId>spark-core_${scala.binary.version}</artifactId>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>org.apache.spark</groupId>
|
||||
<artifactId>spark-sql_2.11</artifactId>
|
||||
<artifactId>spark-sql_${scala.binary.version}</artifactId>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>com.jayway.jsonpath</groupId>
|
||||
|
|
|
@ -10,11 +10,11 @@
|
|||
<dependencies>
|
||||
<dependency>
|
||||
<groupId>org.apache.spark</groupId>
|
||||
<artifactId>spark-core_2.11</artifactId>
|
||||
<artifactId>spark-core_${scala.binary.version}</artifactId>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>org.apache.spark</groupId>
|
||||
<artifactId>spark-sql_2.11</artifactId>
|
||||
<artifactId>spark-sql_${scala.binary.version}</artifactId>
|
||||
</dependency>
|
||||
</dependencies>
|
||||
<build>
|
||||
|
|
|
@ -10,11 +10,11 @@
|
|||
<dependencies>
|
||||
<dependency>
|
||||
<groupId>org.apache.spark</groupId>
|
||||
<artifactId>spark-core_2.11</artifactId>
|
||||
<artifactId>spark-core_${scala.binary.version}</artifactId>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>org.apache.spark</groupId>
|
||||
<artifactId>spark-sql_2.11</artifactId>
|
||||
<artifactId>spark-sql_${scala.binary.version}</artifactId>
|
||||
</dependency>
|
||||
</dependencies>
|
||||
<build>
|
||||
|
|
|
@ -46,13 +46,11 @@
|
|||
<dependencies>
|
||||
<dependency>
|
||||
<groupId>org.apache.spark</groupId>
|
||||
<artifactId>spark-core_2.11</artifactId>
|
||||
<version>2.2.0</version>
|
||||
<artifactId>spark-core_${scala.binary.version}</artifactId>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>org.apache.spark</groupId>
|
||||
<artifactId>spark-sql_2.11</artifactId>
|
||||
<version>2.4.5</version>
|
||||
<artifactId>spark-sql_${scala.binary.version}</artifactId>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>com.googlecode.json-simple</groupId>
|
||||
|
|
|
@ -46,13 +46,11 @@
|
|||
<dependencies>
|
||||
<dependency>
|
||||
<groupId>org.apache.spark</groupId>
|
||||
<artifactId>spark-core_2.11</artifactId>
|
||||
<version>2.2.0</version>
|
||||
<artifactId>spark-core_${scala.binary.version}</artifactId>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>org.apache.spark</groupId>
|
||||
<artifactId>spark-sql_2.11</artifactId>
|
||||
<version>2.4.5</version>
|
||||
<artifactId>spark-sql_${scala.binary.version}</artifactId>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>com.googlecode.json-simple</groupId>
|
||||
|
|
31
pom.xml
31
pom.xml
|
@ -142,7 +142,7 @@
|
|||
<dependencies>
|
||||
<dependency>
|
||||
<groupId>eu.dnetlib.dhp</groupId>
|
||||
<artifactId>dhp-schemas</artifactId>
|
||||
<artifactId>dhp-schemas_${scala.binary.version}</artifactId>
|
||||
<version>${dhp-schemas.version}</version>
|
||||
</dependency>
|
||||
<dependency>
|
||||
|
@ -171,25 +171,25 @@
|
|||
</dependency>
|
||||
<dependency>
|
||||
<groupId>org.apache.spark</groupId>
|
||||
<artifactId>spark-core_2.11</artifactId>
|
||||
<artifactId>spark-core_${scala.binary.version}</artifactId>
|
||||
<version>${dhp.spark.version}</version>
|
||||
<scope>provided</scope>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>org.apache.spark</groupId>
|
||||
<artifactId>spark-sql_2.11</artifactId>
|
||||
<artifactId>spark-sql_${scala.binary.version}</artifactId>
|
||||
<version>${dhp.spark.version}</version>
|
||||
<scope>provided</scope>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>org.apache.spark</groupId>
|
||||
<artifactId>spark-graphx_2.11</artifactId>
|
||||
<artifactId>spark-graphx_${scala.binary.version}</artifactId>
|
||||
<version>${dhp.spark.version}</version>
|
||||
<scope>provided</scope>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>org.apache.spark</groupId>
|
||||
<artifactId>spark-hive_2.11</artifactId>
|
||||
<artifactId>spark-hive_${scala.binary.version}</artifactId>
|
||||
<version>${dhp.spark.version}</version>
|
||||
<scope>test</scope>
|
||||
</dependency>
|
||||
|
@ -295,7 +295,7 @@
|
|||
<dependency>
|
||||
<groupId>com.lucidworks.spark</groupId>
|
||||
<artifactId>spark-solr</artifactId>
|
||||
<version>3.6.0</version>
|
||||
<version>4.0.2</version>
|
||||
<exclusions>
|
||||
<exclusion>
|
||||
<artifactId>*</artifactId>
|
||||
|
@ -523,7 +523,7 @@
|
|||
|
||||
<dependency>
|
||||
<groupId>org.json4s</groupId>
|
||||
<artifactId>json4s-jackson_2.11</artifactId>
|
||||
<artifactId>json4s-jackson_${scala.binary.version}</artifactId>
|
||||
<version>${json4s.version}</version>
|
||||
</dependency>
|
||||
|
||||
|
@ -699,7 +699,7 @@
|
|||
</plugin>
|
||||
<plugin>
|
||||
<groupId>org.antipathy</groupId>
|
||||
<artifactId>mvn-scalafmt_2.11</artifactId>
|
||||
<artifactId>mvn-scalafmt_${scala.binary.version}</artifactId>
|
||||
<version>1.0.1640073709.733712b</version>
|
||||
<dependencies>
|
||||
<dependency>
|
||||
|
@ -756,7 +756,7 @@
|
|||
</plugin>
|
||||
<plugin>
|
||||
<groupId>org.antipathy</groupId>
|
||||
<artifactId>mvn-scalafmt_2.11</artifactId>
|
||||
<artifactId>mvn-scalafmt_${scala.binary.version}</artifactId>
|
||||
<configuration>
|
||||
<configLocation>https://code-repo.d4science.org/D-Net/dnet-hadoop/raw/branch/beta/dhp-build/dhp-code-style/src/main/resources/scalafmt/scalafmt.conf</configLocation>
|
||||
<skipTestSources>false</skipTestSources>
|
||||
|
@ -865,17 +865,18 @@
|
|||
<dhp.cdh.version>cdh5.9.2</dhp.cdh.version>
|
||||
<dhp.hadoop.version>2.6.0-${dhp.cdh.version}</dhp.hadoop.version>
|
||||
<dhp.oozie.version>4.1.0-${dhp.cdh.version}</dhp.oozie.version>
|
||||
<dhp.spark.version>2.4.0.cloudera2</dhp.spark.version>
|
||||
<dhp.jackson.version>2.9.6</dhp.jackson.version>
|
||||
<dhp.spark.version>3.4.1</dhp.spark.version>
|
||||
<dhp.jackson.version>2.14.2</dhp.jackson.version>
|
||||
<dhp.commons.lang.version>3.5</dhp.commons.lang.version>
|
||||
<dhp.site.skip>true</dhp.site.skip>
|
||||
<dhp.guava.version>11.0.2</dhp.guava.version>
|
||||
<scala.version>2.11.12</scala.version>
|
||||
<scala.version>2.12.18</scala.version>
|
||||
<scala.binary.version>2.12</scala.binary.version>
|
||||
<junit-jupiter.version>5.6.1</junit-jupiter.version>
|
||||
<mockito-core.version>3.3.3</mockito-core.version>
|
||||
<mongodb.driver.version>3.4.2</mongodb.driver.version>
|
||||
<vtd.version>[2.12,3.0)</vtd.version>
|
||||
<dhp-schemas.version>[3.17.1]</dhp-schemas.version>
|
||||
<dhp-schemas.version>3.17.2-SNAPSHOT</dhp-schemas.version>
|
||||
<dnet-actionmanager-api.version>[4.0.3]</dnet-actionmanager-api.version>
|
||||
<dnet-actionmanager-common.version>[6.0.5]</dnet-actionmanager-common.version>
|
||||
<dnet-openaire-broker-common.version>[3.1.6]</dnet-openaire-broker-common.version>
|
||||
|
@ -883,13 +884,13 @@
|
|||
<solr.version>7.5.0</solr.version>
|
||||
<okhttp.version>4.7.2</okhttp.version>
|
||||
<common.compress.version>1.20</common.compress.version>
|
||||
<json4s.version>3.5.3</json4s.version>
|
||||
<json4s.version>3.7.0-M11</json4s.version>
|
||||
<jsonschemagenerator.version>4.13.0</jsonschemagenerator.version>
|
||||
<common.csv.version>1.8</common.csv.version>
|
||||
<apache.poi.version>4.1.2</apache.poi.version>
|
||||
<common.text.version>1.8</common.text.version>
|
||||
<org.apache.httpcomponents.version>4.5.3</org.apache.httpcomponents.version>
|
||||
<net.alchim31.maven.version>4.0.1</net.alchim31.maven.version>
|
||||
<net.alchim31.maven.version>4.8.1</net.alchim31.maven.version>
|
||||
<google.gson.version>2.2.2</google.gson.version>
|
||||
<commons.logging.version>1.1.3</commons.logging.version>
|
||||
<commons.collections.version>3.2.1</commons.collections.version>
|
||||
|
|
Loading…
Reference in New Issue