diff --git a/dhp-common/src/main/java/eu/dnetlib/dhp/common/PacePerson.java b/dhp-common/src/main/java/eu/dnetlib/dhp/common/PacePerson.java index fac9a7565..fbf586f8c 100644 --- a/dhp-common/src/main/java/eu/dnetlib/dhp/common/PacePerson.java +++ b/dhp-common/src/main/java/eu/dnetlib/dhp/common/PacePerson.java @@ -38,7 +38,7 @@ public class PacePerson { PacePerson.class .getResourceAsStream( "/eu/dnetlib/dhp/common/name_particles.txt"))); - } catch (IOException e) { + } catch (Exception e) { throw new RuntimeException(e); } } diff --git a/dhp-pace-core/pom.xml b/dhp-pace-core/pom.xml index a6d2538f2..6449b7ec8 100644 --- a/dhp-pace-core/pom.xml +++ b/dhp-pace-core/pom.xml @@ -24,7 +24,7 @@ scala-compile-first - initialize + process-resources add-source compile @@ -95,4 +95,90 @@ + + + spark-24 + + true + + + + + + org.codehaus.mojo + build-helper-maven-plugin + 3.4.0 + + + generate-sources + + add-source + + + + src/main/spark-2 + + + + + + + + + + + spark-34 + + + + + org.codehaus.mojo + build-helper-maven-plugin + 3.4.0 + + + generate-sources + + add-source + + + + src/main/spark-2 + + + + + + + + + + + spark-35 + + + + + org.codehaus.mojo + build-helper-maven-plugin + 3.4.0 + + + generate-sources + + add-source + + + + src/main/spark-35 + + + + + + + + + + diff --git a/dhp-pace-core/src/main/java/eu/dnetlib/pace/model/SparkModel.scala b/dhp-pace-core/src/main/java/eu/dnetlib/pace/model/SparkModel.scala index 95325ace0..d67860a3d 100644 --- a/dhp-pace-core/src/main/java/eu/dnetlib/pace/model/SparkModel.scala +++ b/dhp-pace-core/src/main/java/eu/dnetlib/pace/model/SparkModel.scala @@ -2,11 +2,10 @@ package eu.dnetlib.pace.model import com.jayway.jsonpath.{Configuration, JsonPath} import eu.dnetlib.pace.config.{DedupConfig, Type} -import eu.dnetlib.pace.util.MapDocumentUtil -import org.apache.spark.sql.catalyst.encoders.RowEncoder +import eu.dnetlib.pace.util.{MapDocumentUtil, SparkCompatUtils} +import org.apache.spark.sql.{Dataset, Row} import org.apache.spark.sql.catalyst.expressions.GenericRowWithSchema import org.apache.spark.sql.types.{DataTypes, Metadata, StructField, StructType} -import org.apache.spark.sql.{Dataset, Row} import java.util.regex.Pattern import scala.collection.JavaConverters._ @@ -48,8 +47,8 @@ case class SparkModel(conf: DedupConfig) { val orderingFieldPosition: Int = schema.fieldIndex(orderingFieldName) - val parseJsonDataset: (Dataset[String] => Dataset[Row]) = df => { - df.map(r => rowFromJson(r))(RowEncoder(schema)) + val parseJsonDataset: (Dataset[String] => Dataset[Row]) = df => { + df.map(r => rowFromJson(r))(SparkCompatUtils.encoderFor(schema)) } def rowFromJson(json: String): Row = { diff --git a/dhp-pace-core/src/main/spark-2/eu/dnetlib/pace/util/SparkCompatUtils.scala b/dhp-pace-core/src/main/spark-2/eu/dnetlib/pace/util/SparkCompatUtils.scala new file mode 100644 index 000000000..a426703d6 --- /dev/null +++ b/dhp-pace-core/src/main/spark-2/eu/dnetlib/pace/util/SparkCompatUtils.scala @@ -0,0 +1,12 @@ +package eu.dnetlib.pace.util + +import org.apache.spark.sql.Row +import org.apache.spark.sql.catalyst.encoders.{ExpressionEncoder, RowEncoder} +import org.apache.spark.sql.types.StructType + +object SparkCompatUtils { + + def encoderFor(schema: StructType): ExpressionEncoder[Row] = { + RowEncoder(schema) + } +} \ No newline at end of file diff --git a/dhp-pace-core/src/main/spark-35/eu/dnetlib/pace/util/SparkCompatUtils.scala b/dhp-pace-core/src/main/spark-35/eu/dnetlib/pace/util/SparkCompatUtils.scala new file mode 100644 index 000000000..cbc454ae2 --- /dev/null +++ b/dhp-pace-core/src/main/spark-35/eu/dnetlib/pace/util/SparkCompatUtils.scala @@ -0,0 +1,12 @@ +package eu.dnetlib.pace.util + +import org.apache.spark.sql.Row +import org.apache.spark.sql.catalyst.encoders.ExpressionEncoder +import org.apache.spark.sql.types.StructType + +object SparkCompatUtils { + + def encoderFor(schema: StructType): ExpressionEncoder[Row] = { + ExpressionEncoder(schema) + } +} diff --git a/dhp-workflows/dhp-graph-provision/pom.xml b/dhp-workflows/dhp-graph-provision/pom.xml index 60c925227..4b4e6c1c4 100644 --- a/dhp-workflows/dhp-graph-provision/pom.xml +++ b/dhp-workflows/dhp-graph-provision/pom.xml @@ -162,6 +162,18 @@ antlr4-runtime org.antlr + + woodstox-core + com.fasterxml.woodstox + + + log4j + * + + + org.apache.logging.log4j + * + @@ -210,7 +222,7 @@ - scala-2.11 + spark-24 true @@ -240,7 +252,7 @@ - scala-2.12 + spark-34 @@ -266,6 +278,32 @@ + + spark-35 + + + + + org.codehaus.mojo + build-helper-maven-plugin + 3.4.0 + + + generate-sources + + add-source + + + + src/main/sparksolr-4 + + + + + + + + \ No newline at end of file diff --git a/pom.xml b/pom.xml index 1480af2a6..d7d7a6ca9 100644 --- a/pom.xml +++ b/pom.xml @@ -233,6 +233,13 @@ provided + + org.slf4j + slf4j-log4j12 + ${org.slf4j.version} + provided + + org.slf4j jcl-over-slf4j @@ -240,6 +247,28 @@ provided + + org.apache.logging.log4j + log4j-slf4j2-impl + ${log4j.version} + + + org.apache.logging.log4j + log4j-api + ${log4j.version} + + + org.apache.logging.log4j + log4j-core + ${log4j.version} + + + + org.apache.logging.log4j + log4j-1.2-api + ${log4j.version} + + org.apache.commons commons-lang3 @@ -381,7 +410,7 @@ org.apache.zookeeper zookeeper - 3.4.11 + ${zookeeper.version} @@ -713,6 +742,7 @@ 3.0.0-M4 true + false @@ -945,6 +975,7 @@ [6.0.5] [3.1.6] 2.2.2 + 1.2.17 3.19.0-GA 3.5.3 4.13.0 @@ -960,12 +991,13 @@ 3.6.0 0.0.7 [2.12,3.0) + 3.4.6 - scala-2.12 + spark-34 2.12 2.12.18 @@ -991,11 +1023,13 @@ 3.4.2.openaire-SNAPSHOT 2.14.2 3.12.0 + 2.19.0 3.7.0-M11 3.25.0-GA 4.10.0 2.0.6 0.10.2 + 3.6.3 + 4.8.1 + + + 1.23.0 + 1.8 + 1.10.0 + 1.9.4 + 1.16.0 + 3.2.2 + 2.13.0 + 1.1.3 + 1.7 + + 14.0.1 + 8.11.0 + 4.0.4 + 3.5.1.openaire-SNAPSHOT + 2.15.2 + 3.12.0 + 2.20.0 + 3.7.0-M11 + 3.25.0-GA + 4.10.0 + 2.0.7 + 0.10.2 + 3.6.3 + + + + + + java11 - 17 + [11 @@ -1031,6 +1106,7 @@ --add-opens=java.base/sun.util.calendar=ALL-UNNAMED true + false