diff --git a/dhp-pace-core/src/main/java/eu/dnetlib/pace/tree/AuthorsMatch.java b/dhp-pace-core/src/main/java/eu/dnetlib/pace/tree/AuthorsMatch.java index 0921d7a64..07080b09e 100644 --- a/dhp-pace-core/src/main/java/eu/dnetlib/pace/tree/AuthorsMatch.java +++ b/dhp-pace-core/src/main/java/eu/dnetlib/pace/tree/AuthorsMatch.java @@ -1,19 +1,20 @@ package eu.dnetlib.pace.tree; -import com.wcohen.ss.AbstractStringDistance; -import eu.dnetlib.pace.config.Config; -import eu.dnetlib.pace.model.Person; -import eu.dnetlib.pace.tree.support.AbstractListComparator; -import eu.dnetlib.pace.tree.support.ComparatorClass; -import eu.dnetlib.pace.util.AuthorMatchers; - import java.util.ArrayList; import java.util.List; import java.util.Map; import java.util.function.BiFunction; import java.util.stream.Collectors; +import com.wcohen.ss.AbstractStringDistance; + +import eu.dnetlib.pace.config.Config; +import eu.dnetlib.pace.model.Person; +import eu.dnetlib.pace.tree.support.AbstractListComparator; +import eu.dnetlib.pace.tree.support.ComparatorClass; +import eu.dnetlib.pace.util.AuthorMatchers; + @ComparatorClass("authorsMatch") public class AuthorsMatch extends AbstractListComparator { diff --git a/dhp-pace-core/src/test/java/eu/dnetlib/pace/util/UtilTest.java b/dhp-pace-core/src/test/java/eu/dnetlib/pace/util/UtilTest.java index 6d1300eae..93db552c1 100644 --- a/dhp-pace-core/src/test/java/eu/dnetlib/pace/util/UtilTest.java +++ b/dhp-pace-core/src/test/java/eu/dnetlib/pace/util/UtilTest.java @@ -12,7 +12,6 @@ import org.junit.jupiter.api.Test; import eu.dnetlib.pace.model.Person; - public class UtilTest { static Map params; diff --git a/dhp-workflows/dhp-aggregation/src/main/scala/eu/dnetlib/dhp/collection/mag/MagUtility.scala b/dhp-workflows/dhp-aggregation/src/main/scala/eu/dnetlib/dhp/collection/mag/MagUtility.scala index 48cb3276a..df22a6b84 100644 --- a/dhp-workflows/dhp-aggregation/src/main/scala/eu/dnetlib/dhp/collection/mag/MagUtility.scala +++ b/dhp-workflows/dhp-aggregation/src/main/scala/eu/dnetlib/dhp/collection/mag/MagUtility.scala @@ -5,7 +5,17 @@ import eu.dnetlib.dhp.schema.action.AtomicAction import eu.dnetlib.dhp.schema.common.ModelConstants import eu.dnetlib.dhp.schema.oaf.utils.OafMapperUtils._ import eu.dnetlib.dhp.schema.oaf.utils.{OafMapperUtils, PidType} -import eu.dnetlib.dhp.schema.oaf.{Author, DataInfo, Instance, Journal, Organization, Publication, Relation, Result, Dataset => OafDataset} +import eu.dnetlib.dhp.schema.oaf.{ + Author, + DataInfo, + Instance, + Journal, + Organization, + Publication, + Relation, + Result, + Dataset => OafDataset +} import eu.dnetlib.dhp.utils.DHPUtils import org.apache.spark.sql.types._ import org.apache.spark.sql.{Dataset, Row, SparkSession} @@ -688,33 +698,45 @@ object MagUtility extends Serializable { o.setLegalname(field(r.getAs[String]("DisplayName"), null)) val gid = r.getAs[String]("GridId") if (gid != null) { - o.setPid(List( - structuredProperty(gid, qualifier( - PidType.GRID.toString, - PidType.GRID.toString, - ModelConstants.DNET_PID_TYPES, - ModelConstants.DNET_PID_TYPES - ), - null), - structuredProperty(r.getAs[Long]("AffiliationId").toString, qualifier( - PidType.mag_id.toString, - PidType.mag_id.toString, - ModelConstants.DNET_PID_TYPES, - ModelConstants.DNET_PID_TYPES - ), - null) - - ).asJava) + o.setPid( + List( + structuredProperty( + gid, + qualifier( + PidType.GRID.toString, + PidType.GRID.toString, + ModelConstants.DNET_PID_TYPES, + ModelConstants.DNET_PID_TYPES + ), + null + ), + structuredProperty( + r.getAs[Long]("AffiliationId").toString, + qualifier( + PidType.mag_id.toString, + PidType.mag_id.toString, + ModelConstants.DNET_PID_TYPES, + ModelConstants.DNET_PID_TYPES + ), + null + ) + ).asJava + ) } else { - o.setPid(List( - structuredProperty(r.getAs[Long]("AffiliationId").toString, qualifier( - PidType.mag_id.toString, - PidType.mag_id.toString, - ModelConstants.DNET_PID_TYPES, - ModelConstants.DNET_PID_TYPES - ), - null) - ).asJava) + o.setPid( + List( + structuredProperty( + r.getAs[Long]("AffiliationId").toString, + qualifier( + PidType.mag_id.toString, + PidType.mag_id.toString, + ModelConstants.DNET_PID_TYPES, + ModelConstants.DNET_PID_TYPES + ), + null + ) + ).asJava + ) } val c = r.getAs[String]("Iso3166Code") if (c != null) diff --git a/dhp-workflows/dhp-aggregation/src/main/scala/eu/dnetlib/dhp/collection/mag/SparkMagOrganizationAS.scala b/dhp-workflows/dhp-aggregation/src/main/scala/eu/dnetlib/dhp/collection/mag/SparkMagOrganizationAS.scala index 096a03f45..a9b0fac03 100644 --- a/dhp-workflows/dhp-aggregation/src/main/scala/eu/dnetlib/dhp/collection/mag/SparkMagOrganizationAS.scala +++ b/dhp-workflows/dhp-aggregation/src/main/scala/eu/dnetlib/dhp/collection/mag/SparkMagOrganizationAS.scala @@ -6,33 +6,37 @@ import eu.dnetlib.dhp.schema.oaf.Organization import org.apache.spark.sql.{Encoder, Encoders, SaveMode, SparkSession} import org.slf4j.{Logger, LoggerFactory} -class SparkMagOrganizationAS (propertyPath: String, args: Array[String], log: Logger) - extends AbstractScalaApplication(propertyPath, args, log: Logger) { +class SparkMagOrganizationAS(propertyPath: String, args: Array[String], log: Logger) + extends AbstractScalaApplication(propertyPath, args, log: Logger) { /** Here all the spark applications runs this method - * where the whole logic of the spark node is defined - */ + * where the whole logic of the spark node is defined + */ override def run(): Unit = { - val magBasePath:String = parser.get("magBasePath") + val magBasePath: String = parser.get("magBasePath") log.info(s"magBasePath is $magBasePath") - val outputPath:String = parser.get("outputPath") + val outputPath: String = parser.get("outputPath") log.info(s"outputPath is $outputPath") - generateAS(spark,magBasePath, outputPath) + generateAS(spark, magBasePath, outputPath) } - def generateAS(spark:SparkSession, magBasePath:String,outputPath:String ):Unit = { + def generateAS(spark: SparkSession, magBasePath: String, outputPath: String): Unit = { import spark.implicits._ - val organizations = MagUtility.loadMagEntity(spark,"Affiliations", magBasePath) - organizations.map(r => MagUtility.generateOrganization(r)).write.mode(SaveMode.Overwrite) + val organizations = MagUtility.loadMagEntity(spark, "Affiliations", magBasePath) + organizations + .map(r => MagUtility.generateOrganization(r)) + .write + .mode(SaveMode.Overwrite) .option("compression", "gzip") .text(outputPath) } } -object SparkMagOrganizationAS{ +object SparkMagOrganizationAS { val log: Logger = LoggerFactory.getLogger(SparkMagOrganizationAS.getClass) + def main(args: Array[String]): Unit = { new SparkMagOrganizationAS("/eu/dnetlib/dhp/collection/mag/create_organization_AS.json", args, log) .initialize() diff --git a/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/collection/plugin/rest/OsfPreprintCollectorTest.java b/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/collection/plugin/rest/OsfPreprintCollectorTest.java index 2f0263a0d..bc2d12661 100644 --- a/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/collection/plugin/rest/OsfPreprintCollectorTest.java +++ b/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/collection/plugin/rest/OsfPreprintCollectorTest.java @@ -1,3 +1,4 @@ + package eu.dnetlib.dhp.collection.plugin.rest; import java.util.HashMap; diff --git a/dhp-workflows/dhp-aggregation/src/test/scala/eu/dnetlib/dhp/collection/mag/MAGMappingTest.scala b/dhp-workflows/dhp-aggregation/src/test/scala/eu/dnetlib/dhp/collection/mag/MAGMappingTest.scala index e41ccc41a..59b91d66b 100644 --- a/dhp-workflows/dhp-aggregation/src/test/scala/eu/dnetlib/dhp/collection/mag/MAGMappingTest.scala +++ b/dhp-workflows/dhp-aggregation/src/test/scala/eu/dnetlib/dhp/collection/mag/MAGMappingTest.scala @@ -10,7 +10,6 @@ class MAGMappingTest { val mapper = new ObjectMapper() - def mappingTest(): Unit = { val spark = SparkSession diff --git a/dhp-workflows/dhp-dedup-openaire/src/test/java/eu/dnetlib/dhp/oa/dedup/SparkDedupTest.java b/dhp-workflows/dhp-dedup-openaire/src/test/java/eu/dnetlib/dhp/oa/dedup/SparkDedupTest.java index 2c96b7399..c80c98bb7 100644 --- a/dhp-workflows/dhp-dedup-openaire/src/test/java/eu/dnetlib/dhp/oa/dedup/SparkDedupTest.java +++ b/dhp-workflows/dhp-dedup-openaire/src/test/java/eu/dnetlib/dhp/oa/dedup/SparkDedupTest.java @@ -258,7 +258,6 @@ public class SparkDedupTest implements Serializable { assertEquals(115, sw_simrel.count()); } - // check if the first relation in the whitelist exists assertTrue( sw_simrel diff --git a/dhp-workflows/dhp-doiboost/src/test/java/eu/dnetlib/doiboost/orcid/OrcidClientTest.java b/dhp-workflows/dhp-doiboost/src/test/java/eu/dnetlib/doiboost/orcid/OrcidClientTest.java index c4fd7dcba..bc912b124 100644 --- a/dhp-workflows/dhp-doiboost/src/test/java/eu/dnetlib/doiboost/orcid/OrcidClientTest.java +++ b/dhp-workflows/dhp-doiboost/src/test/java/eu/dnetlib/doiboost/orcid/OrcidClientTest.java @@ -31,7 +31,6 @@ import eu.dnetlib.dhp.schema.orcid.AuthorData; import eu.dnetlib.doiboost.orcid.util.DownloadsReport; import eu.dnetlib.doiboost.orcid.util.MultiAttemptsHttpConnector; - public class OrcidClientTest { final int REQ_LIMIT = 24; final int REQ_MAX_TEST = 100; diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/GenerateEntitiesApplication.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/GenerateEntitiesApplication.java index 21d06692f..c3806c211 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/GenerateEntitiesApplication.java +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/GenerateEntitiesApplication.java @@ -130,12 +130,13 @@ public class GenerateEntitiesApplication extends AbstractMigrationApplication { switch (mode) { case claim: save( - inputRdd.keyBy(oaf -> ModelSupport.idFn().apply(oaf)) - .groupByKey() - .map(t -> MergeUtils.mergeGroup(t._1, t._2.iterator())), - //.mapToPair(oaf -> new Tuple2<>(ModelSupport.idFn().apply(oaf), oaf)) - //.reduceByKey(MergeUtils::merge) - //.map(Tuple2::_2), + inputRdd + .keyBy(oaf -> ModelSupport.idFn().apply(oaf)) + .groupByKey() + .map(t -> MergeUtils.mergeGroup(t._1, t._2.iterator())), + // .mapToPair(oaf -> new Tuple2<>(ModelSupport.idFn().apply(oaf), oaf)) + // .reduceByKey(MergeUtils::merge) + // .map(Tuple2::_2), targetPath); break; case graph: