1
0
Fork 0

code formatting

This commit is contained in:
Claudio Atzori 2024-04-20 08:10:58 +02:00
parent ab7f0855af
commit 0656ab2838
9 changed files with 80 additions and 55 deletions

View File

@ -1,19 +1,20 @@
package eu.dnetlib.pace.tree; package eu.dnetlib.pace.tree;
import com.wcohen.ss.AbstractStringDistance;
import eu.dnetlib.pace.config.Config;
import eu.dnetlib.pace.model.Person;
import eu.dnetlib.pace.tree.support.AbstractListComparator;
import eu.dnetlib.pace.tree.support.ComparatorClass;
import eu.dnetlib.pace.util.AuthorMatchers;
import java.util.ArrayList; import java.util.ArrayList;
import java.util.List; import java.util.List;
import java.util.Map; import java.util.Map;
import java.util.function.BiFunction; import java.util.function.BiFunction;
import java.util.stream.Collectors; import java.util.stream.Collectors;
import com.wcohen.ss.AbstractStringDistance;
import eu.dnetlib.pace.config.Config;
import eu.dnetlib.pace.model.Person;
import eu.dnetlib.pace.tree.support.AbstractListComparator;
import eu.dnetlib.pace.tree.support.ComparatorClass;
import eu.dnetlib.pace.util.AuthorMatchers;
@ComparatorClass("authorsMatch") @ComparatorClass("authorsMatch")
public class AuthorsMatch extends AbstractListComparator { public class AuthorsMatch extends AbstractListComparator {

View File

@ -12,7 +12,6 @@ import org.junit.jupiter.api.Test;
import eu.dnetlib.pace.model.Person; import eu.dnetlib.pace.model.Person;
public class UtilTest { public class UtilTest {
static Map<String, String> params; static Map<String, String> params;

View File

@ -5,7 +5,17 @@ import eu.dnetlib.dhp.schema.action.AtomicAction
import eu.dnetlib.dhp.schema.common.ModelConstants import eu.dnetlib.dhp.schema.common.ModelConstants
import eu.dnetlib.dhp.schema.oaf.utils.OafMapperUtils._ import eu.dnetlib.dhp.schema.oaf.utils.OafMapperUtils._
import eu.dnetlib.dhp.schema.oaf.utils.{OafMapperUtils, PidType} import eu.dnetlib.dhp.schema.oaf.utils.{OafMapperUtils, PidType}
import eu.dnetlib.dhp.schema.oaf.{Author, DataInfo, Instance, Journal, Organization, Publication, Relation, Result, Dataset => OafDataset} import eu.dnetlib.dhp.schema.oaf.{
Author,
DataInfo,
Instance,
Journal,
Organization,
Publication,
Relation,
Result,
Dataset => OafDataset
}
import eu.dnetlib.dhp.utils.DHPUtils import eu.dnetlib.dhp.utils.DHPUtils
import org.apache.spark.sql.types._ import org.apache.spark.sql.types._
import org.apache.spark.sql.{Dataset, Row, SparkSession} import org.apache.spark.sql.{Dataset, Row, SparkSession}
@ -688,33 +698,45 @@ object MagUtility extends Serializable {
o.setLegalname(field(r.getAs[String]("DisplayName"), null)) o.setLegalname(field(r.getAs[String]("DisplayName"), null))
val gid = r.getAs[String]("GridId") val gid = r.getAs[String]("GridId")
if (gid != null) { if (gid != null) {
o.setPid(List( o.setPid(
structuredProperty(gid, qualifier( List(
structuredProperty(
gid,
qualifier(
PidType.GRID.toString, PidType.GRID.toString,
PidType.GRID.toString, PidType.GRID.toString,
ModelConstants.DNET_PID_TYPES, ModelConstants.DNET_PID_TYPES,
ModelConstants.DNET_PID_TYPES ModelConstants.DNET_PID_TYPES
), ),
null), null
structuredProperty(r.getAs[Long]("AffiliationId").toString, qualifier( ),
structuredProperty(
r.getAs[Long]("AffiliationId").toString,
qualifier(
PidType.mag_id.toString, PidType.mag_id.toString,
PidType.mag_id.toString, PidType.mag_id.toString,
ModelConstants.DNET_PID_TYPES, ModelConstants.DNET_PID_TYPES,
ModelConstants.DNET_PID_TYPES ModelConstants.DNET_PID_TYPES
), ),
null) null
)
).asJava) ).asJava
)
} else { } else {
o.setPid(List( o.setPid(
structuredProperty(r.getAs[Long]("AffiliationId").toString, qualifier( List(
structuredProperty(
r.getAs[Long]("AffiliationId").toString,
qualifier(
PidType.mag_id.toString, PidType.mag_id.toString,
PidType.mag_id.toString, PidType.mag_id.toString,
ModelConstants.DNET_PID_TYPES, ModelConstants.DNET_PID_TYPES,
ModelConstants.DNET_PID_TYPES ModelConstants.DNET_PID_TYPES
), ),
null) null
).asJava) )
).asJava
)
} }
val c = r.getAs[String]("Iso3166Code") val c = r.getAs[String]("Iso3166Code")
if (c != null) if (c != null)

View File

@ -6,33 +6,37 @@ import eu.dnetlib.dhp.schema.oaf.Organization
import org.apache.spark.sql.{Encoder, Encoders, SaveMode, SparkSession} import org.apache.spark.sql.{Encoder, Encoders, SaveMode, SparkSession}
import org.slf4j.{Logger, LoggerFactory} import org.slf4j.{Logger, LoggerFactory}
class SparkMagOrganizationAS (propertyPath: String, args: Array[String], log: Logger) class SparkMagOrganizationAS(propertyPath: String, args: Array[String], log: Logger)
extends AbstractScalaApplication(propertyPath, args, log: Logger) { extends AbstractScalaApplication(propertyPath, args, log: Logger) {
/** Here all the spark applications runs this method /** Here all the spark applications runs this method
* where the whole logic of the spark node is defined * where the whole logic of the spark node is defined
*/ */
override def run(): Unit = { override def run(): Unit = {
val magBasePath:String = parser.get("magBasePath") val magBasePath: String = parser.get("magBasePath")
log.info(s"magBasePath is $magBasePath") log.info(s"magBasePath is $magBasePath")
val outputPath:String = parser.get("outputPath") val outputPath: String = parser.get("outputPath")
log.info(s"outputPath is $outputPath") log.info(s"outputPath is $outputPath")
generateAS(spark,magBasePath, outputPath) generateAS(spark, magBasePath, outputPath)
} }
def generateAS(spark:SparkSession, magBasePath:String,outputPath:String ):Unit = { def generateAS(spark: SparkSession, magBasePath: String, outputPath: String): Unit = {
import spark.implicits._ import spark.implicits._
val organizations = MagUtility.loadMagEntity(spark,"Affiliations", magBasePath) val organizations = MagUtility.loadMagEntity(spark, "Affiliations", magBasePath)
organizations.map(r => MagUtility.generateOrganization(r)).write.mode(SaveMode.Overwrite) organizations
.map(r => MagUtility.generateOrganization(r))
.write
.mode(SaveMode.Overwrite)
.option("compression", "gzip") .option("compression", "gzip")
.text(outputPath) .text(outputPath)
} }
} }
object SparkMagOrganizationAS{ object SparkMagOrganizationAS {
val log: Logger = LoggerFactory.getLogger(SparkMagOrganizationAS.getClass) val log: Logger = LoggerFactory.getLogger(SparkMagOrganizationAS.getClass)
def main(args: Array[String]): Unit = { def main(args: Array[String]): Unit = {
new SparkMagOrganizationAS("/eu/dnetlib/dhp/collection/mag/create_organization_AS.json", args, log) new SparkMagOrganizationAS("/eu/dnetlib/dhp/collection/mag/create_organization_AS.json", args, log)
.initialize() .initialize()

View File

@ -1,3 +1,4 @@
package eu.dnetlib.dhp.collection.plugin.rest; package eu.dnetlib.dhp.collection.plugin.rest;
import java.util.HashMap; import java.util.HashMap;

View File

@ -10,7 +10,6 @@ class MAGMappingTest {
val mapper = new ObjectMapper() val mapper = new ObjectMapper()
def mappingTest(): Unit = { def mappingTest(): Unit = {
val spark = SparkSession val spark = SparkSession

View File

@ -258,7 +258,6 @@ public class SparkDedupTest implements Serializable {
assertEquals(115, sw_simrel.count()); assertEquals(115, sw_simrel.count());
} }
// check if the first relation in the whitelist exists // check if the first relation in the whitelist exists
assertTrue( assertTrue(
sw_simrel sw_simrel

View File

@ -31,7 +31,6 @@ import eu.dnetlib.dhp.schema.orcid.AuthorData;
import eu.dnetlib.doiboost.orcid.util.DownloadsReport; import eu.dnetlib.doiboost.orcid.util.DownloadsReport;
import eu.dnetlib.doiboost.orcid.util.MultiAttemptsHttpConnector; import eu.dnetlib.doiboost.orcid.util.MultiAttemptsHttpConnector;
public class OrcidClientTest { public class OrcidClientTest {
final int REQ_LIMIT = 24; final int REQ_LIMIT = 24;
final int REQ_MAX_TEST = 100; final int REQ_MAX_TEST = 100;

View File

@ -130,12 +130,13 @@ public class GenerateEntitiesApplication extends AbstractMigrationApplication {
switch (mode) { switch (mode) {
case claim: case claim:
save( save(
inputRdd.keyBy(oaf -> ModelSupport.idFn().apply(oaf)) inputRdd
.keyBy(oaf -> ModelSupport.idFn().apply(oaf))
.groupByKey() .groupByKey()
.map(t -> MergeUtils.mergeGroup(t._1, t._2.iterator())), .map(t -> MergeUtils.mergeGroup(t._1, t._2.iterator())),
//.mapToPair(oaf -> new Tuple2<>(ModelSupport.idFn().apply(oaf), oaf)) // .mapToPair(oaf -> new Tuple2<>(ModelSupport.idFn().apply(oaf), oaf))
//.reduceByKey(MergeUtils::merge) // .reduceByKey(MergeUtils::merge)
//.map(Tuple2::_2), // .map(Tuple2::_2),
targetPath); targetPath);
break; break;
case graph: case graph: