forked from D-Net/dnet-hadoop
code formatting
This commit is contained in:
parent
ab7f0855af
commit
0656ab2838
|
@ -1,19 +1,20 @@
|
||||||
|
|
||||||
package eu.dnetlib.pace.tree;
|
package eu.dnetlib.pace.tree;
|
||||||
|
|
||||||
import com.wcohen.ss.AbstractStringDistance;
|
|
||||||
import eu.dnetlib.pace.config.Config;
|
|
||||||
import eu.dnetlib.pace.model.Person;
|
|
||||||
import eu.dnetlib.pace.tree.support.AbstractListComparator;
|
|
||||||
import eu.dnetlib.pace.tree.support.ComparatorClass;
|
|
||||||
import eu.dnetlib.pace.util.AuthorMatchers;
|
|
||||||
|
|
||||||
import java.util.ArrayList;
|
import java.util.ArrayList;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
import java.util.Map;
|
import java.util.Map;
|
||||||
import java.util.function.BiFunction;
|
import java.util.function.BiFunction;
|
||||||
import java.util.stream.Collectors;
|
import java.util.stream.Collectors;
|
||||||
|
|
||||||
|
import com.wcohen.ss.AbstractStringDistance;
|
||||||
|
|
||||||
|
import eu.dnetlib.pace.config.Config;
|
||||||
|
import eu.dnetlib.pace.model.Person;
|
||||||
|
import eu.dnetlib.pace.tree.support.AbstractListComparator;
|
||||||
|
import eu.dnetlib.pace.tree.support.ComparatorClass;
|
||||||
|
import eu.dnetlib.pace.util.AuthorMatchers;
|
||||||
|
|
||||||
@ComparatorClass("authorsMatch")
|
@ComparatorClass("authorsMatch")
|
||||||
public class AuthorsMatch extends AbstractListComparator {
|
public class AuthorsMatch extends AbstractListComparator {
|
||||||
|
|
||||||
|
|
|
@ -12,7 +12,6 @@ import org.junit.jupiter.api.Test;
|
||||||
|
|
||||||
import eu.dnetlib.pace.model.Person;
|
import eu.dnetlib.pace.model.Person;
|
||||||
|
|
||||||
|
|
||||||
public class UtilTest {
|
public class UtilTest {
|
||||||
|
|
||||||
static Map<String, String> params;
|
static Map<String, String> params;
|
||||||
|
|
|
@ -5,7 +5,17 @@ import eu.dnetlib.dhp.schema.action.AtomicAction
|
||||||
import eu.dnetlib.dhp.schema.common.ModelConstants
|
import eu.dnetlib.dhp.schema.common.ModelConstants
|
||||||
import eu.dnetlib.dhp.schema.oaf.utils.OafMapperUtils._
|
import eu.dnetlib.dhp.schema.oaf.utils.OafMapperUtils._
|
||||||
import eu.dnetlib.dhp.schema.oaf.utils.{OafMapperUtils, PidType}
|
import eu.dnetlib.dhp.schema.oaf.utils.{OafMapperUtils, PidType}
|
||||||
import eu.dnetlib.dhp.schema.oaf.{Author, DataInfo, Instance, Journal, Organization, Publication, Relation, Result, Dataset => OafDataset}
|
import eu.dnetlib.dhp.schema.oaf.{
|
||||||
|
Author,
|
||||||
|
DataInfo,
|
||||||
|
Instance,
|
||||||
|
Journal,
|
||||||
|
Organization,
|
||||||
|
Publication,
|
||||||
|
Relation,
|
||||||
|
Result,
|
||||||
|
Dataset => OafDataset
|
||||||
|
}
|
||||||
import eu.dnetlib.dhp.utils.DHPUtils
|
import eu.dnetlib.dhp.utils.DHPUtils
|
||||||
import org.apache.spark.sql.types._
|
import org.apache.spark.sql.types._
|
||||||
import org.apache.spark.sql.{Dataset, Row, SparkSession}
|
import org.apache.spark.sql.{Dataset, Row, SparkSession}
|
||||||
|
@ -688,33 +698,45 @@ object MagUtility extends Serializable {
|
||||||
o.setLegalname(field(r.getAs[String]("DisplayName"), null))
|
o.setLegalname(field(r.getAs[String]("DisplayName"), null))
|
||||||
val gid = r.getAs[String]("GridId")
|
val gid = r.getAs[String]("GridId")
|
||||||
if (gid != null) {
|
if (gid != null) {
|
||||||
o.setPid(List(
|
o.setPid(
|
||||||
structuredProperty(gid, qualifier(
|
List(
|
||||||
PidType.GRID.toString,
|
structuredProperty(
|
||||||
PidType.GRID.toString,
|
gid,
|
||||||
ModelConstants.DNET_PID_TYPES,
|
qualifier(
|
||||||
ModelConstants.DNET_PID_TYPES
|
PidType.GRID.toString,
|
||||||
),
|
PidType.GRID.toString,
|
||||||
null),
|
ModelConstants.DNET_PID_TYPES,
|
||||||
structuredProperty(r.getAs[Long]("AffiliationId").toString, qualifier(
|
ModelConstants.DNET_PID_TYPES
|
||||||
PidType.mag_id.toString,
|
),
|
||||||
PidType.mag_id.toString,
|
null
|
||||||
ModelConstants.DNET_PID_TYPES,
|
),
|
||||||
ModelConstants.DNET_PID_TYPES
|
structuredProperty(
|
||||||
),
|
r.getAs[Long]("AffiliationId").toString,
|
||||||
null)
|
qualifier(
|
||||||
|
PidType.mag_id.toString,
|
||||||
).asJava)
|
PidType.mag_id.toString,
|
||||||
|
ModelConstants.DNET_PID_TYPES,
|
||||||
|
ModelConstants.DNET_PID_TYPES
|
||||||
|
),
|
||||||
|
null
|
||||||
|
)
|
||||||
|
).asJava
|
||||||
|
)
|
||||||
} else {
|
} else {
|
||||||
o.setPid(List(
|
o.setPid(
|
||||||
structuredProperty(r.getAs[Long]("AffiliationId").toString, qualifier(
|
List(
|
||||||
PidType.mag_id.toString,
|
structuredProperty(
|
||||||
PidType.mag_id.toString,
|
r.getAs[Long]("AffiliationId").toString,
|
||||||
ModelConstants.DNET_PID_TYPES,
|
qualifier(
|
||||||
ModelConstants.DNET_PID_TYPES
|
PidType.mag_id.toString,
|
||||||
),
|
PidType.mag_id.toString,
|
||||||
null)
|
ModelConstants.DNET_PID_TYPES,
|
||||||
).asJava)
|
ModelConstants.DNET_PID_TYPES
|
||||||
|
),
|
||||||
|
null
|
||||||
|
)
|
||||||
|
).asJava
|
||||||
|
)
|
||||||
}
|
}
|
||||||
val c = r.getAs[String]("Iso3166Code")
|
val c = r.getAs[String]("Iso3166Code")
|
||||||
if (c != null)
|
if (c != null)
|
||||||
|
|
|
@ -6,33 +6,37 @@ import eu.dnetlib.dhp.schema.oaf.Organization
|
||||||
import org.apache.spark.sql.{Encoder, Encoders, SaveMode, SparkSession}
|
import org.apache.spark.sql.{Encoder, Encoders, SaveMode, SparkSession}
|
||||||
import org.slf4j.{Logger, LoggerFactory}
|
import org.slf4j.{Logger, LoggerFactory}
|
||||||
|
|
||||||
class SparkMagOrganizationAS (propertyPath: String, args: Array[String], log: Logger)
|
class SparkMagOrganizationAS(propertyPath: String, args: Array[String], log: Logger)
|
||||||
extends AbstractScalaApplication(propertyPath, args, log: Logger) {
|
extends AbstractScalaApplication(propertyPath, args, log: Logger) {
|
||||||
|
|
||||||
/** Here all the spark applications runs this method
|
/** Here all the spark applications runs this method
|
||||||
* where the whole logic of the spark node is defined
|
* where the whole logic of the spark node is defined
|
||||||
*/
|
*/
|
||||||
override def run(): Unit = {
|
override def run(): Unit = {
|
||||||
val magBasePath:String = parser.get("magBasePath")
|
val magBasePath: String = parser.get("magBasePath")
|
||||||
log.info(s"magBasePath is $magBasePath")
|
log.info(s"magBasePath is $magBasePath")
|
||||||
val outputPath:String = parser.get("outputPath")
|
val outputPath: String = parser.get("outputPath")
|
||||||
log.info(s"outputPath is $outputPath")
|
log.info(s"outputPath is $outputPath")
|
||||||
generateAS(spark,magBasePath, outputPath)
|
generateAS(spark, magBasePath, outputPath)
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
def generateAS(spark:SparkSession, magBasePath:String,outputPath:String ):Unit = {
|
def generateAS(spark: SparkSession, magBasePath: String, outputPath: String): Unit = {
|
||||||
import spark.implicits._
|
import spark.implicits._
|
||||||
val organizations = MagUtility.loadMagEntity(spark,"Affiliations", magBasePath)
|
val organizations = MagUtility.loadMagEntity(spark, "Affiliations", magBasePath)
|
||||||
organizations.map(r => MagUtility.generateOrganization(r)).write.mode(SaveMode.Overwrite)
|
organizations
|
||||||
|
.map(r => MagUtility.generateOrganization(r))
|
||||||
|
.write
|
||||||
|
.mode(SaveMode.Overwrite)
|
||||||
.option("compression", "gzip")
|
.option("compression", "gzip")
|
||||||
.text(outputPath)
|
.text(outputPath)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
object SparkMagOrganizationAS{
|
object SparkMagOrganizationAS {
|
||||||
|
|
||||||
val log: Logger = LoggerFactory.getLogger(SparkMagOrganizationAS.getClass)
|
val log: Logger = LoggerFactory.getLogger(SparkMagOrganizationAS.getClass)
|
||||||
|
|
||||||
def main(args: Array[String]): Unit = {
|
def main(args: Array[String]): Unit = {
|
||||||
new SparkMagOrganizationAS("/eu/dnetlib/dhp/collection/mag/create_organization_AS.json", args, log)
|
new SparkMagOrganizationAS("/eu/dnetlib/dhp/collection/mag/create_organization_AS.json", args, log)
|
||||||
.initialize()
|
.initialize()
|
||||||
|
|
|
@ -1,3 +1,4 @@
|
||||||
|
|
||||||
package eu.dnetlib.dhp.collection.plugin.rest;
|
package eu.dnetlib.dhp.collection.plugin.rest;
|
||||||
|
|
||||||
import java.util.HashMap;
|
import java.util.HashMap;
|
||||||
|
|
|
@ -10,7 +10,6 @@ class MAGMappingTest {
|
||||||
|
|
||||||
val mapper = new ObjectMapper()
|
val mapper = new ObjectMapper()
|
||||||
|
|
||||||
|
|
||||||
def mappingTest(): Unit = {
|
def mappingTest(): Unit = {
|
||||||
|
|
||||||
val spark = SparkSession
|
val spark = SparkSession
|
||||||
|
|
|
@ -258,7 +258,6 @@ public class SparkDedupTest implements Serializable {
|
||||||
assertEquals(115, sw_simrel.count());
|
assertEquals(115, sw_simrel.count());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
// check if the first relation in the whitelist exists
|
// check if the first relation in the whitelist exists
|
||||||
assertTrue(
|
assertTrue(
|
||||||
sw_simrel
|
sw_simrel
|
||||||
|
|
|
@ -31,7 +31,6 @@ import eu.dnetlib.dhp.schema.orcid.AuthorData;
|
||||||
import eu.dnetlib.doiboost.orcid.util.DownloadsReport;
|
import eu.dnetlib.doiboost.orcid.util.DownloadsReport;
|
||||||
import eu.dnetlib.doiboost.orcid.util.MultiAttemptsHttpConnector;
|
import eu.dnetlib.doiboost.orcid.util.MultiAttemptsHttpConnector;
|
||||||
|
|
||||||
|
|
||||||
public class OrcidClientTest {
|
public class OrcidClientTest {
|
||||||
final int REQ_LIMIT = 24;
|
final int REQ_LIMIT = 24;
|
||||||
final int REQ_MAX_TEST = 100;
|
final int REQ_MAX_TEST = 100;
|
||||||
|
|
|
@ -130,12 +130,13 @@ public class GenerateEntitiesApplication extends AbstractMigrationApplication {
|
||||||
switch (mode) {
|
switch (mode) {
|
||||||
case claim:
|
case claim:
|
||||||
save(
|
save(
|
||||||
inputRdd.keyBy(oaf -> ModelSupport.idFn().apply(oaf))
|
inputRdd
|
||||||
.groupByKey()
|
.keyBy(oaf -> ModelSupport.idFn().apply(oaf))
|
||||||
.map(t -> MergeUtils.mergeGroup(t._1, t._2.iterator())),
|
.groupByKey()
|
||||||
//.mapToPair(oaf -> new Tuple2<>(ModelSupport.idFn().apply(oaf), oaf))
|
.map(t -> MergeUtils.mergeGroup(t._1, t._2.iterator())),
|
||||||
//.reduceByKey(MergeUtils::merge)
|
// .mapToPair(oaf -> new Tuple2<>(ModelSupport.idFn().apply(oaf), oaf))
|
||||||
//.map(Tuple2::_2),
|
// .reduceByKey(MergeUtils::merge)
|
||||||
|
// .map(Tuple2::_2),
|
||||||
targetPath);
|
targetPath);
|
||||||
break;
|
break;
|
||||||
case graph:
|
case graph:
|
||||||
|
|
Loading…
Reference in New Issue