code formatting

This commit is contained in:
Claudio Atzori 2020-10-30 15:47:05 +01:00
parent 04ad8969b2
commit 385214eeae
4 changed files with 49 additions and 39 deletions

View File

@ -1,13 +1,9 @@
package eu.dnetlib.dhp.oa.dedup; package eu.dnetlib.dhp.oa.dedup;
import com.google.common.collect.Lists; import java.io.IOException;
import eu.dnetlib.dhp.application.ArgumentApplicationParser; import java.util.*;
import eu.dnetlib.dhp.oa.dedup.model.OrgSimRel;
import eu.dnetlib.dhp.schema.common.ModelSupport;
import eu.dnetlib.dhp.schema.oaf.Organization;
import eu.dnetlib.dhp.schema.oaf.Relation;
import eu.dnetlib.dhp.utils.ISLookupClientFactory;
import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpService;
import org.apache.commons.io.IOUtils; import org.apache.commons.io.IOUtils;
import org.apache.spark.SparkConf; import org.apache.spark.SparkConf;
import org.apache.spark.api.java.function.MapFunction; import org.apache.spark.api.java.function.MapFunction;
@ -17,12 +13,19 @@ import org.apache.spark.sql.SaveMode;
import org.apache.spark.sql.SparkSession; import org.apache.spark.sql.SparkSession;
import org.slf4j.Logger; import org.slf4j.Logger;
import org.slf4j.LoggerFactory; import org.slf4j.LoggerFactory;
import com.google.common.collect.Lists;
import eu.dnetlib.dhp.application.ArgumentApplicationParser;
import eu.dnetlib.dhp.oa.dedup.model.OrgSimRel;
import eu.dnetlib.dhp.schema.common.ModelSupport;
import eu.dnetlib.dhp.schema.oaf.Organization;
import eu.dnetlib.dhp.schema.oaf.Relation;
import eu.dnetlib.dhp.utils.ISLookupClientFactory;
import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpService;
import scala.Tuple2; import scala.Tuple2;
import scala.Tuple3; import scala.Tuple3;
import java.io.IOException;
import java.util.*;
public class SparkPrepareOrgRels extends AbstractSparkAction { public class SparkPrepareOrgRels extends AbstractSparkAction {
private static final Logger log = LoggerFactory.getLogger(SparkCreateDedupRecord.class); private static final Logger log = LoggerFactory.getLogger(SparkCreateDedupRecord.class);
@ -235,14 +238,14 @@ public class SparkPrepareOrgRels extends AbstractSparkAction {
.joinWith(entities, relations.col("_2").equalTo(entities.col("_1")), "inner") .joinWith(entities, relations.col("_2").equalTo(entities.col("_1")), "inner")
.map( .map(
(MapFunction<Tuple2<Tuple2<String, String>, Tuple2<String, Organization>>, OrgSimRel>) r -> new OrgSimRel( (MapFunction<Tuple2<Tuple2<String, String>, Tuple2<String, Organization>>, OrgSimRel>) r -> new OrgSimRel(
r._1()._1(), r._1()._1(),
r._2()._2().getOriginalId().get(0), r._2()._2().getOriginalId().get(0),
r._2()._2().getLegalname() != null ? r._2()._2().getLegalname().getValue() : "", r._2()._2().getLegalname() != null ? r._2()._2().getLegalname().getValue() : "",
r._2()._2().getLegalshortname() != null ? r._2()._2().getLegalshortname().getValue() : "", r._2()._2().getLegalshortname() != null ? r._2()._2().getLegalshortname().getValue() : "",
r._2()._2().getCountry() != null ? r._2()._2().getCountry().getClassid() : "", r._2()._2().getCountry() != null ? r._2()._2().getCountry().getClassid() : "",
r._2()._2().getWebsiteurl() != null ? r._2()._2().getWebsiteurl().getValue() : "", r._2()._2().getWebsiteurl() != null ? r._2()._2().getWebsiteurl().getValue() : "",
r._2()._2().getCollectedfrom().get(0).getValue(), r._2()._2().getCollectedfrom().get(0).getValue(),
"group::" + r._1()._1()), "group::" + r._1()._1()),
Encoders.bean(OrgSimRel.class)) Encoders.bean(OrgSimRel.class))
.map( .map(
(MapFunction<OrgSimRel, Tuple2<String, OrgSimRel>>) o -> new Tuple2<>(o.getLocal_id(), o), (MapFunction<OrgSimRel, Tuple2<String, OrgSimRel>>) o -> new Tuple2<>(o.getLocal_id(), o),

View File

@ -8,6 +8,7 @@ import java.util.Set;
import java.util.stream.Collectors; import java.util.stream.Collectors;
import com.google.common.collect.Sets; import com.google.common.collect.Sets;
import eu.dnetlib.dhp.oa.dedup.IdGenerator; import eu.dnetlib.dhp.oa.dedup.IdGenerator;
import eu.dnetlib.dhp.schema.common.EntityType; import eu.dnetlib.dhp.schema.common.EntityType;
import eu.dnetlib.dhp.schema.oaf.KeyValue; import eu.dnetlib.dhp.schema.oaf.KeyValue;

View File

@ -94,7 +94,8 @@ public class IdGeneratorTest {
public void generateIdTest1() { public void generateIdTest1() {
String id1 = IdGenerator.generate(bestIds, "50|defaultID"); String id1 = IdGenerator.generate(bestIds, "50|defaultID");
System.out.println("id list 1 = " + bestIds.stream().map(i -> i.getPid().getValue()).collect(Collectors.toList())); System.out
.println("id list 1 = " + bestIds.stream().map(i -> i.getPid().getValue()).collect(Collectors.toList()));
assertEquals("50|dedup_wf_001::9c5cfbf993d38476e0f959a301239719", id1); assertEquals("50|dedup_wf_001::9c5cfbf993d38476e0f959a301239719", id1);
} }
@ -104,9 +105,11 @@ public class IdGeneratorTest {
String id1 = IdGenerator.generate(bestIds2, "50|defaultID"); String id1 = IdGenerator.generate(bestIds2, "50|defaultID");
String id2 = IdGenerator.generate(bestIds3, "50|defaultID"); String id2 = IdGenerator.generate(bestIds3, "50|defaultID");
System.out.println("id list 2 = " + bestIds2.stream().map(i -> i.getPid().getValue()).collect(Collectors.toList())); System.out
.println("id list 2 = " + bestIds2.stream().map(i -> i.getPid().getValue()).collect(Collectors.toList()));
System.out.println("winner 2 = " + id1); System.out.println("winner 2 = " + id1);
System.out.println("id list 3 = " + bestIds3.stream().map(i -> i.getPid().getValue()).collect(Collectors.toList())); System.out
.println("id list 3 = " + bestIds3.stream().map(i -> i.getPid().getValue()).collect(Collectors.toList()));
System.out.println("winner 3 = " + id2); System.out.println("winner 3 = " + id2);
assertEquals("50|dedup_wf_001::2c56cc1914bffdb30fdff354e0099612", id1); assertEquals("50|dedup_wf_001::2c56cc1914bffdb30fdff354e0099612", id1);

View File

@ -1,12 +1,18 @@
package eu.dnetlib.dhp.oa.dedup; package eu.dnetlib.dhp.oa.dedup;
import com.fasterxml.jackson.databind.ObjectMapper; import static java.nio.file.Files.createTempDirectory;
import eu.dnetlib.dhp.application.ArgumentApplicationParser;
import eu.dnetlib.dhp.schema.oaf.Relation; import static org.apache.spark.sql.functions.count;
import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpException; import static org.junit.jupiter.api.Assertions.assertEquals;
import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpService; import static org.mockito.Mockito.lenient;
import eu.dnetlib.pace.util.MapDocumentUtil;
import java.io.File;
import java.io.IOException;
import java.io.Serializable;
import java.net.URISyntaxException;
import java.nio.file.Paths;
import org.apache.commons.io.FileUtils; import org.apache.commons.io.FileUtils;
import org.apache.commons.io.IOUtils; import org.apache.commons.io.IOUtils;
import org.apache.spark.SparkConf; import org.apache.spark.SparkConf;
@ -25,19 +31,16 @@ import org.junit.jupiter.api.extension.ExtendWith;
import org.mockito.Mock; import org.mockito.Mock;
import org.mockito.Mockito; import org.mockito.Mockito;
import org.mockito.junit.jupiter.MockitoExtension; import org.mockito.junit.jupiter.MockitoExtension;
import com.fasterxml.jackson.databind.ObjectMapper;
import eu.dnetlib.dhp.application.ArgumentApplicationParser;
import eu.dnetlib.dhp.schema.oaf.Relation;
import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpException;
import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpService;
import eu.dnetlib.pace.util.MapDocumentUtil;
import scala.Tuple2; import scala.Tuple2;
import java.io.File;
import java.io.IOException;
import java.io.Serializable;
import java.net.URISyntaxException;
import java.nio.file.Paths;
import static java.nio.file.Files.createTempDirectory;
import static org.apache.spark.sql.functions.count;
import static org.junit.jupiter.api.Assertions.assertEquals;
import static org.mockito.Mockito.lenient;
@ExtendWith(MockitoExtension.class) @ExtendWith(MockitoExtension.class)
@TestMethodOrder(MethodOrderer.OrderAnnotation.class) @TestMethodOrder(MethodOrderer.OrderAnnotation.class)
public class SparkDedupTest implements Serializable { public class SparkDedupTest implements Serializable {