forked from D-Net/dnet-hadoop
code formatting
This commit is contained in:
parent
04ad8969b2
commit
385214eeae
|
@ -1,13 +1,9 @@
|
||||||
|
|
||||||
package eu.dnetlib.dhp.oa.dedup;
|
package eu.dnetlib.dhp.oa.dedup;
|
||||||
|
|
||||||
import com.google.common.collect.Lists;
|
import java.io.IOException;
|
||||||
import eu.dnetlib.dhp.application.ArgumentApplicationParser;
|
import java.util.*;
|
||||||
import eu.dnetlib.dhp.oa.dedup.model.OrgSimRel;
|
|
||||||
import eu.dnetlib.dhp.schema.common.ModelSupport;
|
|
||||||
import eu.dnetlib.dhp.schema.oaf.Organization;
|
|
||||||
import eu.dnetlib.dhp.schema.oaf.Relation;
|
|
||||||
import eu.dnetlib.dhp.utils.ISLookupClientFactory;
|
|
||||||
import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpService;
|
|
||||||
import org.apache.commons.io.IOUtils;
|
import org.apache.commons.io.IOUtils;
|
||||||
import org.apache.spark.SparkConf;
|
import org.apache.spark.SparkConf;
|
||||||
import org.apache.spark.api.java.function.MapFunction;
|
import org.apache.spark.api.java.function.MapFunction;
|
||||||
|
@ -17,12 +13,19 @@ import org.apache.spark.sql.SaveMode;
|
||||||
import org.apache.spark.sql.SparkSession;
|
import org.apache.spark.sql.SparkSession;
|
||||||
import org.slf4j.Logger;
|
import org.slf4j.Logger;
|
||||||
import org.slf4j.LoggerFactory;
|
import org.slf4j.LoggerFactory;
|
||||||
|
|
||||||
|
import com.google.common.collect.Lists;
|
||||||
|
|
||||||
|
import eu.dnetlib.dhp.application.ArgumentApplicationParser;
|
||||||
|
import eu.dnetlib.dhp.oa.dedup.model.OrgSimRel;
|
||||||
|
import eu.dnetlib.dhp.schema.common.ModelSupport;
|
||||||
|
import eu.dnetlib.dhp.schema.oaf.Organization;
|
||||||
|
import eu.dnetlib.dhp.schema.oaf.Relation;
|
||||||
|
import eu.dnetlib.dhp.utils.ISLookupClientFactory;
|
||||||
|
import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpService;
|
||||||
import scala.Tuple2;
|
import scala.Tuple2;
|
||||||
import scala.Tuple3;
|
import scala.Tuple3;
|
||||||
|
|
||||||
import java.io.IOException;
|
|
||||||
import java.util.*;
|
|
||||||
|
|
||||||
public class SparkPrepareOrgRels extends AbstractSparkAction {
|
public class SparkPrepareOrgRels extends AbstractSparkAction {
|
||||||
|
|
||||||
private static final Logger log = LoggerFactory.getLogger(SparkCreateDedupRecord.class);
|
private static final Logger log = LoggerFactory.getLogger(SparkCreateDedupRecord.class);
|
||||||
|
@ -235,14 +238,14 @@ public class SparkPrepareOrgRels extends AbstractSparkAction {
|
||||||
.joinWith(entities, relations.col("_2").equalTo(entities.col("_1")), "inner")
|
.joinWith(entities, relations.col("_2").equalTo(entities.col("_1")), "inner")
|
||||||
.map(
|
.map(
|
||||||
(MapFunction<Tuple2<Tuple2<String, String>, Tuple2<String, Organization>>, OrgSimRel>) r -> new OrgSimRel(
|
(MapFunction<Tuple2<Tuple2<String, String>, Tuple2<String, Organization>>, OrgSimRel>) r -> new OrgSimRel(
|
||||||
r._1()._1(),
|
r._1()._1(),
|
||||||
r._2()._2().getOriginalId().get(0),
|
r._2()._2().getOriginalId().get(0),
|
||||||
r._2()._2().getLegalname() != null ? r._2()._2().getLegalname().getValue() : "",
|
r._2()._2().getLegalname() != null ? r._2()._2().getLegalname().getValue() : "",
|
||||||
r._2()._2().getLegalshortname() != null ? r._2()._2().getLegalshortname().getValue() : "",
|
r._2()._2().getLegalshortname() != null ? r._2()._2().getLegalshortname().getValue() : "",
|
||||||
r._2()._2().getCountry() != null ? r._2()._2().getCountry().getClassid() : "",
|
r._2()._2().getCountry() != null ? r._2()._2().getCountry().getClassid() : "",
|
||||||
r._2()._2().getWebsiteurl() != null ? r._2()._2().getWebsiteurl().getValue() : "",
|
r._2()._2().getWebsiteurl() != null ? r._2()._2().getWebsiteurl().getValue() : "",
|
||||||
r._2()._2().getCollectedfrom().get(0).getValue(),
|
r._2()._2().getCollectedfrom().get(0).getValue(),
|
||||||
"group::" + r._1()._1()),
|
"group::" + r._1()._1()),
|
||||||
Encoders.bean(OrgSimRel.class))
|
Encoders.bean(OrgSimRel.class))
|
||||||
.map(
|
.map(
|
||||||
(MapFunction<OrgSimRel, Tuple2<String, OrgSimRel>>) o -> new Tuple2<>(o.getLocal_id(), o),
|
(MapFunction<OrgSimRel, Tuple2<String, OrgSimRel>>) o -> new Tuple2<>(o.getLocal_id(), o),
|
||||||
|
|
|
@ -8,6 +8,7 @@ import java.util.Set;
|
||||||
import java.util.stream.Collectors;
|
import java.util.stream.Collectors;
|
||||||
|
|
||||||
import com.google.common.collect.Sets;
|
import com.google.common.collect.Sets;
|
||||||
|
|
||||||
import eu.dnetlib.dhp.oa.dedup.IdGenerator;
|
import eu.dnetlib.dhp.oa.dedup.IdGenerator;
|
||||||
import eu.dnetlib.dhp.schema.common.EntityType;
|
import eu.dnetlib.dhp.schema.common.EntityType;
|
||||||
import eu.dnetlib.dhp.schema.oaf.KeyValue;
|
import eu.dnetlib.dhp.schema.oaf.KeyValue;
|
||||||
|
|
|
@ -94,7 +94,8 @@ public class IdGeneratorTest {
|
||||||
public void generateIdTest1() {
|
public void generateIdTest1() {
|
||||||
String id1 = IdGenerator.generate(bestIds, "50|defaultID");
|
String id1 = IdGenerator.generate(bestIds, "50|defaultID");
|
||||||
|
|
||||||
System.out.println("id list 1 = " + bestIds.stream().map(i -> i.getPid().getValue()).collect(Collectors.toList()));
|
System.out
|
||||||
|
.println("id list 1 = " + bestIds.stream().map(i -> i.getPid().getValue()).collect(Collectors.toList()));
|
||||||
|
|
||||||
assertEquals("50|dedup_wf_001::9c5cfbf993d38476e0f959a301239719", id1);
|
assertEquals("50|dedup_wf_001::9c5cfbf993d38476e0f959a301239719", id1);
|
||||||
}
|
}
|
||||||
|
@ -104,9 +105,11 @@ public class IdGeneratorTest {
|
||||||
String id1 = IdGenerator.generate(bestIds2, "50|defaultID");
|
String id1 = IdGenerator.generate(bestIds2, "50|defaultID");
|
||||||
String id2 = IdGenerator.generate(bestIds3, "50|defaultID");
|
String id2 = IdGenerator.generate(bestIds3, "50|defaultID");
|
||||||
|
|
||||||
System.out.println("id list 2 = " + bestIds2.stream().map(i -> i.getPid().getValue()).collect(Collectors.toList()));
|
System.out
|
||||||
|
.println("id list 2 = " + bestIds2.stream().map(i -> i.getPid().getValue()).collect(Collectors.toList()));
|
||||||
System.out.println("winner 2 = " + id1);
|
System.out.println("winner 2 = " + id1);
|
||||||
System.out.println("id list 3 = " + bestIds3.stream().map(i -> i.getPid().getValue()).collect(Collectors.toList()));
|
System.out
|
||||||
|
.println("id list 3 = " + bestIds3.stream().map(i -> i.getPid().getValue()).collect(Collectors.toList()));
|
||||||
System.out.println("winner 3 = " + id2);
|
System.out.println("winner 3 = " + id2);
|
||||||
|
|
||||||
assertEquals("50|dedup_wf_001::2c56cc1914bffdb30fdff354e0099612", id1);
|
assertEquals("50|dedup_wf_001::2c56cc1914bffdb30fdff354e0099612", id1);
|
||||||
|
|
|
@ -1,12 +1,18 @@
|
||||||
|
|
||||||
package eu.dnetlib.dhp.oa.dedup;
|
package eu.dnetlib.dhp.oa.dedup;
|
||||||
|
|
||||||
import com.fasterxml.jackson.databind.ObjectMapper;
|
import static java.nio.file.Files.createTempDirectory;
|
||||||
import eu.dnetlib.dhp.application.ArgumentApplicationParser;
|
|
||||||
import eu.dnetlib.dhp.schema.oaf.Relation;
|
import static org.apache.spark.sql.functions.count;
|
||||||
import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpException;
|
import static org.junit.jupiter.api.Assertions.assertEquals;
|
||||||
import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpService;
|
import static org.mockito.Mockito.lenient;
|
||||||
import eu.dnetlib.pace.util.MapDocumentUtil;
|
|
||||||
|
import java.io.File;
|
||||||
|
import java.io.IOException;
|
||||||
|
import java.io.Serializable;
|
||||||
|
import java.net.URISyntaxException;
|
||||||
|
import java.nio.file.Paths;
|
||||||
|
|
||||||
import org.apache.commons.io.FileUtils;
|
import org.apache.commons.io.FileUtils;
|
||||||
import org.apache.commons.io.IOUtils;
|
import org.apache.commons.io.IOUtils;
|
||||||
import org.apache.spark.SparkConf;
|
import org.apache.spark.SparkConf;
|
||||||
|
@ -25,19 +31,16 @@ import org.junit.jupiter.api.extension.ExtendWith;
|
||||||
import org.mockito.Mock;
|
import org.mockito.Mock;
|
||||||
import org.mockito.Mockito;
|
import org.mockito.Mockito;
|
||||||
import org.mockito.junit.jupiter.MockitoExtension;
|
import org.mockito.junit.jupiter.MockitoExtension;
|
||||||
|
|
||||||
|
import com.fasterxml.jackson.databind.ObjectMapper;
|
||||||
|
|
||||||
|
import eu.dnetlib.dhp.application.ArgumentApplicationParser;
|
||||||
|
import eu.dnetlib.dhp.schema.oaf.Relation;
|
||||||
|
import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpException;
|
||||||
|
import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpService;
|
||||||
|
import eu.dnetlib.pace.util.MapDocumentUtil;
|
||||||
import scala.Tuple2;
|
import scala.Tuple2;
|
||||||
|
|
||||||
import java.io.File;
|
|
||||||
import java.io.IOException;
|
|
||||||
import java.io.Serializable;
|
|
||||||
import java.net.URISyntaxException;
|
|
||||||
import java.nio.file.Paths;
|
|
||||||
|
|
||||||
import static java.nio.file.Files.createTempDirectory;
|
|
||||||
import static org.apache.spark.sql.functions.count;
|
|
||||||
import static org.junit.jupiter.api.Assertions.assertEquals;
|
|
||||||
import static org.mockito.Mockito.lenient;
|
|
||||||
|
|
||||||
@ExtendWith(MockitoExtension.class)
|
@ExtendWith(MockitoExtension.class)
|
||||||
@TestMethodOrder(MethodOrderer.OrderAnnotation.class)
|
@TestMethodOrder(MethodOrderer.OrderAnnotation.class)
|
||||||
public class SparkDedupTest implements Serializable {
|
public class SparkDedupTest implements Serializable {
|
||||||
|
|
Loading…
Reference in New Issue