minor changes to fix and optimize
This commit is contained in:
parent
52f65e9838
commit
531ec8544b
|
@ -20,9 +20,7 @@ import scala.collection.JavaConverters;
|
|||
import scala.collection.convert.Wrappers;
|
||||
|
||||
import java.io.*;
|
||||
import java.util.Iterator;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.stream.Collectors;
|
||||
|
||||
public abstract class DedupLocalTestUtils {
|
||||
|
@ -71,12 +69,7 @@ public abstract class DedupLocalTestUtils {
|
|||
|
||||
List<String> vertexes = entities.toJavaRDD().map(r -> r.getAs("identifier").toString()).collect();
|
||||
|
||||
List<Node> nodes = vertexes
|
||||
.stream()
|
||||
.map(v -> new Node(v.substring(3, 20).replaceAll("_", ""), vertexes.indexOf(v), prepareTable(
|
||||
entities.toJavaRDD().filter(r -> r.getAs("identifier").toString().equals(v)).first()
|
||||
)))
|
||||
.collect(Collectors.toList());
|
||||
List<Node> nodes = entities.toJavaRDD().map(e -> new Node(e.getAs("identifier").toString(), vertexes.indexOf(e.getAs("identifier").toString()), prepareTable(e))).collect();
|
||||
|
||||
List<Edge> edges = simRels.toJavaRDD().collect().stream().map(sr -> new Edge(vertexes.indexOf(sr.getSource()), vertexes.indexOf(sr.getTarget()))).collect(Collectors.toList());
|
||||
|
||||
|
@ -137,7 +130,7 @@ public abstract class DedupLocalTestUtils {
|
|||
|
||||
}
|
||||
|
||||
class Node{
|
||||
class Node implements Serializable{
|
||||
String label;
|
||||
int id;
|
||||
String title;
|
||||
|
@ -173,7 +166,7 @@ class Node{
|
|||
}
|
||||
}
|
||||
|
||||
class Edge{
|
||||
class Edge implements Serializable{
|
||||
int from;
|
||||
int to;
|
||||
|
||||
|
|
|
@ -14,9 +14,6 @@ import eu.dnetlib.pace.config.DedupConfig;
|
|||
import eu.dnetlib.pace.model.SparkDeduper;
|
||||
import eu.dnetlib.pace.model.SparkModel;
|
||||
import eu.dnetlib.pace.tree.support.TreeProcessor;
|
||||
import org.apache.hadoop.conf.Configuration;
|
||||
import org.apache.hadoop.fs.FileSystem;
|
||||
import org.apache.hadoop.fs.Path;
|
||||
import org.apache.spark.api.java.JavaSparkContext;
|
||||
import org.apache.spark.api.java.function.FlatMapFunction;
|
||||
import org.apache.spark.api.java.function.FlatMapGroupsFunction;
|
||||
|
@ -34,15 +31,12 @@ import scala.Tuple3;
|
|||
import scala.collection.JavaConversions;
|
||||
|
||||
import java.awt.*;
|
||||
import java.io.BufferedReader;
|
||||
import java.io.File;
|
||||
import java.io.IOException;
|
||||
import java.io.InputStreamReader;
|
||||
import java.net.URISyntaxException;
|
||||
import java.nio.file.Paths;
|
||||
import java.util.*;
|
||||
import java.util.List;
|
||||
import java.util.stream.Collectors;
|
||||
import java.util.*;
|
||||
import java.util.stream.Stream;
|
||||
|
||||
@ExtendWith(MockitoExtension.class)
|
||||
|
@ -167,7 +161,7 @@ public class SparkDedupLocalTest extends DedupLocalTestUtils {
|
|||
|
||||
long before_dedupentity = System.currentTimeMillis();
|
||||
|
||||
final Class<OafEntity> clazz = ModelSupport.entityTypes.get(EntityType.valueOf(config.getWf().getSubEntityType()));
|
||||
final Class<OafEntity> clazz = ModelSupport.entityTypes.get(EntityType.valueOf(config.getWf().getSubEntityValue()));
|
||||
final Encoder<OafEntity> beanEncoder = Encoders.bean(clazz);
|
||||
final Encoder<OafEntity> kryoEncoder = Encoders.kryo(clazz);
|
||||
|
||||
|
@ -300,7 +294,7 @@ public class SparkDedupLocalTest extends DedupLocalTestUtils {
|
|||
entities,
|
||||
simRels,
|
||||
"/tmp/graph.html",
|
||||
Paths.get(SparkDedupLocalTest.class.getResource("/eu/dnetlib/dhp/dedup/visualization_tools/graph_template.html").toURI()).toFile().getAbsolutePath());
|
||||
Paths.get(Objects.requireNonNull(SparkDedupLocalTest.class.getResource("/eu/dnetlib/dhp/dedup/visualization_tools/graph_template.html")).toURI()).toFile().getAbsolutePath());
|
||||
Desktop.getDesktop().browse(new File("/tmp/graph.html").toURI());
|
||||
} catch (Exception e) {
|
||||
e.printStackTrace();
|
||||
|
|
Loading…
Reference in New Issue