minor changes to fix and optimize

This commit is contained in:
Michele De Bonis 2024-09-27 10:51:55 +02:00
parent 52f65e9838
commit 531ec8544b
2 changed files with 6 additions and 19 deletions

View File

@ -20,9 +20,7 @@ import scala.collection.JavaConverters;
import scala.collection.convert.Wrappers;
import java.io.*;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.stream.Collectors;
public abstract class DedupLocalTestUtils {
@ -71,12 +69,7 @@ public abstract class DedupLocalTestUtils {
List<String> vertexes = entities.toJavaRDD().map(r -> r.getAs("identifier").toString()).collect();
List<Node> nodes = vertexes
.stream()
.map(v -> new Node(v.substring(3, 20).replaceAll("_", ""), vertexes.indexOf(v), prepareTable(
entities.toJavaRDD().filter(r -> r.getAs("identifier").toString().equals(v)).first()
)))
.collect(Collectors.toList());
List<Node> nodes = entities.toJavaRDD().map(e -> new Node(e.getAs("identifier").toString(), vertexes.indexOf(e.getAs("identifier").toString()), prepareTable(e))).collect();
List<Edge> edges = simRels.toJavaRDD().collect().stream().map(sr -> new Edge(vertexes.indexOf(sr.getSource()), vertexes.indexOf(sr.getTarget()))).collect(Collectors.toList());
@ -137,7 +130,7 @@ public abstract class DedupLocalTestUtils {
}
class Node{
class Node implements Serializable{
String label;
int id;
String title;
@ -173,7 +166,7 @@ class Node{
}
}
class Edge{
class Edge implements Serializable{
int from;
int to;

View File

@ -14,9 +14,6 @@ import eu.dnetlib.pace.config.DedupConfig;
import eu.dnetlib.pace.model.SparkDeduper;
import eu.dnetlib.pace.model.SparkModel;
import eu.dnetlib.pace.tree.support.TreeProcessor;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.spark.api.java.JavaSparkContext;
import org.apache.spark.api.java.function.FlatMapFunction;
import org.apache.spark.api.java.function.FlatMapGroupsFunction;
@ -34,15 +31,12 @@ import scala.Tuple3;
import scala.collection.JavaConversions;
import java.awt.*;
import java.io.BufferedReader;
import java.io.File;
import java.io.IOException;
import java.io.InputStreamReader;
import java.net.URISyntaxException;
import java.nio.file.Paths;
import java.util.*;
import java.util.List;
import java.util.stream.Collectors;
import java.util.*;
import java.util.stream.Stream;
@ExtendWith(MockitoExtension.class)
@ -167,7 +161,7 @@ public class SparkDedupLocalTest extends DedupLocalTestUtils {
long before_dedupentity = System.currentTimeMillis();
final Class<OafEntity> clazz = ModelSupport.entityTypes.get(EntityType.valueOf(config.getWf().getSubEntityType()));
final Class<OafEntity> clazz = ModelSupport.entityTypes.get(EntityType.valueOf(config.getWf().getSubEntityValue()));
final Encoder<OafEntity> beanEncoder = Encoders.bean(clazz);
final Encoder<OafEntity> kryoEncoder = Encoders.kryo(clazz);
@ -300,7 +294,7 @@ public class SparkDedupLocalTest extends DedupLocalTestUtils {
entities,
simRels,
"/tmp/graph.html",
Paths.get(SparkDedupLocalTest.class.getResource("/eu/dnetlib/dhp/dedup/visualization_tools/graph_template.html").toURI()).toFile().getAbsolutePath());
Paths.get(Objects.requireNonNull(SparkDedupLocalTest.class.getResource("/eu/dnetlib/dhp/dedup/visualization_tools/graph_template.html")).toURI()).toFile().getAbsolutePath());
Desktop.getDesktop().browse(new File("/tmp/graph.html").toURI());
} catch (Exception e) {
e.printStackTrace();