implementation of graph visualization for local dedup test

This commit is contained in:
Michele De Bonis 2024-09-27 10:13:35 +02:00
parent 983b5e7b06
commit 52f65e9838
3 changed files with 280 additions and 379 deletions

View File

@ -1,55 +1,139 @@
package eu.dnetlib.dhp.oa.dedup.local;
import com.cloudera.com.fasterxml.jackson.core.JsonFactory;
import com.cloudera.com.fasterxml.jackson.databind.JsonNode;
import com.cloudera.com.fasterxml.jackson.databind.ObjectMapper;
import eu.dnetlib.dhp.schema.common.ModelConstants;
import eu.dnetlib.dhp.schema.oaf.OafEntity;
import eu.dnetlib.dhp.schema.oaf.Relation;
import eu.dnetlib.pace.config.DedupConfig;
import org.apache.commons.beanutils.BeanUtils;
import org.apache.commons.collections4.IteratorUtils;
import org.apache.commons.io.IOUtils;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.spark.sql.Dataset;
import org.apache.spark.sql.Row;
import org.spark_project.guava.hash.Hashing;
import scala.collection.JavaConverters;
import scala.collection.convert.Wrappers;
import java.io.*;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.stream.Collectors;
public abstract class DedupLocalTestUtils {
// public static String prepareTable(MapDocument doc) {
//
// String ret = "<table>";
//
// for(String fieldName: doc.getFieldMap().keySet()) {
// if (doc.getFieldMap().get(fieldName).getType().equals(Type.String)) {
// ret += "<tr><th>" + fieldName + "</th><td>" + doc.getFieldMap().get(fieldName).stringValue() + "</td></tr>";
// }
// else if (doc.getFieldMap().get(fieldName).getType().equals(Type.List)) {
// ret += "<tr><th>" + fieldName + "</th><td>[" + ((FieldListImpl)doc.getFieldMap().get(fieldName)).stringList().stream().collect(Collectors.joining(";")) + "]</td></tr>";
// }
// }
//
// return ret + "</table>";
//
// }
//
// public static void prepareGraphParams(List<String> vertexes, List<Tuple2<String, String>> edgesTuple, String filePath, String templateFilePath, Map<String, MapDocument> mapDocuments) {
//
// List<Node> nodes = vertexes.stream().map(v -> new Node(v.substring(3, 20).replaceAll("_", ""), vertexes.indexOf(v), prepareTable(mapDocuments.get(v)))).collect(Collectors.toList());
// List<Edge> edges = edgesTuple.stream().map(e -> new Edge(vertexes.indexOf(e._1()), vertexes.indexOf(e._2()))).collect(Collectors.toList());
//
// try(FileWriter fw = new FileWriter(filePath)) {
// String fullText = IOUtils.toString(new FileReader(templateFilePath));
//
// String s = fullText
// .replaceAll("%nodes%", new ObjectMapper().writeValueAsString(nodes))
// .replaceAll("%edges%", new ObjectMapper().writeValueAsString(edges));
//
// IOUtils.write(s, fw);
// } catch (IOException e) {
// e.printStackTrace();
// }
//
// }
//
// public static String getOrganizationLegalname(MapDocument mapDocument){
// return mapDocument.getFieldMap().get("legalname").stringValue();
// }
//
// public static String getJSONEntity(List<String> entities, String id){
//
// for (String entity: entities) {
// if(entity.contains(id))
// return entity;
// }
// return "";
// }
public static String prepareTable(Row doc) {
StringBuilder ret = new StringBuilder("<table>");
for(String fieldName: doc.schema().fieldNames()) {
Object value = doc.getAs(fieldName);
if(value.getClass() == String.class){
ret.append("<tr><th>").append(fieldName).append("</th><td>").append(value).append("</td></tr>");
}
else {
List<String> strings = IteratorUtils.toList(JavaConverters.asJavaIteratorConverter(((Wrappers.JListWrapper<String>) value).iterator()).asJava());
if(value.toString().contains("value")) {
List<String> values = strings.stream().map(DedupLocalTestUtils::takeValue).collect(Collectors.toList());
ret.append("<tr><th>").append(fieldName).append("</th><td>[").append(String.join(";", values)).append("]</td></tr>");
}
else {
ret.append("<tr><th>").append(fieldName).append("</th><td>[").append(String.join(";", strings)).append("]</td></tr>");
}
}
}
ret.append("</table>");
return ret.toString();
}
protected static String fileToString(String filePath) throws IOException {
Path path=new Path(filePath);
FileSystem fs = FileSystem.get(new Configuration());
BufferedReader br=new BufferedReader(new InputStreamReader(fs.open(path)));
try {
return String.join("", br.lines().collect(Collectors.toList()));
} finally {
br.close();
}
}
public static void prepareGraphParams(Dataset<Row> entities, Dataset<Relation> simRels, String filePath, String templateFilePath) {
List<String> vertexes = entities.toJavaRDD().map(r -> r.getAs("identifier").toString()).collect();
List<Node> nodes = vertexes
.stream()
.map(v -> new Node(v.substring(3, 20).replaceAll("_", ""), vertexes.indexOf(v), prepareTable(
entities.toJavaRDD().filter(r -> r.getAs("identifier").toString().equals(v)).first()
)))
.collect(Collectors.toList());
List<Edge> edges = simRels.toJavaRDD().collect().stream().map(sr -> new Edge(vertexes.indexOf(sr.getSource()), vertexes.indexOf(sr.getTarget()))).collect(Collectors.toList());
try(FileWriter fw = new FileWriter(filePath)) {
String fullText = IOUtils.toString(new FileReader(templateFilePath));
String s = fullText
.replaceAll("%nodes%", new ObjectMapper().writeValueAsString(nodes))
.replaceAll("%edges%", new ObjectMapper().writeValueAsString(edges));
IOUtils.write(s, fw);
} catch (IOException e) {
e.printStackTrace();
}
}
public static long hash(final String id) {
return Hashing.murmur3_128().hashString(id).asLong();
}
public static Relation createRel(String source, String target, String relClass, DedupConfig dedupConf) {
String entityType = dedupConf.getWf().getEntityType();
Relation r = new Relation();
r.setSource(source);
r.setTarget(target);
r.setRelClass(relClass);
r.setRelType(entityType + entityType.substring(0, 1).toUpperCase() + entityType.substring(1));
r.setSubRelType(ModelConstants.DEDUP);
return r;
}
public static OafEntity createOafEntity(String id, OafEntity base, long ts) {
try {
OafEntity res = (OafEntity) BeanUtils.cloneBean(base);
res.setId(id);
res.setLastupdatetimestamp(ts);
return res;
} catch (Exception e) {
throw new RuntimeException(e);
}
}
public static String takeValue(String json) {
ObjectMapper mapper = new ObjectMapper(new JsonFactory());
try {
JsonNode rootNode = mapper.readTree(json);
return rootNode.get("value").toString().replaceAll("\"", "");
} catch (Exception e) {
return "";
}
}
}

View File

@ -0,0 +1,70 @@
<html>
<head>
<script type="text/javascript" src="https://unpkg.com/vis-network/standalone/umd/vis-network.min.js"></script>
<style type="text/css">
#mynetwork {
width: 1000px;
height: 700px;
border: 1px solid lightgray;
}
th, td {
font-size: 10px;
}
</style>
</head>
<body>
<div id="mynetwork"></div>
<script type="text/javascript">
// HTML parsing with all XSS goodness
function htmlTitle(html) {
const container = document.createElement("div");
container.innerHTML = html;
return container;
}
var nodesArray = %nodes%;
var edgesArray = %edges%;
for (var i = 0; i < nodesArray.length; i++) {
nodesArray[i].title = htmlTitle(nodesArray[i].title);
};
// create an array with nodes
var nodes = new vis.DataSet(nodesArray);
// create an array with edges
var edges = new vis.DataSet(edgesArray);
// HTML parsing with all XSS goodness
function htmlTitle(html) {
const container = document.createElement("div");
container.innerHTML = html;
return container;
}
// create a network
var container = document.getElementById('mynetwork');
// provide the data in the vis format
var data = {
nodes: nodes,
edges: edges
};
var options = {
physics:{enabled: false},
edges:{physics:false},
nodes:{font:{size:10}},
layout: {improvedLayout:true}
};
// initialize your network!
var network = new vis.Network(container, data, options);
</script>
</body>
</html>