dump #50

Merged
claudio.atzori merged 98 commits from miriam.baglioni/dnet-hadoop:dump into master 2020-11-04 18:07:01 +01:00
1 changed files with 43 additions and 0 deletions
Showing only changes of commit 5fb2949cb8 - Show all commits

View File

@ -4,6 +4,9 @@ package eu.dnetlib.dhp.oa.graph.dump;
import java.io.BufferedReader;
import java.io.IOException;
import java.io.InputStreamReader;
import java.io.StringReader;
import java.util.ArrayList;
import java.util.List;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
@ -11,6 +14,9 @@ import org.apache.spark.api.java.function.MapFunction;
import org.apache.spark.sql.Dataset;
import org.apache.spark.sql.Encoders;
import org.apache.spark.sql.SparkSession;
import org.dom4j.Document;
import org.dom4j.DocumentException;
import org.dom4j.io.SAXReader;
import com.fasterxml.jackson.databind.ObjectMapper;
import com.google.gson.Gson;
@ -18,6 +24,10 @@ import com.google.gson.Gson;
import eu.dnetlib.dhp.common.HdfsSupport;
import eu.dnetlib.dhp.oa.graph.dump.community.CommunityMap;
import eu.dnetlib.dhp.oa.graph.dump.graph.Constants;
import eu.dnetlib.dhp.schema.dump.oaf.graph.Node;
import eu.dnetlib.dhp.schema.dump.oaf.graph.RelType;
import eu.dnetlib.dhp.schema.dump.oaf.graph.Relation;
import eu.dnetlib.dhp.schema.dump.pidgraph.Entity;
import eu.dnetlib.dhp.utils.DHPUtils;
import eu.dnetlib.dhp.utils.ISLookupClientFactory;
import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpService;
@ -70,4 +80,37 @@ public class Utils {
return new Gson().fromJson(sb.toString(), CommunityMap.class);
}
public static List<Relation> getRelationPair(String pid1, String pid2, String type1, String type2,
String semtype, String rel1, String rel2) {
List<Relation> ret = new ArrayList<>();
ret
.add(
Relation
.newInstance(
Node.newInstance(pid1, type1),
Node.newInstance(pid2, type2),
RelType.newInstance(rel1, semtype),
null));
ret
.add(
Relation
.newInstance(
Node.newInstance(pid2, type2),
Node.newInstance(pid1, type1),
RelType.newInstance(rel2, semtype),
null));
return ret;
}
public static Entity getEntity(String fund, String code) throws DocumentException {
{
final Document doc;
doc = new SAXReader().read(new StringReader(fund));
String name = ((org.dom4j.Node) (doc.selectNodes("//funder/shortname").get(0))).getText();
return Entity.newInstance(name + ":" + code);
}
}
}