167 lines
5.2 KiB
Java
167 lines
5.2 KiB
Java
|
|
package eu.dnetlib.dhp.oa.graph.dump.skgif;
|
|
|
|
import java.io.Serializable;
|
|
import java.io.StringReader;
|
|
import java.util.List;
|
|
import java.util.Optional;
|
|
|
|
import org.apache.spark.api.java.function.MapFunction;
|
|
import org.apache.spark.sql.Dataset;
|
|
import org.apache.spark.sql.Encoders;
|
|
import org.apache.spark.sql.SparkSession;
|
|
import org.dom4j.Document;
|
|
import org.dom4j.DocumentException;
|
|
import org.dom4j.io.SAXReader;
|
|
|
|
import com.fasterxml.jackson.core.JsonProcessingException;
|
|
import com.fasterxml.jackson.databind.ObjectMapper;
|
|
|
|
import eu.dnetlib.dhp.common.HdfsSupport;
|
|
import eu.dnetlib.dhp.schema.common.ModelConstants;
|
|
import eu.dnetlib.dhp.schema.oaf.Organization;
|
|
import eu.dnetlib.dhp.schema.oaf.Project;
|
|
import eu.dnetlib.dhp.schema.oaf.Result;
|
|
import eu.dnetlib.dhp.schema.oaf.StructuredProperty;
|
|
import eu.dnetlib.dhp.skgif.model.MinGrant;
|
|
import eu.dnetlib.dhp.skgif.model.MinOrganization;
|
|
import eu.dnetlib.dhp.skgif.model.MinProduct;
|
|
import eu.dnetlib.dhp.skgif.model.Prefixes;
|
|
import eu.dnetlib.dhp.utils.DHPUtils;
|
|
import scala.Tuple2;
|
|
|
|
/**
|
|
* @author miriam.baglioni
|
|
* @Date 16/02/24
|
|
*/
|
|
public class Utils implements Serializable {
|
|
public static final ObjectMapper OBJECT_MAPPER = new ObjectMapper();
|
|
|
|
private Utils() {
|
|
}
|
|
|
|
public static void removeOutputDir(SparkSession spark, String path) {
|
|
HdfsSupport.remove(path, spark.sparkContext().hadoopConfiguration());
|
|
}
|
|
|
|
public static <R> Dataset<R> readPath(
|
|
SparkSession spark, String inputPath, Class<R> clazz) {
|
|
return spark
|
|
.read()
|
|
.textFile(inputPath)
|
|
.map((MapFunction<String, R>) value -> OBJECT_MAPPER.readValue(value, clazz), Encoders.bean(clazz));
|
|
}
|
|
|
|
public static Tuple2<String, Boolean> getOrcid(List<StructuredProperty> pid) {
|
|
if (!Optional.ofNullable(pid).isPresent())
|
|
return null;
|
|
if (pid.size() == 0)
|
|
return null;
|
|
for (StructuredProperty p : pid) {
|
|
if (p.getQualifier().getClassid().equals(ModelConstants.ORCID)) {
|
|
return new Tuple2<>(p.getValue(), Boolean.TRUE);
|
|
}
|
|
}
|
|
for (StructuredProperty p : pid) {
|
|
if (p.getQualifier().getClassid().equals(ModelConstants.ORCID_PENDING)) {
|
|
return new Tuple2<>(p.getValue(), Boolean.FALSE);
|
|
}
|
|
}
|
|
return null;
|
|
}
|
|
|
|
public static String getIdentifier(Prefixes entity, String id) {
|
|
return entity.label + DHPUtils.md5(id);
|
|
|
|
}
|
|
|
|
public static String getFunderName(String fundingtree) throws DocumentException {
|
|
final Document doc;
|
|
|
|
doc = new SAXReader().read(new StringReader(fundingtree));
|
|
// f.setShortName(((org.dom4j.Node) (doc.selectNodes("//funder/shortname").get(0))).getText());
|
|
return ((org.dom4j.Node) (doc.selectNodes("//funder/name").get(0))).getText();
|
|
// f.setJurisdiction(((org.dom4j.Node) (doc.selectNodes("//funder/jurisdiction").get(0))).getText());
|
|
|
|
}
|
|
|
|
public static MinOrganization getMinOrganization(Organization o) {
|
|
MinOrganization mo = new MinOrganization();
|
|
// mo.setLocal_identifier(Utils.getIdentifier(Prefixes.ORGANIZATION, o.getId()));
|
|
mo.setLocal_identifier(o.getId());
|
|
if (Optional.ofNullable(o.getLegalname()).isPresent())
|
|
mo.setName(o.getLegalname().getValue());
|
|
if (Optional.ofNullable(o.getPid()).isPresent())
|
|
for (StructuredProperty pid : o.getPid()) {
|
|
if (Optional.ofNullable(pid.getQualifier()).isPresent() &&
|
|
Optional.ofNullable(pid.getQualifier().getClassid()).isPresent())
|
|
switch (pid.getQualifier().getClassid().toLowerCase()) {
|
|
case "ror":
|
|
mo.setRor(pid.getValue());
|
|
break;
|
|
case "isni":
|
|
mo.setIsni(pid.getValue());
|
|
break;
|
|
case "fundref":
|
|
mo.setFundRef(pid.getValue());
|
|
break;
|
|
case "ringgold":
|
|
mo.setRinGold(pid.getValue());
|
|
break;
|
|
case "wikidata":
|
|
mo.setWikidata(pid.getValue());
|
|
break;
|
|
|
|
}
|
|
}
|
|
return mo;
|
|
}
|
|
|
|
public static MinGrant getMinGrant(Project p) throws DocumentException {
|
|
MinGrant mg = new MinGrant();
|
|
// mg.setLocal_identifier(Utils.getIdentifier(Prefixes.GRANT, p.getId()));
|
|
mg.setLocal_identifier(p.getId());
|
|
if (Optional.ofNullable(p.getCode()).isPresent())
|
|
mg.setCode(p.getCode().getValue());
|
|
if (Optional.ofNullable(p.getFundingtree()).isPresent() && p.getFundingtree().size() > 0)
|
|
mg.setFunder(getFunderName(p.getFundingtree().get(0).getValue()));
|
|
if (Optional.ofNullable(p.getAcronym()).isPresent())
|
|
mg.setTitle(p.getAcronym().getValue());
|
|
else if (Optional.ofNullable(p.getTitle()).isPresent()) {
|
|
mg.setTitle(p.getTitle().getValue());
|
|
|
|
}
|
|
return mg;
|
|
}
|
|
|
|
public static <R extends Result> MinProduct getMinProduct(R r) throws JsonProcessingException {
|
|
MinProduct mp = new MinProduct();
|
|
// mp.setLocal_identifier(Utils.getIdentifier(Prefixes.RESEARCH_PRODUCT, r.getId()));
|
|
mp.setLocal_identifier(r.getId());
|
|
for (StructuredProperty title : r.getTitle()) {
|
|
if (title.getQualifier().getClassid().equalsIgnoreCase("main title")) {
|
|
mp.setTitle(title.getValue());
|
|
}
|
|
}
|
|
if (r.getPid() != null)
|
|
for (StructuredProperty pid : r.getPid()) {
|
|
switch (pid.getQualifier().getClassid().toLowerCase()) {
|
|
case "doi":
|
|
mp.setDoi(pid.getValue());
|
|
break;
|
|
case "pmcid":
|
|
mp.setPmcid(pid.getValue());
|
|
break;
|
|
case "arxiv":
|
|
mp.setArxivid(pid.getValue());
|
|
break;
|
|
case "pmid":
|
|
mp.setPmid(pid.getValue());
|
|
break;
|
|
}
|
|
}
|
|
|
|
return mp;
|
|
}
|
|
}
|