67 lines
1.8 KiB
Java
67 lines
1.8 KiB
Java
|
|
package eu.dnetlib.dhp.oa.graph.dump.skgif;
|
|
|
|
import java.io.Serializable;
|
|
import java.util.List;
|
|
import java.util.Optional;
|
|
|
|
import org.apache.spark.api.java.function.MapFunction;
|
|
import org.apache.spark.sql.Dataset;
|
|
import org.apache.spark.sql.Encoders;
|
|
import org.apache.spark.sql.SparkSession;
|
|
|
|
import com.fasterxml.jackson.databind.ObjectMapper;
|
|
|
|
import eu.dnetlib.dhp.common.HdfsSupport;
|
|
import eu.dnetlib.dhp.schema.common.ModelConstants;
|
|
import eu.dnetlib.dhp.schema.oaf.StructuredProperty;
|
|
import eu.dnetlib.dhp.skgif.model.Prefixes;
|
|
import eu.dnetlib.dhp.utils.DHPUtils;
|
|
import scala.Tuple2;
|
|
|
|
/**
|
|
* @author miriam.baglioni
|
|
* @Date 16/02/24
|
|
*/
|
|
public class Utils implements Serializable {
|
|
public static final ObjectMapper OBJECT_MAPPER = new ObjectMapper();
|
|
|
|
private Utils() {
|
|
}
|
|
|
|
public static void removeOutputDir(SparkSession spark, String path) {
|
|
HdfsSupport.remove(path, spark.sparkContext().hadoopConfiguration());
|
|
}
|
|
|
|
public static <R> Dataset<R> readPath(
|
|
SparkSession spark, String inputPath, Class<R> clazz) {
|
|
return spark
|
|
.read()
|
|
.textFile(inputPath)
|
|
.map((MapFunction<String, R>) value -> OBJECT_MAPPER.readValue(value, clazz), Encoders.bean(clazz));
|
|
}
|
|
|
|
public static Tuple2<String, Boolean> getOrcid(List<StructuredProperty> pid) {
|
|
if (!Optional.ofNullable(pid).isPresent())
|
|
return null;
|
|
if (pid.size() == 0)
|
|
return null;
|
|
for (StructuredProperty p : pid) {
|
|
if (p.getQualifier().getClassid().equals(ModelConstants.ORCID)) {
|
|
return new Tuple2<>(p.getValue(), Boolean.TRUE);
|
|
}
|
|
}
|
|
for (StructuredProperty p : pid) {
|
|
if (p.getQualifier().getClassid().equals(ModelConstants.ORCID_PENDING)) {
|
|
return new Tuple2<>(p.getValue(), Boolean.FALSE);
|
|
}
|
|
}
|
|
return null;
|
|
}
|
|
|
|
public static String getIdentifier(Prefixes entity, String id) {
|
|
return entity.label + DHPUtils.md5(id);
|
|
|
|
}
|
|
}
|