package eu.dnetlib.dhp.oa.graph.dump.skgif; import java.io.Serializable; import java.util.List; import java.util.Optional; import org.apache.spark.api.java.function.MapFunction; import org.apache.spark.sql.Dataset; import org.apache.spark.sql.Encoders; import org.apache.spark.sql.SparkSession; import com.fasterxml.jackson.databind.ObjectMapper; import eu.dnetlib.dhp.common.HdfsSupport; import eu.dnetlib.dhp.schema.common.ModelConstants; import eu.dnetlib.dhp.schema.oaf.StructuredProperty; import eu.dnetlib.dhp.skgif.model.Prefixes; import eu.dnetlib.dhp.utils.DHPUtils; import scala.Tuple2; /** * @author miriam.baglioni * @Date 16/02/24 */ public class Utils implements Serializable { public static final ObjectMapper OBJECT_MAPPER = new ObjectMapper(); private Utils() { } public static void removeOutputDir(SparkSession spark, String path) { HdfsSupport.remove(path, spark.sparkContext().hadoopConfiguration()); } public static Dataset readPath( SparkSession spark, String inputPath, Class clazz) { return spark .read() .textFile(inputPath) .map((MapFunction) value -> OBJECT_MAPPER.readValue(value, clazz), Encoders.bean(clazz)); } public static Tuple2 getOrcid(List pid) { if (!Optional.ofNullable(pid).isPresent()) return null; if (pid.size() == 0) return null; for (StructuredProperty p : pid) { if (p.getQualifier().getClassid().equals(ModelConstants.ORCID)) { return new Tuple2<>(p.getValue(), Boolean.TRUE); } } for (StructuredProperty p : pid) { if (p.getQualifier().getClassid().equals(ModelConstants.ORCID_PENDING)) { return new Tuple2<>(p.getValue(), Boolean.FALSE); } } return null; } public static String getIdentifier(Prefixes entity, String id) { return entity.label + DHPUtils.md5(id); } }