forked from antonis.lempesis/dnet-hadoop
fixed error on empty intersection with publication and relation on export to OAF
This commit is contained in:
parent
eec418cd26
commit
734934e2eb
|
@ -1,4 +1,6 @@
|
||||||
|
|
||||||
package eu.dnetlib.dhp.oa.merge;
|
package eu.dnetlib.dhp.oa.merge;
|
||||||
|
|
||||||
import java.text.Normalizer;
|
import java.text.Normalizer;
|
||||||
import java.util.*;
|
import java.util.*;
|
||||||
import java.util.stream.Collectors;
|
import java.util.stream.Collectors;
|
||||||
|
|
|
@ -1,12 +1,10 @@
|
||||||
|
|
||||||
package eu.dnetlib.dhp.oa.dedup;
|
package eu.dnetlib.dhp.oa.dedup;
|
||||||
|
|
||||||
|
|
||||||
import java.util.Collection;
|
import java.util.Collection;
|
||||||
import java.util.Iterator;
|
import java.util.Iterator;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
|
|
||||||
import eu.dnetlib.dhp.oa.merge.AuthorMerger;
|
|
||||||
import org.apache.spark.api.java.function.MapFunction;
|
import org.apache.spark.api.java.function.MapFunction;
|
||||||
import org.apache.spark.api.java.function.MapGroupsFunction;
|
import org.apache.spark.api.java.function.MapGroupsFunction;
|
||||||
import org.apache.spark.sql.Dataset;
|
import org.apache.spark.sql.Dataset;
|
||||||
|
@ -19,6 +17,7 @@ import com.fasterxml.jackson.databind.DeserializationFeature;
|
||||||
import com.fasterxml.jackson.databind.ObjectMapper;
|
import com.fasterxml.jackson.databind.ObjectMapper;
|
||||||
import com.google.common.collect.Lists;
|
import com.google.common.collect.Lists;
|
||||||
|
|
||||||
|
import eu.dnetlib.dhp.oa.merge.AuthorMerger;
|
||||||
import eu.dnetlib.dhp.schema.common.ModelSupport;
|
import eu.dnetlib.dhp.schema.common.ModelSupport;
|
||||||
import eu.dnetlib.dhp.schema.oaf.*;
|
import eu.dnetlib.dhp.schema.oaf.*;
|
||||||
import scala.Tuple2;
|
import scala.Tuple2;
|
||||||
|
|
|
@ -10,11 +10,11 @@ import java.io.Serializable;
|
||||||
import java.nio.file.Paths;
|
import java.nio.file.Paths;
|
||||||
import java.util.*;
|
import java.util.*;
|
||||||
|
|
||||||
import eu.dnetlib.dhp.oa.merge.AuthorMerger;
|
|
||||||
import org.codehaus.jackson.map.ObjectMapper;
|
import org.codehaus.jackson.map.ObjectMapper;
|
||||||
import org.junit.jupiter.api.BeforeEach;
|
import org.junit.jupiter.api.BeforeEach;
|
||||||
import org.junit.jupiter.api.Test;
|
import org.junit.jupiter.api.Test;
|
||||||
|
|
||||||
|
import eu.dnetlib.dhp.oa.merge.AuthorMerger;
|
||||||
import eu.dnetlib.dhp.schema.oaf.*;
|
import eu.dnetlib.dhp.schema.oaf.*;
|
||||||
import eu.dnetlib.pace.util.MapDocumentUtil;
|
import eu.dnetlib.pace.util.MapDocumentUtil;
|
||||||
import scala.Tuple2;
|
import scala.Tuple2;
|
||||||
|
|
|
@ -272,30 +272,11 @@ object DLIToOAF {
|
||||||
result
|
result
|
||||||
}
|
}
|
||||||
|
|
||||||
|
def convertDLIRelation(r: Relation): Relation = {
|
||||||
// def convertDLIRelation(r: DLIRelation): Relation = {
|
r.setSource(r.getSource.replaceFirst("50|","50|scholix_____::" ).replaceFirst("60|", "60|scholix_____::"))
|
||||||
//
|
r.setTarget(r.getTarget.replaceFirst("50|","50|scholix_____::" ).replaceFirst("60|", "60|scholix_____::"))
|
||||||
// val result = new Relation
|
r
|
||||||
// if (!relationTypeMapping.contains(r.getRelType))
|
}
|
||||||
// return null
|
|
||||||
//
|
|
||||||
// if (r.getProperties == null || r.getProperties.size() == 0 || (r.getProperties.size() == 1 && r.getProperties.get(0) == null))
|
|
||||||
// return null
|
|
||||||
// val t = relationTypeMapping.get(r.getRelType)
|
|
||||||
//
|
|
||||||
// result.setRelType("resultResult")
|
|
||||||
// result.setRelClass(t.get._1)
|
|
||||||
// result.setSubRelType(t.get._2)
|
|
||||||
// result.setCollectedfrom(r.getProperties.asScala.map(c => collectedFromMap.getOrElse(c.getKey, null)).filter(p => p != null).asJava)
|
|
||||||
// result.setSource(generateId(r.getSource))
|
|
||||||
// result.setTarget(generateId(r.getTarget))
|
|
||||||
//
|
|
||||||
// if (result.getSource.equals(result.getTarget))
|
|
||||||
// return null
|
|
||||||
// result.setDataInfo(generateDataInfo())
|
|
||||||
//
|
|
||||||
// result
|
|
||||||
// }
|
|
||||||
|
|
||||||
|
|
||||||
def convertDLIDatasetTOOAF(d: DLIDataset): Dataset = {
|
def convertDLIDatasetTOOAF(d: DLIDataset): Dataset = {
|
||||||
|
|
|
@ -44,7 +44,7 @@ object SparkExportContentForOpenAire {
|
||||||
|
|
||||||
|
|
||||||
val dsRel = spark.read.load(s"$workingPath/relation_b").as[Relation]
|
val dsRel = spark.read.load(s"$workingPath/relation_b").as[Relation]
|
||||||
dsRel.filter(r => r.getDataInfo==null || r.getDataInfo.getDeletedbyinference ==false).write.mode(SaveMode.Overwrite).save(s"$workingPath/export/relationDS")
|
dsRel.filter(r => r.getDataInfo==null || r.getDataInfo.getDeletedbyinference ==false).map(DLIToOAF.convertDLIRelation).write.mode(SaveMode.Overwrite).save(s"$workingPath/export/relationDS")
|
||||||
|
|
||||||
|
|
||||||
val dsPubs = spark.read.load(s"$workingPath/publication").as[DLIPublication]
|
val dsPubs = spark.read.load(s"$workingPath/publication").as[DLIPublication]
|
||||||
|
|
Loading…
Reference in New Issue