forked from antonis.lempesis/dnet-hadoop
fix wrong relation with source null
This commit is contained in:
parent
0fe40b08e4
commit
098914dcff
|
@ -38,37 +38,39 @@ object SparkGenerateDOIBoostActionSet {
|
||||||
val crossRefRelation = parser.get("crossRefRelation")
|
val crossRefRelation = parser.get("crossRefRelation")
|
||||||
val dbaffiliationRelationPath = parser.get("dbaffiliationRelationPath")
|
val dbaffiliationRelationPath = parser.get("dbaffiliationRelationPath")
|
||||||
val dbOrganizationPath = parser.get("dbOrganizationPath")
|
val dbOrganizationPath = parser.get("dbOrganizationPath")
|
||||||
val workingDirPath = parser.get("targetPath")
|
|
||||||
val sequenceFilePath = parser.get("sFilePath")
|
val sequenceFilePath = parser.get("sFilePath")
|
||||||
|
|
||||||
val asDataset = spark.read.load(dbDatasetPath).as[OafDataset]
|
val asDataset = spark.read.load(dbDatasetPath).as[OafDataset]
|
||||||
|
.filter(p => p != null || p.getId != null)
|
||||||
.map(d =>DoiBoostMappingUtil.fixResult(d))
|
.map(d =>DoiBoostMappingUtil.fixResult(d))
|
||||||
.map(d=>DoiBoostMappingUtil.toActionSet(d))(Encoders.tuple(Encoders.STRING, Encoders.STRING))
|
.map(d=>DoiBoostMappingUtil.toActionSet(d))(Encoders.tuple(Encoders.STRING, Encoders.STRING))
|
||||||
// .write.mode(SaveMode.Overwrite).save(s"$workingDirPath/actionSet")
|
|
||||||
|
|
||||||
val asPublication =spark.read.load(dbPublicationPath).as[Publication]
|
val asPublication =spark.read.load(dbPublicationPath).as[Publication]
|
||||||
|
.filter(p => p != null || p.getId != null)
|
||||||
.map(d=>DoiBoostMappingUtil.toActionSet(d))(Encoders.tuple(Encoders.STRING, Encoders.STRING))
|
.map(d=>DoiBoostMappingUtil.toActionSet(d))(Encoders.tuple(Encoders.STRING, Encoders.STRING))
|
||||||
// .write.mode(SaveMode.Append).save(s"$workingDirPath/actionSet")
|
|
||||||
|
|
||||||
val asOrganization = spark.read.load(dbOrganizationPath).as[Organization]
|
val asOrganization = spark.read.load(dbOrganizationPath).as[Organization]
|
||||||
.map(d=>DoiBoostMappingUtil.toActionSet(d))(Encoders.tuple(Encoders.STRING, Encoders.STRING))
|
.map(d=>DoiBoostMappingUtil.toActionSet(d))(Encoders.tuple(Encoders.STRING, Encoders.STRING))
|
||||||
// .write.mode(SaveMode.Append).save(s"$workingDirPath/actionSet")
|
|
||||||
|
|
||||||
|
|
||||||
val asCRelation = spark.read.load(crossRefRelation).as[Relation]
|
val asCRelation = spark.read.load(crossRefRelation).as[Relation]
|
||||||
|
.filter(r => r!= null || (r.getSource != null && r.getTarget != null))
|
||||||
.map(d=>DoiBoostMappingUtil.toActionSet(d))(Encoders.tuple(Encoders.STRING, Encoders.STRING))
|
.map(d=>DoiBoostMappingUtil.toActionSet(d))(Encoders.tuple(Encoders.STRING, Encoders.STRING))
|
||||||
// .write.mode(SaveMode.Append).save(s"$workingDirPath/actionSet")
|
|
||||||
|
|
||||||
val asRelAffiliation = spark.read.load(dbaffiliationRelationPath).as[Relation]
|
val asRelAffiliation = spark.read.load(dbaffiliationRelationPath).as[Relation]
|
||||||
.map(d=>DoiBoostMappingUtil.toActionSet(d))(Encoders.tuple(Encoders.STRING, Encoders.STRING))
|
.map(d=>DoiBoostMappingUtil.toActionSet(d))(Encoders.tuple(Encoders.STRING, Encoders.STRING))
|
||||||
// .write.mode(SaveMode.Append).save(s"$workingDirPath/actionSet")
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
val d: Dataset[(String, String)] = asDataset.union(asPublication).union(asOrganization).union(asCRelation).union(asRelAffiliation)
|
val d: Dataset[(String, String)] = asDataset.union(asPublication).union(asOrganization).union(asCRelation).union(asRelAffiliation)
|
||||||
|
|
||||||
// spark.read.load(s"$workingDirPath/actionSet").as[(String,String)]
|
|
||||||
|
|
||||||
d.rdd.repartition(6000).map(s => (new Text(s._1), new Text(s._2))).saveAsHadoopFile(s"$sequenceFilePath", classOf[Text], classOf[Text], classOf[SequenceFileOutputFormat[Text,Text]], classOf[GzipCodec])
|
d.rdd.repartition(6000).map(s => (new Text(s._1), new Text(s._2))).saveAsHadoopFile(s"$sequenceFilePath", classOf[Text], classOf[Text], classOf[SequenceFileOutputFormat[Text,Text]], classOf[GzipCodec])
|
||||||
|
|
||||||
|
|
|
@ -15,7 +15,7 @@ import scala.collection.JavaConverters._
|
||||||
import scala.collection.mutable
|
import scala.collection.mutable
|
||||||
import scala.util.matching.Regex
|
import scala.util.matching.Regex
|
||||||
|
|
||||||
import eu.dnetlib.dhp.schema.scholexplorer.OafUtils;
|
import eu.dnetlib.dhp.schema.scholexplorer.OafUtils
|
||||||
|
|
||||||
case class CrossrefDT(doi: String, json:String, timestamp: Long) {}
|
case class CrossrefDT(doi: String, json:String, timestamp: Long) {}
|
||||||
|
|
||||||
|
@ -182,7 +182,7 @@ case object Crossref2Oaf {
|
||||||
// Ticket #6281 added pid to Instance
|
// Ticket #6281 added pid to Instance
|
||||||
instance.setPid(result.getPid)
|
instance.setPid(result.getPid)
|
||||||
|
|
||||||
val has_review = (json \ "relation" \"has-review" \ "id")
|
val has_review = json \ "relation" \"has-review" \ "id"
|
||||||
|
|
||||||
if(has_review != JNothing) {
|
if(has_review != JNothing) {
|
||||||
instance.setRefereed(
|
instance.setRefereed(
|
||||||
|
@ -208,7 +208,8 @@ case object Crossref2Oaf {
|
||||||
instance.setUrl(links.asJava)
|
instance.setUrl(links.asJava)
|
||||||
result.setId(IdentifierFactory.createDOIBoostIdentifier(result))
|
result.setId(IdentifierFactory.createDOIBoostIdentifier(result))
|
||||||
if (result.getId== null)
|
if (result.getId== null)
|
||||||
return null
|
null
|
||||||
|
else
|
||||||
result
|
result
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -241,9 +242,9 @@ case object Crossref2Oaf {
|
||||||
val result = generateItemFromType(objectType, objectSubType)
|
val result = generateItemFromType(objectType, objectSubType)
|
||||||
if (result == null)
|
if (result == null)
|
||||||
return List()
|
return List()
|
||||||
val cOBJCategory = mappingCrossrefSubType.getOrElse(objectType, mappingCrossrefSubType.getOrElse(objectSubType, "0038 Other literature type"));
|
val cOBJCategory = mappingCrossrefSubType.getOrElse(objectType, mappingCrossrefSubType.getOrElse(objectSubType, "0038 Other literature type"))
|
||||||
mappingResult(result, json, cOBJCategory)
|
mappingResult(result, json, cOBJCategory)
|
||||||
if (result == null)
|
if (result == null || result.getId == null)
|
||||||
return List()
|
return List()
|
||||||
|
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue