forked from antonis.lempesis/dnet-hadoop
added control to check if the entity exists
This commit is contained in:
parent
62ae36a3d2
commit
058b636d4d
|
@ -23,6 +23,7 @@ import org.slf4j.Logger;
|
|||
import org.slf4j.LoggerFactory;
|
||||
|
||||
import eu.dnetlib.dhp.application.ArgumentApplicationParser;
|
||||
import eu.dnetlib.dhp.common.HdfsSupport;
|
||||
import eu.dnetlib.dhp.schema.common.EntityType;
|
||||
import eu.dnetlib.dhp.schema.common.ModelSupport;
|
||||
import eu.dnetlib.dhp.schema.oaf.*;
|
||||
|
@ -77,17 +78,22 @@ public class SparkUpdateEntity extends AbstractSparkAction {
|
|||
(type, clazz) -> {
|
||||
final String outputPath = dedupGraphPath + "/" + type;
|
||||
removeOutputDir(spark, outputPath);
|
||||
|
||||
final String ip = DedupUtility.createEntityPath(graphBasePath, type.toString());
|
||||
if (HdfsSupport.exists(ip, sc.hadoopConfiguration())) {
|
||||
JavaRDD<String> sourceEntity = sc
|
||||
.textFile(DedupUtility.createEntityPath(graphBasePath, type.toString()));
|
||||
|
||||
if (mergeRelExists(workingPath, type.toString())) {
|
||||
|
||||
final String mergeRelPath = DedupUtility.createMergeRelPath(workingPath, "*", type.toString());
|
||||
final String mergeRelPath = DedupUtility
|
||||
.createMergeRelPath(workingPath, "*", type.toString());
|
||||
final String dedupRecordPath = DedupUtility
|
||||
.createDedupRecordPath(workingPath, "*", type.toString());
|
||||
|
||||
final Dataset<Relation> rel = spark.read().load(mergeRelPath).as(Encoders.bean(Relation.class));
|
||||
final Dataset<Relation> rel = spark
|
||||
.read()
|
||||
.load(mergeRelPath)
|
||||
.as(Encoders.bean(Relation.class));
|
||||
|
||||
final JavaPairRDD<String, String> mergedIds = rel
|
||||
.where("relClass == 'merges'")
|
||||
|
@ -119,6 +125,7 @@ public class SparkUpdateEntity extends AbstractSparkAction {
|
|||
}
|
||||
|
||||
sourceEntity.saveAsTextFile(outputPath, GzipCodec.class);
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
|
|
Loading…
Reference in New Issue