forked from D-Net/dnet-hadoop
[aggregator graph] save invalid records aside for further inspection
This commit is contained in:
parent
a0919ed495
commit
c48f6e9c57
|
@ -143,7 +143,7 @@ public abstract class AbstractMdRecordToOafMapper {
|
|||
return createOafs(doc, type, instances, collectedFrom, info, lastUpdateTimestamp);
|
||||
} catch (DocumentException e) {
|
||||
log.error("Error with record:\n" + xml);
|
||||
return Lists.newArrayList();
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -126,8 +126,6 @@ public class GenerateEntitiesApplication {
|
|||
log.info("Generate entities from files:");
|
||||
existingSourcePaths.forEach(log::info);
|
||||
|
||||
JavaRDD<Oaf> inputRdd = sc.emptyRDD();
|
||||
|
||||
for (final String sp : existingSourcePaths) {
|
||||
RDD<String> invalidRecords = sc
|
||||
.sequenceFile(sp, Text.class, Text.class)
|
||||
|
@ -141,7 +139,11 @@ public class GenerateEntitiesApplication {
|
|||
.mode(SaveMode.Append)
|
||||
.option("compression", "gzip")
|
||||
.text(invalidPath);
|
||||
}
|
||||
|
||||
JavaRDD<Oaf> inputRdd = sc.emptyRDD();
|
||||
|
||||
for (final String sp : existingSourcePaths) {
|
||||
inputRdd = inputRdd
|
||||
.union(
|
||||
sc
|
||||
|
@ -223,7 +225,7 @@ public class GenerateEntitiesApplication {
|
|||
final boolean shouldHashId,
|
||||
final VocabularyGroup vocs) {
|
||||
|
||||
if (convertToListOaf(id, s, shouldHashId, vocs).isEmpty()) {
|
||||
if (Objects.isNull(convertToListOaf(id, s, shouldHashId, vocs))) {
|
||||
return s;
|
||||
}
|
||||
return null;
|
||||
|
|
Loading…
Reference in New Issue