[aggregator graph] save invalid records aside for further inspection

discard-non-wellformed
Claudio Atzori 2 years ago
parent a0919ed495
commit c48f6e9c57

@ -143,7 +143,7 @@ public abstract class AbstractMdRecordToOafMapper {
return createOafs(doc, type, instances, collectedFrom, info, lastUpdateTimestamp);
} catch (DocumentException e) {
log.error("Error with record:\n" + xml);
return Lists.newArrayList();
return null;
}
}

@ -126,8 +126,6 @@ public class GenerateEntitiesApplication {
log.info("Generate entities from files:");
existingSourcePaths.forEach(log::info);
JavaRDD<Oaf> inputRdd = sc.emptyRDD();
for (final String sp : existingSourcePaths) {
RDD<String> invalidRecords = sc
.sequenceFile(sp, Text.class, Text.class)
@ -141,7 +139,11 @@ public class GenerateEntitiesApplication {
.mode(SaveMode.Append)
.option("compression", "gzip")
.text(invalidPath);
}
JavaRDD<Oaf> inputRdd = sc.emptyRDD();
for (final String sp : existingSourcePaths) {
inputRdd = inputRdd
.union(
sc
@ -223,7 +225,7 @@ public class GenerateEntitiesApplication {
final boolean shouldHashId,
final VocabularyGroup vocs) {
if (convertToListOaf(id, s, shouldHashId, vocs).isEmpty()) {
if (Objects.isNull(convertToListOaf(id, s, shouldHashId, vocs))) {
return s;
}
return null;

Loading…
Cancel
Save