[Aggregator graph|master] Discard invalid records #245

Merged
claudio.atzori merged 6 commits from discard-non-wellformed into master 2022-09-19 09:48:20 +02:00
2 changed files with 6 additions and 4 deletions
Showing only changes of commit c48f6e9c57 - Show all commits

View File

@ -143,7 +143,7 @@ public abstract class AbstractMdRecordToOafMapper {
return createOafs(doc, type, instances, collectedFrom, info, lastUpdateTimestamp);
} catch (DocumentException e) {
log.error("Error with record:\n" + xml);
return Lists.newArrayList();
return null;
}
}

View File

@ -126,8 +126,6 @@ public class GenerateEntitiesApplication {
log.info("Generate entities from files:");
existingSourcePaths.forEach(log::info);
JavaRDD<Oaf> inputRdd = sc.emptyRDD();
for (final String sp : existingSourcePaths) {
RDD<String> invalidRecords = sc
.sequenceFile(sp, Text.class, Text.class)
@ -141,7 +139,11 @@ public class GenerateEntitiesApplication {
.mode(SaveMode.Append)
.option("compression", "gzip")
.text(invalidPath);
}
JavaRDD<Oaf> inputRdd = sc.emptyRDD();
for (final String sp : existingSourcePaths) {
inputRdd = inputRdd
.union(
sc
@ -223,7 +225,7 @@ public class GenerateEntitiesApplication {
final boolean shouldHashId,
final VocabularyGroup vocs) {
if (convertToListOaf(id, s, shouldHashId, vocs).isEmpty()) {
if (Objects.isNull(convertToListOaf(id, s, shouldHashId, vocs))) {
return s;
}
return null;