fixed path reports
This commit is contained in:
parent
8d85c1e97e
commit
e254720377
|
@ -48,18 +48,17 @@ public class BaseAnalyzerJob {
|
||||||
public static void main(final String[] args) throws Exception {
|
public static void main(final String[] args) throws Exception {
|
||||||
|
|
||||||
final String jsonConfiguration = IOUtils
|
final String jsonConfiguration = IOUtils
|
||||||
.toString(
|
.toString(BaseAnalyzerJob.class
|
||||||
BaseAnalyzerJob.class
|
.getResourceAsStream("/eu/dnetlib/dhp/collection/plugin/base/action_set_parameters.json"));
|
||||||
.getResourceAsStream("/eu/dnetlib/dhp/collection/plugin/base/action_set_parameters.json"));
|
|
||||||
|
|
||||||
final ArgumentApplicationParser parser = new ArgumentApplicationParser(jsonConfiguration);
|
final ArgumentApplicationParser parser = new ArgumentApplicationParser(jsonConfiguration);
|
||||||
|
|
||||||
parser.parseArgument(args);
|
parser.parseArgument(args);
|
||||||
|
|
||||||
final Boolean isSparkSessionManaged = Optional
|
final Boolean isSparkSessionManaged = Optional
|
||||||
.ofNullable(parser.get("isSparkSessionManaged"))
|
.ofNullable(parser.get("isSparkSessionManaged"))
|
||||||
.map(Boolean::valueOf)
|
.map(Boolean::valueOf)
|
||||||
.orElse(Boolean.TRUE);
|
.orElse(Boolean.TRUE);
|
||||||
|
|
||||||
log.info("isSparkSessionManaged: {}", isSparkSessionManaged);
|
log.info("isSparkSessionManaged: {}", isSparkSessionManaged);
|
||||||
|
|
||||||
|
@ -75,11 +74,11 @@ public class BaseAnalyzerJob {
|
||||||
}
|
}
|
||||||
|
|
||||||
private static void processBaseRecords(final SparkSession spark,
|
private static void processBaseRecords(final SparkSession spark,
|
||||||
final String inputPath,
|
final String inputPath,
|
||||||
final String outputPath) throws IOException {
|
final String outputPath) throws IOException {
|
||||||
|
|
||||||
try (final FileSystem fs = FileSystem.get(new Configuration());
|
try (final FileSystem fs = FileSystem.get(new Configuration());
|
||||||
final AggregatorReport report = new AggregatorReport()) {
|
final AggregatorReport report = new AggregatorReport()) {
|
||||||
final Map<String, AtomicLong> fields = new HashMap<>();
|
final Map<String, AtomicLong> fields = new HashMap<>();
|
||||||
final Map<String, AtomicLong> types = new HashMap<>();
|
final Map<String, AtomicLong> types = new HashMap<>();
|
||||||
final Map<String, AtomicLong> collections = new HashMap<>();
|
final Map<String, AtomicLong> collections = new HashMap<>();
|
||||||
|
@ -97,12 +96,12 @@ public class BaseAnalyzerJob {
|
||||||
}
|
}
|
||||||
|
|
||||||
private static void analyze(final FileSystem fs,
|
private static void analyze(final FileSystem fs,
|
||||||
final String inputPath,
|
final String inputPath,
|
||||||
final Map<String, AtomicLong> fields,
|
final Map<String, AtomicLong> fields,
|
||||||
final Map<String, AtomicLong> types,
|
final Map<String, AtomicLong> types,
|
||||||
final Map<String, AtomicLong> collections,
|
final Map<String, AtomicLong> collections,
|
||||||
final Map<String, AtomicLong> totals,
|
final Map<String, AtomicLong> totals,
|
||||||
final AggregatorReport report) throws JsonProcessingException, IOException, DocumentException {
|
final AggregatorReport report) throws JsonProcessingException, IOException, DocumentException {
|
||||||
|
|
||||||
final AtomicLong recordsCounter = new AtomicLong(0);
|
final AtomicLong recordsCounter = new AtomicLong(0);
|
||||||
|
|
||||||
|
@ -124,27 +123,28 @@ public class BaseAnalyzerJob {
|
||||||
|
|
||||||
final List<String> recTypes = new ArrayList<>();
|
final List<String> recTypes = new ArrayList<>();
|
||||||
|
|
||||||
for (final Object o : record.selectNodes("//*[local-name()='metadata']//*")) {
|
for (final Object o : record.selectNodes("//*|//@*")) {
|
||||||
|
|
||||||
incrementMapCounter(fields, ((Node) o).getPath());
|
incrementMapCounter(fields, ((Node) o).getPath());
|
||||||
|
|
||||||
final String nodeName = ((Node) o).getName();
|
final String nodeName = ((Node) o).getName();
|
||||||
|
|
||||||
if ("collection".equals(nodeName)) {
|
if (o instanceof Element) {
|
||||||
final Element n = (Element) o;
|
final Element n = (Element) o;
|
||||||
final String collName = n.getText().trim();
|
if ("collection".equals(nodeName)) {
|
||||||
if (StringUtils.isNotBlank(collName)) {
|
final String collName = n.getText().trim();
|
||||||
final Map<String, String> map = new HashMap<>();
|
if (StringUtils.isNotBlank(collName)) {
|
||||||
for (final Object ao : n.attributes()) {
|
final Map<String, String> map = new HashMap<>();
|
||||||
map.put(((Attribute) ao).getName(), ((Attribute) ao).getValue());
|
for (final Object ao : n.attributes()) {
|
||||||
|
map.put(((Attribute) ao).getName(), ((Attribute) ao).getValue());
|
||||||
|
}
|
||||||
|
incrementMapCounter(collections, collName + ": " + OBJECT_MAPPER.writeValueAsString(map));
|
||||||
}
|
}
|
||||||
|
} else if ("type".equals(nodeName)) {
|
||||||
incrementMapCounter(collections, collName + ": " + OBJECT_MAPPER.writeValueAsString(map));
|
recTypes.add("TYPE: " + n.getText().trim());
|
||||||
|
} else if ("typenorm".equals(nodeName)) {
|
||||||
|
recTypes.add("TYPE_NORM: " + n.getText().trim());
|
||||||
}
|
}
|
||||||
} else if ("type".equals(nodeName)) {
|
|
||||||
recTypes.add("TYPE: " + nodeName);
|
|
||||||
} else if ("typenorm".equals(nodeName)) {
|
|
||||||
recTypes.add("TYPE_NORM: " + nodeName);
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -163,14 +163,11 @@ public class BaseAnalyzerJob {
|
||||||
}
|
}
|
||||||
|
|
||||||
private static void saveReport(final FileSystem fs, final String outputPath, final Map<String, AtomicLong> fields)
|
private static void saveReport(final FileSystem fs, final String outputPath, final Map<String, AtomicLong> fields)
|
||||||
throws JsonProcessingException, IOException {
|
throws JsonProcessingException, IOException {
|
||||||
try (final SequenceFile.Writer writer = SequenceFile
|
try (final SequenceFile.Writer writer = SequenceFile
|
||||||
.createWriter(
|
.createWriter(fs.getConf(), SequenceFile.Writer.file(new Path(outputPath)), SequenceFile.Writer
|
||||||
fs.getConf(), SequenceFile.Writer.file(new Path(outputPath)), SequenceFile.Writer
|
.keyClass(IntWritable.class), SequenceFile.Writer
|
||||||
.keyClass(IntWritable.class),
|
.valueClass(Text.class), SequenceFile.Writer.compression(SequenceFile.CompressionType.BLOCK, new DeflateCodec()))) {
|
||||||
SequenceFile.Writer
|
|
||||||
.valueClass(Text.class),
|
|
||||||
SequenceFile.Writer.compression(SequenceFile.CompressionType.BLOCK, new DeflateCodec()))) {
|
|
||||||
|
|
||||||
final Text key = new Text();
|
final Text key = new Text();
|
||||||
final Text value = new Text();
|
final Text value = new Text();
|
||||||
|
|
|
@ -47,7 +47,7 @@ public class BaseCollectorIteratorTest {
|
||||||
|
|
||||||
// System.out.println(record.asXML());
|
// System.out.println(record.asXML());
|
||||||
|
|
||||||
for (final Object o : record.selectNodes("//*[local-name()='metadata']//*")) {
|
for (final Object o : record.selectNodes("//*|//@*")) {
|
||||||
final String path = ((Node) o).getPath();
|
final String path = ((Node) o).getPath();
|
||||||
|
|
||||||
if (fields.containsKey(path)) {
|
if (fields.containsKey(path)) {
|
||||||
|
@ -56,22 +56,23 @@ public class BaseCollectorIteratorTest {
|
||||||
fields.put(path, new AtomicInteger(1));
|
fields.put(path, new AtomicInteger(1));
|
||||||
}
|
}
|
||||||
|
|
||||||
if ("collection".equals(((Node) o).getName())) {
|
if (o instanceof Element) {
|
||||||
final Element n = (Element) o;
|
final Element n = (Element) o;
|
||||||
final String collName = n.getText().trim();
|
|
||||||
if (StringUtils.isNotBlank(collName) && !collections.containsKey(collName)) {
|
if ("collection".equals(n.getName())) {
|
||||||
final Map<String, String> collAttrs = new HashMap<>();
|
final String collName = n.getText().trim();
|
||||||
for (final Object ao : n.attributes()) {
|
if (StringUtils.isNotBlank(collName) && !collections.containsKey(collName)) {
|
||||||
collAttrs.put(((Attribute) ao).getName(), ((Attribute) ao).getValue());
|
final Map<String, String> collAttrs = new HashMap<>();
|
||||||
|
for (final Object ao : n.attributes()) {
|
||||||
|
collAttrs.put(((Attribute) ao).getName(), ((Attribute) ao).getValue());
|
||||||
|
}
|
||||||
|
collections.put(collName, collAttrs);
|
||||||
}
|
}
|
||||||
collections.put(collName, collAttrs);
|
} else if ("type".equals(n.getName())) {
|
||||||
|
types.add(n.getText().trim());
|
||||||
}
|
}
|
||||||
}
|
|
||||||
|
|
||||||
if ("type".equals(((Node) o).getName())) {
|
|
||||||
types.add(((Element) o).getText().trim());
|
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in New Issue