package eu.dnetlib.dhp.common.report; import java.util.Collections; import java.util.List; import java.util.Map; import java.util.stream.Collectors; import org.apache.commons.lang3.tuple.Pair; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; import com.google.common.collect.Lists; import eu.dnetlib.dhp.common.java.PortBindings; import eu.dnetlib.dhp.common.java.Process; import eu.dnetlib.dhp.common.java.io.DataStore; import eu.dnetlib.dhp.common.java.io.FileSystemPath; import eu.dnetlib.dhp.common.java.porttype.AvroPortType; import eu.dnetlib.dhp.common.java.porttype.PortType; import eu.dnetlib.dhp.common.schemas.ReportEntry; /** * Java workflow node process for building report.
* It writes report properties into avro datastore of {@link ReportEntry}s * with location specified in output port.
* Report property name must start with report. to * be included in output datastore. * * Usage example:
*
 * {@code
 * 
 *     
 *         eu.dnetlib.dhp.common.java.ProcessWrapper
 *         eu.dnetlib.dhp.common.report.ReportGenerator
 *         -Preport.someProperty=someValue
 *         -Oreport=/report/path
 *     
 *     ...
 * 
 * }
 * 
* Above example will produce avro datastore in /report/path * with single {@link ReportEntry}. * Where the {@link ReportEntry#getKey()} will be equal to someProperty and * the {@link ReportEntry#getValue()} will be equal to someValue * (notice the stripped report. prefix from the entry key). * * * @author madryk * */ public class ReportGenerator implements Process { private static final String REPORT_PORT_OUT_NAME = "report"; private static final String REPORT_PROPERTY_PREFIX = "report."; //------------------------ LOGIC -------------------------- @Override public Map getInputPorts() { return Collections.emptyMap(); } @Override public Map getOutputPorts() { return Collections.singletonMap(REPORT_PORT_OUT_NAME, new AvroPortType(ReportEntry.SCHEMA$)); } @Override public void run(PortBindings portBindings, Configuration conf, Map parameters) throws Exception { Map entriesToReport = collectEntriesToReport(parameters); List avroReport = convertToAvroReport(entriesToReport); FileSystem fs = FileSystem.get(conf); Path reportPath = portBindings.getOutput().get(REPORT_PORT_OUT_NAME); DataStore.create(avroReport, new FileSystemPath(fs, reportPath)); } //------------------------ PRIVATE -------------------------- private Map collectEntriesToReport(Map parameters) { return parameters.entrySet().stream() .filter(property -> property.getKey().startsWith(REPORT_PROPERTY_PREFIX)) .map(x -> Pair.of(x.getKey().substring(REPORT_PROPERTY_PREFIX.length()), x.getValue())) .collect(Collectors.toMap(e -> e.getLeft(), e -> e.getRight())); } private List convertToAvroReport(Map entriesToReport) { List avroReport = Lists.newArrayList(); entriesToReport.forEach((key, value) -> avroReport.add(ReportEntryFactory.createCounterReportEntry(key, Long.valueOf(value)))); return avroReport; } }