package eu.dnetlib.dhp.common.report;
import java.util.Collections;
import java.util.List;
import java.util.Map;
import java.util.stream.Collectors;
import org.apache.commons.lang3.tuple.Pair;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import com.google.common.collect.Lists;
import eu.dnetlib.dhp.common.java.PortBindings;
import eu.dnetlib.dhp.common.java.Process;
import eu.dnetlib.dhp.common.java.io.DataStore;
import eu.dnetlib.dhp.common.java.io.FileSystemPath;
import eu.dnetlib.dhp.common.java.porttype.AvroPortType;
import eu.dnetlib.dhp.common.java.porttype.PortType;
import eu.dnetlib.dhp.common.schemas.ReportEntry;
/**
* Java workflow node process for building report.
* It writes report properties into avro datastore of {@link ReportEntry}s
* with location specified in output port.
* Report property name must start with report.
to
* be included in output datastore.
*
* Usage example:
*
* {@code ** Above example will produce avro datastore in* * } ** * ... *eu.dnetlib.dhp.common.java.ProcessWrapper *eu.dnetlib.dhp.common.report.ReportGenerator *-Preport.someProperty=someValue *-Oreport=/report/path *
/report/path
* with single {@link ReportEntry}.
* Where the {@link ReportEntry#getKey()} will be equal to someProperty
and
* the {@link ReportEntry#getValue()} will be equal to someValue
* (notice the stripped report.
prefix from the entry key).
*
*
* @author madryk
*
*/
public class ReportGenerator implements Process {
private static final String REPORT_PORT_OUT_NAME = "report";
private static final String REPORT_PROPERTY_PREFIX = "report.";
//------------------------ LOGIC --------------------------
@Override
public Map