56 lines
1.9 KiB
Java
56 lines
1.9 KiB
Java
|
|
package eu.dnetlib.dhp.collection.worker;
|
|
|
|
import org.apache.commons.io.IOUtils;
|
|
import org.slf4j.Logger;
|
|
import org.slf4j.LoggerFactory;
|
|
|
|
import com.fasterxml.jackson.databind.ObjectMapper;
|
|
|
|
import eu.dnetlib.collector.worker.model.ApiDescriptor;
|
|
import eu.dnetlib.dhp.application.ArgumentApplicationParser;
|
|
import eu.dnetlib.dhp.collection.worker.utils.CollectorPluginFactory;
|
|
|
|
/**
|
|
* DnetCollectortWorkerApplication is the main class responsible to start the Dnet Collection into HDFS. This module
|
|
* will be executed on the hadoop cluster and taking in input some parameters that tells it which is the right collector
|
|
* plugin to use and where store the data into HDFS path
|
|
*
|
|
* @author Sandro La Bruzzo
|
|
*/
|
|
public class CollectorWorkerApplication {
|
|
|
|
private static final Logger log = LoggerFactory.getLogger(CollectorWorkerApplication.class);
|
|
|
|
private static final CollectorPluginFactory collectorPluginFactory = new CollectorPluginFactory();
|
|
|
|
/**
|
|
* @param args
|
|
*/
|
|
public static void main(final String[] args) throws Exception {
|
|
|
|
final ArgumentApplicationParser argumentParser = new ArgumentApplicationParser(
|
|
IOUtils
|
|
.toString(
|
|
CollectorWorker.class
|
|
.getResourceAsStream(
|
|
"/eu/dnetlib/dhp/collection/collector_parameter.json")));
|
|
argumentParser.parseArgument(args);
|
|
|
|
final String hdfsuri = argumentParser.get("namenode");
|
|
|
|
log.info("hdfsURI is {}", hdfsuri);
|
|
final String hdfsPath = argumentParser.get("hdfsPath");
|
|
log.info("hdfsPath is {}" + hdfsPath);
|
|
final String apiDescriptor = argumentParser.get("apidescriptor");
|
|
log.info("apiDescriptor is {}" + apiDescriptor);
|
|
|
|
final ObjectMapper jsonMapper = new ObjectMapper();
|
|
|
|
final ApiDescriptor api = jsonMapper.readValue(apiDescriptor, ApiDescriptor.class);
|
|
|
|
final CollectorWorker worker = new CollectorWorker(collectorPluginFactory, api, hdfsuri, hdfsPath);
|
|
worker.collect();
|
|
}
|
|
}
|