forked from antonis.lempesis/dnet-hadoop
Removed springboot dependencies, is useless
This commit is contained in:
parent
53ec9bccca
commit
5b48bb9be1
|
@ -1,16 +1,30 @@
|
||||||
Description of the Module
|
Description of the Module
|
||||||
--------------------------
|
--------------------------
|
||||||
This module defines a **collector worker application** that run on Hadoop.
|
This module defines a **collector worker application** that runs on Hadoop.
|
||||||
|
|
||||||
It is responsible of harvesting metadata using different plugin, that should be passed as arguments
|
It is responsible for harvesting metadata using a different plugin,
|
||||||
in the main class
|
that has been passed as arguments in the main class
|
||||||
|
|
||||||
|
The collector worker uses a message queue to inform the progress
|
||||||
|
of the harvesting action (using a message queue for sending **ONGOING** messages) furthermore,
|
||||||
|
It gives, at the end of the job, some information about the status
|
||||||
|
of the collection i.e Number of records collected(using a message queue for sending **REPORT** messages).
|
||||||
|
|
||||||
|
To work the collection worker need some parameter like:
|
||||||
|
|
||||||
|
* **hdfsPath**: the path where storing the sequential file
|
||||||
|
* **apidescriptor**: the JSON encoding of the API Descriptor
|
||||||
|
* **namenode**: the Name Node URI
|
||||||
|
* **userHDFS**: the user wich create the hdfs seq file
|
||||||
|
* **rabbitUser**: the user to connect with RabbitMq for messaging
|
||||||
|
* **rabbitPassWord**: the password to connect with RabbitMq for messaging
|
||||||
|
* **rabbitHost**: the host of the RabbitMq server
|
||||||
|
* **rabbitOngoingQueue**: the name of the ongoing queue
|
||||||
|
* **rabbitReportQueue**: the name of the report queue
|
||||||
|
* **workflowId**: the identifier of the dnet Workflow
|
||||||
|
|
||||||
##Plugins
|
##Plugins
|
||||||
* OAI Plugin
|
* OAI Plugin
|
||||||
|
|
||||||
|
|
||||||
## Api Descriptor
|
|
||||||
TODO
|
|
||||||
|
|
||||||
## Usage
|
## Usage
|
||||||
TODO
|
TODO
|
|
@ -4,12 +4,13 @@
|
||||||
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
|
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
|
||||||
<!-- Inherit defaults from Spring Boot -->
|
<!-- Inherit defaults from Spring Boot -->
|
||||||
<parent>
|
<parent>
|
||||||
<groupId>org.springframework.boot</groupId>
|
<groupId>eu.dnetlib.dhp</groupId>
|
||||||
<artifactId>spring-boot-starter-parent</artifactId>
|
<artifactId>dhp-applications</artifactId>
|
||||||
<version>2.1.3.RELEASE</version>
|
<version>1.0.0-SNAPSHOT</version>
|
||||||
<relativePath>../</relativePath>
|
<relativePath>../</relativePath>
|
||||||
</parent>
|
</parent>
|
||||||
|
|
||||||
|
|
||||||
<modelVersion>4.0.0</modelVersion>
|
<modelVersion>4.0.0</modelVersion>
|
||||||
|
|
||||||
<groupId>eu.dnetlib</groupId>
|
<groupId>eu.dnetlib</groupId>
|
||||||
|
@ -18,59 +19,8 @@
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
<repositories>
|
|
||||||
|
|
||||||
<repository>
|
|
||||||
<id>dnet45-releases</id>
|
|
||||||
<name>D-Net 45 releases</name>
|
|
||||||
<url>http://maven.research-infrastructures.eu/nexus/content/repositories/dnet45-releases</url>
|
|
||||||
<layout>default</layout>
|
|
||||||
<snapshots>
|
|
||||||
<enabled>false</enabled>
|
|
||||||
</snapshots>
|
|
||||||
<releases>
|
|
||||||
<enabled>true</enabled>
|
|
||||||
</releases>
|
|
||||||
</repository>
|
|
||||||
<repository>
|
|
||||||
<id>dnet45-bootstrap-release</id>
|
|
||||||
<name>dnet45 bootstrap release</name>
|
|
||||||
<url>http://maven.research-infrastructures.eu/nexus/content/repositories/dnet45-bootstrap-release</url>
|
|
||||||
<layout>default</layout>
|
|
||||||
<snapshots>
|
|
||||||
<enabled>false</enabled>
|
|
||||||
</snapshots>
|
|
||||||
<releases>
|
|
||||||
<enabled>true</enabled>
|
|
||||||
</releases>
|
|
||||||
</repository>
|
|
||||||
|
|
||||||
<repository>
|
|
||||||
<id>cloudera</id>
|
|
||||||
<name>Cloudera Repository</name>
|
|
||||||
<url>https://repository.cloudera.com/artifactory/cloudera-repos</url>
|
|
||||||
<releases>
|
|
||||||
<enabled>true</enabled>
|
|
||||||
</releases>
|
|
||||||
<snapshots>
|
|
||||||
<enabled>false</enabled>
|
|
||||||
</snapshots>
|
|
||||||
</repository>
|
|
||||||
</repositories>
|
|
||||||
|
|
||||||
|
|
||||||
<build>
|
<build>
|
||||||
<plugins>
|
<plugins>
|
||||||
<plugin>
|
|
||||||
<groupId>org.springframework.boot</groupId>
|
|
||||||
<artifactId>spring-boot-maven-plugin</artifactId>
|
|
||||||
<configuration>
|
|
||||||
<executable>true</executable>
|
|
||||||
</configuration>
|
|
||||||
</plugin>
|
|
||||||
|
|
||||||
|
|
||||||
<plugin>
|
<plugin>
|
||||||
<groupId>org.apache.maven.plugins</groupId>
|
<groupId>org.apache.maven.plugins</groupId>
|
||||||
<artifactId>maven-compiler-plugin</artifactId>
|
<artifactId>maven-compiler-plugin</artifactId>
|
||||||
|
@ -129,12 +79,10 @@
|
||||||
|
|
||||||
</build>
|
</build>
|
||||||
|
|
||||||
|
|
||||||
<dependencies>
|
<dependencies>
|
||||||
<dependency>
|
<dependency>
|
||||||
<groupId>eu.dnetlib.dhp</groupId>
|
<groupId>eu.dnetlib.dhp</groupId>
|
||||||
<artifactId>dhp-common</artifactId>
|
<artifactId>dhp-common</artifactId>
|
||||||
<version>1.0.0-SNAPSHOT</version>
|
|
||||||
</dependency>
|
</dependency>
|
||||||
|
|
||||||
<dependency>
|
<dependency>
|
||||||
|
@ -142,10 +90,6 @@
|
||||||
<artifactId>commons-cli</artifactId>
|
<artifactId>commons-cli</artifactId>
|
||||||
<version>1.4</version>
|
<version>1.4</version>
|
||||||
</dependency>
|
</dependency>
|
||||||
<dependency>
|
|
||||||
<groupId>org.springframework.boot</groupId>
|
|
||||||
<artifactId>spring-boot-starter</artifactId>
|
|
||||||
</dependency>
|
|
||||||
<dependency>
|
<dependency>
|
||||||
<groupId>org.apache.hadoop</groupId>
|
<groupId>org.apache.hadoop</groupId>
|
||||||
<artifactId>hadoop-client</artifactId>
|
<artifactId>hadoop-client</artifactId>
|
||||||
|
@ -154,26 +98,39 @@
|
||||||
<dependency>
|
<dependency>
|
||||||
<groupId>com.fasterxml.jackson.core</groupId>
|
<groupId>com.fasterxml.jackson.core</groupId>
|
||||||
<artifactId>jackson-core</artifactId>
|
<artifactId>jackson-core</artifactId>
|
||||||
|
<version>2.9.8</version>
|
||||||
</dependency>
|
</dependency>
|
||||||
<dependency>
|
<dependency>
|
||||||
<groupId>com.fasterxml.jackson.core</groupId>
|
<groupId>com.fasterxml.jackson.core</groupId>
|
||||||
<artifactId>jackson-annotations</artifactId>
|
<artifactId>jackson-annotations</artifactId>
|
||||||
|
<version>2.9.0</version>
|
||||||
</dependency>
|
</dependency>
|
||||||
<dependency>
|
<dependency>
|
||||||
<groupId>com.fasterxml.jackson.core</groupId>
|
<groupId>com.fasterxml.jackson.core</groupId>
|
||||||
<artifactId>jackson-databind</artifactId>
|
<artifactId>jackson-databind</artifactId>
|
||||||
</dependency>
|
<version>2.9.8</version>
|
||||||
<dependency>
|
|
||||||
<groupId>dom4j</groupId>
|
|
||||||
<artifactId>dom4j</artifactId>
|
|
||||||
</dependency>
|
</dependency>
|
||||||
<dependency>
|
<dependency>
|
||||||
<groupId>jaxen</groupId>
|
<groupId>jaxen</groupId>
|
||||||
<artifactId>jaxen</artifactId>
|
<artifactId>jaxen</artifactId>
|
||||||
|
<version>1.1.6</version>
|
||||||
</dependency>
|
</dependency>
|
||||||
<dependency>
|
<dependency>
|
||||||
<groupId>org.springframework.boot</groupId>
|
<groupId>dom4j</groupId>
|
||||||
<artifactId>spring-boot-starter-test</artifactId>
|
<artifactId>dom4j</artifactId>
|
||||||
|
<version>1.6.1</version>
|
||||||
|
<scope>compile</scope>
|
||||||
|
</dependency>
|
||||||
|
<dependency>
|
||||||
|
<groupId>com.fasterxml.jackson.core</groupId>
|
||||||
|
<artifactId>jackson-databind</artifactId>
|
||||||
|
<version>2.9.8</version>
|
||||||
|
<scope>compile</scope>
|
||||||
|
</dependency>
|
||||||
|
<dependency>
|
||||||
|
<groupId>org.mockito</groupId>
|
||||||
|
<artifactId>mockito-core</artifactId>
|
||||||
|
<version>3.0.0</version>
|
||||||
<scope>test</scope>
|
<scope>test</scope>
|
||||||
</dependency>
|
</dependency>
|
||||||
|
|
||||||
|
|
|
@ -0,0 +1,111 @@
|
||||||
|
package eu.dnetlib.collector.worker;
|
||||||
|
|
||||||
|
import com.fasterxml.jackson.databind.ObjectMapper;
|
||||||
|
import eu.dnetlib.collector.worker.model.ApiDescriptor;
|
||||||
|
import eu.dnetlib.collector.worker.plugins.CollectorPlugin;
|
||||||
|
import eu.dnetlib.collector.worker.utils.CollectorPluginFactory;
|
||||||
|
import eu.dnetlib.message.Message;
|
||||||
|
import eu.dnetlib.message.MessageManager;
|
||||||
|
import eu.dnetlib.message.MessageType;
|
||||||
|
import org.apache.hadoop.conf.Configuration;
|
||||||
|
import org.apache.hadoop.fs.FileSystem;
|
||||||
|
import org.apache.hadoop.fs.Path;
|
||||||
|
import org.apache.hadoop.io.IntWritable;
|
||||||
|
import org.apache.hadoop.io.SequenceFile;
|
||||||
|
import org.apache.hadoop.io.Text;
|
||||||
|
import org.slf4j.Logger;
|
||||||
|
import org.slf4j.LoggerFactory;
|
||||||
|
|
||||||
|
import java.io.IOException;
|
||||||
|
import java.net.URI;
|
||||||
|
import java.util.HashMap;
|
||||||
|
import java.util.Map;
|
||||||
|
import java.util.concurrent.atomic.AtomicInteger;
|
||||||
|
|
||||||
|
public class DnetCollectorWorker {
|
||||||
|
|
||||||
|
private static final Logger log = LoggerFactory.getLogger(DnetCollectorWorker.class);
|
||||||
|
|
||||||
|
|
||||||
|
private final CollectorPluginFactory collectorPluginFactory;
|
||||||
|
|
||||||
|
private final DnetCollectorWorkerArgumentParser argumentParser;
|
||||||
|
|
||||||
|
private final MessageManager manager;
|
||||||
|
|
||||||
|
|
||||||
|
public DnetCollectorWorker(final CollectorPluginFactory collectorPluginFactory, final DnetCollectorWorkerArgumentParser argumentParser, final MessageManager manager) throws DnetCollectorException {
|
||||||
|
this.collectorPluginFactory = collectorPluginFactory;
|
||||||
|
this.argumentParser = argumentParser;
|
||||||
|
this.manager = manager;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
public void collect() throws DnetCollectorException {
|
||||||
|
try {
|
||||||
|
final ObjectMapper jsonMapper = new ObjectMapper();
|
||||||
|
final ApiDescriptor api = jsonMapper.readValue(argumentParser.getJson(), ApiDescriptor.class);
|
||||||
|
|
||||||
|
final CollectorPlugin plugin = collectorPluginFactory.getPluginByProtocol(api.getProtocol());
|
||||||
|
|
||||||
|
final String hdfsuri = argumentParser.getNameNode();
|
||||||
|
|
||||||
|
// ====== Init HDFS File System Object
|
||||||
|
Configuration conf = new Configuration();
|
||||||
|
// Set FileSystem URI
|
||||||
|
conf.set("fs.defaultFS", hdfsuri);
|
||||||
|
// Because of Maven
|
||||||
|
conf.set("fs.hdfs.impl", org.apache.hadoop.hdfs.DistributedFileSystem.class.getName());
|
||||||
|
conf.set("fs.file.impl", org.apache.hadoop.fs.LocalFileSystem.class.getName());
|
||||||
|
|
||||||
|
System.setProperty("HADOOP_USER_NAME", argumentParser.getUser());
|
||||||
|
System.setProperty("hadoop.home.dir", "/");
|
||||||
|
//Get the filesystem - HDFS
|
||||||
|
FileSystem fs = FileSystem.get(URI.create(hdfsuri), conf);
|
||||||
|
Path hdfswritepath = new Path(argumentParser.getHdfsPath());
|
||||||
|
|
||||||
|
log.info("Created path " + hdfswritepath.toString());
|
||||||
|
|
||||||
|
final Map<String, String> ongoingMap = new HashMap<>();
|
||||||
|
final Map<String, String> reportMap = new HashMap<>();
|
||||||
|
final AtomicInteger counter = new AtomicInteger(0);
|
||||||
|
try (SequenceFile.Writer writer = SequenceFile.createWriter(conf,
|
||||||
|
SequenceFile.Writer.file(hdfswritepath), SequenceFile.Writer.keyClass(IntWritable.class),
|
||||||
|
SequenceFile.Writer.valueClass(Text.class))) {
|
||||||
|
final IntWritable key = new IntWritable(counter.get());
|
||||||
|
final Text value = new Text();
|
||||||
|
plugin.collect(api).forEach(content -> {
|
||||||
|
|
||||||
|
key.set(counter.getAndIncrement());
|
||||||
|
value.set(content);
|
||||||
|
if (counter.get() % 10 == 0) {
|
||||||
|
try {
|
||||||
|
ongoingMap.put("ongoing", "" + counter.get());
|
||||||
|
log.debug("Sending message: "+ manager.sendMessage(new Message(argumentParser.getWorkflowId(), "Collection", MessageType.ONGOING, ongoingMap), argumentParser.getRabbitOngoingQueue(), true, false));
|
||||||
|
} catch (Exception e) {
|
||||||
|
log.error("Error on sending message ", e);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
try {
|
||||||
|
writer.append(key, value);
|
||||||
|
} catch (IOException e) {
|
||||||
|
throw new RuntimeException(e);
|
||||||
|
}
|
||||||
|
|
||||||
|
});
|
||||||
|
}
|
||||||
|
ongoingMap.put("ongoing", "" + counter.get());
|
||||||
|
manager.sendMessage(new Message(argumentParser.getWorkflowId(), "Collection", MessageType.ONGOING, ongoingMap), argumentParser.getRabbitOngoingQueue(), true, false);
|
||||||
|
reportMap.put("collected", "" + counter.get());
|
||||||
|
manager.sendMessage(new Message(argumentParser.getWorkflowId(), "Collection", MessageType.REPORT, reportMap), argumentParser.getRabbitOngoingQueue(), true, false);
|
||||||
|
manager.close();
|
||||||
|
} catch (Throwable e) {
|
||||||
|
throw new DnetCollectorException("Error on collecting ",e);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
}
|
|
@ -1,237 +1,43 @@
|
||||||
package eu.dnetlib.collector.worker;
|
package eu.dnetlib.collector.worker;
|
||||||
|
|
||||||
import com.fasterxml.jackson.databind.ObjectMapper;
|
import eu.dnetlib.collector.worker.utils.CollectorPluginFactory;
|
||||||
import eu.dnetlib.collector.worker.model.ApiDescriptor;
|
|
||||||
import eu.dnetlib.collector.worker.plugins.CollectorPlugin;
|
|
||||||
import eu.dnetlib.collector.worker.utils.CollectorPluginEnumerator;
|
|
||||||
import eu.dnetlib.message.Message;
|
|
||||||
import eu.dnetlib.message.MessageManager;
|
import eu.dnetlib.message.MessageManager;
|
||||||
import eu.dnetlib.message.MessageType;
|
|
||||||
import org.apache.commons.cli.*;
|
|
||||||
import org.apache.hadoop.conf.Configuration;
|
|
||||||
import org.apache.hadoop.fs.FileSystem;
|
|
||||||
import org.apache.hadoop.fs.Path;
|
|
||||||
import org.apache.hadoop.io.IntWritable;
|
|
||||||
import org.apache.hadoop.io.SequenceFile;
|
|
||||||
import org.apache.hadoop.io.Text;
|
|
||||||
import org.slf4j.Logger;
|
import org.slf4j.Logger;
|
||||||
import org.slf4j.LoggerFactory;
|
import org.slf4j.LoggerFactory;
|
||||||
import org.springframework.beans.factory.annotation.Autowired;
|
|
||||||
import org.springframework.boot.CommandLineRunner;
|
|
||||||
import org.springframework.boot.SpringApplication;
|
|
||||||
import org.springframework.boot.autoconfigure.SpringBootApplication;
|
|
||||||
|
|
||||||
import java.io.IOException;
|
|
||||||
import java.net.URI;
|
|
||||||
import java.util.HashMap;
|
|
||||||
import java.util.Map;
|
|
||||||
import java.util.concurrent.atomic.AtomicInteger;
|
|
||||||
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
*
|
|
||||||
* DnetCollectortWorkerApplication is the main class responsible to start
|
* DnetCollectortWorkerApplication is the main class responsible to start
|
||||||
* the Dnet Collection into HDFS.
|
* the Dnet Collection into HDFS.
|
||||||
* This module will be executed on the hadoop cluster and taking in input some parameters
|
* This module will be executed on the hadoop cluster and taking in input some parameters
|
||||||
* that tells it which is the right collector plugin to use and where store the data into HDFS path
|
* that tells it which is the right collector plugin to use and where store the data into HDFS path
|
||||||
*
|
*
|
||||||
*
|
|
||||||
* @author Sandro La Bruzzo
|
* @author Sandro La Bruzzo
|
||||||
*/
|
*/
|
||||||
@SpringBootApplication
|
|
||||||
public class DnetCollectorWorkerApplication implements CommandLineRunner {
|
public class DnetCollectorWorkerApplication {
|
||||||
|
|
||||||
private static final Logger log = LoggerFactory.getLogger(DnetCollectorWorkerApplication.class);
|
private static final Logger log = LoggerFactory.getLogger(DnetCollectorWorkerApplication.class);
|
||||||
|
|
||||||
@Autowired
|
private static CollectorPluginFactory collectorPluginFactory = new CollectorPluginFactory();
|
||||||
private CollectorPluginEnumerator collectorPluginEnumerator;
|
|
||||||
|
private static DnetCollectorWorkerArgumentParser argumentParser = new DnetCollectorWorkerArgumentParser();
|
||||||
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
*
|
|
||||||
* @param args
|
* @param args
|
||||||
*/
|
*/
|
||||||
public static void main(final String[] args) {
|
public static void main(final String[] args) throws Exception {
|
||||||
SpringApplication.run(DnetCollectorWorkerApplication.class, args);
|
|
||||||
|
argumentParser.parseArgument(args);
|
||||||
|
log.info("hdfsPath =" + argumentParser.getHdfsPath());
|
||||||
|
log.info("json = " + argumentParser.getJson());
|
||||||
|
final MessageManager manager = new MessageManager(argumentParser.getRabbitHost(), argumentParser.getRabbitUser(), argumentParser.getRabbitPassword(), false, false, null);
|
||||||
|
final DnetCollectorWorker worker = new DnetCollectorWorker(collectorPluginFactory, argumentParser, manager);
|
||||||
|
worker.collect();
|
||||||
|
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
|
||||||
* This module expect two arguments:
|
|
||||||
* path hdfs where store the sequential file.
|
|
||||||
* Json serialization of {@link ApiDescriptor}
|
|
||||||
*/
|
|
||||||
@Override
|
|
||||||
public void run(final String... args) throws Exception {
|
|
||||||
Options options = new Options();
|
|
||||||
options.addOption(Option.builder("p")
|
|
||||||
.longOpt("hdfsPath")
|
|
||||||
.required(true)
|
|
||||||
.desc("the path where storing the sequential file")
|
|
||||||
.hasArg() // This option has an argument.
|
|
||||||
.build());
|
|
||||||
options.addOption(Option.builder("a")
|
|
||||||
.longOpt("apidescriptor")
|
|
||||||
.required(true)
|
|
||||||
.desc("the Json enconding of the API Descriptor")
|
|
||||||
.hasArg() // This option has an argument.
|
|
||||||
.build());
|
|
||||||
|
|
||||||
options.addOption(Option.builder("n")
|
|
||||||
.longOpt("namenode")
|
|
||||||
.required(true)
|
|
||||||
.desc("the Name Node URI")
|
|
||||||
.hasArg() // This option has an argument.
|
|
||||||
.build());
|
|
||||||
|
|
||||||
options.addOption(Option.builder("u")
|
|
||||||
.longOpt("userHDFS")
|
|
||||||
.required(true)
|
|
||||||
.desc("the user wich create the hdfs seq file")
|
|
||||||
.hasArg() // This option has an argument.
|
|
||||||
.build());
|
|
||||||
|
|
||||||
options.addOption(Option.builder("ru")
|
|
||||||
.longOpt("rabbitUser")
|
|
||||||
.required(true)
|
|
||||||
.desc("the user to connect with RabbitMq for messaging")
|
|
||||||
.hasArg() // This option has an argument.
|
|
||||||
.build());
|
|
||||||
|
|
||||||
options.addOption(Option.builder("rp")
|
|
||||||
.longOpt("rabbitPassWord")
|
|
||||||
.required(true)
|
|
||||||
.desc("the password to connect with RabbitMq for messaging")
|
|
||||||
.hasArg() // This option has an argument.
|
|
||||||
.build());
|
|
||||||
|
|
||||||
options.addOption(Option.builder("rh")
|
|
||||||
.longOpt("rabbitHost")
|
|
||||||
.required(true)
|
|
||||||
.desc("the host of the RabbitMq server")
|
|
||||||
.hasArg() // This option has an argument.
|
|
||||||
.build());
|
|
||||||
|
|
||||||
options.addOption(Option.builder("ro")
|
|
||||||
.longOpt("rabbitOngoingQueue")
|
|
||||||
.required(true)
|
|
||||||
.desc("the name of the ongoing queue")
|
|
||||||
.hasArg() // This option has an argument.
|
|
||||||
.build());
|
|
||||||
|
|
||||||
options.addOption(Option.builder("rr")
|
|
||||||
.longOpt("rabbitReportQueue")
|
|
||||||
.required(true)
|
|
||||||
.desc("the name of the report queue")
|
|
||||||
.hasArg() // This option has an argument.
|
|
||||||
.build());
|
|
||||||
|
|
||||||
|
|
||||||
options.addOption(Option.builder("w")
|
|
||||||
.longOpt("workflowId")
|
|
||||||
.required(true)
|
|
||||||
.desc("the identifier of the dnet Workflow")
|
|
||||||
.hasArg() // This option has an argument.
|
|
||||||
.build());
|
|
||||||
|
|
||||||
CommandLineParser parser = new DefaultParser();
|
|
||||||
String hdfsPath ;
|
|
||||||
String json;
|
|
||||||
String nameNode;
|
|
||||||
String user;
|
|
||||||
String rabbitUser;
|
|
||||||
String rabbitPassword;
|
|
||||||
String rabbitHost;
|
|
||||||
String rabbitOngoingQueue;
|
|
||||||
String rabbitReportQueue;
|
|
||||||
String workflowId;
|
|
||||||
|
|
||||||
try {
|
|
||||||
CommandLine cmd = parser.parse(options, args);
|
|
||||||
hdfsPath = cmd.getOptionValue("p");
|
|
||||||
json = cmd.getOptionValue("a");
|
|
||||||
nameNode = cmd.getOptionValue("n");
|
|
||||||
user = cmd.getOptionValue("u");
|
|
||||||
rabbitUser = cmd.getOptionValue("ru");
|
|
||||||
rabbitPassword = cmd.getOptionValue("rp");
|
|
||||||
rabbitHost = cmd.getOptionValue("rh");
|
|
||||||
rabbitOngoingQueue = cmd.getOptionValue("ro");
|
|
||||||
rabbitReportQueue = cmd.getOptionValue("rr");
|
|
||||||
workflowId = cmd.getOptionValue("w");
|
|
||||||
} catch (ParseException e) {
|
|
||||||
System.out.println("Error on executing collector worker, missing parameter:");
|
|
||||||
e.printStackTrace();
|
|
||||||
HelpFormatter formatter = new HelpFormatter();
|
|
||||||
formatter.printHelp("dhp-collector-worker", options);
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
log.info("hdfsPath ="+hdfsPath);
|
|
||||||
log.info("json = "+json);
|
|
||||||
|
|
||||||
final MessageManager manager = new MessageManager(rabbitHost, rabbitUser, rabbitPassword, false, false, null);
|
|
||||||
|
|
||||||
|
|
||||||
final ObjectMapper jsonMapper = new ObjectMapper();
|
|
||||||
final ApiDescriptor api = jsonMapper.readValue(json, ApiDescriptor.class);
|
|
||||||
|
|
||||||
final CollectorPlugin plugin = collectorPluginEnumerator.getPluginByProtocol(api.getProtocol());
|
|
||||||
|
|
||||||
final String hdfsuri =nameNode;
|
|
||||||
|
|
||||||
// ====== Init HDFS File System Object
|
|
||||||
Configuration conf = new Configuration();
|
|
||||||
// Set FileSystem URI
|
|
||||||
conf.set("fs.defaultFS", hdfsuri);
|
|
||||||
// Because of Maven
|
|
||||||
conf.set("fs.hdfs.impl", org.apache.hadoop.hdfs.DistributedFileSystem.class.getName());
|
|
||||||
conf.set("fs.file.impl", org.apache.hadoop.fs.LocalFileSystem.class.getName());
|
|
||||||
|
|
||||||
System.setProperty("HADOOP_USER_NAME", user);
|
|
||||||
System.setProperty("hadoop.home.dir", "/");
|
|
||||||
//Get the filesystem - HDFS
|
|
||||||
FileSystem fs = FileSystem.get(URI.create(hdfsuri), conf);
|
|
||||||
Path hdfswritepath = new Path(hdfsPath);
|
|
||||||
|
|
||||||
log.info("Created path "+hdfswritepath.toString());
|
|
||||||
|
|
||||||
final Map<String, String> ongoingMap = new HashMap<>();
|
|
||||||
final Map<String, String> reportMap = new HashMap<>();
|
|
||||||
final AtomicInteger counter = new AtomicInteger(0);
|
|
||||||
try(SequenceFile.Writer writer = SequenceFile.createWriter(conf,
|
|
||||||
SequenceFile.Writer.file(hdfswritepath), SequenceFile.Writer.keyClass(IntWritable.class),
|
|
||||||
SequenceFile.Writer.valueClass(Text.class))) {
|
|
||||||
|
|
||||||
|
|
||||||
final IntWritable key = new IntWritable(counter.get());
|
|
||||||
final Text value = new Text();
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
plugin.collect(api).forEach(content -> {
|
|
||||||
|
|
||||||
key.set(counter.getAndIncrement());
|
|
||||||
value.set(content);
|
|
||||||
if (counter.get() % 10 ==0) {
|
|
||||||
try {
|
|
||||||
ongoingMap.put("ongoing", ""+counter.get());
|
|
||||||
manager.sendMessage(new Message(workflowId,"Collection", MessageType.ONGOING, ongoingMap ), rabbitOngoingQueue, true, false);
|
|
||||||
} catch (Exception e) {
|
|
||||||
log.error("Error on sending message ", e);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
try {
|
|
||||||
writer.append(key, value);
|
|
||||||
} catch (IOException e) {
|
|
||||||
throw new RuntimeException(e);
|
|
||||||
}
|
|
||||||
|
|
||||||
});
|
|
||||||
|
|
||||||
}
|
|
||||||
ongoingMap.put("ongoing", ""+counter.get());
|
|
||||||
manager.sendMessage(new Message(workflowId,"Collection", MessageType.ONGOING, ongoingMap ), rabbitOngoingQueue, true, false);
|
|
||||||
reportMap.put("collected", ""+counter.get());
|
|
||||||
manager.sendMessage(new Message(workflowId,"Collection", MessageType.REPORT, reportMap ), rabbitReportQueue, true, false);
|
|
||||||
manager.close();
|
|
||||||
}
|
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
|
@ -0,0 +1,193 @@
|
||||||
|
package eu.dnetlib.collector.worker;
|
||||||
|
|
||||||
|
import org.apache.commons.cli.*;
|
||||||
|
|
||||||
|
public class DnetCollectorWorkerArgumentParser {
|
||||||
|
|
||||||
|
private final Options options;
|
||||||
|
private String hdfsPath ;
|
||||||
|
private String json;
|
||||||
|
private String nameNode;
|
||||||
|
private String user;
|
||||||
|
private String rabbitUser;
|
||||||
|
private String rabbitPassword;
|
||||||
|
private String rabbitHost;
|
||||||
|
private String rabbitOngoingQueue;
|
||||||
|
private String rabbitReportQueue;
|
||||||
|
private String workflowId;
|
||||||
|
|
||||||
|
public DnetCollectorWorkerArgumentParser(){
|
||||||
|
options = new Options();
|
||||||
|
options.addOption(Option.builder("p")
|
||||||
|
.longOpt("hdfsPath")
|
||||||
|
.required(true)
|
||||||
|
.desc("the path where storing the sequential file")
|
||||||
|
.hasArg() // This option has an argument.
|
||||||
|
.build());
|
||||||
|
options.addOption(Option.builder("a")
|
||||||
|
.longOpt("apidescriptor")
|
||||||
|
.required(true)
|
||||||
|
.desc("the Json enconding of the API Descriptor")
|
||||||
|
.hasArg() // This option has an argument.
|
||||||
|
.build());
|
||||||
|
|
||||||
|
options.addOption(Option.builder("n")
|
||||||
|
.longOpt("namenode")
|
||||||
|
.required(true)
|
||||||
|
.desc("the Name Node URI")
|
||||||
|
.hasArg() // This option has an argument.
|
||||||
|
.build());
|
||||||
|
|
||||||
|
options.addOption(Option.builder("u")
|
||||||
|
.longOpt("userHDFS")
|
||||||
|
.required(true)
|
||||||
|
.desc("the user wich create the hdfs seq file")
|
||||||
|
.hasArg() // This option has an argument.
|
||||||
|
.build());
|
||||||
|
|
||||||
|
options.addOption(Option.builder("ru")
|
||||||
|
.longOpt("rabbitUser")
|
||||||
|
.required(true)
|
||||||
|
.desc("the user to connect with RabbitMq for messaging")
|
||||||
|
.hasArg() // This option has an argument.
|
||||||
|
.build());
|
||||||
|
|
||||||
|
options.addOption(Option.builder("rp")
|
||||||
|
.longOpt("rabbitPassWord")
|
||||||
|
.required(true)
|
||||||
|
.desc("the password to connect with RabbitMq for messaging")
|
||||||
|
.hasArg() // This option has an argument.
|
||||||
|
.build());
|
||||||
|
|
||||||
|
options.addOption(Option.builder("rh")
|
||||||
|
.longOpt("rabbitHost")
|
||||||
|
.required(true)
|
||||||
|
.desc("the host of the RabbitMq server")
|
||||||
|
.hasArg() // This option has an argument.
|
||||||
|
.build());
|
||||||
|
|
||||||
|
options.addOption(Option.builder("ro")
|
||||||
|
.longOpt("rabbitOngoingQueue")
|
||||||
|
.required(true)
|
||||||
|
.desc("the name of the ongoing queue")
|
||||||
|
.hasArg() // This option has an argument.
|
||||||
|
.build());
|
||||||
|
|
||||||
|
options.addOption(Option.builder("rr")
|
||||||
|
.longOpt("rabbitReportQueue")
|
||||||
|
.required(true)
|
||||||
|
.desc("the name of the report queue")
|
||||||
|
.hasArg() // This option has an argument.
|
||||||
|
.build());
|
||||||
|
options.addOption(Option.builder("w")
|
||||||
|
.longOpt("workflowId")
|
||||||
|
.required(true)
|
||||||
|
.desc("the identifier of the dnet Workflow")
|
||||||
|
.hasArg() // This option has an argument.
|
||||||
|
.build());
|
||||||
|
}
|
||||||
|
|
||||||
|
public void parseArgument(final String[] args) throws DnetCollectorException {
|
||||||
|
try {
|
||||||
|
CommandLineParser parser = new DefaultParser();
|
||||||
|
CommandLine cmd = parser.parse(options, args);
|
||||||
|
hdfsPath = cmd.getOptionValue("p");
|
||||||
|
json = cmd.getOptionValue("a");
|
||||||
|
nameNode = cmd.getOptionValue("n");
|
||||||
|
user = cmd.getOptionValue("u");
|
||||||
|
rabbitUser = cmd.getOptionValue("ru");
|
||||||
|
rabbitPassword = cmd.getOptionValue("rp");
|
||||||
|
rabbitHost = cmd.getOptionValue("rh");
|
||||||
|
rabbitOngoingQueue = cmd.getOptionValue("ro");
|
||||||
|
rabbitReportQueue = cmd.getOptionValue("rr");
|
||||||
|
workflowId = cmd.getOptionValue("w");
|
||||||
|
} catch (Throwable e){
|
||||||
|
throw new DnetCollectorException("Error during parsing arguments ",e);
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
public Options getOptions() {
|
||||||
|
return options;
|
||||||
|
}
|
||||||
|
|
||||||
|
public String getHdfsPath() {
|
||||||
|
return hdfsPath;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setHdfsPath(String hdfsPath) {
|
||||||
|
this.hdfsPath = hdfsPath;
|
||||||
|
}
|
||||||
|
|
||||||
|
public String getJson() {
|
||||||
|
return json;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setJson(String json) {
|
||||||
|
this.json = json;
|
||||||
|
}
|
||||||
|
|
||||||
|
public String getNameNode() {
|
||||||
|
return nameNode;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setNameNode(String nameNode) {
|
||||||
|
this.nameNode = nameNode;
|
||||||
|
}
|
||||||
|
|
||||||
|
public String getUser() {
|
||||||
|
return user;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setUser(String user) {
|
||||||
|
this.user = user;
|
||||||
|
}
|
||||||
|
|
||||||
|
public String getRabbitUser() {
|
||||||
|
return rabbitUser;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setRabbitUser(String rabbitUser) {
|
||||||
|
this.rabbitUser = rabbitUser;
|
||||||
|
}
|
||||||
|
|
||||||
|
public String getRabbitPassword() {
|
||||||
|
return rabbitPassword;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setRabbitPassword(String rabbitPassword) {
|
||||||
|
this.rabbitPassword = rabbitPassword;
|
||||||
|
}
|
||||||
|
|
||||||
|
public String getRabbitHost() {
|
||||||
|
return rabbitHost;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setRabbitHost(String rabbitHost) {
|
||||||
|
this.rabbitHost = rabbitHost;
|
||||||
|
}
|
||||||
|
|
||||||
|
public String getRabbitOngoingQueue() {
|
||||||
|
return rabbitOngoingQueue;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setRabbitOngoingQueue(String rabbitOngoingQueue) {
|
||||||
|
this.rabbitOngoingQueue = rabbitOngoingQueue;
|
||||||
|
}
|
||||||
|
|
||||||
|
public String getRabbitReportQueue() {
|
||||||
|
return rabbitReportQueue;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setRabbitReportQueue(String rabbitReportQueue) {
|
||||||
|
this.rabbitReportQueue = rabbitReportQueue;
|
||||||
|
}
|
||||||
|
|
||||||
|
public String getWorkflowId() {
|
||||||
|
return workflowId;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setWorkflowId(String workflowId) {
|
||||||
|
this.workflowId = workflowId;
|
||||||
|
}
|
||||||
|
}
|
|
@ -7,10 +7,6 @@ import java.util.Spliterator;
|
||||||
import java.util.Spliterators;
|
import java.util.Spliterators;
|
||||||
import java.util.stream.Stream;
|
import java.util.stream.Stream;
|
||||||
import java.util.stream.StreamSupport;
|
import java.util.stream.StreamSupport;
|
||||||
|
|
||||||
import org.springframework.beans.factory.annotation.Autowired;
|
|
||||||
import org.springframework.stereotype.Component;
|
|
||||||
|
|
||||||
import com.google.common.base.Splitter;
|
import com.google.common.base.Splitter;
|
||||||
import com.google.common.collect.Iterators;
|
import com.google.common.collect.Iterators;
|
||||||
import com.google.common.collect.Lists;
|
import com.google.common.collect.Lists;
|
||||||
|
@ -18,10 +14,8 @@ import com.google.common.collect.Lists;
|
||||||
import eu.dnetlib.collector.worker.DnetCollectorException;
|
import eu.dnetlib.collector.worker.DnetCollectorException;
|
||||||
import eu.dnetlib.collector.worker.model.ApiDescriptor;
|
import eu.dnetlib.collector.worker.model.ApiDescriptor;
|
||||||
import eu.dnetlib.collector.worker.plugins.CollectorPlugin;
|
import eu.dnetlib.collector.worker.plugins.CollectorPlugin;
|
||||||
import eu.dnetlib.collector.worker.utils.DnetWorkerCollector;
|
|
||||||
|
|
||||||
@Component
|
|
||||||
@DnetWorkerCollector("oai")
|
|
||||||
public class OaiCollectorPlugin implements CollectorPlugin {
|
public class OaiCollectorPlugin implements CollectorPlugin {
|
||||||
|
|
||||||
private static final String FORMAT_PARAM = "format";
|
private static final String FORMAT_PARAM = "format";
|
||||||
|
@ -29,7 +23,7 @@ public class OaiCollectorPlugin implements CollectorPlugin {
|
||||||
private static final Object OAI_FROM_DATE_PARAM = "fromDate";
|
private static final Object OAI_FROM_DATE_PARAM = "fromDate";
|
||||||
private static final Object OAI_UNTIL_DATE_PARAM = "untilDate";
|
private static final Object OAI_UNTIL_DATE_PARAM = "untilDate";
|
||||||
|
|
||||||
@Autowired
|
|
||||||
private OaiIteratorFactory oaiIteratorFactory;
|
private OaiIteratorFactory oaiIteratorFactory;
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
|
@ -58,9 +52,16 @@ public class OaiCollectorPlugin implements CollectorPlugin {
|
||||||
if (untilDate != null && !untilDate.matches("\\d{4}-\\d{2}-\\d{2}")) { throw new DnetCollectorException("Invalid date (YYYY-MM-DD): " + untilDate); }
|
if (untilDate != null && !untilDate.matches("\\d{4}-\\d{2}-\\d{2}")) { throw new DnetCollectorException("Invalid date (YYYY-MM-DD): " + untilDate); }
|
||||||
|
|
||||||
final Iterator<Iterator<String>> iters = sets.stream()
|
final Iterator<Iterator<String>> iters = sets.stream()
|
||||||
.map(set -> oaiIteratorFactory.newIterator(baseUrl, mdFormat, set, fromDate, untilDate))
|
.map(set -> getOaiIteratorFactory().newIterator(baseUrl, mdFormat, set, fromDate, untilDate))
|
||||||
.iterator();
|
.iterator();
|
||||||
|
|
||||||
return StreamSupport.stream(Spliterators.spliteratorUnknownSize(Iterators.concat(iters), Spliterator.ORDERED), false);
|
return StreamSupport.stream(Spliterators.spliteratorUnknownSize(Iterators.concat(iters), Spliterator.ORDERED), false);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public OaiIteratorFactory getOaiIteratorFactory() {
|
||||||
|
if (oaiIteratorFactory == null){
|
||||||
|
oaiIteratorFactory = new OaiIteratorFactory();
|
||||||
|
}
|
||||||
|
return oaiIteratorFactory;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -1,20 +1,24 @@
|
||||||
package eu.dnetlib.collector.worker.plugins.oai;
|
package eu.dnetlib.collector.worker.plugins.oai;
|
||||||
|
|
||||||
import java.util.Iterator;
|
import java.util.Iterator;
|
||||||
|
|
||||||
import org.springframework.beans.factory.annotation.Autowired;
|
|
||||||
import org.springframework.stereotype.Component;
|
|
||||||
|
|
||||||
import eu.dnetlib.collector.worker.utils.HttpConnector;
|
import eu.dnetlib.collector.worker.utils.HttpConnector;
|
||||||
|
|
||||||
@Component
|
|
||||||
public class OaiIteratorFactory {
|
public class OaiIteratorFactory {
|
||||||
|
|
||||||
@Autowired
|
|
||||||
private HttpConnector httpConnector;
|
private HttpConnector httpConnector;
|
||||||
|
|
||||||
public Iterator<String> newIterator(final String baseUrl, final String mdFormat, final String set, final String fromDate, final String untilDate) {
|
public Iterator<String> newIterator(final String baseUrl, final String mdFormat, final String set, final String fromDate, final String untilDate) {
|
||||||
return new OaiIterator(baseUrl, mdFormat, set, fromDate, untilDate, httpConnector);
|
return new OaiIterator(baseUrl, mdFormat, set, fromDate, untilDate, getHttpConnector());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
private HttpConnector getHttpConnector() {
|
||||||
|
if (httpConnector== null)
|
||||||
|
httpConnector = new HttpConnector();
|
||||||
|
return httpConnector;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
|
@ -1,24 +0,0 @@
|
||||||
package eu.dnetlib.collector.worker.utils;
|
|
||||||
|
|
||||||
import java.util.List;
|
|
||||||
|
|
||||||
import org.springframework.beans.factory.annotation.Autowired;
|
|
||||||
import org.springframework.stereotype.Component;
|
|
||||||
|
|
||||||
import eu.dnetlib.collector.worker.plugins.CollectorPlugin;
|
|
||||||
|
|
||||||
@Component
|
|
||||||
public class CollectorPluginEnumerator {
|
|
||||||
|
|
||||||
@Autowired
|
|
||||||
private List<CollectorPlugin> plugins;
|
|
||||||
|
|
||||||
public CollectorPlugin getPluginByProtocol(final String protocol) {
|
|
||||||
return plugins.stream()
|
|
||||||
.filter(p -> p.getClass().isAnnotationPresent(DnetWorkerCollector.class))
|
|
||||||
.filter(p -> p.getClass().getAnnotation(DnetWorkerCollector.class).value().equalsIgnoreCase(protocol))
|
|
||||||
.findFirst()
|
|
||||||
.get();
|
|
||||||
}
|
|
||||||
|
|
||||||
}
|
|
|
@ -0,0 +1,20 @@
|
||||||
|
package eu.dnetlib.collector.worker.utils;
|
||||||
|
|
||||||
|
import eu.dnetlib.collector.worker.DnetCollectorException;
|
||||||
|
import eu.dnetlib.collector.worker.plugins.CollectorPlugin;
|
||||||
|
import eu.dnetlib.collector.worker.plugins.oai.OaiCollectorPlugin;
|
||||||
|
|
||||||
|
|
||||||
|
public class CollectorPluginFactory {
|
||||||
|
|
||||||
|
public CollectorPlugin getPluginByProtocol(final String protocol) throws DnetCollectorException {
|
||||||
|
if (protocol==null) throw new DnetCollectorException("protocol cannot be null");
|
||||||
|
switch (protocol.toLowerCase().trim()){
|
||||||
|
case "oai":
|
||||||
|
return new OaiCollectorPlugin();
|
||||||
|
default:
|
||||||
|
throw new DnetCollectorException("UNknown protocol");
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
}
|
|
@ -1,14 +0,0 @@
|
||||||
package eu.dnetlib.collector.worker.utils;
|
|
||||||
|
|
||||||
import java.lang.annotation.ElementType;
|
|
||||||
import java.lang.annotation.Retention;
|
|
||||||
import java.lang.annotation.RetentionPolicy;
|
|
||||||
import java.lang.annotation.Target;
|
|
||||||
|
|
||||||
@Retention(RetentionPolicy.RUNTIME)
|
|
||||||
@Target(ElementType.TYPE)
|
|
||||||
public @interface DnetWorkerCollector {
|
|
||||||
|
|
||||||
String value();
|
|
||||||
|
|
||||||
}
|
|
|
@ -21,11 +21,11 @@ import org.apache.commons.io.IOUtils;
|
||||||
import org.apache.commons.lang.math.NumberUtils;
|
import org.apache.commons.lang.math.NumberUtils;
|
||||||
import org.apache.commons.logging.Log;
|
import org.apache.commons.logging.Log;
|
||||||
import org.apache.commons.logging.LogFactory;
|
import org.apache.commons.logging.LogFactory;
|
||||||
import org.springframework.stereotype.Component;
|
|
||||||
|
|
||||||
import eu.dnetlib.collector.worker.DnetCollectorException;
|
import eu.dnetlib.collector.worker.DnetCollectorException;
|
||||||
|
|
||||||
@Component
|
|
||||||
public class HttpConnector {
|
public class HttpConnector {
|
||||||
|
|
||||||
private static final Log log = LogFactory.getLog(HttpConnector.class);
|
private static final Log log = LogFactory.getLog(HttpConnector.class);
|
||||||
|
@ -48,7 +48,7 @@ public class HttpConnector {
|
||||||
* @param requestUrl
|
* @param requestUrl
|
||||||
* the URL
|
* the URL
|
||||||
* @return the content of the downloaded resource
|
* @return the content of the downloaded resource
|
||||||
* @throws CollectorServiceException
|
* @throws DnetCollectorException
|
||||||
* when retrying more than maxNumberOfRetry times
|
* when retrying more than maxNumberOfRetry times
|
||||||
*/
|
*/
|
||||||
public String getInputSource(final String requestUrl) throws DnetCollectorException {
|
public String getInputSource(final String requestUrl) throws DnetCollectorException {
|
||||||
|
@ -61,7 +61,7 @@ public class HttpConnector {
|
||||||
* @param requestUrl
|
* @param requestUrl
|
||||||
* the URL
|
* the URL
|
||||||
* @return the content of the downloaded resource as InputStream
|
* @return the content of the downloaded resource as InputStream
|
||||||
* @throws CollectorServiceException
|
* @throws DnetCollectorException
|
||||||
* when retrying more than maxNumberOfRetry times
|
* when retrying more than maxNumberOfRetry times
|
||||||
*/
|
*/
|
||||||
public InputStream getInputSourceAsStream(final String requestUrl) throws DnetCollectorException {
|
public InputStream getInputSourceAsStream(final String requestUrl) throws DnetCollectorException {
|
||||||
|
|
|
@ -1,34 +1,65 @@
|
||||||
package eu.dnetlib.collector.worker;
|
package eu.dnetlib.collector.worker;
|
||||||
|
|
||||||
import static org.junit.Assert.assertNotNull;
|
|
||||||
|
|
||||||
import org.junit.Test;
|
|
||||||
import org.junit.runner.RunWith;
|
|
||||||
import org.springframework.beans.factory.annotation.Autowired;
|
|
||||||
import org.springframework.boot.CommandLineRunner;
|
|
||||||
import org.springframework.boot.test.context.SpringBootTest;
|
|
||||||
import org.springframework.context.ApplicationContext;
|
|
||||||
import org.springframework.test.context.junit4.SpringRunner;
|
|
||||||
|
|
||||||
import com.fasterxml.jackson.databind.ObjectMapper;
|
import com.fasterxml.jackson.databind.ObjectMapper;
|
||||||
|
|
||||||
import eu.dnetlib.collector.worker.model.ApiDescriptor;
|
import eu.dnetlib.collector.worker.model.ApiDescriptor;
|
||||||
import eu.dnetlib.collector.worker.utils.CollectorPluginEnumerator;
|
import eu.dnetlib.collector.worker.utils.CollectorPluginFactory;
|
||||||
|
import eu.dnetlib.message.Message;
|
||||||
|
import eu.dnetlib.message.MessageManager;
|
||||||
|
import org.junit.After;
|
||||||
|
import org.junit.Before;
|
||||||
|
import org.junit.Test;
|
||||||
|
|
||||||
|
import java.io.File;
|
||||||
|
|
||||||
|
import static org.junit.Assert.assertNotNull;
|
||||||
|
import static org.mockito.Mockito.*;
|
||||||
|
|
||||||
|
|
||||||
@RunWith(SpringRunner.class)
|
|
||||||
@SpringBootTest
|
|
||||||
public class DnetCollectorWorkerApplicationTests {
|
public class DnetCollectorWorkerApplicationTests {
|
||||||
|
|
||||||
@Autowired
|
|
||||||
private ApplicationContext ctx;
|
private DnetCollectorWorkerArgumentParser argumentParser = mock(DnetCollectorWorkerArgumentParser.class);
|
||||||
|
private MessageManager messageManager = mock(MessageManager.class);
|
||||||
|
|
||||||
|
private DnetCollectorWorker worker;
|
||||||
|
@Before
|
||||||
|
public void setup() throws Exception {
|
||||||
|
ObjectMapper mapper = new ObjectMapper();
|
||||||
|
final String apiJson = mapper.writeValueAsString(getApi());
|
||||||
|
when(argumentParser.getJson()).thenReturn(apiJson);
|
||||||
|
when(argumentParser.getNameNode()).thenReturn("file://tmp/test.seq");
|
||||||
|
when(argumentParser.getHdfsPath()).thenReturn("/tmp/file.seq");
|
||||||
|
when(argumentParser.getUser()).thenReturn("sandro");
|
||||||
|
when(argumentParser.getWorkflowId()).thenReturn("sandro");
|
||||||
|
when(argumentParser.getRabbitOngoingQueue()).thenReturn("sandro");
|
||||||
|
|
||||||
|
when(messageManager.sendMessage(any(Message.class), anyString(), anyBoolean(),anyBoolean())).thenAnswer(a -> {
|
||||||
|
System.out.println("sent message: "+a.getArguments()[0]);
|
||||||
|
return true;
|
||||||
|
});
|
||||||
|
when(messageManager.sendMessage(any(Message.class), anyString())).thenAnswer(a -> {
|
||||||
|
System.out.println("Called");
|
||||||
|
return true;
|
||||||
|
});
|
||||||
|
worker = new DnetCollectorWorker(new CollectorPluginFactory(), argumentParser, messageManager);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
@After
|
||||||
|
public void dropDown(){
|
||||||
|
File f = new File("/tmp/test.seq");
|
||||||
|
f.delete();
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void testFindPlugin() throws Exception {
|
public void testFindPlugin() throws Exception {
|
||||||
final CollectorPluginEnumerator collectorPluginEnumerator = ctx.getBean(CollectorPluginEnumerator.class);
|
final CollectorPluginFactory collectorPluginEnumerator = new CollectorPluginFactory();
|
||||||
assertNotNull(collectorPluginEnumerator.getPluginByProtocol("oai"));
|
assertNotNull(collectorPluginEnumerator.getPluginByProtocol("oai"));
|
||||||
assertNotNull(collectorPluginEnumerator.getPluginByProtocol("OAI"));
|
assertNotNull(collectorPluginEnumerator.getPluginByProtocol("OAI"));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void testCollectionOAI() throws Exception {
|
public void testCollectionOAI() throws Exception {
|
||||||
final ApiDescriptor api = new ApiDescriptor();
|
final ApiDescriptor api = new ApiDescriptor();
|
||||||
|
@ -36,10 +67,22 @@ public class DnetCollectorWorkerApplicationTests {
|
||||||
api.setProtocol("oai");
|
api.setProtocol("oai");
|
||||||
api.setBaseUrl("http://www.revista.vocesdelaeducacion.com.mx/index.php/index/oai");
|
api.setBaseUrl("http://www.revista.vocesdelaeducacion.com.mx/index.php/index/oai");
|
||||||
api.getParams().put("format", "oai_dc");
|
api.getParams().put("format", "oai_dc");
|
||||||
|
|
||||||
ObjectMapper mapper = new ObjectMapper();
|
ObjectMapper mapper = new ObjectMapper();
|
||||||
|
assertNotNull(mapper.writeValueAsString(api));
|
||||||
|
}
|
||||||
|
|
||||||
System.out.println(mapper.writeValueAsString(api));
|
@Test
|
||||||
|
public void testFeeding() throws Exception {
|
||||||
|
worker.collect();
|
||||||
|
}
|
||||||
|
|
||||||
|
private ApiDescriptor getApi() {
|
||||||
|
final ApiDescriptor api = new ApiDescriptor();
|
||||||
|
api.setId("oai");
|
||||||
|
api.setProtocol("oai");
|
||||||
|
api.setBaseUrl("http://www.revista.vocesdelaeducacion.com.mx/index.php/index/oai");
|
||||||
|
api.getParams().put("format", "oai_dc");
|
||||||
|
return api;
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
|
@ -0,0 +1,14 @@
|
||||||
|
### Root Level ###
|
||||||
|
log4j.rootLogger=WARN, CONSOLE
|
||||||
|
|
||||||
|
### Configuration for the CONSOLE appender ###
|
||||||
|
log4j.appender.CONSOLE=org.apache.log4j.ConsoleAppender
|
||||||
|
log4j.appender.CONSOLE.layout=org.apache.log4j.PatternLayout
|
||||||
|
log4j.appender.CONSOLE.layout.ConversionPattern=[%-5p] %d %c - %m%n
|
||||||
|
|
||||||
|
org.apache.cxf.Logger=org.apache.cxf.common.logging.Log4jLogger
|
||||||
|
|
||||||
|
### Application Level ###
|
||||||
|
log4j.logger.eu.dnetlib=INFO
|
||||||
|
log4j.logger.eu.dnetlib.collector.worker.DnetCollectorWorker=DEBUG
|
||||||
|
|
Loading…
Reference in New Issue