forked from D-Net/dnet-hadoop
96 lines
2.9 KiB
Java
96 lines
2.9 KiB
Java
|
|
package eu.dnetlib.dhp.swh.utils;
|
|
|
|
import static eu.dnetlib.dhp.common.Constants.*;
|
|
|
|
import java.io.BufferedReader;
|
|
import java.io.IOException;
|
|
import java.io.InputStreamReader;
|
|
import java.nio.charset.StandardCharsets;
|
|
import java.util.Optional;
|
|
|
|
import org.apache.hadoop.fs.FSDataInputStream;
|
|
import org.apache.hadoop.fs.FileSystem;
|
|
import org.apache.hadoop.fs.Path;
|
|
import org.apache.hadoop.io.SequenceFile;
|
|
import org.apache.hadoop.io.Text;
|
|
import org.slf4j.Logger;
|
|
import org.slf4j.LoggerFactory;
|
|
|
|
import eu.dnetlib.dhp.application.ArgumentApplicationParser;
|
|
import eu.dnetlib.dhp.common.collection.HttpClientParams;
|
|
|
|
public class SWHUtils {
|
|
|
|
private static final Logger log = LoggerFactory.getLogger(SWHUtils.class);
|
|
|
|
public static HttpClientParams getClientParams(ArgumentApplicationParser argumentParser) {
|
|
|
|
final HttpClientParams clientParams = new HttpClientParams();
|
|
clientParams
|
|
.setMaxNumberOfRetry(
|
|
Optional
|
|
.ofNullable(argumentParser.get(MAX_NUMBER_OF_RETRY))
|
|
.map(Integer::parseInt)
|
|
.orElse(HttpClientParams._maxNumberOfRetry));
|
|
log.info("maxNumberOfRetry is {}", clientParams.getMaxNumberOfRetry());
|
|
|
|
clientParams
|
|
.setRequestDelay(
|
|
Optional
|
|
.ofNullable(argumentParser.get(REQUEST_DELAY))
|
|
.map(Integer::parseInt)
|
|
.orElse(HttpClientParams._requestDelay));
|
|
log.info("requestDelay is {}", clientParams.getRequestDelay());
|
|
|
|
clientParams
|
|
.setRetryDelay(
|
|
Optional
|
|
.ofNullable(argumentParser.get(RETRY_DELAY))
|
|
.map(Integer::parseInt)
|
|
.orElse(HttpClientParams._retryDelay));
|
|
log.info("retryDelay is {}", clientParams.getRetryDelay());
|
|
|
|
clientParams
|
|
.setRequestMethod(
|
|
Optional
|
|
.ofNullable(argumentParser.get(REQUEST_METHOD))
|
|
.orElse(HttpClientParams._requestMethod));
|
|
log.info("requestMethod is {}", clientParams.getRequestMethod());
|
|
|
|
return clientParams;
|
|
}
|
|
|
|
public static BufferedReader getFileReader(FileSystem fs, Path inputPath) throws IOException {
|
|
FSDataInputStream inputStream = fs.open(inputPath);
|
|
return new BufferedReader(
|
|
new InputStreamReader(inputStream, StandardCharsets.UTF_8));
|
|
}
|
|
|
|
public static SequenceFile.Writer getSequenceFileWriter(FileSystem fs, String outputPath) throws IOException {
|
|
return SequenceFile
|
|
.createWriter(
|
|
fs.getConf(),
|
|
SequenceFile.Writer.file(new Path(outputPath)),
|
|
SequenceFile.Writer.keyClass(Text.class),
|
|
SequenceFile.Writer.valueClass(Text.class));
|
|
}
|
|
|
|
public static SequenceFile.Reader getSequenceFileReader(FileSystem fs, String inputPath) throws IOException {
|
|
Path filePath = new Path(inputPath);
|
|
SequenceFile.Reader.Option fileOption = SequenceFile.Reader.file(filePath);
|
|
|
|
return new SequenceFile.Reader(fs.getConf(), fileOption);
|
|
}
|
|
|
|
public static void appendToSequenceFile(SequenceFile.Writer fw, String keyStr, String valueStr) throws IOException {
|
|
Text key = new Text();
|
|
key.set(keyStr);
|
|
|
|
Text value = new Text();
|
|
value.set(valueStr);
|
|
|
|
fw.append(key, value);
|
|
}
|
|
}
|