BrBETA_dnet-hadoop/dhp-workflows/dhp-swh/src/main/java/eu/dnetlib/dhp/swh/utils/SWHUtils.java

96 lines
2.9 KiB
Java

package eu.dnetlib.dhp.swh.utils;
import static eu.dnetlib.dhp.common.Constants.*;
import java.io.BufferedReader;
import java.io.IOException;
import java.io.InputStreamReader;
import java.nio.charset.StandardCharsets;
import java.util.Optional;
import org.apache.hadoop.fs.FSDataInputStream;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.SequenceFile;
import org.apache.hadoop.io.Text;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import eu.dnetlib.dhp.application.ArgumentApplicationParser;
import eu.dnetlib.dhp.common.collection.HttpClientParams;
public class SWHUtils {
private static final Logger log = LoggerFactory.getLogger(SWHUtils.class);
public static HttpClientParams getClientParams(ArgumentApplicationParser argumentParser) {
final HttpClientParams clientParams = new HttpClientParams();
clientParams
.setMaxNumberOfRetry(
Optional
.ofNullable(argumentParser.get(MAX_NUMBER_OF_RETRY))
.map(Integer::parseInt)
.orElse(HttpClientParams._maxNumberOfRetry));
log.info("maxNumberOfRetry is {}", clientParams.getMaxNumberOfRetry());
clientParams
.setRequestDelay(
Optional
.ofNullable(argumentParser.get(REQUEST_DELAY))
.map(Integer::parseInt)
.orElse(HttpClientParams._requestDelay));
log.info("requestDelay is {}", clientParams.getRequestDelay());
clientParams
.setRetryDelay(
Optional
.ofNullable(argumentParser.get(RETRY_DELAY))
.map(Integer::parseInt)
.orElse(HttpClientParams._retryDelay));
log.info("retryDelay is {}", clientParams.getRetryDelay());
clientParams
.setRequestMethod(
Optional
.ofNullable(argumentParser.get(REQUEST_METHOD))
.orElse(HttpClientParams._requestMethod));
log.info("requestMethod is {}", clientParams.getRequestMethod());
return clientParams;
}
public static BufferedReader getFileReader(FileSystem fs, Path inputPath) throws IOException {
FSDataInputStream inputStream = fs.open(inputPath);
return new BufferedReader(
new InputStreamReader(inputStream, StandardCharsets.UTF_8));
}
public static SequenceFile.Writer getSequenceFileWriter(FileSystem fs, String outputPath) throws IOException {
return SequenceFile
.createWriter(
fs.getConf(),
SequenceFile.Writer.file(new Path(outputPath)),
SequenceFile.Writer.keyClass(Text.class),
SequenceFile.Writer.valueClass(Text.class));
}
public static SequenceFile.Reader getSequenceFileReader(FileSystem fs, String inputPath) throws IOException {
Path filePath = new Path(inputPath);
SequenceFile.Reader.Option fileOption = SequenceFile.Reader.file(filePath);
return new SequenceFile.Reader(fs.getConf(), fileOption);
}
public static void appendToSequenceFile(SequenceFile.Writer fw, String keyStr, String valueStr) throws IOException {
Text key = new Text();
key.set(keyStr);
Text value = new Text();
value.set(valueStr);
fw.append(key, value);
}
}