116 lines
3.6 KiB
Java
116 lines
3.6 KiB
Java
|
|
package eu.dnetlib.dhp.oa.graph.raw.common;
|
|
|
|
import java.io.Closeable;
|
|
import java.io.IOException;
|
|
import java.util.Arrays;
|
|
import java.util.Set;
|
|
import java.util.concurrent.atomic.AtomicInteger;
|
|
import java.util.stream.Collectors;
|
|
|
|
import org.apache.commons.io.IOUtils;
|
|
import org.apache.commons.lang3.StringUtils;
|
|
import org.apache.commons.logging.Log;
|
|
import org.apache.commons.logging.LogFactory;
|
|
import org.apache.hadoop.conf.Configuration;
|
|
import org.apache.hadoop.fs.Path;
|
|
import org.apache.hadoop.io.SequenceFile;
|
|
import org.apache.hadoop.io.Text;
|
|
import org.apache.http.client.methods.CloseableHttpResponse;
|
|
import org.apache.http.client.methods.HttpGet;
|
|
import org.apache.http.impl.client.CloseableHttpClient;
|
|
import org.apache.http.impl.client.HttpClients;
|
|
|
|
import com.fasterxml.jackson.core.JsonProcessingException;
|
|
import com.fasterxml.jackson.databind.ObjectMapper;
|
|
|
|
import eu.dnetlib.dhp.schema.mdstore.MDStoreWithInfo;
|
|
import eu.dnetlib.dhp.schema.oaf.Oaf;
|
|
import eu.dnetlib.dhp.utils.DHPUtils;
|
|
|
|
public class AbstractMigrationApplication implements Closeable {
|
|
|
|
private final AtomicInteger counter = new AtomicInteger(0);
|
|
|
|
private final Text key = new Text();
|
|
|
|
private final Text value = new Text();
|
|
|
|
private final SequenceFile.Writer writer;
|
|
|
|
private final ObjectMapper objectMapper = new ObjectMapper();
|
|
|
|
private static final Log log = LogFactory.getLog(AbstractMigrationApplication.class);
|
|
|
|
protected AbstractMigrationApplication() { // ONLY FOR UNIT TEST
|
|
this.writer = null;
|
|
}
|
|
|
|
public AbstractMigrationApplication(final String hdfsPath) throws IOException {
|
|
|
|
log.info(String.format("Creating SequenceFile Writer, hdfsPath=%s", hdfsPath));
|
|
|
|
this.writer = SequenceFile
|
|
.createWriter(
|
|
getConf(),
|
|
SequenceFile.Writer.file(new Path(hdfsPath)),
|
|
SequenceFile.Writer.keyClass(Text.class),
|
|
SequenceFile.Writer.valueClass(Text.class));
|
|
}
|
|
|
|
/**
|
|
* Retrieves from the metadata store manager application the list of paths associated with mdstores characterized
|
|
* by he given format, layout, interpretation
|
|
* @param mdstoreManagerUrl the URL of the mdstore manager service
|
|
* @param format the mdstore format
|
|
* @param layout the mdstore layout
|
|
* @param interpretation the mdstore interpretation
|
|
* @return the set of hdfs paths
|
|
* @throws IOException in case of HTTP communication issues
|
|
*/
|
|
protected static Set<String> mdstorePaths(final String mdstoreManagerUrl,
|
|
final String format,
|
|
final String layout,
|
|
final String interpretation) throws IOException {
|
|
return DHPUtils.mdstorePaths(mdstoreManagerUrl, format, layout, interpretation, false);
|
|
}
|
|
|
|
private Configuration getConf() {
|
|
return new Configuration();
|
|
/*
|
|
* conf.set("fs.defaultFS", hdfsNameNode); conf.set("fs.hdfs.impl",
|
|
* org.apache.hadoop.hdfs.DistributedFileSystem.class.getName()); conf.set("fs.file.impl",
|
|
* org.apache.hadoop.fs.LocalFileSystem.class.getName()); System.setProperty("HADOOP_USER_NAME", hdfsUser);
|
|
* System.setProperty("hadoop.home.dir", "/"); FileSystem.get(URI.create(hdfsNameNode), conf);
|
|
*/
|
|
}
|
|
|
|
protected void emit(final String s, final String type) {
|
|
try {
|
|
key.set(counter.getAndIncrement() + ":" + type);
|
|
value.set(s);
|
|
writer.append(key, value);
|
|
} catch (final IOException e) {
|
|
throw new IllegalStateException(e);
|
|
}
|
|
}
|
|
|
|
protected void emitOaf(final Oaf oaf) {
|
|
try {
|
|
emit(objectMapper.writeValueAsString(oaf), oaf.getClass().getSimpleName().toLowerCase());
|
|
} catch (JsonProcessingException e) {
|
|
throw new IllegalStateException(e);
|
|
}
|
|
}
|
|
|
|
public ObjectMapper getObjectMapper() {
|
|
return objectMapper;
|
|
}
|
|
|
|
@Override
|
|
public void close() throws IOException {
|
|
writer.hflush();
|
|
writer.close();
|
|
}
|
|
}
|