forked from D-Net/dnet-hadoop
code formatting
This commit is contained in:
parent
d517c71458
commit
512e7b0170
|
@ -1,18 +1,10 @@
|
||||||
|
|
||||||
package eu.dnetlib.dhp.oa.graph.raw;
|
package eu.dnetlib.dhp.oa.graph.raw;
|
||||||
|
|
||||||
import com.clearspring.analytics.util.Lists;
|
import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession;
|
||||||
import com.fasterxml.jackson.databind.ObjectMapper;
|
|
||||||
import eu.dnetlib.dhp.application.ArgumentApplicationParser;
|
import java.util.*;
|
||||||
import eu.dnetlib.dhp.common.HdfsSupport;
|
|
||||||
import eu.dnetlib.dhp.common.vocabulary.VocabularyGroup;
|
|
||||||
import eu.dnetlib.dhp.oa.graph.raw.common.AbstractMigrationApplication;
|
|
||||||
import eu.dnetlib.dhp.schema.common.EntityType;
|
|
||||||
import eu.dnetlib.dhp.schema.common.ModelSupport;
|
|
||||||
import eu.dnetlib.dhp.schema.oaf.Oaf;
|
|
||||||
import eu.dnetlib.dhp.schema.oaf.Relation;
|
|
||||||
import eu.dnetlib.dhp.utils.ISLookupClientFactory;
|
|
||||||
import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpService;
|
|
||||||
import org.apache.commons.io.IOUtils;
|
import org.apache.commons.io.IOUtils;
|
||||||
import org.apache.spark.SparkConf;
|
import org.apache.spark.SparkConf;
|
||||||
import org.apache.spark.api.java.JavaSparkContext;
|
import org.apache.spark.api.java.JavaSparkContext;
|
||||||
|
@ -26,9 +18,19 @@ import org.apache.spark.sql.SparkSession;
|
||||||
import org.slf4j.Logger;
|
import org.slf4j.Logger;
|
||||||
import org.slf4j.LoggerFactory;
|
import org.slf4j.LoggerFactory;
|
||||||
|
|
||||||
import java.util.*;
|
import com.clearspring.analytics.util.Lists;
|
||||||
|
import com.fasterxml.jackson.databind.ObjectMapper;
|
||||||
|
|
||||||
import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession;
|
import eu.dnetlib.dhp.application.ArgumentApplicationParser;
|
||||||
|
import eu.dnetlib.dhp.common.HdfsSupport;
|
||||||
|
import eu.dnetlib.dhp.common.vocabulary.VocabularyGroup;
|
||||||
|
import eu.dnetlib.dhp.oa.graph.raw.common.AbstractMigrationApplication;
|
||||||
|
import eu.dnetlib.dhp.schema.common.EntityType;
|
||||||
|
import eu.dnetlib.dhp.schema.common.ModelSupport;
|
||||||
|
import eu.dnetlib.dhp.schema.oaf.Oaf;
|
||||||
|
import eu.dnetlib.dhp.schema.oaf.Relation;
|
||||||
|
import eu.dnetlib.dhp.utils.ISLookupClientFactory;
|
||||||
|
import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpService;
|
||||||
|
|
||||||
public class CopyHdfsOafApplication extends AbstractMigrationApplication {
|
public class CopyHdfsOafApplication extends AbstractMigrationApplication {
|
||||||
|
|
||||||
|
@ -96,12 +98,12 @@ public class CopyHdfsOafApplication extends AbstractMigrationApplication {
|
||||||
if (validPaths.length > 0) {
|
if (validPaths.length > 0) {
|
||||||
// load the dataset
|
// load the dataset
|
||||||
Dataset<Oaf> oaf = spark
|
Dataset<Oaf> oaf = spark
|
||||||
.read()
|
.read()
|
||||||
.load(validPaths)
|
.load(validPaths)
|
||||||
.as(Encoders.kryo(Oaf.class));
|
.as(Encoders.kryo(Oaf.class));
|
||||||
|
|
||||||
// dispatch each entity type individually in the respective graph subdirectory in append mode
|
// dispatch each entity type individually in the respective graph subdirectory in append mode
|
||||||
for(Map.Entry<String, Class> e : ModelSupport.oafTypes.entrySet()) {
|
for (Map.Entry<String, Class> e : ModelSupport.oafTypes.entrySet()) {
|
||||||
oaf
|
oaf
|
||||||
.filter((FilterFunction<Oaf>) o -> o.getClass().getSimpleName().toLowerCase().equals(e.getKey()))
|
.filter((FilterFunction<Oaf>) o -> o.getClass().getSimpleName().toLowerCase().equals(e.getKey()))
|
||||||
.map((MapFunction<Oaf, String>) OBJECT_MAPPER::writeValueAsString, Encoders.bean(e.getValue()))
|
.map((MapFunction<Oaf, String>) OBJECT_MAPPER::writeValueAsString, Encoders.bean(e.getValue()))
|
||||||
|
|
|
@ -8,7 +8,6 @@ import java.util.Set;
|
||||||
import java.util.concurrent.atomic.AtomicInteger;
|
import java.util.concurrent.atomic.AtomicInteger;
|
||||||
import java.util.stream.Collectors;
|
import java.util.stream.Collectors;
|
||||||
|
|
||||||
import eu.dnetlib.dhp.schema.mdstore.MDStoreWithInfo;
|
|
||||||
import org.apache.commons.io.IOUtils;
|
import org.apache.commons.io.IOUtils;
|
||||||
import org.apache.commons.lang3.StringUtils;
|
import org.apache.commons.lang3.StringUtils;
|
||||||
import org.apache.commons.logging.Log;
|
import org.apache.commons.logging.Log;
|
||||||
|
@ -17,16 +16,17 @@ import org.apache.hadoop.conf.Configuration;
|
||||||
import org.apache.hadoop.fs.Path;
|
import org.apache.hadoop.fs.Path;
|
||||||
import org.apache.hadoop.io.SequenceFile;
|
import org.apache.hadoop.io.SequenceFile;
|
||||||
import org.apache.hadoop.io.Text;
|
import org.apache.hadoop.io.Text;
|
||||||
|
|
||||||
import com.fasterxml.jackson.core.JsonProcessingException;
|
|
||||||
import com.fasterxml.jackson.databind.ObjectMapper;
|
|
||||||
|
|
||||||
import eu.dnetlib.dhp.schema.oaf.Oaf;
|
|
||||||
import org.apache.http.client.methods.CloseableHttpResponse;
|
import org.apache.http.client.methods.CloseableHttpResponse;
|
||||||
import org.apache.http.client.methods.HttpGet;
|
import org.apache.http.client.methods.HttpGet;
|
||||||
import org.apache.http.impl.client.CloseableHttpClient;
|
import org.apache.http.impl.client.CloseableHttpClient;
|
||||||
import org.apache.http.impl.client.HttpClients;
|
import org.apache.http.impl.client.HttpClients;
|
||||||
|
|
||||||
|
import com.fasterxml.jackson.core.JsonProcessingException;
|
||||||
|
import com.fasterxml.jackson.databind.ObjectMapper;
|
||||||
|
|
||||||
|
import eu.dnetlib.dhp.schema.mdstore.MDStoreWithInfo;
|
||||||
|
import eu.dnetlib.dhp.schema.oaf.Oaf;
|
||||||
|
|
||||||
public class AbstractMigrationApplication implements Closeable {
|
public class AbstractMigrationApplication implements Closeable {
|
||||||
|
|
||||||
private final AtomicInteger counter = new AtomicInteger(0);
|
private final AtomicInteger counter = new AtomicInteger(0);
|
||||||
|
@ -68,9 +68,9 @@ public class AbstractMigrationApplication implements Closeable {
|
||||||
* @throws IOException in case of HTTP communication issues
|
* @throws IOException in case of HTTP communication issues
|
||||||
*/
|
*/
|
||||||
protected static Set<String> mdstorePaths(final String mdstoreManagerUrl,
|
protected static Set<String> mdstorePaths(final String mdstoreManagerUrl,
|
||||||
final String format,
|
final String format,
|
||||||
final String layout,
|
final String layout,
|
||||||
final String interpretation) throws IOException {
|
final String interpretation) throws IOException {
|
||||||
final String url = mdstoreManagerUrl + "/mdstores/";
|
final String url = mdstoreManagerUrl + "/mdstores/";
|
||||||
final ObjectMapper objectMapper = new ObjectMapper();
|
final ObjectMapper objectMapper = new ObjectMapper();
|
||||||
|
|
||||||
|
@ -81,15 +81,15 @@ public class AbstractMigrationApplication implements Closeable {
|
||||||
final String json = IOUtils.toString(response.getEntity().getContent());
|
final String json = IOUtils.toString(response.getEntity().getContent());
|
||||||
final MDStoreWithInfo[] mdstores = objectMapper.readValue(json, MDStoreWithInfo[].class);
|
final MDStoreWithInfo[] mdstores = objectMapper.readValue(json, MDStoreWithInfo[].class);
|
||||||
return Arrays
|
return Arrays
|
||||||
.stream(mdstores)
|
.stream(mdstores)
|
||||||
.filter(md -> md.getFormat().equalsIgnoreCase(format))
|
.filter(md -> md.getFormat().equalsIgnoreCase(format))
|
||||||
.filter(md -> md.getLayout().equalsIgnoreCase(layout))
|
.filter(md -> md.getLayout().equalsIgnoreCase(layout))
|
||||||
.filter(md -> md.getInterpretation().equalsIgnoreCase(interpretation))
|
.filter(md -> md.getInterpretation().equalsIgnoreCase(interpretation))
|
||||||
.filter(md -> StringUtils.isNotBlank(md.getHdfsPath()))
|
.filter(md -> StringUtils.isNotBlank(md.getHdfsPath()))
|
||||||
.filter(md -> StringUtils.isNotBlank(md.getCurrentVersion()))
|
.filter(md -> StringUtils.isNotBlank(md.getCurrentVersion()))
|
||||||
.filter(md -> md.getSize() > 0)
|
.filter(md -> md.getSize() > 0)
|
||||||
.map(md -> md.getHdfsPath() + "/" + md.getCurrentVersion() + "/store")
|
.map(md -> md.getHdfsPath() + "/" + md.getCurrentVersion() + "/store")
|
||||||
.collect(Collectors.toSet());
|
.collect(Collectors.toSet());
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in New Issue