forked from antonis.lempesis/dnet-hadoop
code formatting
This commit is contained in:
parent
6d0b11252e
commit
c6b028f2af
|
@ -1,3 +1,4 @@
|
||||||
|
|
||||||
package eu.dnetlib.dhp.schema.common;
|
package eu.dnetlib.dhp.schema.common;
|
||||||
|
|
||||||
import java.util.Map;
|
import java.util.Map;
|
||||||
|
|
|
@ -73,19 +73,6 @@ public class PrepareMergedRelationJob {
|
||||||
.mode(SaveMode.Overwrite)
|
.mode(SaveMode.Overwrite)
|
||||||
.option("compression", "gzip")
|
.option("compression", "gzip")
|
||||||
.json(outputPath);
|
.json(outputPath);
|
||||||
// relation.createOrReplaceTempView("relation");
|
|
||||||
//
|
|
||||||
// spark
|
|
||||||
// .sql(
|
|
||||||
// "Select * from relation " +
|
|
||||||
// "where relclass = 'merges' " +
|
|
||||||
// "and datainfo.deletedbyinference = false")
|
|
||||||
// .as(Encoders.bean(Relation.class))
|
|
||||||
// .toJSON()
|
|
||||||
// .write()
|
|
||||||
// .mode(SaveMode.Overwrite)
|
|
||||||
// .option("compression", "gzip")
|
|
||||||
// .text(outputPath);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
public static org.apache.spark.sql.Dataset<Relation> readRelations(
|
public static org.apache.spark.sql.Dataset<Relation> readRelations(
|
||||||
|
|
|
@ -65,8 +65,7 @@ public class ReadBlacklistFromDB implements Closeable {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
public void execute(final String sql, final Function<ResultSet, List<Relation>> producer)
|
public void execute(final String sql, final Function<ResultSet, List<Relation>> producer) throws Exception {
|
||||||
throws Exception {
|
|
||||||
|
|
||||||
final Consumer<ResultSet> consumer = rs -> producer.apply(rs).forEach(r -> writeRelation(r));
|
final Consumer<ResultSet> consumer = rs -> producer.apply(rs).forEach(r -> writeRelation(r));
|
||||||
|
|
||||||
|
|
|
@ -84,7 +84,7 @@ public class SparkRemoveBlacklistedRelationJob {
|
||||||
.joinWith(
|
.joinWith(
|
||||||
mergesRelation, blackListed.col("source").equalTo(mergesRelation.col("target")),
|
mergesRelation, blackListed.col("source").equalTo(mergesRelation.col("target")),
|
||||||
"left_outer")
|
"left_outer")
|
||||||
.map(c -> {
|
.map((MapFunction<Tuple2<Relation, Relation>, Relation>) c -> {
|
||||||
Optional
|
Optional
|
||||||
.ofNullable(c._2())
|
.ofNullable(c._2())
|
||||||
.ifPresent(mr -> c._1().setSource(mr.getSource()));
|
.ifPresent(mr -> c._1().setSource(mr.getSource()));
|
||||||
|
@ -95,7 +95,7 @@ public class SparkRemoveBlacklistedRelationJob {
|
||||||
.joinWith(
|
.joinWith(
|
||||||
mergesRelation, dedupSource.col("target").equalTo(mergesRelation.col("target")),
|
mergesRelation, dedupSource.col("target").equalTo(mergesRelation.col("target")),
|
||||||
"left_outer")
|
"left_outer")
|
||||||
.map(c -> {
|
.map((MapFunction<Tuple2<Relation, Relation>, Relation>) c -> {
|
||||||
Optional
|
Optional
|
||||||
.ofNullable(c._2())
|
.ofNullable(c._2())
|
||||||
.ifPresent(mr -> c._1().setTarget(mr.getSource()));
|
.ifPresent(mr -> c._1().setTarget(mr.getSource()));
|
||||||
|
@ -107,7 +107,6 @@ public class SparkRemoveBlacklistedRelationJob {
|
||||||
.mode(SaveMode.Overwrite)
|
.mode(SaveMode.Overwrite)
|
||||||
.json(blacklistPath + "/deduped");
|
.json(blacklistPath + "/deduped");
|
||||||
|
|
||||||
|
|
||||||
inputRelation
|
inputRelation
|
||||||
.joinWith(
|
.joinWith(
|
||||||
dedupBL, (inputRelation
|
dedupBL, (inputRelation
|
||||||
|
@ -118,7 +117,7 @@ public class SparkRemoveBlacklistedRelationJob {
|
||||||
.col("target")
|
.col("target")
|
||||||
.equalTo(dedupBL.col("target")))),
|
.equalTo(dedupBL.col("target")))),
|
||||||
"left_outer")
|
"left_outer")
|
||||||
.map(c -> {
|
.map((MapFunction<Tuple2<Relation, Relation>, Relation>) c -> {
|
||||||
Relation ir = c._1();
|
Relation ir = c._1();
|
||||||
Optional<Relation> obl = Optional.ofNullable(c._2());
|
Optional<Relation> obl = Optional.ofNullable(c._2());
|
||||||
if (obl.isPresent()) {
|
if (obl.isPresent()) {
|
||||||
|
@ -127,17 +126,14 @@ public class SparkRemoveBlacklistedRelationJob {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
return ir;
|
return ir;
|
||||||
|
|
||||||
}, Encoders.bean(Relation.class))
|
}, Encoders.bean(Relation.class))
|
||||||
.filter(Objects::nonNull)
|
.filter(Objects::nonNull)
|
||||||
.write()
|
.write()
|
||||||
.mode(SaveMode.Overwrite)
|
.mode(SaveMode.Overwrite)
|
||||||
.option("compression", "gzip")
|
.option("compression", "gzip")
|
||||||
.json(outputPath);
|
.json(outputPath);
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
public static org.apache.spark.sql.Dataset<Relation> readRelations(
|
public static org.apache.spark.sql.Dataset<Relation> readRelations(
|
||||||
SparkSession spark, String inputPath) {
|
SparkSession spark, String inputPath) {
|
||||||
return spark
|
return spark
|
||||||
|
|
|
@ -19,6 +19,7 @@ import org.slf4j.Logger;
|
||||||
import org.slf4j.LoggerFactory;
|
import org.slf4j.LoggerFactory;
|
||||||
|
|
||||||
import com.fasterxml.jackson.databind.ObjectMapper;
|
import com.fasterxml.jackson.databind.ObjectMapper;
|
||||||
|
|
||||||
import eu.dnetlib.dhp.schema.oaf.Relation;
|
import eu.dnetlib.dhp.schema.oaf.Relation;
|
||||||
|
|
||||||
public class BlackListTest {
|
public class BlackListTest {
|
||||||
|
|
|
@ -1,11 +1,10 @@
|
||||||
|
|
||||||
package eu.dnetlib.dhp.bulktag;
|
package eu.dnetlib.dhp.bulktag;
|
||||||
|
|
||||||
import com.fasterxml.jackson.databind.ObjectMapper;
|
import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession;
|
||||||
import com.google.gson.Gson;
|
|
||||||
import eu.dnetlib.dhp.application.ArgumentApplicationParser;
|
import java.util.Optional;
|
||||||
import eu.dnetlib.dhp.bulktag.community.*;
|
|
||||||
import eu.dnetlib.dhp.schema.oaf.Result;
|
|
||||||
import org.apache.commons.io.IOUtils;
|
import org.apache.commons.io.IOUtils;
|
||||||
import org.apache.spark.SparkConf;
|
import org.apache.spark.SparkConf;
|
||||||
import org.apache.spark.api.java.function.MapFunction;
|
import org.apache.spark.api.java.function.MapFunction;
|
||||||
|
@ -16,9 +15,12 @@ import org.apache.spark.sql.SparkSession;
|
||||||
import org.slf4j.Logger;
|
import org.slf4j.Logger;
|
||||||
import org.slf4j.LoggerFactory;
|
import org.slf4j.LoggerFactory;
|
||||||
|
|
||||||
import java.util.Optional;
|
import com.fasterxml.jackson.databind.ObjectMapper;
|
||||||
|
import com.google.gson.Gson;
|
||||||
|
|
||||||
import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession;
|
import eu.dnetlib.dhp.application.ArgumentApplicationParser;
|
||||||
|
import eu.dnetlib.dhp.bulktag.community.*;
|
||||||
|
import eu.dnetlib.dhp.schema.oaf.Result;
|
||||||
|
|
||||||
public class SparkBulkTagJob {
|
public class SparkBulkTagJob {
|
||||||
|
|
||||||
|
|
|
@ -1,14 +1,15 @@
|
||||||
|
|
||||||
package eu.dnetlib.dhp.bulktag.community;
|
package eu.dnetlib.dhp.bulktag.community;
|
||||||
|
|
||||||
import com.google.gson.Gson;
|
|
||||||
import org.apache.commons.logging.Log;
|
|
||||||
import org.apache.commons.logging.LogFactory;
|
|
||||||
|
|
||||||
import java.io.Serializable;
|
import java.io.Serializable;
|
||||||
import java.util.ArrayList;
|
import java.util.ArrayList;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
|
|
||||||
|
import org.apache.commons.logging.Log;
|
||||||
|
import org.apache.commons.logging.LogFactory;
|
||||||
|
|
||||||
|
import com.google.gson.Gson;
|
||||||
|
|
||||||
/** Created by miriam on 01/08/2018. */
|
/** Created by miriam on 01/08/2018. */
|
||||||
public class Community implements Serializable {
|
public class Community implements Serializable {
|
||||||
|
|
||||||
|
|
|
@ -1,15 +1,6 @@
|
||||||
|
|
||||||
package eu.dnetlib.dhp.bulktag.community;
|
package eu.dnetlib.dhp.bulktag.community;
|
||||||
|
|
||||||
import com.google.common.collect.Lists;
|
|
||||||
import com.google.common.collect.Maps;
|
|
||||||
import com.google.gson.Gson;
|
|
||||||
import com.google.gson.GsonBuilder;
|
|
||||||
import eu.dnetlib.dhp.bulktag.criteria.InterfaceAdapter;
|
|
||||||
import eu.dnetlib.dhp.bulktag.criteria.Selection;
|
|
||||||
import org.apache.commons.logging.Log;
|
|
||||||
import org.apache.commons.logging.LogFactory;
|
|
||||||
|
|
||||||
import java.io.Serializable;
|
import java.io.Serializable;
|
||||||
import java.util.ArrayList;
|
import java.util.ArrayList;
|
||||||
import java.util.HashMap;
|
import java.util.HashMap;
|
||||||
|
@ -17,6 +8,17 @@ import java.util.List;
|
||||||
import java.util.Map;
|
import java.util.Map;
|
||||||
import java.util.stream.Collectors;
|
import java.util.stream.Collectors;
|
||||||
|
|
||||||
|
import org.apache.commons.logging.Log;
|
||||||
|
import org.apache.commons.logging.LogFactory;
|
||||||
|
|
||||||
|
import com.google.common.collect.Lists;
|
||||||
|
import com.google.common.collect.Maps;
|
||||||
|
import com.google.gson.Gson;
|
||||||
|
import com.google.gson.GsonBuilder;
|
||||||
|
|
||||||
|
import eu.dnetlib.dhp.bulktag.criteria.InterfaceAdapter;
|
||||||
|
import eu.dnetlib.dhp.bulktag.criteria.Selection;
|
||||||
|
|
||||||
/** Created by miriam on 02/08/2018. */
|
/** Created by miriam on 02/08/2018. */
|
||||||
public class CommunityConfiguration implements Serializable {
|
public class CommunityConfiguration implements Serializable {
|
||||||
|
|
||||||
|
|
|
@ -1,14 +1,11 @@
|
||||||
|
|
||||||
package eu.dnetlib.dhp.bulktag.community;
|
package eu.dnetlib.dhp.bulktag.community;
|
||||||
|
|
||||||
import com.google.common.collect.Lists;
|
import java.io.StringReader;
|
||||||
import com.google.common.collect.Maps;
|
import java.util.ArrayList;
|
||||||
import com.google.gson.Gson;
|
import java.util.List;
|
||||||
import com.google.gson.GsonBuilder;
|
import java.util.Map;
|
||||||
import eu.dnetlib.dhp.bulktag.criteria.InterfaceAdapter;
|
|
||||||
import eu.dnetlib.dhp.bulktag.criteria.Selection;
|
|
||||||
import eu.dnetlib.dhp.bulktag.criteria.VerbResolver;
|
|
||||||
import eu.dnetlib.dhp.bulktag.criteria.VerbResolverFactory;
|
|
||||||
import org.apache.commons.lang3.StringUtils;
|
import org.apache.commons.lang3.StringUtils;
|
||||||
import org.apache.commons.logging.Log;
|
import org.apache.commons.logging.Log;
|
||||||
import org.apache.commons.logging.LogFactory;
|
import org.apache.commons.logging.LogFactory;
|
||||||
|
@ -17,10 +14,15 @@ import org.dom4j.DocumentException;
|
||||||
import org.dom4j.Node;
|
import org.dom4j.Node;
|
||||||
import org.dom4j.io.SAXReader;
|
import org.dom4j.io.SAXReader;
|
||||||
|
|
||||||
import java.io.StringReader;
|
import com.google.common.collect.Lists;
|
||||||
import java.util.ArrayList;
|
import com.google.common.collect.Maps;
|
||||||
import java.util.List;
|
import com.google.gson.Gson;
|
||||||
import java.util.Map;
|
import com.google.gson.GsonBuilder;
|
||||||
|
|
||||||
|
import eu.dnetlib.dhp.bulktag.criteria.InterfaceAdapter;
|
||||||
|
import eu.dnetlib.dhp.bulktag.criteria.Selection;
|
||||||
|
import eu.dnetlib.dhp.bulktag.criteria.VerbResolver;
|
||||||
|
import eu.dnetlib.dhp.bulktag.criteria.VerbResolverFactory;
|
||||||
|
|
||||||
/** Created by miriam on 03/08/2018. */
|
/** Created by miriam on 03/08/2018. */
|
||||||
public class CommunityConfigurationFactory {
|
public class CommunityConfigurationFactory {
|
||||||
|
|
|
@ -1,12 +1,12 @@
|
||||||
|
|
||||||
package eu.dnetlib.dhp.bulktag.community;
|
package eu.dnetlib.dhp.bulktag.community;
|
||||||
|
|
||||||
import eu.dnetlib.dhp.bulktag.criteria.Selection;
|
|
||||||
import eu.dnetlib.dhp.bulktag.criteria.VerbResolver;
|
|
||||||
|
|
||||||
import java.io.Serializable;
|
import java.io.Serializable;
|
||||||
import java.lang.reflect.InvocationTargetException;
|
import java.lang.reflect.InvocationTargetException;
|
||||||
|
|
||||||
|
import eu.dnetlib.dhp.bulktag.criteria.Selection;
|
||||||
|
import eu.dnetlib.dhp.bulktag.criteria.VerbResolver;
|
||||||
|
|
||||||
public class Constraint implements Serializable {
|
public class Constraint implements Serializable {
|
||||||
private String verb;
|
private String verb;
|
||||||
private String field;
|
private String field;
|
||||||
|
|
|
@ -1,12 +1,6 @@
|
||||||
|
|
||||||
package eu.dnetlib.dhp.bulktag.community;
|
package eu.dnetlib.dhp.bulktag.community;
|
||||||
|
|
||||||
import com.google.gson.Gson;
|
|
||||||
import com.google.gson.reflect.TypeToken;
|
|
||||||
import eu.dnetlib.dhp.bulktag.criteria.VerbResolver;
|
|
||||||
import org.apache.commons.logging.Log;
|
|
||||||
import org.apache.commons.logging.LogFactory;
|
|
||||||
|
|
||||||
import java.io.Serializable;
|
import java.io.Serializable;
|
||||||
import java.lang.reflect.InvocationTargetException;
|
import java.lang.reflect.InvocationTargetException;
|
||||||
import java.lang.reflect.Type;
|
import java.lang.reflect.Type;
|
||||||
|
@ -14,6 +8,14 @@ import java.util.Collection;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
import java.util.Map;
|
import java.util.Map;
|
||||||
|
|
||||||
|
import org.apache.commons.logging.Log;
|
||||||
|
import org.apache.commons.logging.LogFactory;
|
||||||
|
|
||||||
|
import com.google.gson.Gson;
|
||||||
|
import com.google.gson.reflect.TypeToken;
|
||||||
|
|
||||||
|
import eu.dnetlib.dhp.bulktag.criteria.VerbResolver;
|
||||||
|
|
||||||
/** Created by miriam on 02/08/2018. */
|
/** Created by miriam on 02/08/2018. */
|
||||||
public class Constraints implements Serializable {
|
public class Constraints implements Serializable {
|
||||||
private static final Log log = LogFactory.getLog(Constraints.class);
|
private static final Log log = LogFactory.getLog(Constraints.class);
|
||||||
|
|
|
@ -1,10 +1,10 @@
|
||||||
|
|
||||||
package eu.dnetlib.dhp.bulktag.community;
|
package eu.dnetlib.dhp.bulktag.community;
|
||||||
|
|
||||||
import com.google.gson.Gson;
|
|
||||||
|
|
||||||
import java.io.Serializable;
|
import java.io.Serializable;
|
||||||
|
|
||||||
|
import com.google.gson.Gson;
|
||||||
|
|
||||||
/** Created by miriam on 03/08/2018. */
|
/** Created by miriam on 03/08/2018. */
|
||||||
public class Pair<A, B> implements Serializable {
|
public class Pair<A, B> implements Serializable {
|
||||||
private A fst;
|
private A fst;
|
||||||
|
|
|
@ -1,13 +1,15 @@
|
||||||
|
|
||||||
package eu.dnetlib.dhp.bulktag.community;
|
package eu.dnetlib.dhp.bulktag.community;
|
||||||
|
|
||||||
import com.google.gson.Gson;
|
import java.io.Serializable;
|
||||||
import eu.dnetlib.dhp.bulktag.criteria.VerbResolver;
|
|
||||||
import org.apache.commons.logging.Log;
|
import org.apache.commons.logging.Log;
|
||||||
import org.apache.commons.logging.LogFactory;
|
import org.apache.commons.logging.LogFactory;
|
||||||
import org.dom4j.Node;
|
import org.dom4j.Node;
|
||||||
|
|
||||||
import java.io.Serializable;
|
import com.google.gson.Gson;
|
||||||
|
|
||||||
|
import eu.dnetlib.dhp.bulktag.criteria.VerbResolver;
|
||||||
|
|
||||||
/** Created by miriam on 01/08/2018. */
|
/** Created by miriam on 01/08/2018. */
|
||||||
public class Provider implements Serializable {
|
public class Provider implements Serializable {
|
||||||
|
|
|
@ -1,13 +1,15 @@
|
||||||
|
|
||||||
package eu.dnetlib.dhp.bulktag.community;
|
package eu.dnetlib.dhp.bulktag.community;
|
||||||
|
|
||||||
|
import java.util.List;
|
||||||
|
|
||||||
|
import org.dom4j.DocumentException;
|
||||||
|
|
||||||
import com.google.common.base.Joiner;
|
import com.google.common.base.Joiner;
|
||||||
|
|
||||||
import eu.dnetlib.dhp.utils.ISLookupClientFactory;
|
import eu.dnetlib.dhp.utils.ISLookupClientFactory;
|
||||||
import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpException;
|
import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpException;
|
||||||
import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpService;
|
import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpService;
|
||||||
import org.dom4j.DocumentException;
|
|
||||||
|
|
||||||
import java.util.List;
|
|
||||||
|
|
||||||
public class QueryInformationSystem {
|
public class QueryInformationSystem {
|
||||||
private static final String XQUERY = "for $x in collection('/db/DRIVER/ContextDSResources/ContextDSResourceType') "
|
private static final String XQUERY = "for $x in collection('/db/DRIVER/ContextDSResources/ContextDSResourceType') "
|
||||||
|
|
|
@ -1,19 +1,21 @@
|
||||||
|
|
||||||
package eu.dnetlib.dhp.bulktag.community;
|
package eu.dnetlib.dhp.bulktag.community;
|
||||||
|
|
||||||
import com.google.gson.Gson;
|
import static eu.dnetlib.dhp.bulktag.community.TaggingConstants.*;
|
||||||
import com.jayway.jsonpath.DocumentContext;
|
import static eu.dnetlib.dhp.schema.common.ModelConstants.*;
|
||||||
import com.jayway.jsonpath.JsonPath;
|
|
||||||
import eu.dnetlib.dhp.schema.oaf.*;
|
|
||||||
import org.apache.commons.lang3.StringUtils;
|
|
||||||
|
|
||||||
import java.io.Serializable;
|
import java.io.Serializable;
|
||||||
import java.util.*;
|
import java.util.*;
|
||||||
import java.util.stream.Collectors;
|
import java.util.stream.Collectors;
|
||||||
import java.util.stream.Stream;
|
import java.util.stream.Stream;
|
||||||
|
|
||||||
import static eu.dnetlib.dhp.bulktag.community.TaggingConstants.*;
|
import org.apache.commons.lang3.StringUtils;
|
||||||
import static eu.dnetlib.dhp.schema.common.ModelConstants.*;
|
|
||||||
|
import com.google.gson.Gson;
|
||||||
|
import com.jayway.jsonpath.DocumentContext;
|
||||||
|
import com.jayway.jsonpath.JsonPath;
|
||||||
|
|
||||||
|
import eu.dnetlib.dhp.schema.oaf.*;
|
||||||
|
|
||||||
/** Created by miriam on 02/08/2018. */
|
/** Created by miriam on 02/08/2018. */
|
||||||
public class ResultTagger implements Serializable {
|
public class ResultTagger implements Serializable {
|
||||||
|
|
|
@ -1,16 +1,17 @@
|
||||||
|
|
||||||
package eu.dnetlib.dhp.bulktag.community;
|
package eu.dnetlib.dhp.bulktag.community;
|
||||||
|
|
||||||
import com.google.gson.Gson;
|
|
||||||
import com.google.gson.reflect.TypeToken;
|
|
||||||
import eu.dnetlib.dhp.bulktag.criteria.VerbResolver;
|
|
||||||
|
|
||||||
import java.io.Serializable;
|
import java.io.Serializable;
|
||||||
import java.lang.reflect.Type;
|
import java.lang.reflect.Type;
|
||||||
import java.util.Collection;
|
import java.util.Collection;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
import java.util.Map;
|
import java.util.Map;
|
||||||
|
|
||||||
|
import com.google.gson.Gson;
|
||||||
|
import com.google.gson.reflect.TypeToken;
|
||||||
|
|
||||||
|
import eu.dnetlib.dhp.bulktag.criteria.VerbResolver;
|
||||||
|
|
||||||
public class SelectionConstraints implements Serializable {
|
public class SelectionConstraints implements Serializable {
|
||||||
private List<Constraints> criteria;
|
private List<Constraints> criteria;
|
||||||
|
|
||||||
|
|
|
@ -1,10 +1,11 @@
|
||||||
|
|
||||||
package eu.dnetlib.dhp.bulktag.community;
|
package eu.dnetlib.dhp.bulktag.community;
|
||||||
|
|
||||||
import com.google.gson.Gson;
|
import java.io.Serializable;
|
||||||
|
|
||||||
import org.dom4j.Node;
|
import org.dom4j.Node;
|
||||||
|
|
||||||
import java.io.Serializable;
|
import com.google.gson.Gson;
|
||||||
|
|
||||||
/** Created by miriam on 01/08/2018. */
|
/** Created by miriam on 01/08/2018. */
|
||||||
public class ZenodoCommunity implements Serializable {
|
public class ZenodoCommunity implements Serializable {
|
||||||
|
|
|
@ -1,10 +1,10 @@
|
||||||
|
|
||||||
package eu.dnetlib.dhp.bulktag.criteria;
|
package eu.dnetlib.dhp.bulktag.criteria;
|
||||||
|
|
||||||
import com.google.gson.*;
|
|
||||||
|
|
||||||
import java.lang.reflect.Type;
|
import java.lang.reflect.Type;
|
||||||
|
|
||||||
|
import com.google.gson.*;
|
||||||
|
|
||||||
public class InterfaceAdapter implements JsonSerializer, JsonDeserializer {
|
public class InterfaceAdapter implements JsonSerializer, JsonDeserializer {
|
||||||
|
|
||||||
private static final String CLASSNAME = "CLASSNAME";
|
private static final String CLASSNAME = "CLASSNAME";
|
||||||
|
|
|
@ -1,16 +1,16 @@
|
||||||
|
|
||||||
package eu.dnetlib.dhp.bulktag.criteria;
|
package eu.dnetlib.dhp.bulktag.criteria;
|
||||||
|
|
||||||
import io.github.classgraph.ClassGraph;
|
|
||||||
import io.github.classgraph.ClassInfo;
|
|
||||||
import io.github.classgraph.ClassInfoList;
|
|
||||||
import io.github.classgraph.ScanResult;
|
|
||||||
|
|
||||||
import java.io.Serializable;
|
import java.io.Serializable;
|
||||||
import java.lang.reflect.InvocationTargetException;
|
import java.lang.reflect.InvocationTargetException;
|
||||||
import java.util.Map;
|
import java.util.Map;
|
||||||
import java.util.stream.Collectors;
|
import java.util.stream.Collectors;
|
||||||
|
|
||||||
|
import io.github.classgraph.ClassGraph;
|
||||||
|
import io.github.classgraph.ClassInfo;
|
||||||
|
import io.github.classgraph.ClassInfoList;
|
||||||
|
import io.github.classgraph.ScanResult;
|
||||||
|
|
||||||
public class VerbResolver implements Serializable {
|
public class VerbResolver implements Serializable {
|
||||||
private Map<String, Class<Selection>> map = null; // = new HashMap<>();
|
private Map<String, Class<Selection>> map = null; // = new HashMap<>();
|
||||||
private final ClassGraph classgraph = new ClassGraph();
|
private final ClassGraph classgraph = new ClassGraph();
|
||||||
|
|
|
@ -1,11 +1,12 @@
|
||||||
|
|
||||||
package eu.dnetlib.dhp.bulktag;
|
package eu.dnetlib.dhp.bulktag;
|
||||||
|
|
||||||
import com.fasterxml.jackson.databind.ObjectMapper;
|
import static eu.dnetlib.dhp.bulktag.community.TaggingConstants.ZENODO_COMMUNITY_INDICATOR;
|
||||||
import eu.dnetlib.dhp.schema.oaf.Dataset;
|
|
||||||
import eu.dnetlib.dhp.schema.oaf.OtherResearchProduct;
|
import java.io.IOException;
|
||||||
import eu.dnetlib.dhp.schema.oaf.Publication;
|
import java.nio.file.Files;
|
||||||
import eu.dnetlib.dhp.schema.oaf.Software;
|
import java.nio.file.Path;
|
||||||
|
|
||||||
import org.apache.commons.io.FileUtils;
|
import org.apache.commons.io.FileUtils;
|
||||||
import org.apache.commons.io.IOUtils;
|
import org.apache.commons.io.IOUtils;
|
||||||
import org.apache.spark.SparkConf;
|
import org.apache.spark.SparkConf;
|
||||||
|
@ -18,15 +19,15 @@ import org.junit.jupiter.api.AfterAll;
|
||||||
import org.junit.jupiter.api.Assertions;
|
import org.junit.jupiter.api.Assertions;
|
||||||
import org.junit.jupiter.api.BeforeAll;
|
import org.junit.jupiter.api.BeforeAll;
|
||||||
import org.junit.jupiter.api.Test;
|
import org.junit.jupiter.api.Test;
|
||||||
|
|
||||||
import org.slf4j.Logger;
|
import org.slf4j.Logger;
|
||||||
import org.slf4j.LoggerFactory;
|
import org.slf4j.LoggerFactory;
|
||||||
|
|
||||||
import java.io.IOException;
|
import com.fasterxml.jackson.databind.ObjectMapper;
|
||||||
import java.nio.file.Files;
|
|
||||||
import java.nio.file.Path;
|
|
||||||
|
|
||||||
import static eu.dnetlib.dhp.bulktag.community.TaggingConstants.ZENODO_COMMUNITY_INDICATOR;
|
import eu.dnetlib.dhp.schema.oaf.Dataset;
|
||||||
|
import eu.dnetlib.dhp.schema.oaf.OtherResearchProduct;
|
||||||
|
import eu.dnetlib.dhp.schema.oaf.Publication;
|
||||||
|
import eu.dnetlib.dhp.schema.oaf.Software;
|
||||||
|
|
||||||
public class BulkTagJobTest {
|
public class BulkTagJobTest {
|
||||||
|
|
||||||
|
@ -34,8 +35,7 @@ public class BulkTagJobTest {
|
||||||
|
|
||||||
public static final String MOCK_IS_LOOK_UP_URL = "BASEURL:8280/is/services/isLookUp";
|
public static final String MOCK_IS_LOOK_UP_URL = "BASEURL:8280/is/services/isLookUp";
|
||||||
|
|
||||||
public static final String pathMap =
|
public static final String pathMap = "{ \"author\" : \"$['author'][*]['fullname']\","
|
||||||
"{ \"author\" : \"$['author'][*]['fullname']\","
|
|
||||||
+ " \"title\" : \"$['title'][*]['value']\","
|
+ " \"title\" : \"$['title'][*]['value']\","
|
||||||
+ " \"orcid\" : \"$['author'][*]['pid'][*][?(@['key']=='ORCID')]['value']\","
|
+ " \"orcid\" : \"$['author'][*]['pid'][*][?(@['key']=='ORCID')]['value']\","
|
||||||
+ " \"contributor\" : \"$['contributor'][*]['value']\","
|
+ " \"contributor\" : \"$['contributor'][*]['value']\","
|
||||||
|
@ -97,7 +97,8 @@ public class BulkTagJobTest {
|
||||||
new String[] {
|
new String[] {
|
||||||
"-isTest", Boolean.TRUE.toString(),
|
"-isTest", Boolean.TRUE.toString(),
|
||||||
"-isSparkSessionManaged", Boolean.FALSE.toString(),
|
"-isSparkSessionManaged", Boolean.FALSE.toString(),
|
||||||
"-sourcePath", getClass().getResource("/eu/dnetlib/dhp/bulktag/sample/dataset/no_updates").getPath(),
|
"-sourcePath",
|
||||||
|
getClass().getResource("/eu/dnetlib/dhp/bulktag/sample/dataset/no_updates").getPath(),
|
||||||
"-taggingConf", taggingConf,
|
"-taggingConf", taggingConf,
|
||||||
"-resultTableName", "eu.dnetlib.dhp.schema.oaf.Dataset",
|
"-resultTableName", "eu.dnetlib.dhp.schema.oaf.Dataset",
|
||||||
"-outputPath", workingDir.toString() + "/dataset",
|
"-outputPath", workingDir.toString() + "/dataset",
|
||||||
|
|
|
@ -1,21 +1,23 @@
|
||||||
|
|
||||||
package eu.dnetlib.dhp.bulktag;
|
package eu.dnetlib.dhp.bulktag;
|
||||||
|
|
||||||
import com.google.gson.Gson;
|
import java.io.IOException;
|
||||||
import eu.dnetlib.dhp.bulktag.community.CommunityConfiguration;
|
import java.lang.reflect.InvocationTargetException;
|
||||||
import eu.dnetlib.dhp.bulktag.community.CommunityConfigurationFactory;
|
import java.util.*;
|
||||||
import eu.dnetlib.dhp.bulktag.community.Constraint;
|
|
||||||
import eu.dnetlib.dhp.bulktag.community.SelectionConstraints;
|
|
||||||
import eu.dnetlib.dhp.bulktag.criteria.VerbResolver;
|
|
||||||
import org.apache.commons.io.IOUtils;
|
import org.apache.commons.io.IOUtils;
|
||||||
import org.apache.commons.lang3.StringUtils;
|
import org.apache.commons.lang3.StringUtils;
|
||||||
import org.dom4j.DocumentException;
|
import org.dom4j.DocumentException;
|
||||||
import org.junit.jupiter.api.Assertions;
|
import org.junit.jupiter.api.Assertions;
|
||||||
import org.junit.jupiter.api.Test;
|
import org.junit.jupiter.api.Test;
|
||||||
|
|
||||||
import java.io.IOException;
|
import com.google.gson.Gson;
|
||||||
import java.lang.reflect.InvocationTargetException;
|
|
||||||
import java.util.*;
|
import eu.dnetlib.dhp.bulktag.community.CommunityConfiguration;
|
||||||
|
import eu.dnetlib.dhp.bulktag.community.CommunityConfigurationFactory;
|
||||||
|
import eu.dnetlib.dhp.bulktag.community.Constraint;
|
||||||
|
import eu.dnetlib.dhp.bulktag.community.SelectionConstraints;
|
||||||
|
import eu.dnetlib.dhp.bulktag.criteria.VerbResolver;
|
||||||
|
|
||||||
/** Created by miriam on 03/08/2018. */
|
/** Created by miriam on 03/08/2018. */
|
||||||
public class CommunityConfigurationFactoryTest {
|
public class CommunityConfigurationFactoryTest {
|
||||||
|
|
|
@ -103,7 +103,8 @@ public class CountryPropagationJobTest {
|
||||||
Assertions.assertEquals(0, verificationDs.filter("size(country) > 2").count());
|
Assertions.assertEquals(0, verificationDs.filter("size(country) > 2").count());
|
||||||
|
|
||||||
Dataset<String> countryExploded = verificationDs
|
Dataset<String> countryExploded = verificationDs
|
||||||
.flatMap((FlatMapFunction<Software, Country>) row -> row.getCountry().iterator(), Encoders.bean(Country.class))
|
.flatMap(
|
||||||
|
(FlatMapFunction<Software, Country>) row -> row.getCountry().iterator(), Encoders.bean(Country.class))
|
||||||
.map((MapFunction<Country, String>) c -> c.getClassid(), Encoders.STRING());
|
.map((MapFunction<Country, String>) c -> c.getClassid(), Encoders.STRING());
|
||||||
|
|
||||||
Assertions.assertEquals(9, countryExploded.count());
|
Assertions.assertEquals(9, countryExploded.count());
|
||||||
|
|
|
@ -79,7 +79,8 @@ public class ProjectPropagationJobTest {
|
||||||
.getResource(
|
.getResource(
|
||||||
"/eu/dnetlib/dhp/projecttoresult/preparedInfo/alreadyLinked")
|
"/eu/dnetlib/dhp/projecttoresult/preparedInfo/alreadyLinked")
|
||||||
.getPath();
|
.getPath();
|
||||||
SparkResultToProjectThroughSemRelJob.main(
|
SparkResultToProjectThroughSemRelJob
|
||||||
|
.main(
|
||||||
new String[] {
|
new String[] {
|
||||||
"-isTest", Boolean.TRUE.toString(),
|
"-isTest", Boolean.TRUE.toString(),
|
||||||
"-isSparkSessionManaged", Boolean.FALSE.toString(),
|
"-isSparkSessionManaged", Boolean.FALSE.toString(),
|
||||||
|
@ -114,7 +115,8 @@ public class ProjectPropagationJobTest {
|
||||||
.getResource(
|
.getResource(
|
||||||
"/eu/dnetlib/dhp/projecttoresult/preparedInfo/alreadyLinked")
|
"/eu/dnetlib/dhp/projecttoresult/preparedInfo/alreadyLinked")
|
||||||
.getPath();
|
.getPath();
|
||||||
SparkResultToProjectThroughSemRelJob.main(
|
SparkResultToProjectThroughSemRelJob
|
||||||
|
.main(
|
||||||
new String[] {
|
new String[] {
|
||||||
"-isTest", Boolean.TRUE.toString(),
|
"-isTest", Boolean.TRUE.toString(),
|
||||||
"-isSparkSessionManaged", Boolean.FALSE.toString(),
|
"-isSparkSessionManaged", Boolean.FALSE.toString(),
|
||||||
|
@ -143,8 +145,8 @@ public class ProjectPropagationJobTest {
|
||||||
.assertEquals(
|
.assertEquals(
|
||||||
5,
|
5,
|
||||||
verificationDs
|
verificationDs
|
||||||
.filter((FilterFunction<Relation>) r ->
|
.filter(
|
||||||
r.getSource().startsWith("50")
|
(FilterFunction<Relation>) r -> r.getSource().startsWith("50")
|
||||||
&& r.getTarget().startsWith("40")
|
&& r.getTarget().startsWith("40")
|
||||||
&& r.getRelClass().equals("isProducedBy"))
|
&& r.getRelClass().equals("isProducedBy"))
|
||||||
.count());
|
.count());
|
||||||
|
@ -152,8 +154,8 @@ public class ProjectPropagationJobTest {
|
||||||
.assertEquals(
|
.assertEquals(
|
||||||
5,
|
5,
|
||||||
verificationDs
|
verificationDs
|
||||||
.filter((FilterFunction<Relation>) r ->
|
.filter(
|
||||||
r.getSource().startsWith("40")
|
(FilterFunction<Relation>) r -> r.getSource().startsWith("40")
|
||||||
&& r.getTarget().startsWith("50")
|
&& r.getTarget().startsWith("50")
|
||||||
&& r.getRelClass().equals("produces"))
|
&& r.getRelClass().equals("produces"))
|
||||||
.count());
|
.count());
|
||||||
|
@ -185,7 +187,8 @@ public class ProjectPropagationJobTest {
|
||||||
.getResource(
|
.getResource(
|
||||||
"/eu/dnetlib/dhp/projecttoresult/preparedInfo/alreadyLinked")
|
"/eu/dnetlib/dhp/projecttoresult/preparedInfo/alreadyLinked")
|
||||||
.getPath();
|
.getPath();
|
||||||
SparkResultToProjectThroughSemRelJob.main(
|
SparkResultToProjectThroughSemRelJob
|
||||||
|
.main(
|
||||||
new String[] {
|
new String[] {
|
||||||
"-isTest", Boolean.TRUE.toString(),
|
"-isTest", Boolean.TRUE.toString(),
|
||||||
"-isSparkSessionManaged", Boolean.FALSE.toString(),
|
"-isSparkSessionManaged", Boolean.FALSE.toString(),
|
||||||
|
@ -217,8 +220,8 @@ public class ProjectPropagationJobTest {
|
||||||
.assertEquals(
|
.assertEquals(
|
||||||
4,
|
4,
|
||||||
verificationDs
|
verificationDs
|
||||||
.filter((FilterFunction<Relation>) r ->
|
.filter(
|
||||||
r.getSource().startsWith("50")
|
(FilterFunction<Relation>) r -> r.getSource().startsWith("50")
|
||||||
&& r.getTarget().startsWith("40")
|
&& r.getTarget().startsWith("40")
|
||||||
&& r.getRelClass().equals("isProducedBy"))
|
&& r.getRelClass().equals("isProducedBy"))
|
||||||
.count());
|
.count());
|
||||||
|
@ -226,8 +229,8 @@ public class ProjectPropagationJobTest {
|
||||||
.assertEquals(
|
.assertEquals(
|
||||||
4,
|
4,
|
||||||
verificationDs
|
verificationDs
|
||||||
.filter((FilterFunction<Relation>) r ->
|
.filter(
|
||||||
r.getSource().startsWith("40")
|
(FilterFunction<Relation>) r -> r.getSource().startsWith("40")
|
||||||
&& r.getTarget().startsWith("50")
|
&& r.getTarget().startsWith("50")
|
||||||
&& r.getRelClass().equals("produces"))
|
&& r.getRelClass().equals("produces"))
|
||||||
.count());
|
.count());
|
||||||
|
|
|
@ -69,7 +69,8 @@ public class ResultToCommunityJobTest {
|
||||||
final String preparedInfoPath = getClass()
|
final String preparedInfoPath = getClass()
|
||||||
.getResource("/eu/dnetlib/dhp/resulttocommunityfromorganization/preparedInfo")
|
.getResource("/eu/dnetlib/dhp/resulttocommunityfromorganization/preparedInfo")
|
||||||
.getPath();
|
.getPath();
|
||||||
SparkResultToCommunityFromOrganizationJob.main(
|
SparkResultToCommunityFromOrganizationJob
|
||||||
|
.main(
|
||||||
new String[] {
|
new String[] {
|
||||||
"-isTest", Boolean.TRUE.toString(),
|
"-isTest", Boolean.TRUE.toString(),
|
||||||
"-isSparkSessionManaged", Boolean.FALSE.toString(),
|
"-isSparkSessionManaged", Boolean.FALSE.toString(),
|
||||||
|
|
|
@ -72,12 +72,14 @@ public class ResultToOrganizationJobTest {
|
||||||
.getResource("/eu/dnetlib/dhp/resulttoorganizationfrominstrepo/sample/noupdate_updatenomix")
|
.getResource("/eu/dnetlib/dhp/resulttoorganizationfrominstrepo/sample/noupdate_updatenomix")
|
||||||
.getPath();
|
.getPath();
|
||||||
final String datasourceOrganizationPath = getClass()
|
final String datasourceOrganizationPath = getClass()
|
||||||
.getResource("/eu/dnetlib/dhp/resulttoorganizationfrominstrepo/noupdate/preparedInfo/datasourceOrganization")
|
.getResource(
|
||||||
|
"/eu/dnetlib/dhp/resulttoorganizationfrominstrepo/noupdate/preparedInfo/datasourceOrganization")
|
||||||
.getPath();
|
.getPath();
|
||||||
final String alreadyLinkedPath = getClass()
|
final String alreadyLinkedPath = getClass()
|
||||||
.getResource("/eu/dnetlib/dhp/resulttoorganizationfrominstrepo/noupdate/preparedInfo/alreadyLinked")
|
.getResource("/eu/dnetlib/dhp/resulttoorganizationfrominstrepo/noupdate/preparedInfo/alreadyLinked")
|
||||||
.getPath();
|
.getPath();
|
||||||
SparkResultToOrganizationFromIstRepoJob.main(
|
SparkResultToOrganizationFromIstRepoJob
|
||||||
|
.main(
|
||||||
new String[] {
|
new String[] {
|
||||||
"-isTest", Boolean.TRUE.toString(),
|
"-isTest", Boolean.TRUE.toString(),
|
||||||
"-isSparkSessionManaged", Boolean.FALSE.toString(),
|
"-isSparkSessionManaged", Boolean.FALSE.toString(),
|
||||||
|
@ -112,12 +114,14 @@ public class ResultToOrganizationJobTest {
|
||||||
.getResource("/eu/dnetlib/dhp/resulttoorganizationfrominstrepo/sample/noupdate_updatenomix")
|
.getResource("/eu/dnetlib/dhp/resulttoorganizationfrominstrepo/sample/noupdate_updatenomix")
|
||||||
.getPath();
|
.getPath();
|
||||||
final String datasourceOrganizationPath = getClass()
|
final String datasourceOrganizationPath = getClass()
|
||||||
.getResource("/eu/dnetlib/dhp/resulttoorganizationfrominstrepo/updatenomix/preparedInfo/datasourceOrganization")
|
.getResource(
|
||||||
|
"/eu/dnetlib/dhp/resulttoorganizationfrominstrepo/updatenomix/preparedInfo/datasourceOrganization")
|
||||||
.getPath();
|
.getPath();
|
||||||
final String alreadyLinkedPath = getClass()
|
final String alreadyLinkedPath = getClass()
|
||||||
.getResource("/eu/dnetlib/dhp/resulttoorganizationfrominstrepo/updatenomix/preparedInfo/alreadyLinked")
|
.getResource("/eu/dnetlib/dhp/resulttoorganizationfrominstrepo/updatenomix/preparedInfo/alreadyLinked")
|
||||||
.getPath();
|
.getPath();
|
||||||
SparkResultToOrganizationFromIstRepoJob.main(
|
SparkResultToOrganizationFromIstRepoJob
|
||||||
|
.main(
|
||||||
new String[] {
|
new String[] {
|
||||||
"-isTest", Boolean.TRUE.toString(),
|
"-isTest", Boolean.TRUE.toString(),
|
||||||
"-isSparkSessionManaged", Boolean.FALSE.toString(),
|
"-isSparkSessionManaged", Boolean.FALSE.toString(),
|
||||||
|
@ -175,12 +179,14 @@ public class ResultToOrganizationJobTest {
|
||||||
.getResource("/eu/dnetlib/dhp/resulttoorganizationfrominstrepo/sample/updatemix")
|
.getResource("/eu/dnetlib/dhp/resulttoorganizationfrominstrepo/sample/updatemix")
|
||||||
.getPath();
|
.getPath();
|
||||||
final String datasourceOrganizationPath = getClass()
|
final String datasourceOrganizationPath = getClass()
|
||||||
.getResource("/eu/dnetlib/dhp/resulttoorganizationfrominstrepo/updatemix/preparedInfo/datasourceOrganization")
|
.getResource(
|
||||||
|
"/eu/dnetlib/dhp/resulttoorganizationfrominstrepo/updatemix/preparedInfo/datasourceOrganization")
|
||||||
.getPath();
|
.getPath();
|
||||||
final String alreadyLinkedPath = getClass()
|
final String alreadyLinkedPath = getClass()
|
||||||
.getResource("/eu/dnetlib/dhp/resulttoorganizationfrominstrepo/updatemix/preparedInfo/alreadyLinked")
|
.getResource("/eu/dnetlib/dhp/resulttoorganizationfrominstrepo/updatemix/preparedInfo/alreadyLinked")
|
||||||
.getPath();
|
.getPath();
|
||||||
SparkResultToOrganizationFromIstRepoJob.main(
|
SparkResultToOrganizationFromIstRepoJob
|
||||||
|
.main(
|
||||||
new String[] {
|
new String[] {
|
||||||
"-isTest", Boolean.TRUE.toString(),
|
"-isTest", Boolean.TRUE.toString(),
|
||||||
"-isSparkSessionManaged", Boolean.FALSE.toString(),
|
"-isSparkSessionManaged", Boolean.FALSE.toString(),
|
||||||
|
|
Loading…
Reference in New Issue