forked from D-Net/dnet-hadoop
[Bypass Action Set] creation of unresolved entities
This commit is contained in:
parent
c371b23077
commit
935062edec
|
@ -0,0 +1,55 @@
|
||||||
|
|
||||||
|
package eu.dnetlib.dhp.actionmanager.createunresolvedentities;
|
||||||
|
|
||||||
|
import java.util.Optional;
|
||||||
|
|
||||||
|
import org.apache.spark.api.java.function.MapFunction;
|
||||||
|
import org.apache.spark.sql.Dataset;
|
||||||
|
import org.apache.spark.sql.Encoders;
|
||||||
|
import org.apache.spark.sql.SparkSession;
|
||||||
|
|
||||||
|
import com.fasterxml.jackson.databind.ObjectMapper;
|
||||||
|
|
||||||
|
import eu.dnetlib.dhp.application.ArgumentApplicationParser;
|
||||||
|
import eu.dnetlib.dhp.schema.oaf.DataInfo;
|
||||||
|
import eu.dnetlib.dhp.schema.oaf.Qualifier;
|
||||||
|
|
||||||
|
public class Constants {
|
||||||
|
|
||||||
|
public static final String UNREOSLVED_PREFIX = "unresolved:";
|
||||||
|
public static final String UNREOSLVED_POSTFIX_DOI = ":doi";
|
||||||
|
public static final String DOI = "doi";
|
||||||
|
|
||||||
|
public static final String UPDATE_DATA_INFO_TYPE = "update";
|
||||||
|
public static final String UPDATE_SUBJECT_FOS_CLASS_ID = "subject:fos";
|
||||||
|
public static final String UPDATE_CLASS_NAME = "Inferred by OpenAIRE";
|
||||||
|
public static final String UPDATE_MEASURE_BIP_CLASS_ID = "measure:bip";
|
||||||
|
|
||||||
|
public static final String FOS_CLASS_ID = "FOS";
|
||||||
|
public static final String FOS_CLASS_NAME = "Fields of Science and Technology classification";
|
||||||
|
|
||||||
|
public final static String NULL = "NULL";
|
||||||
|
|
||||||
|
public static final ObjectMapper OBJECT_MAPPER = new ObjectMapper();
|
||||||
|
|
||||||
|
public static Boolean isSparkSessionManaged(ArgumentApplicationParser parser) {
|
||||||
|
return Optional
|
||||||
|
.ofNullable(parser.get("isSparkSessionManaged"))
|
||||||
|
.map(Boolean::valueOf)
|
||||||
|
.orElse(Boolean.TRUE);
|
||||||
|
}
|
||||||
|
|
||||||
|
public static <R> Dataset<R> readPath(
|
||||||
|
SparkSession spark, String inputPath, Class<R> clazz) {
|
||||||
|
return spark
|
||||||
|
.read()
|
||||||
|
.textFile(inputPath)
|
||||||
|
.map((MapFunction<String, R>) value -> OBJECT_MAPPER.readValue(value, clazz), Encoders.bean(clazz));
|
||||||
|
}
|
||||||
|
|
||||||
|
public static String getUnresolvedDoiIndentifier(String doi) {
|
||||||
|
StringBuilder sb = new StringBuilder();
|
||||||
|
sb.append(UNREOSLVED_PREFIX).append(doi).append(UNREOSLVED_POSTFIX_DOI);
|
||||||
|
return sb.toString();
|
||||||
|
}
|
||||||
|
}
|
|
@ -1,7 +1,9 @@
|
||||||
|
|
||||||
package eu.dnetlib.dhp.bypassactionset.fos;
|
package eu.dnetlib.dhp.actionmanager.createunresolvedentities;
|
||||||
|
|
||||||
import java.io.*;
|
import java.io.IOException;
|
||||||
|
import java.io.InputStreamReader;
|
||||||
|
import java.io.Serializable;
|
||||||
import java.util.Objects;
|
import java.util.Objects;
|
||||||
import java.util.Optional;
|
import java.util.Optional;
|
||||||
|
|
||||||
|
@ -29,17 +31,17 @@ public class GetFOSData implements Serializable {
|
||||||
.requireNonNull(
|
.requireNonNull(
|
||||||
GetFOSData.class
|
GetFOSData.class
|
||||||
.getResourceAsStream(
|
.getResourceAsStream(
|
||||||
"/eu/dnetlib/dhp/bypassactionset/get_fos_parameters.json"))));
|
"/eu/dnetlib/dhp/actionmanager/createunresolvedentities/get_fos_parameters.json"))));
|
||||||
|
|
||||||
parser.parseArgument(args);
|
parser.parseArgument(args);
|
||||||
|
|
||||||
// the path where the original fos csv file is stored
|
// the path where the original fos csv file is stored
|
||||||
final String inputPath = parser.get("inputPath");
|
final String sourcePath = parser.get("sourcePath");
|
||||||
log.info("inputPath {}", inputPath);
|
log.info("sourcePath {}", sourcePath);
|
||||||
|
|
||||||
// the path where to put the file as json
|
// the path where to put the file as json
|
||||||
final String outputFile = parser.get("outputFile");
|
final String outputPath = parser.get("outputPath");
|
||||||
log.info("outputFile {}", outputFile);
|
log.info("outputPath {}", outputPath);
|
||||||
|
|
||||||
final String hdfsNameNode = parser.get("hdfsNameNode");
|
final String hdfsNameNode = parser.get("hdfsNameNode");
|
||||||
log.info("hdfsNameNode {}", hdfsNameNode);
|
log.info("hdfsNameNode {}", hdfsNameNode);
|
||||||
|
@ -58,7 +60,7 @@ public class GetFOSData implements Serializable {
|
||||||
|
|
||||||
FileSystem fileSystem = FileSystem.get(conf);
|
FileSystem fileSystem = FileSystem.get(conf);
|
||||||
|
|
||||||
new GetFOSData().doRewrite(inputPath, outputFile, classForName, delimiter, fileSystem);
|
new GetFOSData().doRewrite(sourcePath, outputPath, classForName, delimiter, fileSystem);
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
|
@ -0,0 +1,144 @@
|
||||||
|
|
||||||
|
package eu.dnetlib.dhp.actionmanager.createunresolvedentities;
|
||||||
|
|
||||||
|
import static eu.dnetlib.dhp.actionmanager.createunresolvedentities.Constants.*;
|
||||||
|
import static eu.dnetlib.dhp.actionmanager.createunresolvedentities.Constants.UPDATE_CLASS_NAME;
|
||||||
|
import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession;
|
||||||
|
|
||||||
|
import java.io.Serializable;
|
||||||
|
import java.util.List;
|
||||||
|
import java.util.Optional;
|
||||||
|
import java.util.stream.Collectors;
|
||||||
|
|
||||||
|
import org.apache.commons.io.IOUtils;
|
||||||
|
import org.apache.hadoop.hdfs.client.HdfsUtils;
|
||||||
|
import org.apache.spark.SparkConf;
|
||||||
|
import org.apache.spark.api.java.JavaRDD;
|
||||||
|
import org.apache.spark.api.java.JavaSparkContext;
|
||||||
|
import org.apache.spark.api.java.function.MapFunction;
|
||||||
|
import org.apache.spark.sql.Encoders;
|
||||||
|
import org.apache.spark.sql.SaveMode;
|
||||||
|
import org.apache.spark.sql.SparkSession;
|
||||||
|
import org.slf4j.Logger;
|
||||||
|
import org.slf4j.LoggerFactory;
|
||||||
|
|
||||||
|
import com.fasterxml.jackson.databind.ObjectMapper;
|
||||||
|
|
||||||
|
import eu.dnetlib.dhp.actionmanager.createunresolvedentities.model.BipDeserialize;
|
||||||
|
import eu.dnetlib.dhp.actionmanager.createunresolvedentities.model.BipScore;
|
||||||
|
import eu.dnetlib.dhp.application.ArgumentApplicationParser;
|
||||||
|
import eu.dnetlib.dhp.common.HdfsSupport;
|
||||||
|
import eu.dnetlib.dhp.schema.common.ModelConstants;
|
||||||
|
import eu.dnetlib.dhp.schema.oaf.KeyValue;
|
||||||
|
import eu.dnetlib.dhp.schema.oaf.Measure;
|
||||||
|
import eu.dnetlib.dhp.schema.oaf.Result;
|
||||||
|
import eu.dnetlib.dhp.schema.oaf.utils.OafMapperUtils;
|
||||||
|
|
||||||
|
public class PrepareBipFinder implements Serializable {
|
||||||
|
|
||||||
|
private static final Logger log = LoggerFactory.getLogger(PrepareBipFinder.class);
|
||||||
|
private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper();
|
||||||
|
|
||||||
|
public static <I extends Result> void main(String[] args) throws Exception {
|
||||||
|
|
||||||
|
String jsonConfiguration = IOUtils
|
||||||
|
.toString(
|
||||||
|
PrepareBipFinder.class
|
||||||
|
.getResourceAsStream(
|
||||||
|
"/eu/dnetlib/dhp/actionmanager/createunresolvedentities/bip_prepare_parameters.json"));
|
||||||
|
|
||||||
|
final ArgumentApplicationParser parser = new ArgumentApplicationParser(jsonConfiguration);
|
||||||
|
|
||||||
|
parser.parseArgument(args);
|
||||||
|
|
||||||
|
Boolean isSparkSessionManaged = Optional
|
||||||
|
.ofNullable(parser.get("isSparkSessionManaged"))
|
||||||
|
.map(Boolean::valueOf)
|
||||||
|
.orElse(Boolean.TRUE);
|
||||||
|
|
||||||
|
log.info("isSparkSessionManaged: {}", isSparkSessionManaged);
|
||||||
|
|
||||||
|
final String sourcePath = parser.get("sourcePath");
|
||||||
|
log.info("sourcePath {}: ", sourcePath);
|
||||||
|
|
||||||
|
final String outputPath = parser.get("outputPath");
|
||||||
|
log.info("outputPath {}: ", outputPath);
|
||||||
|
|
||||||
|
SparkConf conf = new SparkConf();
|
||||||
|
|
||||||
|
runWithSparkSession(
|
||||||
|
conf,
|
||||||
|
isSparkSessionManaged,
|
||||||
|
spark -> {
|
||||||
|
HdfsSupport.remove(outputPath, spark.sparkContext().hadoopConfiguration());
|
||||||
|
prepareResults(spark, sourcePath, outputPath);
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
private static <I extends Result> void prepareResults(SparkSession spark, String inputPath, String outputPath) {
|
||||||
|
|
||||||
|
final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext());
|
||||||
|
|
||||||
|
JavaRDD<BipDeserialize> bipDeserializeJavaRDD = sc
|
||||||
|
.textFile(inputPath)
|
||||||
|
.map(item -> OBJECT_MAPPER.readValue(item, BipDeserialize.class));
|
||||||
|
|
||||||
|
spark
|
||||||
|
.createDataset(bipDeserializeJavaRDD.flatMap(entry -> entry.keySet().stream().map(key -> {
|
||||||
|
BipScore bs = new BipScore();
|
||||||
|
bs.setId(key);
|
||||||
|
bs.setScoreList(entry.get(key));
|
||||||
|
return bs;
|
||||||
|
}).collect(Collectors.toList()).iterator()).rdd(), Encoders.bean(BipScore.class))
|
||||||
|
.map((MapFunction<BipScore, Result>) v -> {
|
||||||
|
Result r = new Result();
|
||||||
|
|
||||||
|
r.setId(getUnresolvedDoiIndentifier(v.getId()));
|
||||||
|
r.setMeasures(getMeasure(v));
|
||||||
|
return r;
|
||||||
|
}, Encoders.bean(Result.class))
|
||||||
|
.write()
|
||||||
|
.mode(SaveMode.Overwrite)
|
||||||
|
.option("compression", "gzip")
|
||||||
|
.json(outputPath + "/bip");
|
||||||
|
}
|
||||||
|
|
||||||
|
private static List<Measure> getMeasure(BipScore value) {
|
||||||
|
return value
|
||||||
|
.getScoreList()
|
||||||
|
.stream()
|
||||||
|
.map(score -> {
|
||||||
|
Measure m = new Measure();
|
||||||
|
m.setId(score.getId());
|
||||||
|
m
|
||||||
|
.setUnit(
|
||||||
|
score
|
||||||
|
.getUnit()
|
||||||
|
.stream()
|
||||||
|
.map(unit -> {
|
||||||
|
KeyValue kv = new KeyValue();
|
||||||
|
kv.setValue(unit.getValue());
|
||||||
|
kv.setKey(unit.getKey());
|
||||||
|
kv
|
||||||
|
.setDataInfo(
|
||||||
|
OafMapperUtils
|
||||||
|
.dataInfo(
|
||||||
|
false,
|
||||||
|
UPDATE_DATA_INFO_TYPE,
|
||||||
|
true,
|
||||||
|
false,
|
||||||
|
OafMapperUtils
|
||||||
|
.qualifier(
|
||||||
|
UPDATE_MEASURE_BIP_CLASS_ID,
|
||||||
|
UPDATE_CLASS_NAME,
|
||||||
|
ModelConstants.DNET_PROVENANCE_ACTIONS,
|
||||||
|
ModelConstants.DNET_PROVENANCE_ACTIONS),
|
||||||
|
""));
|
||||||
|
return kv;
|
||||||
|
})
|
||||||
|
.collect(Collectors.toList()));
|
||||||
|
return m;
|
||||||
|
})
|
||||||
|
.collect(Collectors.toList());
|
||||||
|
}
|
||||||
|
}
|
|
@ -1,30 +1,30 @@
|
||||||
|
|
||||||
package eu.dnetlib.dhp.bypassactionset.fos;
|
package eu.dnetlib.dhp.actionmanager.createunresolvedentities;
|
||||||
|
|
||||||
import static eu.dnetlib.dhp.PropagationConstant.*;
|
import static eu.dnetlib.dhp.actionmanager.createunresolvedentities.Constants.*;
|
||||||
import static eu.dnetlib.dhp.bypassactionset.Utils.getIdentifier;
|
|
||||||
import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession;
|
import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession;
|
||||||
|
|
||||||
import java.io.Serializable;
|
import java.io.Serializable;
|
||||||
import java.util.ArrayList;
|
import java.util.*;
|
||||||
import java.util.Arrays;
|
import java.util.stream.Collectors;
|
||||||
import java.util.List;
|
|
||||||
|
|
||||||
import org.apache.commons.io.IOUtils;
|
import org.apache.commons.io.IOUtils;
|
||||||
import org.apache.spark.SparkConf;
|
import org.apache.spark.SparkConf;
|
||||||
import org.apache.spark.api.java.function.FlatMapFunction;
|
import org.apache.spark.api.java.function.FlatMapFunction;
|
||||||
|
import org.apache.spark.api.java.function.MapFunction;
|
||||||
import org.apache.spark.sql.Dataset;
|
import org.apache.spark.sql.Dataset;
|
||||||
import org.apache.spark.sql.Encoders;
|
import org.apache.spark.sql.Encoders;
|
||||||
import org.apache.spark.sql.SaveMode;
|
import org.apache.spark.sql.SaveMode;
|
||||||
import org.apache.spark.sql.SparkSession;
|
import org.apache.spark.sql.SparkSession;
|
||||||
import org.jetbrains.annotations.NotNull;
|
|
||||||
import org.slf4j.Logger;
|
import org.slf4j.Logger;
|
||||||
import org.slf4j.LoggerFactory;
|
import org.slf4j.LoggerFactory;
|
||||||
|
|
||||||
|
import eu.dnetlib.dhp.actionmanager.createunresolvedentities.model.FOSDataModel;
|
||||||
import eu.dnetlib.dhp.application.ArgumentApplicationParser;
|
import eu.dnetlib.dhp.application.ArgumentApplicationParser;
|
||||||
import eu.dnetlib.dhp.bypassactionset.model.FOSDataModel;
|
import eu.dnetlib.dhp.schema.common.ModelConstants;
|
||||||
import eu.dnetlib.dhp.schema.oaf.utils.CleaningFunctions;
|
import eu.dnetlib.dhp.schema.oaf.Result;
|
||||||
import eu.dnetlib.dhp.schema.oaf.utils.IdentifierFactory;
|
import eu.dnetlib.dhp.schema.oaf.StructuredProperty;
|
||||||
|
import eu.dnetlib.dhp.schema.oaf.utils.OafMapperUtils;
|
||||||
|
|
||||||
public class PrepareFOSSparkJob implements Serializable {
|
public class PrepareFOSSparkJob implements Serializable {
|
||||||
private static final Logger log = LoggerFactory.getLogger(PrepareFOSSparkJob.class);
|
private static final Logger log = LoggerFactory.getLogger(PrepareFOSSparkJob.class);
|
||||||
|
@ -35,7 +35,7 @@ public class PrepareFOSSparkJob implements Serializable {
|
||||||
.toString(
|
.toString(
|
||||||
PrepareFOSSparkJob.class
|
PrepareFOSSparkJob.class
|
||||||
.getResourceAsStream(
|
.getResourceAsStream(
|
||||||
"/eu/dnetlib/dhp/bypassactionset/distribute_fos_parameters.json"));
|
"/eu/dnetlib/dhp/actionmanager/createunresolvedentities/distribute_fos_parameters.json"));
|
||||||
|
|
||||||
final ArgumentApplicationParser parser = new ArgumentApplicationParser(jsonConfiguration);
|
final ArgumentApplicationParser parser = new ArgumentApplicationParser(jsonConfiguration);
|
||||||
|
|
||||||
|
@ -55,7 +55,6 @@ public class PrepareFOSSparkJob implements Serializable {
|
||||||
conf,
|
conf,
|
||||||
isSparkSessionManaged,
|
isSparkSessionManaged,
|
||||||
spark -> {
|
spark -> {
|
||||||
removeOutputDir(spark, outputPath);
|
|
||||||
distributeFOSdois(
|
distributeFOSdois(
|
||||||
spark,
|
spark,
|
||||||
sourcePath,
|
sourcePath,
|
||||||
|
@ -74,13 +73,60 @@ public class PrepareFOSSparkJob implements Serializable {
|
||||||
final String level3 = v.getLevel3();
|
final String level3 = v.getLevel3();
|
||||||
Arrays
|
Arrays
|
||||||
.stream(v.getDoi().split("\u0002"))
|
.stream(v.getDoi().split("\u0002"))
|
||||||
.forEach(d -> fosList.add(FOSDataModel.newInstance(getIdentifier(d), level1, level2, level3)));
|
.forEach(d -> fosList.add(FOSDataModel.newInstance(d, level1, level2, level3)));
|
||||||
return fosList.iterator();
|
return fosList.iterator();
|
||||||
}, Encoders.bean(FOSDataModel.class))
|
}, Encoders.bean(FOSDataModel.class))
|
||||||
|
.map((MapFunction<FOSDataModel, Result>) value -> {
|
||||||
|
Result r = new Result();
|
||||||
|
r.setId(getUnresolvedDoiIndentifier(value.getDoi()));
|
||||||
|
r.setSubject(getSubjects(value));
|
||||||
|
return r;
|
||||||
|
}, Encoders.bean(Result.class))
|
||||||
.write()
|
.write()
|
||||||
.mode(SaveMode.Overwrite)
|
.mode(SaveMode.Overwrite)
|
||||||
.option("compression", "gzip")
|
.option("compression", "gzip")
|
||||||
.json(outputPath);
|
.json(outputPath + "/fos");
|
||||||
|
}
|
||||||
|
|
||||||
|
private static List<StructuredProperty> getSubjects(FOSDataModel fos) {
|
||||||
|
return Arrays
|
||||||
|
.asList(getSubject(fos.getLevel1()), getSubject(fos.getLevel2()), getSubject(fos.getLevel3()))
|
||||||
|
.stream()
|
||||||
|
.filter(Objects::nonNull)
|
||||||
|
.collect(Collectors.toList());
|
||||||
|
}
|
||||||
|
|
||||||
|
private static StructuredProperty getSubject(String sbj) {
|
||||||
|
if (sbj.equals(NULL))
|
||||||
|
return null;
|
||||||
|
StructuredProperty sp = new StructuredProperty();
|
||||||
|
sp.setValue(sbj);
|
||||||
|
sp
|
||||||
|
.setQualifier(
|
||||||
|
OafMapperUtils
|
||||||
|
.qualifier(
|
||||||
|
FOS_CLASS_ID,
|
||||||
|
FOS_CLASS_NAME,
|
||||||
|
ModelConstants.DNET_SUBJECT_TYPOLOGIES,
|
||||||
|
ModelConstants.DNET_SUBJECT_TYPOLOGIES));
|
||||||
|
sp
|
||||||
|
.setDataInfo(
|
||||||
|
OafMapperUtils
|
||||||
|
.dataInfo(
|
||||||
|
false,
|
||||||
|
UPDATE_DATA_INFO_TYPE,
|
||||||
|
true,
|
||||||
|
false,
|
||||||
|
OafMapperUtils
|
||||||
|
.qualifier(
|
||||||
|
UPDATE_SUBJECT_FOS_CLASS_ID,
|
||||||
|
UPDATE_CLASS_NAME,
|
||||||
|
ModelConstants.DNET_PROVENANCE_ACTIONS,
|
||||||
|
ModelConstants.DNET_PROVENANCE_ACTIONS),
|
||||||
|
""));
|
||||||
|
|
||||||
|
return sp;
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
|
@ -0,0 +1,79 @@
|
||||||
|
|
||||||
|
package eu.dnetlib.dhp.actionmanager.createunresolvedentities;
|
||||||
|
|
||||||
|
import static eu.dnetlib.dhp.actionmanager.createunresolvedentities.Constants.*;
|
||||||
|
import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession;
|
||||||
|
|
||||||
|
import java.io.Serializable;
|
||||||
|
|
||||||
|
import org.apache.commons.io.IOUtils;
|
||||||
|
import org.apache.spark.SparkConf;
|
||||||
|
import org.apache.spark.api.java.function.MapFunction;
|
||||||
|
import org.apache.spark.api.java.function.MapGroupsFunction;
|
||||||
|
import org.apache.spark.sql.Encoders;
|
||||||
|
import org.apache.spark.sql.SaveMode;
|
||||||
|
import org.apache.spark.sql.SparkSession;
|
||||||
|
import org.slf4j.Logger;
|
||||||
|
import org.slf4j.LoggerFactory;
|
||||||
|
|
||||||
|
import eu.dnetlib.dhp.application.ArgumentApplicationParser;
|
||||||
|
import eu.dnetlib.dhp.schema.oaf.Result;
|
||||||
|
|
||||||
|
public class SparkSaveUnresolved implements Serializable {
|
||||||
|
private static final Logger log = LoggerFactory.getLogger(PrepareFOSSparkJob.class);
|
||||||
|
|
||||||
|
public static void main(String[] args) throws Exception {
|
||||||
|
|
||||||
|
String jsonConfiguration = IOUtils
|
||||||
|
.toString(
|
||||||
|
PrepareFOSSparkJob.class
|
||||||
|
.getResourceAsStream(
|
||||||
|
"/eu/dnetlib/dhp/actionmanager/createunresolvedentities/produce_unresolved_parameters.json"));
|
||||||
|
|
||||||
|
final ArgumentApplicationParser parser = new ArgumentApplicationParser(jsonConfiguration);
|
||||||
|
|
||||||
|
parser.parseArgument(args);
|
||||||
|
|
||||||
|
Boolean isSparkSessionManaged = isSparkSessionManaged(parser);
|
||||||
|
log.info("isSparkSessionManaged: {}", isSparkSessionManaged);
|
||||||
|
|
||||||
|
String sourcePath = parser.get("sourcePath");
|
||||||
|
log.info("sourcePath: {}", sourcePath);
|
||||||
|
|
||||||
|
final String outputPath = parser.get("outputPath");
|
||||||
|
log.info("outputPath: {}", outputPath);
|
||||||
|
|
||||||
|
SparkConf conf = new SparkConf();
|
||||||
|
runWithSparkSession(
|
||||||
|
conf,
|
||||||
|
isSparkSessionManaged,
|
||||||
|
spark -> {
|
||||||
|
saveUnresolved(
|
||||||
|
spark,
|
||||||
|
sourcePath,
|
||||||
|
|
||||||
|
outputPath);
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
private static void saveUnresolved(SparkSession spark, String sourcePath, String outputPath) {
|
||||||
|
|
||||||
|
spark
|
||||||
|
.read()
|
||||||
|
.textFile(sourcePath + "/*")
|
||||||
|
.map(
|
||||||
|
(MapFunction<String, Result>) l -> OBJECT_MAPPER.readValue(l, Result.class),
|
||||||
|
Encoders.bean(Result.class))
|
||||||
|
.groupByKey((MapFunction<Result, String>) r -> r.getId(), Encoders.STRING())
|
||||||
|
.mapGroups((MapGroupsFunction<String, Result, Result>) (k, it) -> {
|
||||||
|
Result ret = it.next();
|
||||||
|
it.forEachRemaining(r -> ret.mergeFrom(r));
|
||||||
|
return ret;
|
||||||
|
}, Encoders.bean(Result.class))
|
||||||
|
.write()
|
||||||
|
.mode(SaveMode.Overwrite)
|
||||||
|
.option("compression", "gzip")
|
||||||
|
.json(outputPath);
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
|
@ -1,5 +1,5 @@
|
||||||
|
|
||||||
package eu.dnetlib.dhp.bypassactionset.model;
|
package eu.dnetlib.dhp.actionmanager.createunresolvedentities.model;
|
||||||
|
|
||||||
import java.io.Serializable;
|
import java.io.Serializable;
|
||||||
import java.util.ArrayList;
|
import java.util.ArrayList;
|
|
@ -1,5 +1,5 @@
|
||||||
|
|
||||||
package eu.dnetlib.dhp.bypassactionset.model;
|
package eu.dnetlib.dhp.actionmanager.createunresolvedentities.model;
|
||||||
|
|
||||||
import java.io.Serializable;
|
import java.io.Serializable;
|
||||||
import java.util.List;
|
import java.util.List;
|
|
@ -1,5 +1,5 @@
|
||||||
|
|
||||||
package eu.dnetlib.dhp.bypassactionset.model;
|
package eu.dnetlib.dhp.actionmanager.createunresolvedentities.model;
|
||||||
|
|
||||||
import java.io.Serializable;
|
import java.io.Serializable;
|
||||||
|
|
|
@ -1,5 +1,5 @@
|
||||||
|
|
||||||
package eu.dnetlib.dhp.bypassactionset.model;
|
package eu.dnetlib.dhp.actionmanager.createunresolvedentities.model;
|
||||||
|
|
||||||
import java.io.Serializable;
|
import java.io.Serializable;
|
||||||
|
|
|
@ -1,5 +1,5 @@
|
||||||
|
|
||||||
package eu.dnetlib.dhp.bypassactionset.model;
|
package eu.dnetlib.dhp.actionmanager.createunresolvedentities.model;
|
||||||
|
|
||||||
import java.io.Serializable;
|
import java.io.Serializable;
|
||||||
import java.util.List;
|
import java.util.List;
|
|
@ -6,8 +6,8 @@
|
||||||
"paramRequired": false
|
"paramRequired": false
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"paramName": "ip",
|
"paramName": "sp",
|
||||||
"paramLongName": "inputPath",
|
"paramLongName": "sourcePath",
|
||||||
"paramDescription": "the URL from where to get the programme file",
|
"paramDescription": "the URL from where to get the programme file",
|
||||||
"paramRequired": true
|
"paramRequired": true
|
||||||
},
|
},
|
|
@ -23,5 +23,11 @@
|
||||||
"paramLongName": "hdfsNameNode",
|
"paramLongName": "hdfsNameNode",
|
||||||
"paramDescription": "the path used to store the HostedByMap",
|
"paramDescription": "the path used to store the HostedByMap",
|
||||||
"paramRequired": true
|
"paramRequired": true
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"paramName": "cfn",
|
||||||
|
"paramLongName": "classForName",
|
||||||
|
"paramDescription": "the path used to store the HostedByMap",
|
||||||
|
"paramRequired": true
|
||||||
}
|
}
|
||||||
]
|
]
|
|
@ -0,0 +1,30 @@
|
||||||
|
<configuration>
|
||||||
|
<property>
|
||||||
|
<name>jobTracker</name>
|
||||||
|
<value>yarnRM</value>
|
||||||
|
</property>
|
||||||
|
<property>
|
||||||
|
<name>nameNode</name>
|
||||||
|
<value>hdfs://nameservice1</value>
|
||||||
|
</property>
|
||||||
|
<property>
|
||||||
|
<name>oozie.use.system.libpath</name>
|
||||||
|
<value>true</value>
|
||||||
|
</property>
|
||||||
|
<property>
|
||||||
|
<name>hiveMetastoreUris</name>
|
||||||
|
<value>thrift://iis-cdh5-test-m3.ocean.icm.edu.pl:9083</value>
|
||||||
|
</property>
|
||||||
|
<property>
|
||||||
|
<name>hiveJdbcUrl</name>
|
||||||
|
<value>jdbc:hive2://iis-cdh5-test-m3.ocean.icm.edu.pl:10000</value>
|
||||||
|
</property>
|
||||||
|
<property>
|
||||||
|
<name>hiveDbName</name>
|
||||||
|
<value>openaire</value>
|
||||||
|
</property>
|
||||||
|
<property>
|
||||||
|
<name>oozie.launcher.mapreduce.user.classpath.first</name>
|
||||||
|
<value>true</value>
|
||||||
|
</property>
|
||||||
|
</configuration>
|
|
@ -0,0 +1,155 @@
|
||||||
|
<workflow-app name="UnresolvedEntities" xmlns="uri:oozie:workflow:0.5">
|
||||||
|
<parameters>
|
||||||
|
<property>
|
||||||
|
<name>fosPath</name>
|
||||||
|
<description>the input path of the resources to be extended</description>
|
||||||
|
</property>
|
||||||
|
|
||||||
|
<property>
|
||||||
|
<name>bipScorePath</name>
|
||||||
|
<description>the path where to find the bipFinder scores</description>
|
||||||
|
</property>
|
||||||
|
<property>
|
||||||
|
<name>outputPath</name>
|
||||||
|
<description>the path where to store the actionset</description>
|
||||||
|
</property>
|
||||||
|
<property>
|
||||||
|
<name>sparkDriverMemory</name>
|
||||||
|
<description>memory for driver process</description>
|
||||||
|
</property>
|
||||||
|
<property>
|
||||||
|
<name>sparkExecutorMemory</name>
|
||||||
|
<description>memory for individual executor</description>
|
||||||
|
</property>
|
||||||
|
<property>
|
||||||
|
<name>sparkExecutorCores</name>
|
||||||
|
<description>number of cores used by single executor</description>
|
||||||
|
</property>
|
||||||
|
<property>
|
||||||
|
<name>oozieActionShareLibForSpark2</name>
|
||||||
|
<description>oozie action sharelib for spark 2.*</description>
|
||||||
|
</property>
|
||||||
|
<property>
|
||||||
|
<name>spark2ExtraListeners</name>
|
||||||
|
<value>com.cloudera.spark.lineage.NavigatorAppListener</value>
|
||||||
|
<description>spark 2.* extra listeners classname</description>
|
||||||
|
</property>
|
||||||
|
<property>
|
||||||
|
<name>spark2SqlQueryExecutionListeners</name>
|
||||||
|
<value>com.cloudera.spark.lineage.NavigatorQueryListener</value>
|
||||||
|
<description>spark 2.* sql query execution listeners classname</description>
|
||||||
|
</property>
|
||||||
|
<property>
|
||||||
|
<name>spark2YarnHistoryServerAddress</name>
|
||||||
|
<description>spark 2.* yarn history server address</description>
|
||||||
|
</property>
|
||||||
|
<property>
|
||||||
|
<name>spark2EventLogDir</name>
|
||||||
|
<description>spark 2.* event log dir location</description>
|
||||||
|
</property>
|
||||||
|
</parameters>
|
||||||
|
|
||||||
|
<start to="prepareInfo"/>
|
||||||
|
|
||||||
|
<kill name="Kill">
|
||||||
|
<message>Action failed, error message[${wf:errorMessage(wf:lastErrorNode())}]</message>
|
||||||
|
</kill>
|
||||||
|
|
||||||
|
|
||||||
|
<fork name="prepareInfo">
|
||||||
|
<path start="prepareBip"/>
|
||||||
|
<path start="getFOS"/>
|
||||||
|
</fork>
|
||||||
|
|
||||||
|
<action name="prepareBip">
|
||||||
|
<spark xmlns="uri:oozie:spark-action:0.2">
|
||||||
|
<master>yarn</master>
|
||||||
|
<mode>cluster</mode>
|
||||||
|
<name>Produces the unresolved from bip finder!</name>
|
||||||
|
<class>eu.dnetlib.dhp.actionmanager.createunresolvedentities.PrepareBipFinder</class>
|
||||||
|
<jar>dhp-aggregation-${projectVersion}.jar</jar>
|
||||||
|
<spark-opts>
|
||||||
|
--executor-memory=${sparkExecutorMemory}
|
||||||
|
--executor-cores=${sparkExecutorCores}
|
||||||
|
--driver-memory=${sparkDriverMemory}
|
||||||
|
--conf spark.extraListeners=${spark2ExtraListeners}
|
||||||
|
--conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
|
||||||
|
--conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
|
||||||
|
--conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
|
||||||
|
--conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir}
|
||||||
|
</spark-opts>
|
||||||
|
<arg>--sourcePath</arg><arg>${bipScorePath}</arg>
|
||||||
|
<arg>--outputPath</arg><arg>${workingDir}/prepared/bip</arg>
|
||||||
|
</spark>
|
||||||
|
<ok to="join"/>
|
||||||
|
<error to="Kill"/>
|
||||||
|
</action>
|
||||||
|
|
||||||
|
<action name="getFOS">
|
||||||
|
<java>
|
||||||
|
<main-class>eu.dnetlib.dhp.actionmanager.createunresolvedentities.GetFOSData</main-class>
|
||||||
|
<arg>--hdfsNameNode</arg><arg>${nameNode}</arg>
|
||||||
|
<arg>--sourcePath</arg><arg>${fosPath}</arg>
|
||||||
|
<arg>--outputPath</arg><arg>${workingDir}/input/fos</arg>
|
||||||
|
<arg>--classForName</arg><arg>eu.dnetlib.dhp.actionmanager.createunresolvedentities.model.FOSDataModel</arg>
|
||||||
|
</java>
|
||||||
|
<ok to="prepareFos"/>
|
||||||
|
<error to="Kill"/>
|
||||||
|
</action>
|
||||||
|
|
||||||
|
<action name="prepareFos">
|
||||||
|
<spark xmlns="uri:oozie:spark-action:0.2">
|
||||||
|
<master>yarn</master>
|
||||||
|
<mode>cluster</mode>
|
||||||
|
<name>Produces the unresolved from FOS!</name>
|
||||||
|
<class>eu.dnetlib.dhp.actionmanager.bipfinder.SparkAtomicActionScoreJob</class>
|
||||||
|
<jar>dhp-aggregation-${projectVersion}.jar</jar>
|
||||||
|
<spark-opts>
|
||||||
|
--executor-memory=${sparkExecutorMemory}
|
||||||
|
--executor-cores=${sparkExecutorCores}
|
||||||
|
--driver-memory=${sparkDriverMemory}
|
||||||
|
--conf spark.extraListeners=${spark2ExtraListeners}
|
||||||
|
--conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
|
||||||
|
--conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
|
||||||
|
--conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
|
||||||
|
--conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir}
|
||||||
|
</spark-opts>
|
||||||
|
<arg>--sourcePath</arg><arg>${workingDir}/input/fos</arg>
|
||||||
|
|
||||||
|
<arg>--outputPath</arg><arg>${workingDir}/prepared/fos</arg>
|
||||||
|
|
||||||
|
</spark>
|
||||||
|
<ok to="join"/>
|
||||||
|
<error to="Kill"/>
|
||||||
|
</action>
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
<join name="join" to="produceUnresolved"/>
|
||||||
|
|
||||||
|
<action name="produceUnresolved">
|
||||||
|
<spark xmlns="uri:oozie:spark-action:0.2">
|
||||||
|
<master>yarn</master>
|
||||||
|
<mode>cluster</mode>
|
||||||
|
<name>Saves the result produced for bip and fos by grouping results with the same id</name>
|
||||||
|
<class>eu.dnetlib.dhp.actionmanager.bipfinder.CollectAndSave</class>
|
||||||
|
<jar>dhp-aggregation-${projectVersion}.jar</jar>
|
||||||
|
<spark-opts>
|
||||||
|
--executor-memory=${sparkExecutorMemory}
|
||||||
|
--executor-cores=${sparkExecutorCores}
|
||||||
|
--driver-memory=${sparkDriverMemory}
|
||||||
|
--conf spark.extraListeners=${spark2ExtraListeners}
|
||||||
|
--conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
|
||||||
|
--conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
|
||||||
|
--conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
|
||||||
|
--conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir}
|
||||||
|
</spark-opts>
|
||||||
|
<arg>--inputPath</arg><arg>${workingDir}/prepared</arg>
|
||||||
|
<arg>--outputPath</arg><arg>${outputPath}</arg>
|
||||||
|
</spark>
|
||||||
|
<ok to="End"/>
|
||||||
|
<error to="Kill"/>
|
||||||
|
</action>
|
||||||
|
|
||||||
|
<end name="End"/>
|
||||||
|
</workflow-app>
|
|
@ -6,8 +6,8 @@
|
||||||
"paramRequired": false
|
"paramRequired": false
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"paramName": "ip",
|
"paramName": "sp",
|
||||||
"paramLongName": "inputPath",
|
"paramLongName": "sourcePath",
|
||||||
"paramDescription": "the URL from where to get the programme file",
|
"paramDescription": "the URL from where to get the programme file",
|
||||||
"paramRequired": true
|
"paramRequired": true
|
||||||
},
|
},
|
||||||
|
@ -16,17 +16,5 @@
|
||||||
"paramLongName": "outputPath",
|
"paramLongName": "outputPath",
|
||||||
"paramDescription": "the path of the new ActionSet",
|
"paramDescription": "the path of the new ActionSet",
|
||||||
"paramRequired": true
|
"paramRequired": true
|
||||||
},
|
|
||||||
{
|
|
||||||
"paramName": "rtn",
|
|
||||||
"paramLongName": "resultTableName",
|
|
||||||
"paramDescription": "the path of the new ActionSet",
|
|
||||||
"paramRequired": true
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"paramName": "fp",
|
|
||||||
"paramLongName": "fosPath",
|
|
||||||
"paramDescription": "the path of the new ActionSet",
|
|
||||||
"paramRequired": true
|
|
||||||
}
|
}
|
||||||
]
|
]
|
|
@ -0,0 +1,250 @@
|
||||||
|
|
||||||
|
package eu.dnetlib.dhp.actionmanager.createunresolvedentities;
|
||||||
|
|
||||||
|
import static org.junit.jupiter.api.Assertions.*;
|
||||||
|
|
||||||
|
import java.io.BufferedReader;
|
||||||
|
import java.io.IOException;
|
||||||
|
import java.io.InputStreamReader;
|
||||||
|
import java.nio.file.Files;
|
||||||
|
import java.nio.file.Path;
|
||||||
|
import java.util.stream.Collectors;
|
||||||
|
|
||||||
|
import org.apache.commons.io.FileUtils;
|
||||||
|
import org.apache.hadoop.conf.Configuration;
|
||||||
|
import org.apache.hadoop.fs.FileSystem;
|
||||||
|
import org.apache.hadoop.fs.LocalFileSystem;
|
||||||
|
import org.apache.spark.SparkConf;
|
||||||
|
import org.apache.spark.api.java.JavaRDD;
|
||||||
|
import org.apache.spark.api.java.JavaSparkContext;
|
||||||
|
import org.apache.spark.sql.SparkSession;
|
||||||
|
import org.junit.jupiter.api.AfterAll;
|
||||||
|
import org.junit.jupiter.api.Assertions;
|
||||||
|
import org.junit.jupiter.api.BeforeAll;
|
||||||
|
import org.junit.jupiter.api.Test;
|
||||||
|
import org.slf4j.Logger;
|
||||||
|
import org.slf4j.LoggerFactory;
|
||||||
|
|
||||||
|
import com.fasterxml.jackson.databind.ObjectMapper;
|
||||||
|
|
||||||
|
import eu.dnetlib.dhp.actionmanager.createunresolvedentities.model.FOSDataModel;
|
||||||
|
import eu.dnetlib.dhp.common.collection.CollectorException;
|
||||||
|
import eu.dnetlib.dhp.schema.oaf.Result;
|
||||||
|
|
||||||
|
public class PrepareTest {
|
||||||
|
|
||||||
|
private static final Logger log = LoggerFactory.getLogger(ProduceTest.class);
|
||||||
|
|
||||||
|
private static Path workingDir;
|
||||||
|
private static SparkSession spark;
|
||||||
|
private static LocalFileSystem fs;
|
||||||
|
private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper();
|
||||||
|
|
||||||
|
@BeforeAll
|
||||||
|
public static void beforeAll() throws IOException {
|
||||||
|
workingDir = Files.createTempDirectory(PrepareTest.class.getSimpleName());
|
||||||
|
|
||||||
|
fs = FileSystem.getLocal(new Configuration());
|
||||||
|
log.info("using work dir {}", workingDir);
|
||||||
|
|
||||||
|
SparkConf conf = new SparkConf();
|
||||||
|
conf.setAppName(ProduceTest.class.getSimpleName());
|
||||||
|
|
||||||
|
conf.setMaster("local[*]");
|
||||||
|
conf.set("spark.driver.host", "localhost");
|
||||||
|
conf.set("hive.metastore.local", "true");
|
||||||
|
conf.set("spark.ui.enabled", "false");
|
||||||
|
conf.set("spark.sql.warehouse.dir", workingDir.toString());
|
||||||
|
conf.set("hive.metastore.warehouse.dir", workingDir.resolve("warehouse").toString());
|
||||||
|
|
||||||
|
spark = SparkSession
|
||||||
|
.builder()
|
||||||
|
.appName(PrepareTest.class.getSimpleName())
|
||||||
|
.config(conf)
|
||||||
|
.getOrCreate();
|
||||||
|
}
|
||||||
|
|
||||||
|
@AfterAll
|
||||||
|
public static void afterAll() throws IOException {
|
||||||
|
FileUtils.deleteDirectory(workingDir.toFile());
|
||||||
|
spark.stop();
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
void bipPrepareTest() throws Exception {
|
||||||
|
final String sourcePath = getClass()
|
||||||
|
.getResource("/eu/dnetlib/dhp/actionmanager/createunresolvedentities/bip/bip.json")
|
||||||
|
.getPath();
|
||||||
|
|
||||||
|
PrepareBipFinder
|
||||||
|
.main(
|
||||||
|
new String[] {
|
||||||
|
"--isSparkSessionManaged", Boolean.FALSE.toString(),
|
||||||
|
"--sourcePath", sourcePath,
|
||||||
|
"--outputPath", workingDir.toString() + "/work"
|
||||||
|
|
||||||
|
});
|
||||||
|
|
||||||
|
final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext());
|
||||||
|
|
||||||
|
JavaRDD<Result> tmp = sc
|
||||||
|
.textFile(workingDir.toString() + "/work/bip")
|
||||||
|
.map(item -> OBJECT_MAPPER.readValue(item, Result.class));
|
||||||
|
|
||||||
|
Assertions.assertEquals(86, tmp.count());
|
||||||
|
|
||||||
|
String doi1 = "unresolved:10.0000/096020199389707:doi";
|
||||||
|
|
||||||
|
Assertions.assertEquals(1, tmp.filter(r -> r.getId().equals(doi1)).count());
|
||||||
|
Assertions.assertEquals(3, tmp.filter(r -> r.getId().equals(doi1)).collect().get(0).getMeasures().size());
|
||||||
|
Assertions
|
||||||
|
.assertEquals(
|
||||||
|
"6.34596412687e-09", tmp
|
||||||
|
.filter(r -> r.getId().equals(doi1))
|
||||||
|
.collect()
|
||||||
|
.get(0)
|
||||||
|
.getMeasures()
|
||||||
|
.stream()
|
||||||
|
.filter(sl -> sl.getId().equals("influence"))
|
||||||
|
.collect(Collectors.toList())
|
||||||
|
.get(0)
|
||||||
|
.getUnit()
|
||||||
|
.get(0)
|
||||||
|
.getValue());
|
||||||
|
Assertions
|
||||||
|
.assertEquals(
|
||||||
|
"0.641151896994", tmp
|
||||||
|
.filter(r -> r.getId().equals(doi1))
|
||||||
|
.collect()
|
||||||
|
.get(0)
|
||||||
|
.getMeasures()
|
||||||
|
.stream()
|
||||||
|
.filter(sl -> sl.getId().equals("popularity_alt"))
|
||||||
|
.collect(Collectors.toList())
|
||||||
|
.get(0)
|
||||||
|
.getUnit()
|
||||||
|
.get(0)
|
||||||
|
.getValue());
|
||||||
|
Assertions
|
||||||
|
.assertEquals(
|
||||||
|
"2.33375102921e-09", tmp
|
||||||
|
.filter(r -> r.getId().equals(doi1))
|
||||||
|
.collect()
|
||||||
|
.get(0)
|
||||||
|
.getMeasures()
|
||||||
|
.stream()
|
||||||
|
.filter(sl -> sl.getId().equals("popularity"))
|
||||||
|
.collect(Collectors.toList())
|
||||||
|
.get(0)
|
||||||
|
.getUnit()
|
||||||
|
.get(0)
|
||||||
|
.getValue());
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
void getFOSFileTest() throws CollectorException, IOException, ClassNotFoundException {
|
||||||
|
|
||||||
|
final String sourcePath = getClass()
|
||||||
|
.getResource("/eu/dnetlib/dhp/actionmanager/createunresolvedentities/fos/h2020_fos_sbs.csv")
|
||||||
|
.getPath();
|
||||||
|
final String outputPath = workingDir.toString() + "/fos.json";
|
||||||
|
|
||||||
|
new GetFOSData()
|
||||||
|
.doRewrite(
|
||||||
|
sourcePath, outputPath, "eu.dnetlib.dhp.actionmanager.createunresolvedentities.model.FOSDataModel",
|
||||||
|
'\t', fs);
|
||||||
|
|
||||||
|
BufferedReader in = new BufferedReader(
|
||||||
|
new InputStreamReader(fs.open(new org.apache.hadoop.fs.Path(outputPath))));
|
||||||
|
|
||||||
|
String line;
|
||||||
|
int count = 0;
|
||||||
|
while ((line = in.readLine()) != null) {
|
||||||
|
FOSDataModel fos = new ObjectMapper().readValue(line, FOSDataModel.class);
|
||||||
|
|
||||||
|
System.out.println(new ObjectMapper().writeValueAsString(fos));
|
||||||
|
count += 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
assertEquals(38, count);
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
void fosPrepareTest() throws Exception {
|
||||||
|
final String sourcePath = getClass()
|
||||||
|
.getResource("/eu/dnetlib/dhp/actionmanager/createunresolvedentities/fos/fos.json")
|
||||||
|
.getPath();
|
||||||
|
|
||||||
|
PrepareFOSSparkJob
|
||||||
|
.main(
|
||||||
|
new String[] {
|
||||||
|
"--isSparkSessionManaged", Boolean.FALSE.toString(),
|
||||||
|
"--sourcePath", sourcePath,
|
||||||
|
|
||||||
|
"-outputPath", workingDir.toString() + "/work"
|
||||||
|
|
||||||
|
});
|
||||||
|
|
||||||
|
final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext());
|
||||||
|
|
||||||
|
JavaRDD<Result> tmp = sc
|
||||||
|
.textFile(workingDir.toString() + "/work/fos")
|
||||||
|
.map(item -> OBJECT_MAPPER.readValue(item, Result.class));
|
||||||
|
|
||||||
|
String doi1 = "unresolved:10.3390/s18072310:doi";
|
||||||
|
|
||||||
|
assertEquals(50, tmp.count());
|
||||||
|
assertEquals(1, tmp.filter(row -> row.getId().equals(doi1)).count());
|
||||||
|
assertTrue(
|
||||||
|
tmp
|
||||||
|
.filter(r -> r.getId().equals(doi1))
|
||||||
|
.flatMap(r -> r.getSubject().iterator())
|
||||||
|
.map(sbj -> sbj.getValue())
|
||||||
|
.collect()
|
||||||
|
.contains("engineering and technology"));
|
||||||
|
|
||||||
|
assertTrue(
|
||||||
|
tmp
|
||||||
|
.filter(r -> r.getId().equals(doi1))
|
||||||
|
.flatMap(r -> r.getSubject().iterator())
|
||||||
|
.map(sbj -> sbj.getValue())
|
||||||
|
.collect()
|
||||||
|
.contains("nano-technology"));
|
||||||
|
assertTrue(
|
||||||
|
tmp
|
||||||
|
.filter(r -> r.getId().equals(doi1))
|
||||||
|
.flatMap(r -> r.getSubject().iterator())
|
||||||
|
.map(sbj -> sbj.getValue())
|
||||||
|
.collect()
|
||||||
|
.contains("nanoscience & nanotechnology"));
|
||||||
|
|
||||||
|
String doi = "unresolved:10.1111/1365-2656.12831:doi";
|
||||||
|
assertEquals(1, tmp.filter(row -> row.getId().equals(doi)).count());
|
||||||
|
assertTrue(
|
||||||
|
tmp
|
||||||
|
.filter(r -> r.getId().equals(doi))
|
||||||
|
.flatMap(r -> r.getSubject().iterator())
|
||||||
|
.map(sbj -> sbj.getValue())
|
||||||
|
.collect()
|
||||||
|
.contains("psychology and cognitive sciences"));
|
||||||
|
|
||||||
|
assertTrue(
|
||||||
|
tmp
|
||||||
|
.filter(r -> r.getId().equals(doi))
|
||||||
|
.flatMap(r -> r.getSubject().iterator())
|
||||||
|
.map(sbj -> sbj.getValue())
|
||||||
|
.collect()
|
||||||
|
.contains("social sciences"));
|
||||||
|
assertFalse(
|
||||||
|
tmp
|
||||||
|
.filter(r -> r.getId().equals(doi))
|
||||||
|
.flatMap(r -> r.getSubject().iterator())
|
||||||
|
.map(sbj -> sbj.getValue())
|
||||||
|
.collect()
|
||||||
|
.contains("NULL"));
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
|
@ -0,0 +1,234 @@
|
||||||
|
|
||||||
|
package eu.dnetlib.dhp.actionmanager.createunresolvedentities;
|
||||||
|
|
||||||
|
import java.io.IOException;
|
||||||
|
import java.nio.file.Files;
|
||||||
|
import java.nio.file.Path;
|
||||||
|
import java.util.List;
|
||||||
|
import java.util.stream.Collectors;
|
||||||
|
|
||||||
|
import org.apache.commons.io.FileUtils;
|
||||||
|
import org.apache.hadoop.conf.Configuration;
|
||||||
|
import org.apache.hadoop.fs.FileSystem;
|
||||||
|
import org.apache.hadoop.fs.LocalFileSystem;
|
||||||
|
import org.apache.spark.SparkConf;
|
||||||
|
import org.apache.spark.api.java.JavaRDD;
|
||||||
|
import org.apache.spark.api.java.JavaSparkContext;
|
||||||
|
import org.apache.spark.sql.SparkSession;
|
||||||
|
import org.junit.jupiter.api.AfterAll;
|
||||||
|
import org.junit.jupiter.api.Assertions;
|
||||||
|
import org.junit.jupiter.api.BeforeAll;
|
||||||
|
import org.junit.jupiter.api.Test;
|
||||||
|
import org.slf4j.Logger;
|
||||||
|
import org.slf4j.LoggerFactory;
|
||||||
|
|
||||||
|
import com.fasterxml.jackson.databind.ObjectMapper;
|
||||||
|
|
||||||
|
import eu.dnetlib.dhp.schema.common.ModelConstants;
|
||||||
|
import eu.dnetlib.dhp.schema.oaf.*;
|
||||||
|
|
||||||
|
public class ProduceTest {
|
||||||
|
private static final Logger log = LoggerFactory.getLogger(ProduceTest.class);
|
||||||
|
|
||||||
|
private static Path workingDir;
|
||||||
|
private static SparkSession spark;
|
||||||
|
private static LocalFileSystem fs;
|
||||||
|
private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper();
|
||||||
|
private static final String ID_PREFIX = "50|doi_________";
|
||||||
|
|
||||||
|
@BeforeAll
|
||||||
|
public static void beforeAll() throws IOException {
|
||||||
|
workingDir = Files.createTempDirectory(ProduceTest.class.getSimpleName());
|
||||||
|
|
||||||
|
fs = FileSystem.getLocal(new Configuration());
|
||||||
|
log.info("using work dir {}", workingDir);
|
||||||
|
|
||||||
|
SparkConf conf = new SparkConf();
|
||||||
|
conf.setAppName(ProduceTest.class.getSimpleName());
|
||||||
|
|
||||||
|
conf.setMaster("local[*]");
|
||||||
|
conf.set("spark.driver.host", "localhost");
|
||||||
|
conf.set("hive.metastore.local", "true");
|
||||||
|
conf.set("spark.ui.enabled", "false");
|
||||||
|
conf.set("spark.sql.warehouse.dir", workingDir.toString());
|
||||||
|
conf.set("hive.metastore.warehouse.dir", workingDir.resolve("warehouse").toString());
|
||||||
|
|
||||||
|
spark = SparkSession
|
||||||
|
.builder()
|
||||||
|
.appName(ProduceTest.class.getSimpleName())
|
||||||
|
.config(conf)
|
||||||
|
.getOrCreate();
|
||||||
|
}
|
||||||
|
|
||||||
|
@AfterAll
|
||||||
|
public static void afterAll() throws IOException {
|
||||||
|
FileUtils.deleteDirectory(workingDir.toFile());
|
||||||
|
spark.stop();
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
void produceTest() throws Exception {
|
||||||
|
|
||||||
|
final String bipPath = getClass()
|
||||||
|
.getResource("/eu/dnetlib/dhp/actionmanager/createunresolvedentities/bip/bip.json")
|
||||||
|
.getPath();
|
||||||
|
|
||||||
|
PrepareBipFinder
|
||||||
|
.main(
|
||||||
|
new String[] {
|
||||||
|
"--isSparkSessionManaged", Boolean.FALSE.toString(),
|
||||||
|
"--sourcePath", bipPath,
|
||||||
|
"--outputPath", workingDir.toString() + "/work"
|
||||||
|
|
||||||
|
});
|
||||||
|
final String fosPath = getClass()
|
||||||
|
.getResource("/eu/dnetlib/dhp/actionmanager/createunresolvedentities/fos/fos.json")
|
||||||
|
.getPath();
|
||||||
|
|
||||||
|
PrepareFOSSparkJob
|
||||||
|
.main(
|
||||||
|
new String[] {
|
||||||
|
"--isSparkSessionManaged", Boolean.FALSE.toString(),
|
||||||
|
"--sourcePath", fosPath,
|
||||||
|
"-outputPath", workingDir.toString() + "/work"
|
||||||
|
});
|
||||||
|
|
||||||
|
SparkSaveUnresolved.main(new String[] {
|
||||||
|
"--isSparkSessionManaged", Boolean.FALSE.toString(),
|
||||||
|
"--sourcePath", workingDir.toString() + "/work",
|
||||||
|
|
||||||
|
"-outputPath", workingDir.toString() + "/unresolved"
|
||||||
|
|
||||||
|
});
|
||||||
|
|
||||||
|
final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext());
|
||||||
|
|
||||||
|
JavaRDD<Result> tmp = sc
|
||||||
|
.textFile(workingDir.toString() + "/unresolved")
|
||||||
|
.map(item -> OBJECT_MAPPER.readValue(item, Result.class));
|
||||||
|
|
||||||
|
Assertions.assertEquals(135, tmp.count());
|
||||||
|
|
||||||
|
Assertions.assertEquals(1, tmp.filter(row -> row.getId().equals("unresolved:10.3390/s18072310:doi")).count());
|
||||||
|
|
||||||
|
Assertions
|
||||||
|
.assertEquals(
|
||||||
|
3, tmp
|
||||||
|
.filter(row -> row.getId().equals("unresolved:10.3390/s18072310:doi"))
|
||||||
|
.collect()
|
||||||
|
.get(0)
|
||||||
|
.getSubject()
|
||||||
|
.size());
|
||||||
|
|
||||||
|
Assertions
|
||||||
|
.assertEquals(
|
||||||
|
3, tmp
|
||||||
|
.filter(row -> row.getId().equals("unresolved:10.3390/s18072310:doi"))
|
||||||
|
.collect()
|
||||||
|
.get(0)
|
||||||
|
.getMeasures()
|
||||||
|
.size());
|
||||||
|
|
||||||
|
List<StructuredProperty> sbjs = tmp
|
||||||
|
.filter(row -> row.getId().equals("unresolved:10.3390/s18072310:doi"))
|
||||||
|
.flatMap(row -> row.getSubject().iterator())
|
||||||
|
.collect();
|
||||||
|
|
||||||
|
sbjs.forEach(sbj -> Assertions.assertEquals("FOS", sbj.getQualifier().getClassid()));
|
||||||
|
sbjs
|
||||||
|
.forEach(
|
||||||
|
sbj -> Assertions
|
||||||
|
.assertEquals(
|
||||||
|
"Fields of Science and Technology classification", sbj.getQualifier().getClassname()));
|
||||||
|
sbjs
|
||||||
|
.forEach(
|
||||||
|
sbj -> Assertions
|
||||||
|
.assertEquals(ModelConstants.DNET_SUBJECT_TYPOLOGIES, sbj.getQualifier().getSchemeid()));
|
||||||
|
sbjs
|
||||||
|
.forEach(
|
||||||
|
sbj -> Assertions
|
||||||
|
.assertEquals(ModelConstants.DNET_SUBJECT_TYPOLOGIES, sbj.getQualifier().getSchemename()));
|
||||||
|
|
||||||
|
sbjs.forEach(sbj -> Assertions.assertEquals(false, sbj.getDataInfo().getDeletedbyinference()));
|
||||||
|
sbjs.forEach(sbj -> Assertions.assertEquals(true, sbj.getDataInfo().getInferred()));
|
||||||
|
sbjs.forEach(sbj -> Assertions.assertEquals(false, sbj.getDataInfo().getInvisible()));
|
||||||
|
sbjs.forEach(sbj -> Assertions.assertEquals("", sbj.getDataInfo().getTrust()));
|
||||||
|
sbjs.forEach(sbj -> Assertions.assertEquals("update", sbj.getDataInfo().getInferenceprovenance()));
|
||||||
|
sbjs
|
||||||
|
.forEach(
|
||||||
|
sbj -> Assertions.assertEquals("subject:fos", sbj.getDataInfo().getProvenanceaction().getClassid()));
|
||||||
|
sbjs
|
||||||
|
.forEach(
|
||||||
|
sbj -> Assertions
|
||||||
|
.assertEquals("Inferred by OpenAIRE", sbj.getDataInfo().getProvenanceaction().getClassname()));
|
||||||
|
sbjs
|
||||||
|
.forEach(
|
||||||
|
sbj -> Assertions
|
||||||
|
.assertEquals(
|
||||||
|
ModelConstants.DNET_PROVENANCE_ACTIONS, sbj.getDataInfo().getProvenanceaction().getSchemeid()));
|
||||||
|
sbjs
|
||||||
|
.forEach(
|
||||||
|
sbj -> Assertions
|
||||||
|
.assertEquals(
|
||||||
|
ModelConstants.DNET_PROVENANCE_ACTIONS,
|
||||||
|
sbj.getDataInfo().getProvenanceaction().getSchemename()));
|
||||||
|
|
||||||
|
sbjs.stream().anyMatch(sbj -> sbj.getValue().equals("engineering and technology"));
|
||||||
|
sbjs.stream().anyMatch(sbj -> sbj.getValue().equals("nano-technology"));
|
||||||
|
sbjs.stream().anyMatch(sbj -> sbj.getValue().equals("nanoscience & nanotechnology"));
|
||||||
|
|
||||||
|
List<Measure> measures = tmp
|
||||||
|
.filter(row -> row.getId().equals("unresolved:10.3390/s18072310:doi"))
|
||||||
|
.flatMap(row -> row.getMeasures().iterator())
|
||||||
|
.collect();
|
||||||
|
Assertions
|
||||||
|
.assertEquals(
|
||||||
|
"7.5597134689e-09", measures
|
||||||
|
.stream()
|
||||||
|
.filter(mes -> mes.getId().equals("influence"))
|
||||||
|
.collect(Collectors.toList())
|
||||||
|
.get(0)
|
||||||
|
.getUnit()
|
||||||
|
.get(0)
|
||||||
|
.getValue());
|
||||||
|
|
||||||
|
Assertions
|
||||||
|
.assertEquals(
|
||||||
|
"4.903880192", measures
|
||||||
|
.stream()
|
||||||
|
.filter(mes -> mes.getId().equals("popularity_alt"))
|
||||||
|
.collect(Collectors.toList())
|
||||||
|
.get(0)
|
||||||
|
.getUnit()
|
||||||
|
.get(0)
|
||||||
|
.getValue());
|
||||||
|
|
||||||
|
Assertions
|
||||||
|
.assertEquals(
|
||||||
|
"1.17977512835e-08", measures
|
||||||
|
.stream()
|
||||||
|
.filter(mes -> mes.getId().equals("popularity"))
|
||||||
|
.collect(Collectors.toList())
|
||||||
|
.get(0)
|
||||||
|
.getUnit()
|
||||||
|
.get(0)
|
||||||
|
.getValue());
|
||||||
|
|
||||||
|
Assertions
|
||||||
|
.assertEquals(
|
||||||
|
49, tmp
|
||||||
|
.filter(row -> !row.getId().equals("unresolved:10.3390/s18072310:doi"))
|
||||||
|
.filter(row -> row.getSubject() != null)
|
||||||
|
.count());
|
||||||
|
|
||||||
|
Assertions
|
||||||
|
.assertEquals(
|
||||||
|
85,
|
||||||
|
tmp
|
||||||
|
.filter(row -> !row.getId().equals("unresolved:10.3390/s18072310:doi"))
|
||||||
|
.filter(r -> r.getMeasures() != null)
|
||||||
|
.count());
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
|
@ -1,4 +1,4 @@
|
||||||
{"10.0000/000000": [{"id": "influence", "unit": [{"value": "7.5597134689e-09", "key": "score"}]}, {"id": "popularity_alt", "unit": [{"value": "4.903880192", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "1.17977512835e-08", "key": "score"}]}]}
|
{"10.3390/s18072310": [{"id": "influence", "unit": [{"value": "7.5597134689e-09", "key": "score"}]}, {"id": "popularity_alt", "unit": [{"value": "4.903880192", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "1.17977512835e-08", "key": "score"}]}]}
|
||||||
{"10.0000/096020199389707": [{"id": "influence", "unit": [{"value": "6.34596412687e-09", "key": "score"}]}, {"id": "popularity_alt", "unit": [{"value": "0.641151896994", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "2.33375102921e-09", "key": "score"}]}]}
|
{"10.0000/096020199389707": [{"id": "influence", "unit": [{"value": "6.34596412687e-09", "key": "score"}]}, {"id": "popularity_alt", "unit": [{"value": "0.641151896994", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "2.33375102921e-09", "key": "score"}]}]}
|
||||||
{"10.00000/jpmc.2017.106": [{"id": "influence", "unit": [{"value": "5.91019644836e-09", "key": "score"}]}, {"id": "popularity_alt", "unit": [{"value": "0.0", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "5.39172290649e-09", "key": "score"}]}]}
|
{"10.00000/jpmc.2017.106": [{"id": "influence", "unit": [{"value": "5.91019644836e-09", "key": "score"}]}, {"id": "popularity_alt", "unit": [{"value": "0.0", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "5.39172290649e-09", "key": "score"}]}]}
|
||||||
{"10.0000/9781845416881": [{"id": "influence", "unit": [{"value": "5.96492048955e-09", "key": "score"}]}, {"id": "popularity_alt", "unit": [{"value": "1.0", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "1.12641925838e-08", "key": "score"}]}]}
|
{"10.0000/9781845416881": [{"id": "influence", "unit": [{"value": "5.96492048955e-09", "key": "score"}]}, {"id": "popularity_alt", "unit": [{"value": "1.0", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "1.12641925838e-08", "key": "score"}]}]}
|
|
@ -1,17 +0,0 @@
|
||||||
|
|
||||||
package eu.dnetlib.dhp.bypassactionset;
|
|
||||||
|
|
||||||
import org.jetbrains.annotations.NotNull;
|
|
||||||
|
|
||||||
import eu.dnetlib.dhp.schema.oaf.utils.CleaningFunctions;
|
|
||||||
import eu.dnetlib.dhp.schema.oaf.utils.IdentifierFactory;
|
|
||||||
|
|
||||||
public class Utils {
|
|
||||||
private static final String ID_PREFIX = "50|doi_________";
|
|
||||||
|
|
||||||
@NotNull
|
|
||||||
public static String getIdentifier(String d) {
|
|
||||||
return ID_PREFIX +
|
|
||||||
IdentifierFactory.md5(CleaningFunctions.normalizePidValue("doi", d));
|
|
||||||
}
|
|
||||||
}
|
|
|
@ -1,89 +0,0 @@
|
||||||
|
|
||||||
package eu.dnetlib.dhp.bypassactionset.bipfinder;
|
|
||||||
|
|
||||||
import static eu.dnetlib.dhp.bypassactionset.Utils.getIdentifier;
|
|
||||||
import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession;
|
|
||||||
|
|
||||||
import java.io.Serializable;
|
|
||||||
import java.util.Optional;
|
|
||||||
import java.util.stream.Collectors;
|
|
||||||
|
|
||||||
import org.apache.commons.io.IOUtils;
|
|
||||||
import org.apache.spark.SparkConf;
|
|
||||||
import org.apache.spark.api.java.JavaRDD;
|
|
||||||
import org.apache.spark.api.java.JavaSparkContext;
|
|
||||||
import org.apache.spark.sql.Encoders;
|
|
||||||
import org.apache.spark.sql.SaveMode;
|
|
||||||
import org.apache.spark.sql.SparkSession;
|
|
||||||
import org.slf4j.Logger;
|
|
||||||
import org.slf4j.LoggerFactory;
|
|
||||||
|
|
||||||
import com.fasterxml.jackson.databind.ObjectMapper;
|
|
||||||
|
|
||||||
import eu.dnetlib.dhp.application.ArgumentApplicationParser;
|
|
||||||
import eu.dnetlib.dhp.bypassactionset.model.BipDeserialize;
|
|
||||||
import eu.dnetlib.dhp.bypassactionset.model.BipScore;
|
|
||||||
import eu.dnetlib.dhp.schema.oaf.Result;
|
|
||||||
|
|
||||||
public class PrepareBipFinder implements Serializable {
|
|
||||||
|
|
||||||
private static final Logger log = LoggerFactory.getLogger(SparkUpdateBip.class);
|
|
||||||
private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper();
|
|
||||||
|
|
||||||
public static <I extends Result> void main(String[] args) throws Exception {
|
|
||||||
|
|
||||||
String jsonConfiguration = IOUtils
|
|
||||||
.toString(
|
|
||||||
SparkUpdateBip.class
|
|
||||||
.getResourceAsStream(
|
|
||||||
"/eu/dnetlib/dhp/bypassactionset/bip_prepare_parameters.json"));
|
|
||||||
|
|
||||||
final ArgumentApplicationParser parser = new ArgumentApplicationParser(jsonConfiguration);
|
|
||||||
|
|
||||||
parser.parseArgument(args);
|
|
||||||
|
|
||||||
Boolean isSparkSessionManaged = Optional
|
|
||||||
.ofNullable(parser.get("isSparkSessionManaged"))
|
|
||||||
.map(Boolean::valueOf)
|
|
||||||
.orElse(Boolean.TRUE);
|
|
||||||
|
|
||||||
log.info("isSparkSessionManaged: {}", isSparkSessionManaged);
|
|
||||||
|
|
||||||
final String inputPath = parser.get("inputPath");
|
|
||||||
log.info("inputPath {}: ", inputPath);
|
|
||||||
|
|
||||||
final String outputPath = parser.get("outputPath");
|
|
||||||
log.info("outputPath {}: ", outputPath);
|
|
||||||
|
|
||||||
SparkConf conf = new SparkConf();
|
|
||||||
|
|
||||||
runWithSparkSession(
|
|
||||||
conf,
|
|
||||||
isSparkSessionManaged,
|
|
||||||
spark -> {
|
|
||||||
|
|
||||||
prepareResults(spark, inputPath, outputPath);
|
|
||||||
});
|
|
||||||
}
|
|
||||||
|
|
||||||
private static <I extends Result> void prepareResults(SparkSession spark, String inputPath, String outputPath) {
|
|
||||||
|
|
||||||
final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext());
|
|
||||||
|
|
||||||
JavaRDD<BipDeserialize> bipDeserializeJavaRDD = sc
|
|
||||||
.textFile(inputPath)
|
|
||||||
.map(item -> OBJECT_MAPPER.readValue(item, BipDeserialize.class));
|
|
||||||
|
|
||||||
spark
|
|
||||||
.createDataset(bipDeserializeJavaRDD.flatMap(entry -> entry.keySet().stream().map(key -> {
|
|
||||||
BipScore bs = new BipScore();
|
|
||||||
bs.setId(getIdentifier(key));
|
|
||||||
bs.setScoreList(entry.get(key));
|
|
||||||
return bs;
|
|
||||||
}).collect(Collectors.toList()).iterator()).rdd(), Encoders.bean(BipScore.class))
|
|
||||||
.write()
|
|
||||||
.mode(SaveMode.Overwrite)
|
|
||||||
.option("compression", "gzip")
|
|
||||||
.json(outputPath);
|
|
||||||
}
|
|
||||||
}
|
|
|
@ -1,131 +0,0 @@
|
||||||
|
|
||||||
package eu.dnetlib.dhp.bypassactionset.bipfinder;
|
|
||||||
|
|
||||||
import static eu.dnetlib.dhp.PropagationConstant.*;
|
|
||||||
import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession;
|
|
||||||
|
|
||||||
import java.io.Serializable;
|
|
||||||
import java.util.List;
|
|
||||||
import java.util.Optional;
|
|
||||||
import java.util.stream.Collectors;
|
|
||||||
|
|
||||||
import org.apache.commons.io.IOUtils;
|
|
||||||
import org.apache.spark.SparkConf;
|
|
||||||
import org.apache.spark.api.java.function.MapFunction;
|
|
||||||
import org.apache.spark.sql.Dataset;
|
|
||||||
import org.apache.spark.sql.Encoders;
|
|
||||||
import org.apache.spark.sql.SaveMode;
|
|
||||||
import org.apache.spark.sql.SparkSession;
|
|
||||||
import org.slf4j.Logger;
|
|
||||||
import org.slf4j.LoggerFactory;
|
|
||||||
|
|
||||||
import eu.dnetlib.dhp.application.ArgumentApplicationParser;
|
|
||||||
import eu.dnetlib.dhp.bypassactionset.model.BipScore;
|
|
||||||
import eu.dnetlib.dhp.schema.common.ModelConstants;
|
|
||||||
import eu.dnetlib.dhp.schema.oaf.*;
|
|
||||||
import scala.Tuple2;
|
|
||||||
|
|
||||||
/**
|
|
||||||
* created the Atomic Action for each tipe of results
|
|
||||||
*/
|
|
||||||
public class SparkUpdateBip implements Serializable {
|
|
||||||
|
|
||||||
private static final Logger log = LoggerFactory.getLogger(SparkUpdateBip.class);
|
|
||||||
|
|
||||||
public static <I extends Result> void main(String[] args) throws Exception {
|
|
||||||
|
|
||||||
String jsonConfiguration = IOUtils
|
|
||||||
.toString(
|
|
||||||
SparkUpdateBip.class
|
|
||||||
.getResourceAsStream(
|
|
||||||
"/eu/dnetlib/dhp/bypassactionset/bip_update_parameters.json"));
|
|
||||||
|
|
||||||
final ArgumentApplicationParser parser = new ArgumentApplicationParser(jsonConfiguration);
|
|
||||||
|
|
||||||
parser.parseArgument(args);
|
|
||||||
|
|
||||||
Boolean isSparkSessionManaged = Optional
|
|
||||||
.ofNullable(parser.get("isSparkSessionManaged"))
|
|
||||||
.map(Boolean::valueOf)
|
|
||||||
.orElse(Boolean.TRUE);
|
|
||||||
|
|
||||||
log.info("isSparkSessionManaged: {}", isSparkSessionManaged);
|
|
||||||
|
|
||||||
final String inputPath = parser.get("inputPath");
|
|
||||||
log.info("inputPath {}: ", inputPath);
|
|
||||||
|
|
||||||
final String outputPath = parser.get("outputPath");
|
|
||||||
log.info("outputPath {}: ", outputPath);
|
|
||||||
|
|
||||||
final String bipScorePath = parser.get("bipScorePath");
|
|
||||||
log.info("bipScorePath: {}", bipScorePath);
|
|
||||||
|
|
||||||
final String resultClassName = parser.get("resultTableName");
|
|
||||||
log.info("resultTableName: {}", resultClassName);
|
|
||||||
|
|
||||||
Class<I> inputClazz = (Class<I>) Class.forName(resultClassName);
|
|
||||||
|
|
||||||
SparkConf conf = new SparkConf();
|
|
||||||
|
|
||||||
runWithSparkSession(
|
|
||||||
conf,
|
|
||||||
isSparkSessionManaged,
|
|
||||||
spark -> updateBipFinder(spark, inputPath, outputPath, bipScorePath, inputClazz)
|
|
||||||
|
|
||||||
);
|
|
||||||
}
|
|
||||||
|
|
||||||
private static <I extends Result> void updateBipFinder(SparkSession spark, String inputPath, String outputPath,
|
|
||||||
String bipScorePath, Class<I> inputClazz) {
|
|
||||||
|
|
||||||
Dataset<I> results = readPath(spark, inputPath, inputClazz);
|
|
||||||
Dataset<BipScore> bipScores = readPath(spark, bipScorePath, BipScore.class);
|
|
||||||
|
|
||||||
results
|
|
||||||
.joinWith(bipScores, results.col("id").equalTo(bipScores.col("id")), "left")
|
|
||||||
.map((MapFunction<Tuple2<I, BipScore>, I>) value -> {
|
|
||||||
if (!Optional.ofNullable(value._2()).isPresent()) {
|
|
||||||
return value._1();
|
|
||||||
}
|
|
||||||
value._1().setMeasures(getMeasure(value._2()));
|
|
||||||
return value._1();
|
|
||||||
}, Encoders.bean(inputClazz))
|
|
||||||
.write()
|
|
||||||
.mode(SaveMode.Overwrite)
|
|
||||||
.option("compression", "gzip")
|
|
||||||
.json(outputPath + "/bip");
|
|
||||||
|
|
||||||
}
|
|
||||||
|
|
||||||
private static List<Measure> getMeasure(BipScore value) {
|
|
||||||
return value
|
|
||||||
.getScoreList()
|
|
||||||
.stream()
|
|
||||||
.map(score -> {
|
|
||||||
Measure m = new Measure();
|
|
||||||
m.setId(score.getId());
|
|
||||||
m
|
|
||||||
.setUnit(
|
|
||||||
score
|
|
||||||
.getUnit()
|
|
||||||
.stream()
|
|
||||||
.map(unit -> {
|
|
||||||
KeyValue kv = new KeyValue();
|
|
||||||
kv.setValue(unit.getValue());
|
|
||||||
kv.setKey(unit.getKey());
|
|
||||||
kv
|
|
||||||
.setDataInfo(
|
|
||||||
getDataInfo(
|
|
||||||
UPDATE_DATA_INFO_TYPE,
|
|
||||||
UPDATE_MEASURE_BIP_CLASS_ID,
|
|
||||||
UPDATE_CLASS_NAME,
|
|
||||||
ModelConstants.DNET_PROVENANCE_ACTIONS, ""));
|
|
||||||
return kv;
|
|
||||||
})
|
|
||||||
.collect(Collectors.toList()));
|
|
||||||
return m;
|
|
||||||
})
|
|
||||||
.collect(Collectors.toList());
|
|
||||||
}
|
|
||||||
|
|
||||||
}
|
|
|
@ -1,121 +0,0 @@
|
||||||
|
|
||||||
package eu.dnetlib.dhp.bypassactionset.fos;
|
|
||||||
|
|
||||||
import static eu.dnetlib.dhp.PropagationConstant.*;
|
|
||||||
import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession;
|
|
||||||
|
|
||||||
import java.io.Serializable;
|
|
||||||
import java.util.*;
|
|
||||||
import java.util.stream.Collectors;
|
|
||||||
|
|
||||||
import org.apache.commons.io.IOUtils;
|
|
||||||
import org.apache.spark.SparkConf;
|
|
||||||
import org.apache.spark.api.java.function.MapFunction;
|
|
||||||
import org.apache.spark.sql.Dataset;
|
|
||||||
import org.apache.spark.sql.Encoders;
|
|
||||||
import org.apache.spark.sql.SaveMode;
|
|
||||||
import org.apache.spark.sql.SparkSession;
|
|
||||||
import org.slf4j.Logger;
|
|
||||||
import org.slf4j.LoggerFactory;
|
|
||||||
|
|
||||||
import eu.dnetlib.dhp.application.ArgumentApplicationParser;
|
|
||||||
import eu.dnetlib.dhp.bypassactionset.model.FOSDataModel;
|
|
||||||
import eu.dnetlib.dhp.schema.common.ModelConstants;
|
|
||||||
import eu.dnetlib.dhp.schema.oaf.Result;
|
|
||||||
import eu.dnetlib.dhp.schema.oaf.StructuredProperty;
|
|
||||||
import scala.Tuple2;
|
|
||||||
|
|
||||||
public class SparkUpdateFOS implements Serializable {
|
|
||||||
private static final Logger log = LoggerFactory.getLogger(SparkUpdateFOS.class);
|
|
||||||
|
|
||||||
public static <I extends Result> void main(String[] args) throws Exception {
|
|
||||||
|
|
||||||
String jsonConfiguration = IOUtils
|
|
||||||
.toString(
|
|
||||||
SparkUpdateFOS.class
|
|
||||||
.getResourceAsStream(
|
|
||||||
"/eu/dnetlib/dhp/bypassactionset/fos_update_parameters.json"));
|
|
||||||
|
|
||||||
final ArgumentApplicationParser parser = new ArgumentApplicationParser(jsonConfiguration);
|
|
||||||
|
|
||||||
parser.parseArgument(args);
|
|
||||||
|
|
||||||
Boolean isSparkSessionManaged = Optional
|
|
||||||
.ofNullable(parser.get("isSparkSessionManaged"))
|
|
||||||
.map(Boolean::valueOf)
|
|
||||||
.orElse(Boolean.TRUE);
|
|
||||||
|
|
||||||
log.info("isSparkSessionManaged: {}", isSparkSessionManaged);
|
|
||||||
|
|
||||||
final String inputPath = parser.get("inputPath");
|
|
||||||
log.info("inputPath {}: ", inputPath);
|
|
||||||
|
|
||||||
final String outputPath = parser.get("outputPath");
|
|
||||||
log.info("outputPath {}: ", outputPath);
|
|
||||||
|
|
||||||
final String fosPath = parser.get("fosPath");
|
|
||||||
log.info("fosPath: {}", fosPath);
|
|
||||||
|
|
||||||
final String resultClassName = parser.get("resultTableName");
|
|
||||||
log.info("resultTableName: {}", resultClassName);
|
|
||||||
|
|
||||||
Class<I> inputClazz = (Class<I>) Class.forName(resultClassName);
|
|
||||||
|
|
||||||
SparkConf conf = new SparkConf();
|
|
||||||
|
|
||||||
runWithSparkSession(
|
|
||||||
conf,
|
|
||||||
isSparkSessionManaged,
|
|
||||||
spark -> updateFos(spark, inputPath, outputPath, fosPath, inputClazz)
|
|
||||||
|
|
||||||
);
|
|
||||||
}
|
|
||||||
|
|
||||||
private static <I extends Result> void updateFos(SparkSession spark, String inputPath, String outputPath,
|
|
||||||
String fosPath, Class<I> inputClazz) {
|
|
||||||
|
|
||||||
Dataset<I> results = readPath(spark, inputPath, inputClazz);
|
|
||||||
Dataset<FOSDataModel> fosDataModelDataset = readPath(spark, fosPath, FOSDataModel.class);
|
|
||||||
|
|
||||||
results
|
|
||||||
.joinWith(fosDataModelDataset, results.col("id").equalTo(fosDataModelDataset.col("doi")), "left")
|
|
||||||
.map((MapFunction<Tuple2<I, FOSDataModel>, I>) value -> {
|
|
||||||
if (!Optional.ofNullable(value._2()).isPresent()) {
|
|
||||||
return value._1();
|
|
||||||
}
|
|
||||||
value._1().getSubject().addAll(getSubjects(value._2()));
|
|
||||||
return value._1();
|
|
||||||
}, Encoders.bean(inputClazz))
|
|
||||||
.write()
|
|
||||||
.mode(SaveMode.Overwrite)
|
|
||||||
.option("compression", "gzip")
|
|
||||||
.json(outputPath);
|
|
||||||
|
|
||||||
}
|
|
||||||
|
|
||||||
private static List<StructuredProperty> getSubjects(FOSDataModel fos) {
|
|
||||||
return Arrays
|
|
||||||
.asList(getSubject(fos.getLevel1()), getSubject(fos.getLevel2()), getSubject(fos.getLevel3()))
|
|
||||||
.stream()
|
|
||||||
.filter(Objects::nonNull)
|
|
||||||
.collect(Collectors.toList());
|
|
||||||
}
|
|
||||||
|
|
||||||
private static StructuredProperty getSubject(String sbj) {
|
|
||||||
if (sbj.equals(NULL))
|
|
||||||
return null;
|
|
||||||
StructuredProperty sp = new StructuredProperty();
|
|
||||||
sp.setValue(sbj);
|
|
||||||
sp.setQualifier(getQualifier(FOS_CLASS_ID, FOS_CLASS_NAME, ModelConstants.DNET_SUBJECT_TYPOLOGIES));
|
|
||||||
sp
|
|
||||||
.setDataInfo(
|
|
||||||
getDataInfo(
|
|
||||||
UPDATE_DATA_INFO_TYPE,
|
|
||||||
UPDATE_SUBJECT_FOS_CLASS_ID,
|
|
||||||
UPDATE_CLASS_NAME,
|
|
||||||
ModelConstants.DNET_PROVENANCE_ACTIONS, ""));
|
|
||||||
return sp;
|
|
||||||
|
|
||||||
}
|
|
||||||
|
|
||||||
}
|
|
|
@ -1,93 +0,0 @@
|
||||||
|
|
||||||
package eu.dnetlib.dhp.bypassactionset.opencitations;
|
|
||||||
|
|
||||||
import java.io.*;
|
|
||||||
import java.io.Serializable;
|
|
||||||
import java.util.Objects;
|
|
||||||
import java.util.zip.GZIPOutputStream;
|
|
||||||
import java.util.zip.ZipEntry;
|
|
||||||
import java.util.zip.ZipInputStream;
|
|
||||||
|
|
||||||
import org.apache.commons.cli.ParseException;
|
|
||||||
import org.apache.commons.io.IOUtils;
|
|
||||||
import org.apache.hadoop.conf.Configuration;
|
|
||||||
import org.apache.hadoop.fs.FSDataInputStream;
|
|
||||||
import org.apache.hadoop.fs.FSDataOutputStream;
|
|
||||||
import org.apache.hadoop.fs.FileSystem;
|
|
||||||
import org.apache.hadoop.fs.Path;
|
|
||||||
import org.slf4j.Logger;
|
|
||||||
import org.slf4j.LoggerFactory;
|
|
||||||
|
|
||||||
import eu.dnetlib.dhp.application.ArgumentApplicationParser;
|
|
||||||
|
|
||||||
public class GetOpenCitationsRefs implements Serializable {
|
|
||||||
private static final Logger log = LoggerFactory.getLogger(GetOpenCitationsRefs.class);
|
|
||||||
|
|
||||||
public static void main(final String[] args) throws IOException, ParseException {
|
|
||||||
|
|
||||||
final ArgumentApplicationParser parser = new ArgumentApplicationParser(
|
|
||||||
IOUtils
|
|
||||||
.toString(
|
|
||||||
Objects
|
|
||||||
.requireNonNull(
|
|
||||||
GetOpenCitationsRefs.class
|
|
||||||
.getResourceAsStream(
|
|
||||||
"/eu/dnetlib/dhp/bypassactionset/opencitations/input_parameters.json"))));
|
|
||||||
|
|
||||||
parser.parseArgument(args);
|
|
||||||
|
|
||||||
final String[] inputFile = parser.get("inputFile").split(";");
|
|
||||||
log.info("inputFile {}", inputFile.toString());
|
|
||||||
|
|
||||||
final String workingPath = parser.get("workingPath");
|
|
||||||
log.info("workingPath {}", workingPath);
|
|
||||||
|
|
||||||
final String hdfsNameNode = parser.get("hdfsNameNode");
|
|
||||||
log.info("hdfsNameNode {}", hdfsNameNode);
|
|
||||||
|
|
||||||
Configuration conf = new Configuration();
|
|
||||||
conf.set("fs.defaultFS", hdfsNameNode);
|
|
||||||
|
|
||||||
FileSystem fileSystem = FileSystem.get(conf);
|
|
||||||
|
|
||||||
GetOpenCitationsRefs ocr = new GetOpenCitationsRefs();
|
|
||||||
|
|
||||||
for (String file : inputFile) {
|
|
||||||
ocr.doExtract(workingPath + "/Original/" + file, workingPath, fileSystem);
|
|
||||||
}
|
|
||||||
|
|
||||||
}
|
|
||||||
|
|
||||||
private void doExtract(String inputFile, String workingPath, FileSystem fileSystem)
|
|
||||||
throws IOException {
|
|
||||||
|
|
||||||
final Path path = new Path(inputFile);
|
|
||||||
|
|
||||||
FSDataInputStream oc_zip = fileSystem.open(path);
|
|
||||||
|
|
||||||
int count = 1;
|
|
||||||
try (ZipInputStream zis = new ZipInputStream(oc_zip)) {
|
|
||||||
ZipEntry entry = null;
|
|
||||||
while ((entry = zis.getNextEntry()) != null) {
|
|
||||||
|
|
||||||
if (!entry.isDirectory()) {
|
|
||||||
String fileName = entry.getName();
|
|
||||||
fileName = fileName.substring(0, fileName.indexOf("T")) + "_" + count;
|
|
||||||
count++;
|
|
||||||
try (
|
|
||||||
FSDataOutputStream out = fileSystem
|
|
||||||
.create(new Path(workingPath + "/COCI/" + fileName + ".gz"));
|
|
||||||
GZIPOutputStream gzipOs = new GZIPOutputStream(new BufferedOutputStream(out))) {
|
|
||||||
|
|
||||||
IOUtils.copy(zis, gzipOs);
|
|
||||||
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
}
|
|
||||||
|
|
||||||
}
|
|
||||||
|
|
||||||
}
|
|
||||||
|
|
||||||
}
|
|
|
@ -1,150 +0,0 @@
|
||||||
|
|
||||||
package eu.dnetlib.dhp.bypassactionset.opencitations;
|
|
||||||
|
|
||||||
import static eu.dnetlib.dhp.PropagationConstant.*;
|
|
||||||
import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession;
|
|
||||||
|
|
||||||
import java.io.IOException;
|
|
||||||
import java.io.Serializable;
|
|
||||||
import java.util.*;
|
|
||||||
|
|
||||||
import org.apache.commons.cli.ParseException;
|
|
||||||
import org.apache.commons.io.IOUtils;
|
|
||||||
import org.apache.spark.SparkConf;
|
|
||||||
import org.apache.spark.api.java.function.FilterFunction;
|
|
||||||
import org.apache.spark.api.java.function.FlatMapFunction;
|
|
||||||
import org.apache.spark.sql.Encoders;
|
|
||||||
import org.apache.spark.sql.SaveMode;
|
|
||||||
import org.apache.spark.sql.SparkSession;
|
|
||||||
import org.slf4j.Logger;
|
|
||||||
import org.slf4j.LoggerFactory;
|
|
||||||
|
|
||||||
import com.fasterxml.jackson.databind.ObjectMapper;
|
|
||||||
|
|
||||||
import eu.dnetlib.dhp.application.ArgumentApplicationParser;
|
|
||||||
import eu.dnetlib.dhp.schema.common.ModelConstants;
|
|
||||||
import eu.dnetlib.dhp.schema.oaf.*;
|
|
||||||
import eu.dnetlib.dhp.schema.oaf.utils.CleaningFunctions;
|
|
||||||
import eu.dnetlib.dhp.schema.oaf.utils.IdentifierFactory;
|
|
||||||
|
|
||||||
public class SparkUpdateOCRels implements Serializable {
|
|
||||||
|
|
||||||
private static final Logger log = LoggerFactory.getLogger(SparkUpdateOCRels.class);
|
|
||||||
private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper();
|
|
||||||
|
|
||||||
public static void main(final String[] args) throws IOException, ParseException {
|
|
||||||
|
|
||||||
final ArgumentApplicationParser parser = new ArgumentApplicationParser(
|
|
||||||
IOUtils
|
|
||||||
.toString(
|
|
||||||
Objects
|
|
||||||
.requireNonNull(
|
|
||||||
SparkUpdateOCRels.class
|
|
||||||
.getResourceAsStream(
|
|
||||||
"/eu/dnetlib/dhp/actionmanager/opencitations/as_parameters.json"))));
|
|
||||||
|
|
||||||
parser.parseArgument(args);
|
|
||||||
|
|
||||||
Boolean isSparkSessionManaged = Optional
|
|
||||||
.ofNullable(parser.get("isSparkSessionManaged"))
|
|
||||||
.map(Boolean::valueOf)
|
|
||||||
.orElse(Boolean.TRUE);
|
|
||||||
|
|
||||||
log.info("isSparkSessionManaged: {}", isSparkSessionManaged);
|
|
||||||
|
|
||||||
final String inputPath = parser.get("inputPath");
|
|
||||||
log.info("inputPath {}", inputPath.toString());
|
|
||||||
|
|
||||||
final String outputPath = parser.get("outputPath");
|
|
||||||
log.info("outputPath {}", outputPath);
|
|
||||||
|
|
||||||
final boolean shouldDuplicateRels = Optional
|
|
||||||
.ofNullable(parser.get("shouldDuplicateRels"))
|
|
||||||
.map(Boolean::valueOf)
|
|
||||||
.orElse(Boolean.FALSE);
|
|
||||||
|
|
||||||
SparkConf conf = new SparkConf();
|
|
||||||
runWithSparkSession(
|
|
||||||
conf,
|
|
||||||
isSparkSessionManaged,
|
|
||||||
spark -> addOCRelations(spark, inputPath, outputPath, shouldDuplicateRels));
|
|
||||||
|
|
||||||
}
|
|
||||||
|
|
||||||
private static void addOCRelations(SparkSession spark, String inputPath, String outputPath,
|
|
||||||
boolean shouldDuplicateRels) {
|
|
||||||
spark
|
|
||||||
.sqlContext()
|
|
||||||
.createDataset(spark.sparkContext().textFile(inputPath + "/*", 6000), Encoders.STRING())
|
|
||||||
.flatMap(
|
|
||||||
(FlatMapFunction<String, Relation>) value -> createRelation(value, shouldDuplicateRels).iterator(),
|
|
||||||
Encoders.bean(Relation.class))
|
|
||||||
.filter((FilterFunction<Relation>) value -> value != null)
|
|
||||||
.write()
|
|
||||||
.mode(SaveMode.Append)
|
|
||||||
.option("compression", "gzip")
|
|
||||||
.json(outputPath);
|
|
||||||
|
|
||||||
}
|
|
||||||
|
|
||||||
private static List<Relation> createRelation(String value, boolean duplicate) {
|
|
||||||
String[] line = value.split(",");
|
|
||||||
if (!line[1].startsWith("10.")) {
|
|
||||||
return new ArrayList<>();
|
|
||||||
}
|
|
||||||
List<Relation> relationList = new ArrayList<>();
|
|
||||||
|
|
||||||
String citing = ID_PREFIX + IdentifierFactory.md5(CleaningFunctions.normalizePidValue(DOI, line[1]));
|
|
||||||
final String cited = ID_PREFIX + IdentifierFactory.md5(CleaningFunctions.normalizePidValue(DOI, line[2]));
|
|
||||||
|
|
||||||
relationList
|
|
||||||
.addAll(
|
|
||||||
getRelations(
|
|
||||||
citing,
|
|
||||||
cited));
|
|
||||||
|
|
||||||
if (duplicate && line[1].endsWith(REF_DOI)) {
|
|
||||||
citing = ID_PREFIX + IdentifierFactory
|
|
||||||
.md5(CleaningFunctions.normalizePidValue(DOI, line[1].substring(0, line[1].indexOf(REF_DOI))));
|
|
||||||
relationList.addAll(getRelations(citing, cited));
|
|
||||||
}
|
|
||||||
|
|
||||||
return relationList;
|
|
||||||
}
|
|
||||||
|
|
||||||
private static Collection<Relation> getRelations(String citing, String cited) {
|
|
||||||
|
|
||||||
return Arrays
|
|
||||||
.asList(
|
|
||||||
getRelation(citing, cited, ModelConstants.CITES),
|
|
||||||
getRelation(cited, citing, ModelConstants.IS_CITED_BY));
|
|
||||||
}
|
|
||||||
|
|
||||||
public static Relation getRelation(
|
|
||||||
String source,
|
|
||||||
String target,
|
|
||||||
String relclass) {
|
|
||||||
Relation r = new Relation();
|
|
||||||
r.setCollectedfrom(getCollectedFrom());
|
|
||||||
r.setSource(source);
|
|
||||||
r.setTarget(target);
|
|
||||||
r.setRelClass(relclass);
|
|
||||||
r.setRelType(ModelConstants.RESULT_RESULT);
|
|
||||||
r.setSubRelType(ModelConstants.CITATION);
|
|
||||||
r
|
|
||||||
.setDataInfo(
|
|
||||||
getDataInfo(
|
|
||||||
UPDATE_DATA_INFO_TYPE, OPENCITATIONS_CLASSID, OPENCITATIONS_CLASSNAME,
|
|
||||||
ModelConstants.DNET_PROVENANCE_ACTIONS, OC_TRUST, false));
|
|
||||||
return r;
|
|
||||||
}
|
|
||||||
|
|
||||||
public static List<KeyValue> getCollectedFrom() {
|
|
||||||
KeyValue kv = new KeyValue();
|
|
||||||
kv.setKey(ModelConstants.OPENOCITATIONS_ID);
|
|
||||||
kv.setValue(ModelConstants.OPENOCITATIONS_NAME);
|
|
||||||
|
|
||||||
return Arrays.asList(kv);
|
|
||||||
}
|
|
||||||
|
|
||||||
}
|
|
|
@ -1,32 +0,0 @@
|
||||||
[
|
|
||||||
{
|
|
||||||
"paramName": "issm",
|
|
||||||
"paramLongName": "isSparkSessionManaged",
|
|
||||||
"paramDescription": "when true will stop SparkSession after job execution",
|
|
||||||
"paramRequired": false
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"paramName": "ip",
|
|
||||||
"paramLongName": "inputPath",
|
|
||||||
"paramDescription": "the URL from where to get the programme file",
|
|
||||||
"paramRequired": true
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"paramName": "o",
|
|
||||||
"paramLongName": "outputPath",
|
|
||||||
"paramDescription": "the path of the new ActionSet",
|
|
||||||
"paramRequired": true
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"paramName": "rtn",
|
|
||||||
"paramLongName": "resultTableName",
|
|
||||||
"paramDescription": "the path of the new ActionSet",
|
|
||||||
"paramRequired": true
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"paramName": "bsp",
|
|
||||||
"paramLongName": "bipScorePath",
|
|
||||||
"paramDescription": "the path of the new ActionSet",
|
|
||||||
"paramRequired": true
|
|
||||||
}
|
|
||||||
]
|
|
|
@ -1,250 +0,0 @@
|
||||||
|
|
||||||
package eu.dnetlib.dhp.bypassactionset;
|
|
||||||
|
|
||||||
import java.io.IOException;
|
|
||||||
import java.nio.file.Files;
|
|
||||||
import java.nio.file.Path;
|
|
||||||
import java.util.List;
|
|
||||||
import java.util.stream.Collectors;
|
|
||||||
|
|
||||||
import eu.dnetlib.dhp.schema.oaf.Author;
|
|
||||||
import org.apache.commons.io.FileUtils;
|
|
||||||
import org.apache.hadoop.conf.Configuration;
|
|
||||||
import org.apache.hadoop.fs.FileSystem;
|
|
||||||
import org.apache.hadoop.fs.LocalFileSystem;
|
|
||||||
import org.apache.neethi.Assertion;
|
|
||||||
import org.apache.spark.SparkConf;
|
|
||||||
import org.apache.spark.api.java.JavaRDD;
|
|
||||||
import org.apache.spark.api.java.JavaSparkContext;
|
|
||||||
import org.apache.spark.sql.SparkSession;
|
|
||||||
import org.junit.jupiter.api.AfterAll;
|
|
||||||
import org.junit.jupiter.api.Assertions;
|
|
||||||
import org.junit.jupiter.api.BeforeAll;
|
|
||||||
import org.junit.jupiter.api.Test;
|
|
||||||
import org.slf4j.Logger;
|
|
||||||
import org.slf4j.LoggerFactory;
|
|
||||||
|
|
||||||
import com.fasterxml.jackson.databind.ObjectMapper;
|
|
||||||
|
|
||||||
import eu.dnetlib.dhp.bypassactionset.bipfinder.PrepareBipFinder;
|
|
||||||
import eu.dnetlib.dhp.bypassactionset.bipfinder.SparkUpdateBip;
|
|
||||||
import eu.dnetlib.dhp.bypassactionset.model.BipScore;
|
|
||||||
import eu.dnetlib.dhp.countrypropagation.CountryPropagationJobTest;
|
|
||||||
import eu.dnetlib.dhp.schema.oaf.Measure;
|
|
||||||
import eu.dnetlib.dhp.schema.oaf.Publication;
|
|
||||||
import eu.dnetlib.dhp.schema.oaf.utils.CleaningFunctions;
|
|
||||||
import eu.dnetlib.dhp.schema.oaf.utils.IdentifierFactory;
|
|
||||||
|
|
||||||
public class BipTest {
|
|
||||||
|
|
||||||
private static final Logger log = LoggerFactory.getLogger(FOSTest.class);
|
|
||||||
|
|
||||||
private static Path workingDir;
|
|
||||||
private static SparkSession spark;
|
|
||||||
private static LocalFileSystem fs;
|
|
||||||
private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper();
|
|
||||||
private static final String ID_PREFIX = "50|doi_________";
|
|
||||||
|
|
||||||
@BeforeAll
|
|
||||||
public static void beforeAll() throws IOException {
|
|
||||||
workingDir = Files.createTempDirectory(BipTest.class.getSimpleName());
|
|
||||||
|
|
||||||
fs = FileSystem.getLocal(new Configuration());
|
|
||||||
log.info("using work dir {}", workingDir);
|
|
||||||
|
|
||||||
SparkConf conf = new SparkConf();
|
|
||||||
conf.setAppName(FOSTest.class.getSimpleName());
|
|
||||||
|
|
||||||
conf.setMaster("local[*]");
|
|
||||||
conf.set("spark.driver.host", "localhost");
|
|
||||||
conf.set("hive.metastore.local", "true");
|
|
||||||
conf.set("spark.ui.enabled", "false");
|
|
||||||
conf.set("spark.sql.warehouse.dir", workingDir.toString());
|
|
||||||
conf.set("hive.metastore.warehouse.dir", workingDir.resolve("warehouse").toString());
|
|
||||||
|
|
||||||
spark = SparkSession
|
|
||||||
.builder()
|
|
||||||
.appName(CountryPropagationJobTest.class.getSimpleName())
|
|
||||||
.config(conf)
|
|
||||||
.getOrCreate();
|
|
||||||
}
|
|
||||||
|
|
||||||
@AfterAll
|
|
||||||
public static void afterAll() throws IOException {
|
|
||||||
FileUtils.deleteDirectory(workingDir.toFile());
|
|
||||||
spark.stop();
|
|
||||||
}
|
|
||||||
|
|
||||||
@Test
|
|
||||||
void prepareBipTest() throws Exception {
|
|
||||||
final String sourcePath = getClass()
|
|
||||||
.getResource("/eu/dnetlib/dhp/bypassactionset/bip/bip.json")
|
|
||||||
.getPath();
|
|
||||||
|
|
||||||
PrepareBipFinder
|
|
||||||
.main(
|
|
||||||
new String[] {
|
|
||||||
"--isSparkSessionManaged", Boolean.FALSE.toString(),
|
|
||||||
"--inputPath", sourcePath,
|
|
||||||
"--outputPath", workingDir.toString() + "/remapDoi"
|
|
||||||
|
|
||||||
});
|
|
||||||
|
|
||||||
final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext());
|
|
||||||
|
|
||||||
JavaRDD<BipScore> tmp = sc
|
|
||||||
.textFile(workingDir.toString() + "/remapDoi")
|
|
||||||
.map(item -> OBJECT_MAPPER.readValue(item, BipScore.class));
|
|
||||||
|
|
||||||
Assertions.assertEquals(86, tmp.count());
|
|
||||||
// tmp.foreach(v -> System.out.println(OBJECT_MAPPER.writeValueAsString(v)));
|
|
||||||
|
|
||||||
String doi1 = ID_PREFIX +
|
|
||||||
IdentifierFactory.md5(CleaningFunctions.normalizePidValue("doi", "10.0000/096020199389707"));
|
|
||||||
|
|
||||||
Assertions.assertEquals(1, tmp.filter(r -> r.getId().equals(doi1)).count());
|
|
||||||
Assertions.assertEquals(3, tmp.filter(r -> r.getId().equals(doi1)).collect().get(0).getScoreList().size());
|
|
||||||
Assertions
|
|
||||||
.assertEquals(
|
|
||||||
"6.34596412687e-09", tmp
|
|
||||||
.filter(r -> r.getId().equals(doi1))
|
|
||||||
.collect()
|
|
||||||
.get(0)
|
|
||||||
.getScoreList()
|
|
||||||
.stream()
|
|
||||||
.filter(sl -> sl.getId().equals("influence"))
|
|
||||||
.collect(Collectors.toList())
|
|
||||||
.get(0)
|
|
||||||
.getUnit()
|
|
||||||
.get(0)
|
|
||||||
.getValue());
|
|
||||||
Assertions
|
|
||||||
.assertEquals(
|
|
||||||
"0.641151896994", tmp
|
|
||||||
.filter(r -> r.getId().equals(doi1))
|
|
||||||
.collect()
|
|
||||||
.get(0)
|
|
||||||
.getScoreList()
|
|
||||||
.stream()
|
|
||||||
.filter(sl -> sl.getId().equals("popularity_alt"))
|
|
||||||
.collect(Collectors.toList())
|
|
||||||
.get(0)
|
|
||||||
.getUnit()
|
|
||||||
.get(0)
|
|
||||||
.getValue());
|
|
||||||
Assertions
|
|
||||||
.assertEquals(
|
|
||||||
"2.33375102921e-09", tmp
|
|
||||||
.filter(r -> r.getId().equals(doi1))
|
|
||||||
.collect()
|
|
||||||
.get(0)
|
|
||||||
.getScoreList()
|
|
||||||
.stream()
|
|
||||||
.filter(sl -> sl.getId().equals("popularity"))
|
|
||||||
.collect(Collectors.toList())
|
|
||||||
.get(0)
|
|
||||||
.getUnit()
|
|
||||||
.get(0)
|
|
||||||
.getValue());
|
|
||||||
|
|
||||||
}
|
|
||||||
|
|
||||||
@Test
|
|
||||||
void updateResult() throws Exception {
|
|
||||||
final String bipScorePath = getClass()
|
|
||||||
.getResource("/eu/dnetlib/dhp/bypassactionset/bip/preparedbip.json")
|
|
||||||
.getPath();
|
|
||||||
|
|
||||||
final String inputPath = getClass()
|
|
||||||
.getResource("/eu/dnetlib/dhp/bypassactionset/bip/publicationnomatch.json")
|
|
||||||
.getPath();
|
|
||||||
|
|
||||||
SparkUpdateBip
|
|
||||||
.main(
|
|
||||||
new String[] {
|
|
||||||
"--isSparkSessionManaged", Boolean.FALSE.toString(),
|
|
||||||
"--bipScorePath", bipScorePath,
|
|
||||||
"--inputPath", inputPath,
|
|
||||||
"--outputPath", workingDir.toString() + "/publication",
|
|
||||||
"--resultTableName", "eu.dnetlib.dhp.schema.oaf.Publication"
|
|
||||||
|
|
||||||
});
|
|
||||||
|
|
||||||
final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext());
|
|
||||||
|
|
||||||
JavaRDD<Publication> tmp = sc
|
|
||||||
.textFile(workingDir.toString() + "/publication/bip")
|
|
||||||
.map(item -> OBJECT_MAPPER.readValue(item, Publication.class));
|
|
||||||
|
|
||||||
Assertions.assertEquals(6, tmp.count());
|
|
||||||
Assertions.assertEquals(0, tmp.filter(r -> r.getMeasures() != null).count());
|
|
||||||
tmp.foreach(r -> Assertions.assertEquals("publication", r.getResulttype().getClassid()));
|
|
||||||
|
|
||||||
}
|
|
||||||
|
|
||||||
@Test
|
|
||||||
void updateResultMatchCheckMeasures() throws Exception {
|
|
||||||
final String bipScorePath = getClass()
|
|
||||||
.getResource("/eu/dnetlib/dhp/bypassactionset/bip/preparedbip.json")
|
|
||||||
.getPath();
|
|
||||||
|
|
||||||
final String inputPath = getClass()
|
|
||||||
.getResource("/eu/dnetlib/dhp/bypassactionset/bip/publicationmatch.json")
|
|
||||||
.getPath();
|
|
||||||
|
|
||||||
SparkUpdateBip
|
|
||||||
.main(
|
|
||||||
new String[] {
|
|
||||||
"--isSparkSessionManaged", Boolean.FALSE.toString(),
|
|
||||||
"--bipScorePath", bipScorePath,
|
|
||||||
"--inputPath", inputPath,
|
|
||||||
"--outputPath", workingDir.toString() + "/publication",
|
|
||||||
"--resultTableName", "eu.dnetlib.dhp.schema.oaf.Publication"
|
|
||||||
|
|
||||||
});
|
|
||||||
|
|
||||||
final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext());
|
|
||||||
|
|
||||||
JavaRDD<Publication> tmp = sc
|
|
||||||
.textFile(workingDir.toString() + "/publication/bip")
|
|
||||||
.map(item -> OBJECT_MAPPER.readValue(item, Publication.class));
|
|
||||||
|
|
||||||
Assertions.assertEquals(6, tmp.count());
|
|
||||||
Assertions.assertEquals(1, tmp.filter(r -> r.getMeasures() != null).count());
|
|
||||||
Assertions
|
|
||||||
.assertEquals(
|
|
||||||
1, tmp.filter(r -> r.getId().equals("50|doi_________b24ab3e127aa67e2a1017292988d571f")).count());
|
|
||||||
Assertions
|
|
||||||
.assertEquals(
|
|
||||||
1,
|
|
||||||
tmp
|
|
||||||
.filter(
|
|
||||||
r -> r.getId().equals("50|doi_________b24ab3e127aa67e2a1017292988d571f")
|
|
||||||
&& r.getMeasures() != null)
|
|
||||||
.count());
|
|
||||||
Assertions.assertEquals(3, tmp
|
|
||||||
.filter(r -> r.getId().equals("50|doi_________b24ab3e127aa67e2a1017292988d571f"))
|
|
||||||
.collect()
|
|
||||||
.get(0)
|
|
||||||
.getMeasures().size());
|
|
||||||
|
|
||||||
Assertions.assertEquals("5.91019644836e-09",
|
|
||||||
tmp.filter(r -> r.getId().equals("50|doi_________b24ab3e127aa67e2a1017292988d571f"))
|
|
||||||
.collect()
|
|
||||||
.get(0).getMeasures().stream().filter(m -> m.getId().equals("influence")).collect(Collectors.toList()).get(0).getUnit().get(0).getValue());
|
|
||||||
Assertions.assertEquals("0.0",
|
|
||||||
tmp.filter(r -> r.getId().equals("50|doi_________b24ab3e127aa67e2a1017292988d571f"))
|
|
||||||
.collect()
|
|
||||||
.get(0).getMeasures().stream().filter(m -> m.getId().equals("popularity_alt")).collect(Collectors.toList()).get(0).getUnit().get(0).getValue());
|
|
||||||
Assertions.assertEquals("9.88840807598e-09",
|
|
||||||
tmp.filter(r -> r.getId().equals("50|doi_________b24ab3e127aa67e2a1017292988d571f"))
|
|
||||||
.collect()
|
|
||||||
.get(0).getMeasures().stream().filter(m -> m.getId().equals("popularity")).collect(Collectors.toList()).get(0).getUnit().get(0).getValue());
|
|
||||||
|
|
||||||
tmp.foreach(r -> System.out.println(OBJECT_MAPPER.writeValueAsString(r)));
|
|
||||||
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
}
|
|
|
@ -1,253 +0,0 @@
|
||||||
|
|
||||||
package eu.dnetlib.dhp.bypassactionset;
|
|
||||||
|
|
||||||
import static org.junit.jupiter.api.Assertions.assertEquals;
|
|
||||||
import static org.junit.jupiter.api.Assertions.assertTrue;
|
|
||||||
|
|
||||||
import java.io.BufferedReader;
|
|
||||||
import java.io.IOException;
|
|
||||||
import java.io.InputStreamReader;
|
|
||||||
import java.nio.file.Files;
|
|
||||||
import java.nio.file.Path;
|
|
||||||
import java.util.List;
|
|
||||||
import java.util.stream.Collectors;
|
|
||||||
import java.util.stream.Stream;
|
|
||||||
|
|
||||||
import eu.dnetlib.dhp.PropagationConstant;
|
|
||||||
import eu.dnetlib.dhp.bypassactionset.fos.SparkUpdateFOS;
|
|
||||||
import eu.dnetlib.dhp.schema.oaf.Publication;
|
|
||||||
import eu.dnetlib.dhp.schema.oaf.StructuredProperty;
|
|
||||||
import org.apache.commons.io.FileUtils;
|
|
||||||
import org.apache.hadoop.conf.Configuration;
|
|
||||||
import org.apache.hadoop.fs.FileSystem;
|
|
||||||
import org.apache.hadoop.fs.LocalFileSystem;
|
|
||||||
import org.apache.neethi.Assertion;
|
|
||||||
import org.apache.spark.SparkConf;
|
|
||||||
import org.apache.spark.api.java.JavaRDD;
|
|
||||||
import org.apache.spark.api.java.JavaSparkContext;
|
|
||||||
import org.apache.spark.sql.SparkSession;
|
|
||||||
import org.junit.jupiter.api.AfterAll;
|
|
||||||
import org.junit.jupiter.api.Assertions;
|
|
||||||
import org.junit.jupiter.api.BeforeAll;
|
|
||||||
import org.junit.jupiter.api.Test;
|
|
||||||
import org.slf4j.Logger;
|
|
||||||
import org.slf4j.LoggerFactory;
|
|
||||||
|
|
||||||
import com.fasterxml.jackson.databind.ObjectMapper;
|
|
||||||
|
|
||||||
import eu.dnetlib.dhp.bypassactionset.fos.GetFOSData;
|
|
||||||
import eu.dnetlib.dhp.bypassactionset.fos.PrepareFOSSparkJob;
|
|
||||||
import eu.dnetlib.dhp.bypassactionset.model.FOSDataModel;
|
|
||||||
import eu.dnetlib.dhp.common.collection.CollectorException;
|
|
||||||
import eu.dnetlib.dhp.countrypropagation.CountryPropagationJobTest;
|
|
||||||
import eu.dnetlib.dhp.schema.oaf.utils.CleaningFunctions;
|
|
||||||
import eu.dnetlib.dhp.schema.oaf.utils.IdentifierFactory;
|
|
||||||
|
|
||||||
public class FOSTest {
|
|
||||||
private static final Logger log = LoggerFactory.getLogger(FOSTest.class);
|
|
||||||
|
|
||||||
private static Path workingDir;
|
|
||||||
private static SparkSession spark;
|
|
||||||
private static LocalFileSystem fs;
|
|
||||||
private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper();
|
|
||||||
private static final String ID_PREFIX = "50|doi_________";
|
|
||||||
|
|
||||||
@BeforeAll
|
|
||||||
public static void beforeAll() throws IOException {
|
|
||||||
workingDir = Files.createTempDirectory(CountryPropagationJobTest.class.getSimpleName());
|
|
||||||
|
|
||||||
fs = FileSystem.getLocal(new Configuration());
|
|
||||||
log.info("using work dir {}", workingDir);
|
|
||||||
|
|
||||||
SparkConf conf = new SparkConf();
|
|
||||||
conf.setAppName(FOSTest.class.getSimpleName());
|
|
||||||
|
|
||||||
conf.setMaster("local[*]");
|
|
||||||
conf.set("spark.driver.host", "localhost");
|
|
||||||
conf.set("hive.metastore.local", "true");
|
|
||||||
conf.set("spark.ui.enabled", "false");
|
|
||||||
conf.set("spark.sql.warehouse.dir", workingDir.toString());
|
|
||||||
conf.set("hive.metastore.warehouse.dir", workingDir.resolve("warehouse").toString());
|
|
||||||
|
|
||||||
spark = SparkSession
|
|
||||||
.builder()
|
|
||||||
.appName(CountryPropagationJobTest.class.getSimpleName())
|
|
||||||
.config(conf)
|
|
||||||
.getOrCreate();
|
|
||||||
}
|
|
||||||
|
|
||||||
@AfterAll
|
|
||||||
public static void afterAll() throws IOException {
|
|
||||||
FileUtils.deleteDirectory(workingDir.toFile());
|
|
||||||
spark.stop();
|
|
||||||
}
|
|
||||||
|
|
||||||
@Test
|
|
||||||
void getFOSFileTest() throws CollectorException, IOException, ClassNotFoundException {
|
|
||||||
|
|
||||||
final String sourcePath = getClass()
|
|
||||||
.getResource("/eu/dnetlib/dhp/bypassactionset/fos/h2020_fos_sbs.csv")
|
|
||||||
.getPath();
|
|
||||||
final String outputPath = workingDir.toString() + "/fos.json";
|
|
||||||
|
|
||||||
new GetFOSData()
|
|
||||||
.doRewrite(sourcePath, outputPath, "eu.dnetlib.dhp.bypassactionset.FOSDataModel", '\t', fs);
|
|
||||||
|
|
||||||
BufferedReader in = new BufferedReader(
|
|
||||||
new InputStreamReader(fs.open(new org.apache.hadoop.fs.Path(outputPath))));
|
|
||||||
|
|
||||||
String line;
|
|
||||||
int count = 0;
|
|
||||||
while ((line = in.readLine()) != null) {
|
|
||||||
FOSDataModel fos = new ObjectMapper().readValue(line, FOSDataModel.class);
|
|
||||||
|
|
||||||
System.out.println(new ObjectMapper().writeValueAsString(fos));
|
|
||||||
count += 1;
|
|
||||||
}
|
|
||||||
|
|
||||||
assertEquals(38, count);
|
|
||||||
|
|
||||||
}
|
|
||||||
|
|
||||||
@Test
|
|
||||||
void distributeDoiTest() throws Exception {
|
|
||||||
final String sourcePath = getClass()
|
|
||||||
.getResource("/eu/dnetlib/dhp/bypassactionset/fos/fos.json")
|
|
||||||
.getPath();
|
|
||||||
|
|
||||||
PrepareFOSSparkJob
|
|
||||||
.main(
|
|
||||||
new String[] {
|
|
||||||
"--isSparkSessionManaged", Boolean.FALSE.toString(),
|
|
||||||
"--sourcePath", sourcePath,
|
|
||||||
|
|
||||||
"-outputPath", workingDir.toString() + "/distribute"
|
|
||||||
|
|
||||||
});
|
|
||||||
|
|
||||||
final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext());
|
|
||||||
|
|
||||||
JavaRDD<FOSDataModel> tmp = sc
|
|
||||||
.textFile(workingDir.toString() + "/distribute")
|
|
||||||
.map(item -> OBJECT_MAPPER.readValue(item, FOSDataModel.class));
|
|
||||||
|
|
||||||
String doi1 = ID_PREFIX +
|
|
||||||
IdentifierFactory.md5(CleaningFunctions.normalizePidValue("doi", "10.3390/s18072310"));
|
|
||||||
|
|
||||||
assertEquals(50, tmp.count());
|
|
||||||
assertEquals(1, tmp.filter(row -> row.getDoi().equals(doi1)).count());
|
|
||||||
assertEquals(
|
|
||||||
"engineering and technology", tmp.filter(r -> r.getDoi().equals(doi1)).collect().get(0).getLevel1());
|
|
||||||
assertEquals("nano-technology", tmp.filter(r -> r.getDoi().equals(doi1)).collect().get(0).getLevel2());
|
|
||||||
assertEquals(
|
|
||||||
"nanoscience & nanotechnology", tmp.filter(r -> r.getDoi().equals(doi1)).collect().get(0).getLevel3());
|
|
||||||
|
|
||||||
String doi = ID_PREFIX +
|
|
||||||
IdentifierFactory.md5(CleaningFunctions.normalizePidValue("doi", "10.1111/1365-2656.12831"));
|
|
||||||
assertEquals(1, tmp.filter(row -> row.getDoi().equals(doi)).count());
|
|
||||||
assertEquals("social sciences", tmp.filter(r -> r.getDoi().equals(doi)).collect().get(0).getLevel1());
|
|
||||||
assertEquals(
|
|
||||||
"psychology and cognitive sciences", tmp.filter(r -> r.getDoi().equals(doi)).collect().get(0).getLevel2());
|
|
||||||
assertEquals("NULL", tmp.filter(r -> r.getDoi().equals(doi)).collect().get(0).getLevel3());
|
|
||||||
|
|
||||||
|
|
||||||
}
|
|
||||||
|
|
||||||
@Test
|
|
||||||
void updateResult() throws Exception{
|
|
||||||
|
|
||||||
final String fosPath = getClass()
|
|
||||||
.getResource("/eu/dnetlib/dhp/bypassactionset/fos/fos_prepared.json")
|
|
||||||
.getPath();
|
|
||||||
|
|
||||||
final String inputPath = getClass()
|
|
||||||
.getResource("/eu/dnetlib/dhp/bypassactionset/bip/publicationnomatch.json")
|
|
||||||
.getPath();
|
|
||||||
|
|
||||||
SparkUpdateFOS
|
|
||||||
.main(
|
|
||||||
new String[] {
|
|
||||||
"--isSparkSessionManaged", Boolean.FALSE.toString(),
|
|
||||||
"--fosPath", fosPath,
|
|
||||||
"--inputPath", inputPath,
|
|
||||||
"--outputPath", workingDir.toString() + "/publication",
|
|
||||||
"--resultTableName", "eu.dnetlib.dhp.schema.oaf.Publication"
|
|
||||||
|
|
||||||
});
|
|
||||||
|
|
||||||
final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext());
|
|
||||||
|
|
||||||
JavaRDD<Publication> tmp = sc
|
|
||||||
.textFile(workingDir.toString() + "/publication")
|
|
||||||
.map(item -> OBJECT_MAPPER.readValue(item, Publication.class));
|
|
||||||
|
|
||||||
Assertions.assertEquals(6, tmp.count());
|
|
||||||
|
|
||||||
tmp.filter(r -> r.getSubject() != null).map(p -> p.getSubject())
|
|
||||||
.foreach(s -> s.stream().forEach(sbj -> Assertions.assertFalse("FOS".equals(sbj.getQualifier().getClassid()))));
|
|
||||||
|
|
||||||
|
|
||||||
}
|
|
||||||
|
|
||||||
@Test
|
|
||||||
void updateResultMatch() throws Exception{
|
|
||||||
final String fosPath = getClass()
|
|
||||||
.getResource("/eu/dnetlib/dhp/bypassactionset/fos/fos_prepared.json")
|
|
||||||
.getPath();
|
|
||||||
|
|
||||||
final String inputPath = getClass()
|
|
||||||
.getResource("/eu/dnetlib/dhp/bypassactionset/fos/publicationmatch.json")
|
|
||||||
.getPath();
|
|
||||||
|
|
||||||
SparkUpdateFOS
|
|
||||||
.main(
|
|
||||||
new String[] {
|
|
||||||
"--isSparkSessionManaged", Boolean.FALSE.toString(),
|
|
||||||
"--fosPath", fosPath,
|
|
||||||
"--inputPath", inputPath,
|
|
||||||
"--outputPath", workingDir.toString() + "/publication",
|
|
||||||
"--resultTableName", "eu.dnetlib.dhp.schema.oaf.Publication"
|
|
||||||
|
|
||||||
});
|
|
||||||
|
|
||||||
final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext());
|
|
||||||
|
|
||||||
JavaRDD<Publication> tmp = sc
|
|
||||||
.textFile(workingDir.toString() + "/publication")
|
|
||||||
.map(item -> OBJECT_MAPPER.readValue(item, Publication.class));
|
|
||||||
|
|
||||||
Assertions.assertEquals(6, tmp.count());
|
|
||||||
|
|
||||||
Assertions.assertEquals(3, tmp.filter(r -> r.getSubject() != null).map(p -> p.getSubject()).flatMap(v -> v.iterator())
|
|
||||||
.filter(sbj -> sbj.getQualifier().getClassid().equals("FOS")).collect().size());
|
|
||||||
|
|
||||||
|
|
||||||
List<StructuredProperty> sbjs = tmp.filter(r -> r.getId().equals("50|doi_________b24ab3e127aa67e2a1017292988d571f"))
|
|
||||||
.map(p -> p.getSubject()).collect().get(0);
|
|
||||||
|
|
||||||
Assertions.assertEquals(12, sbjs.size());
|
|
||||||
|
|
||||||
Stream<StructuredProperty> fosSubjs = sbjs.stream().filter(sbj -> sbj.getQualifier().getClassid().equals("FOS"));
|
|
||||||
|
|
||||||
Assertions.assertTrue(fosSubjs
|
|
||||||
.map(sbj -> sbj.getValue()).collect(Collectors.toList()).contains("engineering and technology"));
|
|
||||||
Assertions.assertTrue(fosSubjs
|
|
||||||
.map(sbj -> sbj.getValue()).collect(Collectors.toList()).contains("nano-technology"));
|
|
||||||
Assertions.assertTrue(fosSubjs
|
|
||||||
.map(sbj -> sbj.getValue()).collect(Collectors.toList()).contains("nanoscience & nanotechnology"));
|
|
||||||
|
|
||||||
fosSubjs.forEach(sbj -> Assertions.assertEquals("update", sbj.getDataInfo().getInferenceprovenance()) );
|
|
||||||
fosSubjs.forEach(sbj -> Assertions.assertEquals("subject:fos", sbj.getDataInfo().getProvenanceaction().getClassid()) );
|
|
||||||
fosSubjs.forEach(sbj -> Assertions.assertEquals("Inferred by OpenAIRE", sbj.getDataInfo().getProvenanceaction().getClassname() ));
|
|
||||||
fosSubjs.forEach(sbj -> Assertions.assertEquals("", sbj.getDataInfo().getTrust() ));
|
|
||||||
fosSubjs.forEach(sbj -> Assertions.assertEquals(false, sbj.getDataInfo().getDeletedbyinference() ));
|
|
||||||
fosSubjs.forEach(sbj -> Assertions.assertEquals(false, sbj.getDataInfo().getInvisible() ));
|
|
||||||
fosSubjs.forEach(sbj -> Assertions.assertEquals(true, sbj.getDataInfo().getInferred() ));
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
}
|
|
|
@ -1,86 +0,0 @@
|
||||||
{"id":"50|doi_________63848be3afd635374828253a6f974f11","scoreList":[{"id":"influence","unit":[{"key":"score","value":"5.91019644836e-09"}]},{"id":"popularity_alt","unit":[{"key":"score","value":"0.0"}]},{"id":"popularity","unit":[{"key":"score","value":"9.88840807598e-09"}]}]}
|
|
||||||
{"id":"50|doi_________5da9060b89165e3f61a0806dcf2c2696","scoreList":[{"id":"influence","unit":[{"key":"score","value":"5.91019644836e-09"}]},{"id":"popularity_alt","unit":[{"key":"score","value":"0.0"}]},{"id":"popularity","unit":[{"key":"score","value":"6.26204125721e-09"}]}]}
|
|
||||||
{"id":"50|doi_________9ea0266b8ddd471eddb20635b89273bb","scoreList":[{"id":"influence","unit":[{"key":"score","value":"7.5597134689e-09"}]},{"id":"popularity_alt","unit":[{"key":"score","value":"4.903880192"}]},{"id":"popularity","unit":[{"key":"score","value":"1.17977512835e-08"}]}]}
|
|
||||||
{"id":"50|doi_________ab797495de07d4f4a25f08b84fca6021","scoreList":[{"id":"influence","unit":[{"key":"score","value":"6.34596412687e-09"}]},{"id":"popularity_alt","unit":[{"key":"score","value":"0.641151896994"}]},{"id":"popularity","unit":[{"key":"score","value":"2.33375102921e-09"}]}]}
|
|
||||||
{"id":"50|doi_________df5ade937e177dcfb82bc5ec3139fefe","scoreList":[{"id":"influence","unit":[{"key":"score","value":"5.91019644836e-09"}]},{"id":"popularity_alt","unit":[{"key":"score","value":"0.0"}]},{"id":"popularity","unit":[{"key":"score","value":"5.39172290649e-09"}]}]}
|
|
||||||
{"id":"50|doi_________357976ea7d4e744fe21966607334952c","scoreList":[{"id":"influence","unit":[{"key":"score","value":"5.91019644836e-09"}]},{"id":"popularity_alt","unit":[{"key":"score","value":"0.0"}]},{"id":"popularity","unit":[{"key":"score","value":"5.39172290649e-09"}]}]}
|
|
||||||
{"id":"50|doi_________872982548f741b89eb5d30f509316dde","scoreList":[{"id":"influence","unit":[{"key":"score","value":"6.32078461509e-09"}]},{"id":"popularity_alt","unit":[{"key":"score","value":"1.6"}]},{"id":"popularity","unit":[{"key":"score","value":"8.3168486939e-09"}]}]}
|
|
||||||
{"id":"50|doi_________19c6da2771befb83f9f2715fc84f9edf","scoreList":[{"id":"influence","unit":[{"key":"score","value":"5.96492048955e-09"}]},{"id":"popularity_alt","unit":[{"key":"score","value":"1.0"}]},{"id":"popularity","unit":[{"key":"score","value":"1.12641925838e-08"}]}]}
|
|
||||||
{"id":"50|doi_________46e18e9e477d6fc71e002eae47cfc390","scoreList":[{"id":"influence","unit":[{"key":"score","value":"5.91019644836e-09"}]},{"id":"popularity_alt","unit":[{"key":"score","value":"0.0"}]},{"id":"popularity","unit":[{"key":"score","value":"6.26204125721e-09"}]}]}
|
|
||||||
{"id":"50|doi_________5cc344a6da53a8f2bac11faf7607e0b0","scoreList":[{"id":"influence","unit":[{"key":"score","value":"5.91019644836e-09"}]},{"id":"popularity_alt","unit":[{"key":"score","value":"0.0"}]},{"id":"popularity","unit":[{"key":"score","value":"3.76260934675e-10"}]}]}
|
|
||||||
{"id":"50|doi_________fdfadf5cefdb8b63f90d5a3475a95959","scoreList":[{"id":"influence","unit":[{"key":"score","value":"5.91019644836e-09"}]},{"id":"popularity_alt","unit":[{"key":"score","value":"0.0"}]},{"id":"popularity","unit":[{"key":"score","value":"5.39172290649e-09"}]}]}
|
|
||||||
{"id":"50|doi_________51fc7a9c7b9f1cf6705c7afb1de58967","scoreList":[{"id":"influence","unit":[{"key":"score","value":"6.93311506443e-09"}]},{"id":"popularity_alt","unit":[{"key":"score","value":"0.002176782336"}]},{"id":"popularity","unit":[{"key":"score","value":"1.7668105708e-09"}]}]}
|
|
||||||
{"id":"50|doi_________bbf7a94696ab67e4c29429522c31c0ef","scoreList":[{"id":"influence","unit":[{"key":"score","value":"5.91019644836e-09"}]},{"id":"popularity_alt","unit":[{"key":"score","value":"0.0"}]},{"id":"popularity","unit":[{"key":"score","value":"5.39172290649e-09"}]}]}
|
|
||||||
{"id":"50|doi_________232498b3aed64dd0f0db87b09b61d5cd","scoreList":[{"id":"influence","unit":[{"key":"score","value":"6.26777280882e-09"}]},{"id":"popularity_alt","unit":[{"key":"score","value":"0.406656"}]},{"id":"popularity","unit":[{"key":"score","value":"3.39745193285e-09"}]}]}
|
|
||||||
{"id":"50|doi_________03749bed57efa24ab607527cf1eb94c5","scoreList":[{"id":"influence","unit":[{"key":"score","value":"5.91019644836e-09"}]},{"id":"popularity_alt","unit":[{"key":"score","value":"0.0"}]},{"id":"popularity","unit":[{"key":"score","value":"6.26204125721e-09"}]}]}
|
|
||||||
{"id":"50|doi_________65074998446928c1c18e25313dcf974b","scoreList":[{"id":"influence","unit":[{"key":"score","value":"5.91019644836e-09"}]},{"id":"popularity_alt","unit":[{"key":"score","value":"0.0"}]},{"id":"popularity","unit":[{"key":"score","value":"8.48190886761e-09"}]}]}
|
|
||||||
{"id":"50|doi_________2b62feaed6ad14760096c2a59f82836e","scoreList":[{"id":"influence","unit":[{"key":"score","value":"5.91019644836e-09"}]},{"id":"popularity_alt","unit":[{"key":"score","value":"0.0"}]},{"id":"popularity","unit":[{"key":"score","value":"6.26204125721e-09"}]}]}
|
|
||||||
{"id":"50|doi_________d2d133a6fdfd44f34f96e225b5573447","scoreList":[{"id":"influence","unit":[{"key":"score","value":"5.91019644836e-09"}]},{"id":"popularity_alt","unit":[{"key":"score","value":"0.0"}]},{"id":"popularity","unit":[{"key":"score","value":"4.01810569717e-09"}]}]}
|
|
||||||
{"id":"50|doi_________3dd3cccbfcad3f206d1239a580af011f","scoreList":[{"id":"influence","unit":[{"key":"score","value":"5.91019644836e-09"}]},{"id":"popularity_alt","unit":[{"key":"score","value":"0.0"}]},{"id":"popularity","unit":[{"key":"score","value":"6.26204125721e-09"}]}]}
|
|
||||||
{"id":"50|doi_________84852a20e9bbf1daf89803b5fcfc9c34","scoreList":[{"id":"influence","unit":[{"key":"score","value":"5.91019644836e-09"}]},{"id":"popularity_alt","unit":[{"key":"score","value":"0.0"}]},{"id":"popularity","unit":[{"key":"score","value":"3.47956715615e-09"}]}]}
|
|
||||||
{"id":"50|doi_________84c5824b3a10a28a8bc26fed6223a08a","scoreList":[{"id":"influence","unit":[{"key":"score","value":"5.91019644836e-09"}]},{"id":"popularity_alt","unit":[{"key":"score","value":"0.0"}]},{"id":"popularity","unit":[{"key":"score","value":"6.26204125721e-09"}]}]}
|
|
||||||
{"id":"50|doi_________8658fda7574016daa0d71c88eb6bab5f","scoreList":[{"id":"influence","unit":[{"key":"score","value":"5.91019644836e-09"}]},{"id":"popularity_alt","unit":[{"key":"score","value":"0.0"}]},{"id":"popularity","unit":[{"key":"score","value":"3.47956715615e-09"}]}]}
|
|
||||||
{"id":"50|doi_________ab52c136b88151f0c28f6ca4a5ef2a71","scoreList":[{"id":"influence","unit":[{"key":"score","value":"5.91019644836e-09"}]},{"id":"popularity_alt","unit":[{"key":"score","value":"0.0"}]},{"id":"popularity","unit":[{"key":"score","value":"6.26204125721e-09"}]}]}
|
|
||||||
{"id":"50|doi_________e8a2912fe3d70b13124e436294411378","scoreList":[{"id":"influence","unit":[{"key":"score","value":"5.91019644836e-09"}]},{"id":"popularity_alt","unit":[{"key":"score","value":"0.0"}]},{"id":"popularity","unit":[{"key":"score","value":"6.26204125721e-09"}]}]}
|
|
||||||
{"id":"50|doi_________cf3470c86f338ccf232f2869d0fa6ea2","scoreList":[{"id":"influence","unit":[{"key":"score","value":"5.91019644836e-09"}]},{"id":"popularity_alt","unit":[{"key":"score","value":"0.0"}]},{"id":"popularity","unit":[{"key":"score","value":"4.01810569717e-09"}]}]}
|
|
||||||
{"id":"50|doi_________16437c0064576207ba1438bf07fb9e21","scoreList":[{"id":"influence","unit":[{"key":"score","value":"5.91019644836e-09"}]},{"id":"popularity_alt","unit":[{"key":"score","value":"0.0"}]},{"id":"popularity","unit":[{"key":"score","value":"6.26204125721e-09"}]}]}
|
|
||||||
{"id":"50|doi_________f0ae1dc0f1dbb7fceaccc02d6e667f24","scoreList":[{"id":"influence","unit":[{"key":"score","value":"6.40470414877e-09"}]},{"id":"popularity_alt","unit":[{"key":"score","value":"0.6"}]},{"id":"popularity","unit":[{"key":"score","value":"7.89465099068e-09"}]}]}
|
|
||||||
{"id":"50|doi_________93d03d99bbc0f542fa2679e74882a096","scoreList":[{"id":"influence","unit":[{"key":"score","value":"5.91019644836e-09"}]},{"id":"popularity_alt","unit":[{"key":"score","value":"0.0"}]},{"id":"popularity","unit":[{"key":"score","value":"3.47956715615e-09"}]}]}
|
|
||||||
{"id":"50|doi_________fcfce4bc985f22baf2dc1dc733a862e7","scoreList":[{"id":"influence","unit":[{"key":"score","value":"5.91019644836e-09"}]},{"id":"popularity_alt","unit":[{"key":"score","value":"0.0"}]},{"id":"popularity","unit":[{"key":"score","value":"6.26204125721e-09"}]}]}
|
|
||||||
{"id":"50|doi_________3b7fd390517f45b0d37e0ec0adbb98b1","scoreList":[{"id":"influence","unit":[{"key":"score","value":"5.91019644836e-09"}]},{"id":"popularity_alt","unit":[{"key":"score","value":"0.0"}]},{"id":"popularity","unit":[{"key":"score","value":"3.47956715615e-09"}]}]}
|
|
||||||
{"id":"50|doi_________31306ba727c26ba04c0d4bbd0899d433","scoreList":[{"id":"influence","unit":[{"key":"score","value":"5.91019644836e-09"}]},{"id":"popularity_alt","unit":[{"key":"score","value":"0.0"}]},{"id":"popularity","unit":[{"key":"score","value":"6.26204125721e-09"}]}]}
|
|
||||||
{"id":"50|doi_________3e6151aa77865a1fdae56392f68b81f1","scoreList":[{"id":"influence","unit":[{"key":"score","value":"5.91019644836e-09"}]},{"id":"popularity_alt","unit":[{"key":"score","value":"0.0"}]},{"id":"popularity","unit":[{"key":"score","value":"4.65008652949e-09"}]}]}
|
|
||||||
{"id":"50|doi_________1cd6e2c4cf189819fa8a011629b5744c","scoreList":[{"id":"influence","unit":[{"key":"score","value":"5.91019644836e-09"}]},{"id":"popularity_alt","unit":[{"key":"score","value":"0.0"}]},{"id":"popularity","unit":[{"key":"score","value":"7.28336930301e-09"}]}]}
|
|
||||||
{"id":"50|doi_________45c855d81df747ff3c2033f6c8cf7bca","scoreList":[{"id":"influence","unit":[{"key":"score","value":"5.91019644836e-09"}]},{"id":"popularity_alt","unit":[{"key":"score","value":"0.0"}]},{"id":"popularity","unit":[{"key":"score","value":"6.26204125721e-09"}]}]}
|
|
||||||
{"id":"50|doi_________24ac4c0c4b143e37c2551723e9a55f36","scoreList":[{"id":"influence","unit":[{"key":"score","value":"5.91019644836e-09"}]},{"id":"popularity_alt","unit":[{"key":"score","value":"0.0"}]},{"id":"popularity","unit":[{"key":"score","value":"4.01810569717e-09"}]}]}
|
|
||||||
{"id":"50|doi_________5f9e3f1a076c2f587165f8ccd68286e5","scoreList":[{"id":"influence","unit":[{"key":"score","value":"5.91019644836e-09"}]},{"id":"popularity_alt","unit":[{"key":"score","value":"0.0"}]},{"id":"popularity","unit":[{"key":"score","value":"6.26204125721e-09"}]}]}
|
|
||||||
{"id":"50|doi_________b637731c913efe3c1f283183ef4a59cc","scoreList":[{"id":"influence","unit":[{"key":"score","value":"5.91019644836e-09"}]},{"id":"popularity_alt","unit":[{"key":"score","value":"0.0"}]},{"id":"popularity","unit":[{"key":"score","value":"6.26204125721e-09"}]}]}
|
|
||||||
{"id":"50|doi_________490c0c23e4ed269f807e72e71cad09ae","scoreList":[{"id":"influence","unit":[{"key":"score","value":"5.91019644836e-09"}]},{"id":"popularity_alt","unit":[{"key":"score","value":"0.0"}]},{"id":"popularity","unit":[{"key":"score","value":"4.01810569717e-09"}]}]}
|
|
||||||
{"id":"50|doi_________15584e5a3a5bbdc487c85ab18d8a7c22","scoreList":[{"id":"influence","unit":[{"key":"score","value":"5.91019644836e-09"}]},{"id":"popularity_alt","unit":[{"key":"score","value":"0.0"}]},{"id":"popularity","unit":[{"key":"score","value":"7.28336930301e-09"}]}]}
|
|
||||||
{"id":"50|doi_________a7fd268447553e7e0fe06c19db28ea85","scoreList":[{"id":"influence","unit":[{"key":"score","value":"5.91019644836e-09"}]},{"id":"popularity_alt","unit":[{"key":"score","value":"0.0"}]},{"id":"popularity","unit":[{"key":"score","value":"4.01810569717e-09"}]}]}
|
|
||||||
{"id":"50|doi_________cf1b8db4480aa0e281a4cc122d3b2416","scoreList":[{"id":"influence","unit":[{"key":"score","value":"5.91019644836e-09"}]},{"id":"popularity_alt","unit":[{"key":"score","value":"0.0"}]},{"id":"popularity","unit":[{"key":"score","value":"7.28336930301e-09"}]}]}
|
|
||||||
{"id":"50|doi_________8880fd8fa9cafbde2431583dfbab1e1f","scoreList":[{"id":"influence","unit":[{"key":"score","value":"5.91019644836e-09"}]},{"id":"popularity_alt","unit":[{"key":"score","value":"0.0"}]},{"id":"popularity","unit":[{"key":"score","value":"4.01810569717e-09"}]}]}
|
|
||||||
{"id":"50|doi_________22f8b4f61c49de05dcc0d94bf5e147d8","scoreList":[{"id":"influence","unit":[{"key":"score","value":"5.91019644836e-09"}]},{"id":"popularity_alt","unit":[{"key":"score","value":"0.0"}]},{"id":"popularity","unit":[{"key":"score","value":"7.28336930301e-09"}]}]}
|
|
||||||
{"id":"50|doi_________96e2b076fab4931ec2b1b9f43c9c31f9","scoreList":[{"id":"influence","unit":[{"key":"score","value":"5.91019644836e-09"}]},{"id":"popularity_alt","unit":[{"key":"score","value":"0.0"}]},{"id":"popularity","unit":[{"key":"score","value":"7.28336930301e-09"}]}]}
|
|
||||||
{"id":"50|doi_________1ac665f912877f93f9084dc38bc8174b","scoreList":[{"id":"influence","unit":[{"key":"score","value":"5.91019644836e-09"}]},{"id":"popularity_alt","unit":[{"key":"score","value":"0.0"}]},{"id":"popularity","unit":[{"key":"score","value":"4.01810569717e-09"}]}]}
|
|
||||||
{"id":"50|doi_________394fd70c30b21b1336b5a0405496fbe0","scoreList":[{"id":"influence","unit":[{"key":"score","value":"5.91019644836e-09"}]},{"id":"popularity_alt","unit":[{"key":"score","value":"0.0"}]},{"id":"popularity","unit":[{"key":"score","value":"7.28336930301e-09"}]}]}
|
|
||||||
{"id":"50|doi_________73caf44494091b089f8be65427fb4341","scoreList":[{"id":"influence","unit":[{"key":"score","value":"5.91019644836e-09"}]},{"id":"popularity_alt","unit":[{"key":"score","value":"0.0"}]},{"id":"popularity","unit":[{"key":"score","value":"4.01810569717e-09"}]}]}
|
|
||||||
{"id":"50|doi_________9b996963325dfae10dba5c3043938fc1","scoreList":[{"id":"influence","unit":[{"key":"score","value":"5.91019644836e-09"}]},{"id":"popularity_alt","unit":[{"key":"score","value":"0.0"}]},{"id":"popularity","unit":[{"key":"score","value":"7.28336930301e-09"}]}]}
|
|
||||||
{"id":"50|doi_________1f03d5cab86f4963fbd3e0f1284cb9b8","scoreList":[{"id":"influence","unit":[{"key":"score","value":"5.91019644836e-09"}]},{"id":"popularity_alt","unit":[{"key":"score","value":"0.0"}]},{"id":"popularity","unit":[{"key":"score","value":"7.28336930301e-09"}]}]}
|
|
||||||
{"id":"50|doi_________87a59f3918f9ac2c394b2509173fb3e1","scoreList":[{"id":"influence","unit":[{"key":"score","value":"5.91019644836e-09"}]},{"id":"popularity_alt","unit":[{"key":"score","value":"0.0"}]},{"id":"popularity","unit":[{"key":"score","value":"4.01810569717e-09"}]}]}
|
|
||||||
{"id":"50|doi_________9a8465f5343a1c845b6dd91496395c7e","scoreList":[{"id":"influence","unit":[{"key":"score","value":"5.91019644836e-09"}]},{"id":"popularity_alt","unit":[{"key":"score","value":"0.0"}]},{"id":"popularity","unit":[{"key":"score","value":"7.28336930301e-09"}]}]}
|
|
||||||
{"id":"50|doi_________b03c0a657c669d24f45844307b9ad6bd","scoreList":[{"id":"influence","unit":[{"key":"score","value":"5.91019644836e-09"}]},{"id":"popularity_alt","unit":[{"key":"score","value":"0.0"}]},{"id":"popularity","unit":[{"key":"score","value":"4.01810569717e-09"}]}]}
|
|
||||||
{"id":"50|doi_________04aeabd709d8a486bde733ffc5ef53ae","scoreList":[{"id":"influence","unit":[{"key":"score","value":"5.91019644836e-09"}]},{"id":"popularity_alt","unit":[{"key":"score","value":"0.0"}]},{"id":"popularity","unit":[{"key":"score","value":"4.01810569717e-09"}]}]}
|
|
||||||
{"id":"50|doi_________6c21d2d2133477141ad74226f8fd75b0","scoreList":[{"id":"influence","unit":[{"key":"score","value":"5.91019644836e-09"}]},{"id":"popularity_alt","unit":[{"key":"score","value":"0.0"}]},{"id":"popularity","unit":[{"key":"score","value":"7.28336930301e-09"}]}]}
|
|
||||||
{"id":"50|doi_________b746ffb04cd9f815d5880c827c55040e","scoreList":[{"id":"influence","unit":[{"key":"score","value":"5.91019644836e-09"}]},{"id":"popularity_alt","unit":[{"key":"score","value":"0.0"}]},{"id":"popularity","unit":[{"key":"score","value":"8.48190886761e-09"}]}]}
|
|
||||||
{"id":"50|doi_________0457b2e62a69f0e104db7cc575f2241a","scoreList":[{"id":"influence","unit":[{"key":"score","value":"5.91019644836e-09"}]},{"id":"popularity_alt","unit":[{"key":"score","value":"0.0"}]},{"id":"popularity","unit":[{"key":"score","value":"4.65008652949e-09"}]}]}
|
|
||||||
{"id":"50|doi_________2fa691f5394020f7cb2df4f272a062e7","scoreList":[{"id":"influence","unit":[{"key":"score","value":"5.91019644836e-09"}]},{"id":"popularity_alt","unit":[{"key":"score","value":"0.0"}]},{"id":"popularity","unit":[{"key":"score","value":"8.48190886761e-09"}]}]}
|
|
||||||
{"id":"50|doi_________aa2a77dc605567f5f2305b5f38a1a4b7","scoreList":[{"id":"influence","unit":[{"key":"score","value":"5.91019644836e-09"}]},{"id":"popularity_alt","unit":[{"key":"score","value":"0.0"}]},{"id":"popularity","unit":[{"key":"score","value":"4.65008652949e-09"}]}]}
|
|
||||||
{"id":"50|doi_________cdb6f10588d85495208cbc6bf4a75000","scoreList":[{"id":"influence","unit":[{"key":"score","value":"5.91019644836e-09"}]},{"id":"popularity_alt","unit":[{"key":"score","value":"0.0"}]},{"id":"popularity","unit":[{"key":"score","value":"8.48190886761e-09"}]}]}
|
|
||||||
{"id":"50|doi_________6630766c2ad4783ebd01a9d7e9607eb1","scoreList":[{"id":"influence","unit":[{"key":"score","value":"5.91019644836e-09"}]},{"id":"popularity_alt","unit":[{"key":"score","value":"0.0"}]},{"id":"popularity","unit":[{"key":"score","value":"4.65008652949e-09"}]}]}
|
|
||||||
{"id":"50|doi_________389315d5f74765688d34b8ee86c2514f","scoreList":[{"id":"influence","unit":[{"key":"score","value":"5.91019644836e-09"}]},{"id":"popularity_alt","unit":[{"key":"score","value":"0.0"}]},{"id":"popularity","unit":[{"key":"score","value":"8.48190886761e-09"}]}]}
|
|
||||||
{"id":"50|doi_________beef7316dd6665798120698451446d74","scoreList":[{"id":"influence","unit":[{"key":"score","value":"5.91019644836e-09"}]},{"id":"popularity_alt","unit":[{"key":"score","value":"0.0"}]},{"id":"popularity","unit":[{"key":"score","value":"4.65008652949e-09"}]}]}
|
|
||||||
{"id":"50|doi_________525a46f117c946b8ae1caea542e24d0c","scoreList":[{"id":"influence","unit":[{"key":"score","value":"5.91019644836e-09"}]},{"id":"popularity_alt","unit":[{"key":"score","value":"0.0"}]},{"id":"popularity","unit":[{"key":"score","value":"8.48190886761e-09"}]}]}
|
|
||||||
{"id":"50|doi_________fe3d2250ebaa65dae09aa372945c917f","scoreList":[{"id":"influence","unit":[{"key":"score","value":"5.91019644836e-09"}]},{"id":"popularity_alt","unit":[{"key":"score","value":"0.0"}]},{"id":"popularity","unit":[{"key":"score","value":"4.65008652949e-09"}]}]}
|
|
||||||
{"id":"50|doi_________72d2ec1744cda7d49cf35707ff3e11c1","scoreList":[{"id":"influence","unit":[{"key":"score","value":"5.91019644836e-09"}]},{"id":"popularity_alt","unit":[{"key":"score","value":"0.0"}]},{"id":"popularity","unit":[{"key":"score","value":"8.48190886761e-09"}]}]}
|
|
||||||
{"id":"50|doi_________d7e0c5fd1f00d8a90879f168d73b9e0b","scoreList":[{"id":"influence","unit":[{"key":"score","value":"5.91019644836e-09"}]},{"id":"popularity_alt","unit":[{"key":"score","value":"0.0"}]},{"id":"popularity","unit":[{"key":"score","value":"4.65008652949e-09"}]}]}
|
|
||||||
{"id":"50|doi_________6e0e19ee1f9c1dbadf202ccd9919e6a7","scoreList":[{"id":"influence","unit":[{"key":"score","value":"5.91019644836e-09"}]},{"id":"popularity_alt","unit":[{"key":"score","value":"0.0"}]},{"id":"popularity","unit":[{"key":"score","value":"8.48190886761e-09"}]}]}
|
|
||||||
{"id":"50|doi_________6b306286f9b4d207f81bc9164713243c","scoreList":[{"id":"influence","unit":[{"key":"score","value":"5.91019644836e-09"}]},{"id":"popularity_alt","unit":[{"key":"score","value":"0.0"}]},{"id":"popularity","unit":[{"key":"score","value":"4.65008652949e-09"}]}]}
|
|
||||||
{"id":"50|doi_________acf5c0c1a125cd78fd9193589edc9152","scoreList":[{"id":"influence","unit":[{"key":"score","value":"5.91019644836e-09"}]},{"id":"popularity_alt","unit":[{"key":"score","value":"0.0"}]},{"id":"popularity","unit":[{"key":"score","value":"8.48190886761e-09"}]}]}
|
|
||||||
{"id":"50|doi_________66f645f86cdd9592953a1106e0dbf191","scoreList":[{"id":"influence","unit":[{"key":"score","value":"5.91019644836e-09"}]},{"id":"popularity_alt","unit":[{"key":"score","value":"0.0"}]},{"id":"popularity","unit":[{"key":"score","value":"4.65008652949e-09"}]}]}
|
|
||||||
{"id":"50|doi_________77ff842449308dd907ec46ebf9ccd539","scoreList":[{"id":"influence","unit":[{"key":"score","value":"5.91019644836e-09"}]},{"id":"popularity_alt","unit":[{"key":"score","value":"0.0"}]},{"id":"popularity","unit":[{"key":"score","value":"8.48190886761e-09"}]}]}
|
|
||||||
{"id":"50|doi_________a3d8fc73d1ac1308c117a61b4eb14ab5","scoreList":[{"id":"influence","unit":[{"key":"score","value":"5.91019644836e-09"}]},{"id":"popularity_alt","unit":[{"key":"score","value":"0.0"}]},{"id":"popularity","unit":[{"key":"score","value":"8.48190886761e-09"}]}]}
|
|
||||||
{"id":"50|doi_________9ff57cb17abfcd338c2ca768c85e452a","scoreList":[{"id":"influence","unit":[{"key":"score","value":"5.91019644836e-09"}]},{"id":"popularity_alt","unit":[{"key":"score","value":"0.0"}]},{"id":"popularity","unit":[{"key":"score","value":"5.39172290649e-09"}]}]}
|
|
||||||
{"id":"50|doi_________f903c5c3305fc7b1598a0ed989bb4f83","scoreList":[{"id":"influence","unit":[{"key":"score","value":"5.91019644836e-09"}]},{"id":"popularity_alt","unit":[{"key":"score","value":"0.0"}]},{"id":"popularity","unit":[{"key":"score","value":"4.65008652949e-09"}]}]}
|
|
||||||
{"id":"50|doi_________2260d2cdf6fbcee10d61d5e0f1428ebd","scoreList":[{"id":"influence","unit":[{"key":"score","value":"5.91019644836e-09"}]},{"id":"popularity_alt","unit":[{"key":"score","value":"0.0"}]},{"id":"popularity","unit":[{"key":"score","value":"8.48190886761e-09"}]}]}
|
|
||||||
{"id":"50|doi_________549c5c8f66c36cab609b7221eae37040","scoreList":[{"id":"influence","unit":[{"key":"score","value":"5.91019644836e-09"}]},{"id":"popularity_alt","unit":[{"key":"score","value":"0.0"}]},{"id":"popularity","unit":[{"key":"score","value":"8.48190886761e-09"}]}]}
|
|
||||||
{"id":"50|doi_________388a95b886946d771a7f38e86d58f65e","scoreList":[{"id":"influence","unit":[{"key":"score","value":"5.91019644836e-09"}]},{"id":"popularity_alt","unit":[{"key":"score","value":"0.0"}]},{"id":"popularity","unit":[{"key":"score","value":"8.48190886761e-09"}]}]}
|
|
||||||
{"id":"50|doi_________4141ef3f730fc811930a66c088486c34","scoreList":[{"id":"influence","unit":[{"key":"score","value":"5.91019644836e-09"}]},{"id":"popularity_alt","unit":[{"key":"score","value":"0.0"}]},{"id":"popularity","unit":[{"key":"score","value":"9.88840807598e-09"}]}]}
|
|
||||||
{"id":"50|doi_________e3b32800bba3187fbf7f470df08755ea","scoreList":[{"id":"influence","unit":[{"key":"score","value":"5.91019644836e-09"}]},{"id":"popularity_alt","unit":[{"key":"score","value":"0.0"}]},{"id":"popularity","unit":[{"key":"score","value":"8.48190886761e-09"}]}]}
|
|
||||||
{"id":"50|doi_________d8d02aa2c2347ecc628724265c5cae27","scoreList":[{"id":"influence","unit":[{"key":"score","value":"5.91019644836e-09"}]},{"id":"popularity_alt","unit":[{"key":"score","value":"0.0"}]},{"id":"popularity","unit":[{"key":"score","value":"9.88840807598e-09"}]}]}
|
|
||||||
{"id":"50|doi_________08f1db95b06e62e34948f2b47ac449af","scoreList":[{"id":"influence","unit":[{"key":"score","value":"5.91019644836e-09"}]},{"id":"popularity_alt","unit":[{"key":"score","value":"0.0"}]},{"id":"popularity","unit":[{"key":"score","value":"9.88840807598e-09"}]}]}
|
|
||||||
{"id":"50|doi_________cbbab3f7306b0573664b94615f5c04e2","scoreList":[{"id":"influence","unit":[{"key":"score","value":"5.91019644836e-09"}]},{"id":"popularity_alt","unit":[{"key":"score","value":"0.0"}]},{"id":"popularity","unit":[{"key":"score","value":"9.88840807598e-09"}]}]}
|
|
||||||
{"id":"50|doi_________b24ab3e127aa67e2a1017292988d571f","scoreList":[{"id":"influence","unit":[{"key":"score","value":"5.91019644836e-09"}]},{"id":"popularity_alt","unit":[{"key":"score","value":"0.0"}]},{"id":"popularity","unit":[{"key":"score","value":"9.88840807598e-09"}]}]}
|
|
||||||
{"id":"50|doi_________eb9a4f92bd6934727e2722d22340884a","scoreList":[{"id":"influence","unit":[{"key":"score","value":"5.91019644836e-09"}]},{"id":"popularity_alt","unit":[{"key":"score","value":"0.0"}]},{"id":"popularity","unit":[{"key":"score","value":"9.88840807598e-09"}]}]}
|
|
||||||
{"id":"50|doi_________727484ef97b31ecb0806cc959869aa97","scoreList":[{"id":"influence","unit":[{"key":"score","value":"5.91019644836e-09"}]},{"id":"popularity_alt","unit":[{"key":"score","value":"0.0"}]},{"id":"popularity","unit":[{"key":"score","value":"9.88840807598e-09"}]}]}
|
|
||||||
{"id":"50|doi_________224c865ef04d10c117913fa08eeb1be4","scoreList":[{"id":"influence","unit":[{"key":"score","value":"5.91019644836e-09"}]},{"id":"popularity_alt","unit":[{"key":"score","value":"0.0"}]},{"id":"popularity","unit":[{"key":"score","value":"9.88840807598e-09"}]}]}
|
|
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
|
@ -1,50 +0,0 @@
|
||||||
{"doi":"50|doi_________b24ab3e127aa67e2a1017292988d571f","level1":"engineering and technology","level2":"nano-technology","level3":"nanoscience & nanotechnology"}
|
|
||||||
{"doi":"50|doi_________f648499be6ba8e83226834167f22a7cb","level1":"social sciences","level2":"psychology and cognitive sciences","level3":"NULL"}
|
|
||||||
{"doi":"50|doi_________439b0885db30c66ecf62a2a6a4451116","level1":"social sciences","level2":"psychology and cognitive sciences","level3":"NULL"}
|
|
||||||
{"doi":"50|doi_________c22f065c9ab34fee87a3952fb79f5ee6","level1":"natural sciences","level2":"NULL","level3":"NULL"}
|
|
||||||
{"doi":"50|doi_________9ea3d8140aaa8add5e929be92f0dab13","level1":"natural sciences","level2":"NULL","level3":"NULL"}
|
|
||||||
{"doi":"50|doi_________12f446645a131ab55fc5dabf6692da31","level1":"medical and health sciences","level2":"clinical medicine","level3":"oncology & carcinogenesis"}
|
|
||||||
{"doi":"50|doi_________550a40c9b57fcc974c127ad69dd60df2","level1":"natural sciences","level2":"earth and related environmental sciences","level3":"environmental sciences"}
|
|
||||||
{"doi":"50|doi_________6b95eee8d0eff26b7cca8e960968ad62","level1":"medical and health sciences","level2":"clinical medicine","level3":"oncology & carcinogenesis"}
|
|
||||||
{"doi":"50|doi_________5ff24d570b3984ddcf04f58b771ad635","level1":"natural sciences","level2":"physical sciences","level3":"NULL"}
|
|
||||||
{"doi":"50|doi_________b909cd0953065bc611bac1d89e96bf6c","level1":"engineering and technology","level2":"other engineering and technologies","level3":"building & construction"}
|
|
||||||
{"doi":"50|doi_________d56d9dc21f317b3e009d5b6c8ea87212","level1":"natural sciences","level2":"physical sciences","level3":"nuclear & particles physics"}
|
|
||||||
{"doi":"50|doi_________3ea4d5309ce7934ed281342dd1f70867","level1":"natural sciences","level2":"NULL","level3":"NULL"}
|
|
||||||
{"doi":"50|doi_________f23029f6f070b4b2e1c852ce7f9f5f32","level1":"medical and health sciences","level2":"other medical science","level3":"health policy & services"}
|
|
||||||
{"doi":"50|doi_________2ea2433fb314647e72cab828dd529f00","level1":"natural sciences","level2":"biological sciences","level3":"plant biology & botany"}
|
|
||||||
{"doi":"50|doi_________ffe10c217b3a96f7584cf09fdad579e1","level1":"engineering and technology","level2":"NULL","level3":"NULL"}
|
|
||||||
{"doi":"50|doi_________45e682be5a57e2fabea2c02ba0752f1a","level1":"natural sciences","level2":"NULL","level3":"NULL"}
|
|
||||||
{"doi":"50|doi_________312f5db945545c66e6f18c62faf6290c","level1":"medical and health sciences","level2":"health sciences","level3":"NULL"}
|
|
||||||
{"doi":"50|doi_________d53bd3a4e48921415e554a4edc91d6db","level1":"natural sciences","level2":"physical sciences","level3":"NULL"}
|
|
||||||
{"doi":"50|doi_________2819592482582ff33363069d116abd64","level1":"natural sciences","level2":"physical sciences","level3":"nuclear & particles physics"}
|
|
||||||
{"doi":"50|doi_________e41a47550fed93904e443123b752bc2b","level1":"engineering and technology","level2":"electrical engineering, electronic engineering, information engineering","level3":"electrical & electronic engineering"}
|
|
||||||
{"doi":"50|doi_________43e8bf6e95cd3043f510771cf0b0c984","level1":"engineering and technology","level2":"mechanical engineering","level3":"mechanical engineering & transports"}
|
|
||||||
{"doi":"50|doi_________000c1dc14e99b89fc52976533338fe4c","level1":"engineering and technology","level2":"mechanical engineering","level3":"mechanical engineering & transports"}
|
|
||||||
{"doi":"50|doi_________5cf55e49aebd633f1326d28451d1f6e5","level1":"engineering and technology","level2":"electrical engineering, electronic engineering, information engineering","level3":"computer hardware & architecture"}
|
|
||||||
{"doi":"50|doi_________5b79bd7bd9f87361b4a4abc3cbb2df75","level1":"natural sciences","level2":"mathematics","level3":"numerical & computational mathematics"}
|
|
||||||
{"doi":"50|doi_________51a7b4738d332570beb98be13e95d369","level1":"natural sciences","level2":"chemical sciences","level3":"NULL"}
|
|
||||||
{"doi":"50|doi_________6a9271220296585204ff81d651215d63","level1":"medical and health sciences","level2":"basic medicine","level3":"NULL"}
|
|
||||||
{"doi":"50|doi_________a2b8554df106bb29a0035e2ea56046a2","level1":"medical and health sciences","level2":"health sciences","level3":"biochemistry & molecular biology"}
|
|
||||||
{"doi":"50|doi_________fa2b92d5140f67a6c69c970a9e8d2cf0","level1":"natural sciences","level2":"chemical sciences","level3":"physical chemistry"}
|
|
||||||
{"doi":"50|doi_________b5f6c78a31abbb806e1e375a73231dd1","level1":"natural sciences","level2":"chemical sciences","level3":"physical chemistry"}
|
|
||||||
{"doi":"50|doi_________358777f48b491554cdee63c4dcabe6aa","level1":"natural sciences","level2":"chemical sciences","level3":"physical chemistry"}
|
|
||||||
{"doi":"50|doi_________809f282159a9f46a41f92b6fc8d1095f","level1":"natural sciences","level2":"biological sciences","level3":"marine biology & hydrobiology"}
|
|
||||||
{"doi":"50|doi_________94f1b0b1509700d4370cbe7571000bfc","level1":"engineering and technology","level2":"industrial biotechnology","level3":"industrial engineering & automation"}
|
|
||||||
{"doi":"50|doi_________33390d9d5163da63f73acd173ef704e0","level1":"medical and health sciences","level2":"basic medicine","level3":"NULL"}
|
|
||||||
{"doi":"50|doi_________3149f0a909641720480f62faf1f886b9","level1":"medical and health sciences","level2":"basic medicine","level3":"NULL"}
|
|
||||||
{"doi":"50|doi_________1e29deb6473bdb2c84e5966fc4c557bb","level1":"medical and health sciences","level2":"basic medicine","level3":"NULL"}
|
|
||||||
{"doi":"50|doi_________effab60be43d653f2cccf1cf6de461df","level1":"engineering and technology","level2":"electrical engineering, electronic engineering, information engineering","level3":"electrical & electronic engineering"}
|
|
||||||
{"doi":"50|doi_________14a6ce8c28da6e82412720f1330e826d","level1":"natural sciences","level2":"biological sciences","level3":"NULL"}
|
|
||||||
{"doi":"50|doi_________f59abf7f96244398a465b6e9a7375311","level1":"natural sciences","level2":"earth and related environmental sciences","level3":"NULL"}
|
|
||||||
{"doi":"50|doi_________b1b7b5c7251fa1901583ec4840e2d3ec","level1":"medical and health sciences","level2":"basic medicine","level3":"biochemistry & molecular biology"}
|
|
||||||
{"doi":"50|doi_________e024d1b738df3b24bc58fa0228542571","level1":"natural sciences","level2":"physical sciences","level3":"nuclear & particles physics"}
|
|
||||||
{"doi":"50|doi_________0e03d3592dca1baedfc74f1fbe0b9f22","level1":"natural sciences","level2":"NULL","level3":"NULL"}
|
|
||||||
{"doi":"50|doi_________0e1777e31f32984e5a261205be28da69","level1":"engineering and technology","level2":"electrical engineering, electronic engineering, information engineering","level3":"networking & telecommunications"}
|
|
||||||
{"doi":"50|doi_________7b464a5eb02a959aeedc7b051fe0f89f","level1":"engineering and technology","level2":"electrical engineering, electronic engineering, information engineering","level3":"networking & telecommunications"}
|
|
||||||
{"doi":"50|doi_________32fed3ec7c9ac157b73e050ff1c158d3","level1":"engineering and technology","level2":"electrical engineering, electronic engineering, information engineering","level3":"networking & telecommunications"}
|
|
||||||
{"doi":"50|doi_________8302f548fe724bc58e53e2bd329cb3c3","level1":"medical and health sciences","level2":"health sciences","level3":"genetics & heredity"}
|
|
||||||
{"doi":"50|doi_________44fbef7e57e4123ef44f10bb073f9ef8","level1":"medical and health sciences","level2":"health sciences","level3":"genetics & heredity"}
|
|
||||||
{"doi":"50|doi_________c8e7d24b1649024e85bcf9a4c520a65b","level1":"medical and health sciences","level2":"health sciences","level3":"genetics & heredity"}
|
|
||||||
{"doi":"50|doi_________877a3c4a72d2a6b4aa60a2da016237df","level1":"medical and health sciences","level2":"health sciences","level3":"genetics & heredity"}
|
|
||||||
{"doi":"50|doi_________7af7cda00a082fa8624493d74357d3f7","level1":"engineering and technology","level2":"other engineering and technologies","level3":"building & construction"}
|
|
||||||
{"doi":"50|doi_________20201cc71fd003dae0d58016dd4ef4af","level1":"agricultural and veterinary sciences","level2":"agriculture, forestry, and fisheries","level3":"agronomy & agriculture"}
|
|
File diff suppressed because one or more lines are too long
|
@ -331,7 +331,6 @@ public class DumpJobTest {
|
||||||
Assertions
|
Assertions
|
||||||
.assertEquals(
|
.assertEquals(
|
||||||
Constants.accessRightsCoarMap.get(ModelConstants.ACCESS_RIGHT_OPEN), gr.getBestaccessright().getCode());
|
Constants.accessRightsCoarMap.get(ModelConstants.ACCESS_RIGHT_OPEN), gr.getBestaccessright().getCode());
|
||||||
Assertions.assertEquals(null, gr.getBestaccessright().getOpenAccessRoute());
|
|
||||||
|
|
||||||
Assertions.assertEquals("One Ecosystem", gr.getContainer().getName());
|
Assertions.assertEquals("One Ecosystem", gr.getContainer().getName());
|
||||||
Assertions.assertEquals("2367-8194", gr.getContainer().getIssnOnline());
|
Assertions.assertEquals("2367-8194", gr.getContainer().getIssnOnline());
|
||||||
|
|
Loading…
Reference in New Issue