forked from D-Net/dnet-hadoop
attributes fixes
This commit is contained in:
parent
bde59a7c8f
commit
6af3fd16b6
|
@ -1,10 +1,5 @@
|
|||
package eu.dnetlib.pace.tree;
|
||||
|
||||
import com.wcohen.ss.AbstractStringDistance;
|
||||
import eu.dnetlib.pace.config.Config;
|
||||
import eu.dnetlib.pace.tree.support.AbstractStringComparator;
|
||||
import eu.dnetlib.pace.tree.support.ComparatorClass;
|
||||
import org.joda.time.DateTime;
|
||||
package eu.dnetlib.pace.tree;
|
||||
|
||||
import java.time.DateTimeException;
|
||||
import java.time.LocalDate;
|
||||
|
@ -13,55 +8,62 @@ import java.time.format.DateTimeFormatter;
|
|||
import java.util.Locale;
|
||||
import java.util.Map;
|
||||
|
||||
import org.joda.time.DateTime;
|
||||
|
||||
import com.wcohen.ss.AbstractStringDistance;
|
||||
|
||||
import eu.dnetlib.pace.config.Config;
|
||||
import eu.dnetlib.pace.tree.support.AbstractStringComparator;
|
||||
import eu.dnetlib.pace.tree.support.ComparatorClass;
|
||||
|
||||
@ComparatorClass("dateRange")
|
||||
public class DateRange extends AbstractStringComparator {
|
||||
|
||||
int YEAR_RANGE;
|
||||
int YEAR_RANGE;
|
||||
|
||||
public DateRange(Map<String, String> params) {
|
||||
super(params, new com.wcohen.ss.JaroWinkler());
|
||||
YEAR_RANGE = Integer.parseInt(params.getOrDefault("year_range", "3"));
|
||||
}
|
||||
public DateRange(Map<String, String> params) {
|
||||
super(params, new com.wcohen.ss.JaroWinkler());
|
||||
YEAR_RANGE = Integer.parseInt(params.getOrDefault("year_range", "3"));
|
||||
}
|
||||
|
||||
public DateRange(final double weight) {
|
||||
super(weight, new com.wcohen.ss.JaroWinkler());
|
||||
}
|
||||
public DateRange(final double weight) {
|
||||
super(weight, new com.wcohen.ss.JaroWinkler());
|
||||
}
|
||||
|
||||
protected DateRange(final double weight, final AbstractStringDistance ssalgo) {
|
||||
super(weight, ssalgo);
|
||||
}
|
||||
protected DateRange(final double weight, final AbstractStringDistance ssalgo) {
|
||||
super(weight, ssalgo);
|
||||
}
|
||||
|
||||
public static boolean isNumeric(String str) {
|
||||
return str.matches("\\d+"); //match a number with optional '-' and decimal.
|
||||
}
|
||||
public static boolean isNumeric(String str) {
|
||||
return str.matches("\\d+"); // match a number with optional '-' and decimal.
|
||||
}
|
||||
|
||||
@Override
|
||||
public double distance(final String a, final String b, final Config conf) {
|
||||
if (a.isEmpty() || b.isEmpty()) {
|
||||
return -1.0; // return -1 if a field is missing
|
||||
}
|
||||
@Override
|
||||
public double distance(final String a, final String b, final Config conf) {
|
||||
if (a.isEmpty() || b.isEmpty()) {
|
||||
return -1.0; // return -1 if a field is missing
|
||||
}
|
||||
|
||||
try {
|
||||
DateTimeFormatter formatter = DateTimeFormatter.ofPattern("yyyy-MM-dd", Locale.ENGLISH);
|
||||
LocalDate d1 = LocalDate.parse(a, formatter);
|
||||
LocalDate d2 = LocalDate.parse(b, formatter);
|
||||
Period period = Period.between(d1, d2);
|
||||
try {
|
||||
DateTimeFormatter formatter = DateTimeFormatter.ofPattern("yyyy-MM-dd", Locale.ENGLISH);
|
||||
LocalDate d1 = LocalDate.parse(a, formatter);
|
||||
LocalDate d2 = LocalDate.parse(b, formatter);
|
||||
Period period = Period.between(d1, d2);
|
||||
|
||||
return period.getYears() <= YEAR_RANGE? 1.0 : 0.0;
|
||||
}
|
||||
catch (DateTimeException e) {
|
||||
return -1.0;
|
||||
}
|
||||
return period.getYears() <= YEAR_RANGE ? 1.0 : 0.0;
|
||||
} catch (DateTimeException e) {
|
||||
return -1.0;
|
||||
}
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public double getWeight() {
|
||||
return super.weight;
|
||||
}
|
||||
@Override
|
||||
public double getWeight() {
|
||||
return super.weight;
|
||||
}
|
||||
|
||||
@Override
|
||||
protected double normalize(final double d) {
|
||||
return d;
|
||||
}
|
||||
@Override
|
||||
protected double normalize(final double d) {
|
||||
return d;
|
||||
}
|
||||
}
|
||||
|
|
|
@ -62,7 +62,7 @@ public class JsonListMatch extends AbstractListComparator {
|
|||
|
||||
Set<String> types = Sets.intersection(typesA, typesB);
|
||||
|
||||
if (types.isEmpty()) // if no common type, it is impossible to compare
|
||||
if (types.isEmpty()) // if no common type, it is impossible to compare
|
||||
return -1;
|
||||
|
||||
ca = ca.stream().filter(s -> types.contains(s.split("::")[0])).collect(Collectors.toSet());
|
||||
|
|
|
@ -72,14 +72,34 @@ public class ComparatorTest extends AbstractPaceTest {
|
|||
CodeMatch codeMatch = new CodeMatch(params);
|
||||
|
||||
// names have different codes
|
||||
assertEquals(0.0, codeMatch.distance("physical oceanography at ctd station june 1998 ev02a", "physical oceanography at ctd station june 1998 ir02", conf));
|
||||
assertEquals(
|
||||
0.0,
|
||||
codeMatch
|
||||
.distance(
|
||||
"physical oceanography at ctd station june 1998 ev02a",
|
||||
"physical oceanography at ctd station june 1998 ir02", conf));
|
||||
|
||||
// names have same code
|
||||
assertEquals(1.0, codeMatch.distance("physical oceanography at ctd station june 1998 ev02a", "physical oceanography at ctd station june 1998 ev02a", conf));
|
||||
assertEquals(
|
||||
1.0,
|
||||
codeMatch
|
||||
.distance(
|
||||
"physical oceanography at ctd station june 1998 ev02a",
|
||||
"physical oceanography at ctd station june 1998 ev02a", conf));
|
||||
|
||||
// code is not in both names
|
||||
assertEquals(-1, codeMatch.distance("physical oceanography at ctd station june 1998", "physical oceanography at ctd station june 1998 ev02a", conf));
|
||||
assertEquals(1.0, codeMatch.distance("physical oceanography at ctd station june 1998", "physical oceanography at ctd station june 1998", conf));
|
||||
assertEquals(
|
||||
-1,
|
||||
codeMatch
|
||||
.distance(
|
||||
"physical oceanography at ctd station june 1998",
|
||||
"physical oceanography at ctd station june 1998 ev02a", conf));
|
||||
assertEquals(
|
||||
1.0,
|
||||
codeMatch
|
||||
.distance(
|
||||
"physical oceanography at ctd station june 1998", "physical oceanography at ctd station june 1998",
|
||||
conf));
|
||||
}
|
||||
|
||||
@Test
|
||||
|
@ -275,7 +295,7 @@ public class ComparatorTest extends AbstractPaceTest {
|
|||
Arrays
|
||||
.asList(
|
||||
"{\"datainfo\":{\"deletedbyinference\":false,\"inferenceprovenance\":null,\"inferred\":false,\"invisible\":false,\"provenanceaction\":{\"classid\":\"sysimport:actionset\",\"classname\":\"Harvested\",\"schemeid\":\"dnet:provenanceActions\",\"schemename\":\"dnet:provenanceActions\"},\"trust\":\"0.9\"},\"qualifier\":{\"classid\":\"grid\",\"classname\":\"GRID Identifier\",\"schemeid\":\"dnet:pid_types\",\"schemename\":\"dnet:pid_types\"},\"value\":\"grid_1\"}",
|
||||
"{\"datainfo\":{\"deletedbyinference\":false,\"inferenceprovenance\":null,\"inferred\":false,\"invisible\":false,\"provenanceaction\":{\"classid\":\"sysimport:actionset\",\"classname\":\"Harvested\",\"schemeid\":\"dnet:provenanceActions\",\"schemename\":\"dnet:provenanceActions\"},\"trust\":\"0.9\"},\"qualifier\":{\"classid\":\"ror\",\"classname\":\"Research Organization Registry\",\"schemeid\":\"dnet:pid_types\",\"schemename\":\"dnet:pid_types\"},\"value\":\"ror_1\"}"),
|
||||
"{\"datainfo\":{\"deletedbyinference\":false,\"inferenceprovenance\":null,\"inferred\":false,\"invisible\":false,\"provenanceaction\":{\"classid\":\"sysimport:actionset\",\"classname\":\"Harvested\",\"schemeid\":\"dnet:provenanceActions\",\"schemename\":\"dnet:provenanceActions\"},\"trust\":\"0.9\"},\"qualifier\":{\"classid\":\"ror\",\"classname\":\"Research Organization Registry\",\"schemeid\":\"dnet:pid_types\",\"schemename\":\"dnet:pid_types\"},\"value\":\"ror_1\"}"),
|
||||
"authors");
|
||||
List<String> b = createFieldList(
|
||||
Arrays
|
||||
|
|
|
@ -3,8 +3,6 @@ package eu.dnetlib.dhp.actionmanager;
|
|||
|
||||
import java.util.Optional;
|
||||
|
||||
import eu.dnetlib.dhp.schema.oaf.Instance;
|
||||
import eu.dnetlib.dhp.schema.oaf.Qualifier;
|
||||
import org.apache.spark.api.java.function.MapFunction;
|
||||
import org.apache.spark.sql.Dataset;
|
||||
import org.apache.spark.sql.Encoders;
|
||||
|
@ -15,6 +13,8 @@ import com.fasterxml.jackson.databind.ObjectMapper;
|
|||
import eu.dnetlib.dhp.application.ArgumentApplicationParser;
|
||||
import eu.dnetlib.dhp.common.HdfsSupport;
|
||||
import eu.dnetlib.dhp.schema.common.ModelConstants;
|
||||
import eu.dnetlib.dhp.schema.oaf.Instance;
|
||||
import eu.dnetlib.dhp.schema.oaf.Qualifier;
|
||||
import eu.dnetlib.dhp.schema.oaf.StructuredProperty;
|
||||
import eu.dnetlib.dhp.schema.oaf.Subject;
|
||||
import eu.dnetlib.dhp.schema.oaf.utils.OafMapperUtils;
|
||||
|
|
|
@ -1,15 +1,15 @@
|
|||
|
||||
package eu.dnetlib.dhp.actionmanager.raid;
|
||||
|
||||
import com.fasterxml.jackson.databind.ObjectMapper;
|
||||
import eu.dnetlib.dhp.actionmanager.raid.model.RAiDEntity;
|
||||
import eu.dnetlib.dhp.application.ArgumentApplicationParser;
|
||||
import eu.dnetlib.dhp.common.Constants;
|
||||
import eu.dnetlib.dhp.common.HdfsSupport;
|
||||
import eu.dnetlib.dhp.schema.action.AtomicAction;
|
||||
import eu.dnetlib.dhp.schema.common.ModelConstants;
|
||||
import eu.dnetlib.dhp.schema.oaf.*;
|
||||
import eu.dnetlib.dhp.schema.oaf.utils.OafMapperUtils;
|
||||
import eu.dnetlib.dhp.utils.DHPUtils;
|
||||
import static eu.dnetlib.dhp.actionmanager.personentity.ExtractPerson.OPENAIRE_DATASOURCE_ID;
|
||||
import static eu.dnetlib.dhp.actionmanager.personentity.ExtractPerson.OPENAIRE_DATASOURCE_NAME;
|
||||
import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession;
|
||||
import static eu.dnetlib.dhp.schema.common.ModelConstants.*;
|
||||
import static eu.dnetlib.dhp.schema.oaf.utils.OafMapperUtils.*;
|
||||
|
||||
import java.util.*;
|
||||
import java.util.stream.Collectors;
|
||||
|
||||
import org.apache.commons.io.IOUtils;
|
||||
import org.apache.hadoop.io.Text;
|
||||
import org.apache.hadoop.mapred.SequenceFileOutputFormat;
|
||||
|
@ -19,172 +19,191 @@ import org.apache.spark.sql.Encoders;
|
|||
import org.apache.spark.sql.SparkSession;
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
|
||||
import com.fasterxml.jackson.databind.ObjectMapper;
|
||||
|
||||
import eu.dnetlib.dhp.actionmanager.raid.model.RAiDEntity;
|
||||
import eu.dnetlib.dhp.application.ArgumentApplicationParser;
|
||||
import eu.dnetlib.dhp.common.Constants;
|
||||
import eu.dnetlib.dhp.common.HdfsSupport;
|
||||
import eu.dnetlib.dhp.schema.action.AtomicAction;
|
||||
import eu.dnetlib.dhp.schema.common.ModelConstants;
|
||||
import eu.dnetlib.dhp.schema.oaf.*;
|
||||
import eu.dnetlib.dhp.schema.oaf.utils.OafMapperUtils;
|
||||
import eu.dnetlib.dhp.utils.DHPUtils;
|
||||
import scala.Tuple2;
|
||||
|
||||
import java.util.*;
|
||||
import java.util.stream.Collectors;
|
||||
|
||||
import static eu.dnetlib.dhp.actionmanager.personentity.ExtractPerson.OPENAIRE_DATASOURCE_ID;
|
||||
import static eu.dnetlib.dhp.actionmanager.personentity.ExtractPerson.OPENAIRE_DATASOURCE_NAME;
|
||||
import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession;
|
||||
import static eu.dnetlib.dhp.schema.common.ModelConstants.*;
|
||||
import static eu.dnetlib.dhp.schema.oaf.utils.OafMapperUtils.*;
|
||||
|
||||
public class GenerateRAiDActionSetJob {
|
||||
|
||||
private static final Logger log = LoggerFactory.getLogger(eu.dnetlib.dhp.actionmanager.raid.GenerateRAiDActionSetJob.class);
|
||||
private static final Logger log = LoggerFactory
|
||||
.getLogger(eu.dnetlib.dhp.actionmanager.raid.GenerateRAiDActionSetJob.class);
|
||||
|
||||
private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper();
|
||||
private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper();
|
||||
|
||||
private static final List<KeyValue> RAID_COLLECTED_FROM = listKeyValues(
|
||||
OPENAIRE_DATASOURCE_ID, OPENAIRE_DATASOURCE_NAME);
|
||||
private static final List<KeyValue> RAID_COLLECTED_FROM = listKeyValues(
|
||||
OPENAIRE_DATASOURCE_ID, OPENAIRE_DATASOURCE_NAME);
|
||||
|
||||
private static final Qualifier RAID_QUALIFIER = qualifier("raid:openaireinference", "raid:openaireinference", DNET_PROVENANCE_ACTIONS, DNET_PROVENANCE_ACTIONS);
|
||||
private static final Qualifier RAID_QUALIFIER = qualifier("0049", "Research Activity Identifier", DNET_PUBLICATION_RESOURCE, DNET_PUBLICATION_RESOURCE);
|
||||
|
||||
private static final DataInfo RAID_DATA_INFO = dataInfo(
|
||||
false, OPENAIRE_DATASOURCE_NAME, true, false, RAID_QUALIFIER, "0.92");
|
||||
private static final Qualifier RAID_INFERENCE_QUALIFIER = qualifier(
|
||||
"raid:openaireinference", "Inferred by OpenAIRE", DNET_PROVENANCE_ACTIONS, DNET_PROVENANCE_ACTIONS);
|
||||
|
||||
public static void main(final String[] args) throws Exception {
|
||||
private static final DataInfo RAID_DATA_INFO = dataInfo(
|
||||
false, OPENAIRE_DATASOURCE_NAME, true, false, RAID_INFERENCE_QUALIFIER, "0.92");
|
||||
|
||||
final String jsonConfiguration = IOUtils
|
||||
.toString(
|
||||
eu.dnetlib.dhp.actionmanager.raid.GenerateRAiDActionSetJob.class
|
||||
.getResourceAsStream("/eu/dnetlib/dhp/actionmanager/raid/action_set_parameters.json"));
|
||||
public static void main(final String[] args) throws Exception {
|
||||
|
||||
final ArgumentApplicationParser parser = new ArgumentApplicationParser(jsonConfiguration);
|
||||
final String jsonConfiguration = IOUtils
|
||||
.toString(
|
||||
eu.dnetlib.dhp.actionmanager.raid.GenerateRAiDActionSetJob.class
|
||||
.getResourceAsStream("/eu/dnetlib/dhp/actionmanager/raid/action_set_parameters.json"));
|
||||
|
||||
parser.parseArgument(args);
|
||||
final ArgumentApplicationParser parser = new ArgumentApplicationParser(jsonConfiguration);
|
||||
|
||||
final Boolean isSparkSessionManaged = Optional
|
||||
.ofNullable(parser.get("isSparkSessionManaged"))
|
||||
.map(Boolean::valueOf)
|
||||
.orElse(Boolean.TRUE);
|
||||
parser.parseArgument(args);
|
||||
|
||||
log.info("isSparkSessionManaged: {}", isSparkSessionManaged);
|
||||
final Boolean isSparkSessionManaged = Optional
|
||||
.ofNullable(parser.get("isSparkSessionManaged"))
|
||||
.map(Boolean::valueOf)
|
||||
.orElse(Boolean.TRUE);
|
||||
|
||||
final String inputPath = parser.get("inputPath");
|
||||
log.info("inputPath: {}", inputPath);
|
||||
log.info("isSparkSessionManaged: {}", isSparkSessionManaged);
|
||||
|
||||
final String outputPath = parser.get("outputPath");
|
||||
log.info("outputPath {}: ", outputPath);
|
||||
final String inputPath = parser.get("inputPath");
|
||||
log.info("inputPath: {}", inputPath);
|
||||
|
||||
final SparkConf conf = new SparkConf();
|
||||
final String outputPath = parser.get("outputPath");
|
||||
log.info("outputPath {}: ", outputPath);
|
||||
|
||||
runWithSparkSession(conf, isSparkSessionManaged, spark -> {
|
||||
removeOutputDir(spark, outputPath);
|
||||
processRAiDEntities(spark, inputPath, outputPath);
|
||||
});
|
||||
}
|
||||
final SparkConf conf = new SparkConf();
|
||||
|
||||
private static void removeOutputDir(final SparkSession spark, final String path) {
|
||||
HdfsSupport.remove(path, spark.sparkContext().hadoopConfiguration());
|
||||
}
|
||||
runWithSparkSession(conf, isSparkSessionManaged, spark -> {
|
||||
removeOutputDir(spark, outputPath);
|
||||
processRAiDEntities(spark, inputPath, outputPath);
|
||||
});
|
||||
}
|
||||
|
||||
static void processRAiDEntities(final SparkSession spark,
|
||||
final String inputPath,
|
||||
final String outputPath) {
|
||||
readInputPath(spark, inputPath)
|
||||
.map(GenerateRAiDActionSetJob::prepareRAiD)
|
||||
.flatMap(List::iterator)
|
||||
.mapToPair(
|
||||
aa -> new Tuple2<>(new Text(aa.getClazz().getCanonicalName()),
|
||||
new Text(OBJECT_MAPPER.writeValueAsString(aa))))
|
||||
.saveAsHadoopFile(outputPath, Text.class, Text.class, SequenceFileOutputFormat.class);
|
||||
private static void removeOutputDir(final SparkSession spark, final String path) {
|
||||
HdfsSupport.remove(path, spark.sparkContext().hadoopConfiguration());
|
||||
}
|
||||
|
||||
}
|
||||
static void processRAiDEntities(final SparkSession spark,
|
||||
final String inputPath,
|
||||
final String outputPath) {
|
||||
readInputPath(spark, inputPath)
|
||||
.map(GenerateRAiDActionSetJob::prepareRAiD)
|
||||
.flatMap(List::iterator)
|
||||
.mapToPair(
|
||||
aa -> new Tuple2<>(new Text(aa.getClazz().getCanonicalName()),
|
||||
new Text(OBJECT_MAPPER.writeValueAsString(aa))))
|
||||
.saveAsHadoopFile(outputPath, Text.class, Text.class, SequenceFileOutputFormat.class);
|
||||
|
||||
protected static List<AtomicAction<? extends Oaf>> prepareRAiD(final RAiDEntity r) {
|
||||
}
|
||||
|
||||
final Date now = new Date();
|
||||
final OtherResearchProduct orp = new OtherResearchProduct();
|
||||
final List<AtomicAction<? extends Oaf>> res = new ArrayList<>();
|
||||
String raidId = calculateOpenaireId(r.getRaid());
|
||||
protected static List<AtomicAction<? extends Oaf>> prepareRAiD(final RAiDEntity r) {
|
||||
|
||||
orp.setId(raidId);
|
||||
orp.setCollectedfrom(RAID_COLLECTED_FROM);
|
||||
orp.setDataInfo(RAID_DATA_INFO);
|
||||
orp.setResourcetype(RAID_QUALIFIER);
|
||||
orp.setTitle(
|
||||
Collections.singletonList(
|
||||
structuredProperty(
|
||||
r.getTitle(),
|
||||
qualifier("main title", "main title", DNET_DATACITE_TITLE, DNET_DATACITE_TITLE),
|
||||
RAID_DATA_INFO))
|
||||
);
|
||||
orp.setDescription(listFields(RAID_DATA_INFO, r.getSummary()));
|
||||
orp.setAuthor(createAuthors(r.getAuthors()));
|
||||
orp.setInstance(Collections.singletonList(eu.dnetlib.dhp.actionmanager.Constants.getInstance(RAID_QUALIFIER)));
|
||||
orp.setSubject(
|
||||
r.getSubjects()
|
||||
.stream()
|
||||
.map(s -> subject(s, qualifier(DNET_SUBJECT_KEYWORD, DNET_SUBJECT_KEYWORD, DNET_SUBJECT_TYPOLOGIES, DNET_SUBJECT_TYPOLOGIES), RAID_DATA_INFO))
|
||||
.collect(Collectors.toList())
|
||||
);
|
||||
orp.setRelevantdate(
|
||||
Arrays.asList(
|
||||
structuredProperty(r.getEndDate(), qualifier("endDate","endDate", DNET_DATACITE_DATE, DNET_DATACITE_DATE), RAID_DATA_INFO),
|
||||
structuredProperty(r.getStartDate(), qualifier("startDate", "startDate", DNET_DATACITE_DATE, DNET_DATACITE_DATE), RAID_DATA_INFO)
|
||||
)
|
||||
);
|
||||
orp.setLastupdatetimestamp(now.getTime());
|
||||
orp.setDateofcollection(r.getStartDate());
|
||||
final Date now = new Date();
|
||||
final OtherResearchProduct orp = new OtherResearchProduct();
|
||||
final List<AtomicAction<? extends Oaf>> res = new ArrayList<>();
|
||||
String raidId = calculateOpenaireId(r.getRaid());
|
||||
|
||||
res.add(new AtomicAction<>(OtherResearchProduct.class, orp));
|
||||
orp.setId(raidId);
|
||||
orp.setCollectedfrom(RAID_COLLECTED_FROM);
|
||||
orp.setDataInfo(RAID_DATA_INFO);
|
||||
orp
|
||||
.setTitle(
|
||||
Collections
|
||||
.singletonList(
|
||||
structuredProperty(
|
||||
r.getTitle(),
|
||||
qualifier("main title", "main title", DNET_DATACITE_TITLE, DNET_DATACITE_TITLE),
|
||||
RAID_DATA_INFO)));
|
||||
orp.setDescription(listFields(RAID_DATA_INFO, r.getSummary()));
|
||||
// orp.setAuthor(createAuthors(r.getAuthors()));
|
||||
orp.setInstance(Collections.singletonList(eu.dnetlib.dhp.actionmanager.Constants.getInstance(RAID_QUALIFIER)));
|
||||
orp
|
||||
.setSubject(
|
||||
r
|
||||
.getSubjects()
|
||||
.stream()
|
||||
.map(
|
||||
s -> subject(
|
||||
s,
|
||||
qualifier(
|
||||
DNET_SUBJECT_KEYWORD, DNET_SUBJECT_KEYWORD, DNET_SUBJECT_TYPOLOGIES,
|
||||
DNET_SUBJECT_TYPOLOGIES),
|
||||
RAID_DATA_INFO))
|
||||
.collect(Collectors.toList()));
|
||||
orp
|
||||
.setRelevantdate(
|
||||
Arrays
|
||||
.asList(
|
||||
structuredProperty(
|
||||
r.getEndDate(), qualifier("endDate", "endDate", DNET_DATACITE_DATE, DNET_DATACITE_DATE),
|
||||
RAID_DATA_INFO),
|
||||
structuredProperty(
|
||||
r.getStartDate(),
|
||||
qualifier("startDate", "startDate", DNET_DATACITE_DATE, DNET_DATACITE_DATE),
|
||||
RAID_DATA_INFO)));
|
||||
orp.setLastupdatetimestamp(now.getTime());
|
||||
orp.setDateofacceptance(field(r.getStartDate(), RAID_DATA_INFO));
|
||||
|
||||
for(String resultId: r.getIds()) {
|
||||
Relation rel1 = OafMapperUtils.getRelation(
|
||||
raidId,
|
||||
resultId,
|
||||
ModelConstants.RESULT_RESULT,
|
||||
ModelConstants.OUTCOME,
|
||||
PART,
|
||||
RAID_COLLECTED_FROM,
|
||||
RAID_DATA_INFO,
|
||||
now.getTime(),
|
||||
null,
|
||||
null
|
||||
);
|
||||
Relation rel2 = OafMapperUtils.getRelation(
|
||||
resultId,
|
||||
raidId,
|
||||
ModelConstants.RESULT_RESULT,
|
||||
ModelConstants.OUTCOME,
|
||||
IS_PART_OF,
|
||||
RAID_COLLECTED_FROM,
|
||||
RAID_DATA_INFO,
|
||||
now.getTime(),
|
||||
null,
|
||||
null
|
||||
);
|
||||
res.add(new AtomicAction<>(Relation.class, rel1));
|
||||
res.add(new AtomicAction<>(Relation.class, rel2));
|
||||
}
|
||||
res.add(new AtomicAction<>(OtherResearchProduct.class, orp));
|
||||
|
||||
return res;
|
||||
}
|
||||
for (String resultId : r.getIds()) {
|
||||
Relation rel1 = OafMapperUtils
|
||||
.getRelation(
|
||||
raidId,
|
||||
resultId,
|
||||
ModelConstants.RESULT_RESULT,
|
||||
PART,
|
||||
HAS_PART,
|
||||
RAID_COLLECTED_FROM,
|
||||
RAID_DATA_INFO,
|
||||
now.getTime(),
|
||||
null,
|
||||
null);
|
||||
Relation rel2 = OafMapperUtils
|
||||
.getRelation(
|
||||
resultId,
|
||||
raidId,
|
||||
ModelConstants.RESULT_RESULT,
|
||||
PART,
|
||||
IS_PART_OF,
|
||||
RAID_COLLECTED_FROM,
|
||||
RAID_DATA_INFO,
|
||||
now.getTime(),
|
||||
null,
|
||||
null);
|
||||
res.add(new AtomicAction<>(Relation.class, rel1));
|
||||
res.add(new AtomicAction<>(Relation.class, rel2));
|
||||
}
|
||||
|
||||
public static String calculateOpenaireId(final String raid) {
|
||||
return String.format("50|%s::%s", Constants.RAID_NS_PREFIX, DHPUtils.md5(raid));
|
||||
}
|
||||
return res;
|
||||
}
|
||||
|
||||
public static List<Author> createAuthors(final List<String> author) {
|
||||
return author.stream().map(s-> {
|
||||
Author a = new Author();
|
||||
a.setFullname(s);
|
||||
return a;
|
||||
}).collect(Collectors.toList());
|
||||
}
|
||||
public static String calculateOpenaireId(final String raid) {
|
||||
return String.format("50|%s::%s", Constants.RAID_NS_PREFIX, DHPUtils.md5(raid));
|
||||
}
|
||||
|
||||
private static JavaRDD<RAiDEntity> readInputPath(
|
||||
final SparkSession spark,
|
||||
final String path) {
|
||||
public static List<Author> createAuthors(final List<String> author) {
|
||||
return author.stream().map(s -> {
|
||||
Author a = new Author();
|
||||
a.setFullname(s);
|
||||
return a;
|
||||
}).collect(Collectors.toList());
|
||||
}
|
||||
|
||||
return spark
|
||||
.read()
|
||||
.json(path)
|
||||
.as(Encoders.bean(RAiDEntity.class))
|
||||
.toJavaRDD();
|
||||
private static JavaRDD<RAiDEntity> readInputPath(
|
||||
final SparkSession spark,
|
||||
final String path) {
|
||||
|
||||
}
|
||||
return spark
|
||||
.read()
|
||||
.json(path)
|
||||
.as(Encoders.bean(RAiDEntity.class))
|
||||
.toJavaRDD();
|
||||
|
||||
}
|
||||
|
||||
}
|
||||
|
|
|
@ -1,2 +1,5 @@
|
|||
package eu.dnetlib.dhp.actionmanager.raid.model;public class GenerateRAiDActionSetJob {
|
||||
|
||||
package eu.dnetlib.dhp.actionmanager.raid.model;
|
||||
|
||||
public class GenerateRAiDActionSetJob {
|
||||
}
|
||||
|
|
|
@ -1,3 +1,4 @@
|
|||
|
||||
package eu.dnetlib.dhp.actionmanager.raid.model;
|
||||
|
||||
import java.io.Serializable;
|
||||
|
@ -5,98 +6,101 @@ import java.util.List;
|
|||
|
||||
public class RAiDEntity implements Serializable {
|
||||
|
||||
String raid;
|
||||
List<String> authors;
|
||||
String startDate;
|
||||
String endDate;
|
||||
List<String> subjects;
|
||||
List<String> titles;
|
||||
List<String> ids;
|
||||
String title;
|
||||
String summary;
|
||||
String raid;
|
||||
List<String> authors;
|
||||
String startDate;
|
||||
String endDate;
|
||||
List<String> subjects;
|
||||
List<String> titles;
|
||||
List<String> ids;
|
||||
String title;
|
||||
String summary;
|
||||
|
||||
public RAiDEntity(){}
|
||||
public RAiDEntity(String raid, List<String> authors, String startDate, String endDate, List<String> subjects, List<String> titles, List<String> ids, String title, String summary) {
|
||||
this.raid = raid;
|
||||
this.authors = authors;
|
||||
this.startDate = startDate;
|
||||
this.endDate = endDate;
|
||||
this.subjects = subjects;
|
||||
this.titles = titles;
|
||||
this.ids = ids;
|
||||
this.title = title;
|
||||
this.summary = summary;
|
||||
}
|
||||
public RAiDEntity() {
|
||||
}
|
||||
|
||||
public String getRaid() {
|
||||
return raid;
|
||||
}
|
||||
public RAiDEntity(String raid, List<String> authors, String startDate, String endDate, List<String> subjects,
|
||||
List<String> titles, List<String> ids, String title, String summary) {
|
||||
this.raid = raid;
|
||||
this.authors = authors;
|
||||
this.startDate = startDate;
|
||||
this.endDate = endDate;
|
||||
this.subjects = subjects;
|
||||
this.titles = titles;
|
||||
this.ids = ids;
|
||||
this.title = title;
|
||||
this.summary = summary;
|
||||
}
|
||||
|
||||
public void setRaid(String raid) {
|
||||
this.raid = raid;
|
||||
}
|
||||
public String getRaid() {
|
||||
return raid;
|
||||
}
|
||||
|
||||
public List<String> getAuthors() {
|
||||
return authors;
|
||||
}
|
||||
public void setRaid(String raid) {
|
||||
this.raid = raid;
|
||||
}
|
||||
|
||||
public void setAuthors(List<String> authors) {
|
||||
this.authors = authors;
|
||||
}
|
||||
public List<String> getAuthors() {
|
||||
return authors;
|
||||
}
|
||||
|
||||
public String getStartDate() {
|
||||
return startDate;
|
||||
}
|
||||
public void setAuthors(List<String> authors) {
|
||||
this.authors = authors;
|
||||
}
|
||||
|
||||
public void setStartDate(String startDate) {
|
||||
this.startDate = startDate;
|
||||
}
|
||||
public String getStartDate() {
|
||||
return startDate;
|
||||
}
|
||||
|
||||
public String getEndDate() {
|
||||
return endDate;
|
||||
}
|
||||
public void setStartDate(String startDate) {
|
||||
this.startDate = startDate;
|
||||
}
|
||||
|
||||
public void setEndDate(String endDate) {
|
||||
this.endDate = endDate;
|
||||
}
|
||||
public String getEndDate() {
|
||||
return endDate;
|
||||
}
|
||||
|
||||
public List<String> getSubjects() {
|
||||
return subjects;
|
||||
}
|
||||
public void setEndDate(String endDate) {
|
||||
this.endDate = endDate;
|
||||
}
|
||||
|
||||
public void setSubjects(List<String> subjects) {
|
||||
this.subjects = subjects;
|
||||
}
|
||||
public List<String> getSubjects() {
|
||||
return subjects;
|
||||
}
|
||||
|
||||
public List<String> getTitles() {
|
||||
return titles;
|
||||
}
|
||||
public void setSubjects(List<String> subjects) {
|
||||
this.subjects = subjects;
|
||||
}
|
||||
|
||||
public void setTitles(List<String> titles) {
|
||||
this.titles = titles;
|
||||
}
|
||||
public List<String> getTitles() {
|
||||
return titles;
|
||||
}
|
||||
|
||||
public List<String> getIds() {
|
||||
return ids;
|
||||
}
|
||||
public void setTitles(List<String> titles) {
|
||||
this.titles = titles;
|
||||
}
|
||||
|
||||
public void setIds(List<String> ids) {
|
||||
this.ids = ids;
|
||||
}
|
||||
public List<String> getIds() {
|
||||
return ids;
|
||||
}
|
||||
|
||||
public String getTitle() {
|
||||
return title;
|
||||
}
|
||||
public void setIds(List<String> ids) {
|
||||
this.ids = ids;
|
||||
}
|
||||
|
||||
public void setTitle(String title) {
|
||||
this.title = title;
|
||||
}
|
||||
public String getTitle() {
|
||||
return title;
|
||||
}
|
||||
|
||||
public String getSummary() {
|
||||
return summary;
|
||||
}
|
||||
public void setTitle(String title) {
|
||||
this.title = title;
|
||||
}
|
||||
|
||||
public void setSummary(String summary) {
|
||||
this.summary = summary;
|
||||
}
|
||||
public String getSummary() {
|
||||
return summary;
|
||||
}
|
||||
|
||||
public void setSummary(String summary) {
|
||||
this.summary = summary;
|
||||
}
|
||||
}
|
||||
|
|
|
@ -21,7 +21,6 @@ import java.util.Optional;
|
|||
import java.util.Set;
|
||||
import java.util.stream.Collectors;
|
||||
|
||||
import eu.dnetlib.dhp.schema.oaf.*;
|
||||
import org.apache.commons.io.IOUtils;
|
||||
import org.apache.commons.lang3.StringUtils;
|
||||
import org.apache.hadoop.conf.Configuration;
|
||||
|
@ -45,6 +44,7 @@ import eu.dnetlib.dhp.common.Constants;
|
|||
import eu.dnetlib.dhp.common.HdfsSupport;
|
||||
import eu.dnetlib.dhp.schema.action.AtomicAction;
|
||||
import eu.dnetlib.dhp.schema.common.ModelConstants;
|
||||
import eu.dnetlib.dhp.schema.oaf.*;
|
||||
import eu.dnetlib.dhp.utils.DHPUtils;
|
||||
import scala.Tuple2;
|
||||
|
||||
|
|
|
@ -1,3 +1,4 @@
|
|||
|
||||
package eu.dnetlib.dhp.sx.bio.pubmed;
|
||||
|
||||
/**
|
||||
|
@ -7,32 +8,33 @@ package eu.dnetlib.dhp.sx.bio.pubmed;
|
|||
*/
|
||||
public class PMAffiliation {
|
||||
|
||||
private String name;
|
||||
private String name;
|
||||
|
||||
private PMIdentifier identifier;
|
||||
private PMIdentifier identifier;
|
||||
|
||||
public PMAffiliation() {
|
||||
public PMAffiliation() {
|
||||
|
||||
}
|
||||
public PMAffiliation(String name, PMIdentifier identifier) {
|
||||
this.name = name;
|
||||
this.identifier = identifier;
|
||||
}
|
||||
}
|
||||
|
||||
public String getName() {
|
||||
return name;
|
||||
}
|
||||
public PMAffiliation(String name, PMIdentifier identifier) {
|
||||
this.name = name;
|
||||
this.identifier = identifier;
|
||||
}
|
||||
|
||||
public void setName(String name) {
|
||||
this.name = name;
|
||||
}
|
||||
public String getName() {
|
||||
return name;
|
||||
}
|
||||
|
||||
public PMIdentifier getIdentifier() {
|
||||
return identifier;
|
||||
}
|
||||
public void setName(String name) {
|
||||
this.name = name;
|
||||
}
|
||||
|
||||
public PMAffiliation setIdentifier(PMIdentifier identifier) {
|
||||
this.identifier = identifier;
|
||||
return this;
|
||||
}
|
||||
public PMIdentifier getIdentifier() {
|
||||
return identifier;
|
||||
}
|
||||
|
||||
public PMAffiliation setIdentifier(PMIdentifier identifier) {
|
||||
this.identifier = identifier;
|
||||
return this;
|
||||
}
|
||||
}
|
||||
|
|
|
@ -97,5 +97,4 @@ public class PMAuthor implements Serializable {
|
|||
this.affiliation = affiliation;
|
||||
}
|
||||
|
||||
|
||||
}
|
||||
|
|
|
@ -1,53 +1,53 @@
|
|||
|
||||
package eu.dnetlib.dhp.sx.bio.pubmed;
|
||||
|
||||
public class PMIdentifier {
|
||||
|
||||
private String pid;
|
||||
private String type;
|
||||
private String pid;
|
||||
private String type;
|
||||
|
||||
public PMIdentifier(String pid, String type) {
|
||||
this.pid = cleanPid(pid);
|
||||
this.type = type;
|
||||
}
|
||||
|
||||
public PMIdentifier(String pid, String type) {
|
||||
this.pid = cleanPid(pid);
|
||||
this.type = type;
|
||||
}
|
||||
public PMIdentifier() {
|
||||
|
||||
public PMIdentifier() {
|
||||
}
|
||||
|
||||
}
|
||||
private String cleanPid(String pid) {
|
||||
|
||||
private String cleanPid(String pid) {
|
||||
if (pid == null) {
|
||||
return null;
|
||||
}
|
||||
|
||||
if (pid == null) {
|
||||
return null;
|
||||
}
|
||||
// clean ORCID ID in the form 0000000163025705 to 0000-0001-6302-5705
|
||||
if (pid.matches("[0-9]{15}[0-9X]")) {
|
||||
return pid.replaceAll("(.{4})(.{4})(.{4})(.{4})", "$1-$2-$3-$4");
|
||||
}
|
||||
|
||||
// clean ORCID ID in the form 0000000163025705 to 0000-0001-6302-5705
|
||||
if (pid.matches("[0-9]{15}[0-9X]")) {
|
||||
return pid.replaceAll("(.{4})(.{4})(.{4})(.{4})", "$1-$2-$3-$4");
|
||||
}
|
||||
// clean ORCID in the form http://orcid.org/0000-0001-8567-3543 to 0000-0001-8567-3543
|
||||
if (pid.matches("http://orcid.org/[0-9]{4}-[0-9]{4}-[0-9]{4}-[0-9]{4}")) {
|
||||
return pid.replaceAll("http://orcid.org/", "");
|
||||
}
|
||||
return pid;
|
||||
}
|
||||
|
||||
// clean ORCID in the form http://orcid.org/0000-0001-8567-3543 to 0000-0001-8567-3543
|
||||
if (pid.matches("http://orcid.org/[0-9]{4}-[0-9]{4}-[0-9]{4}-[0-9]{4}")) {
|
||||
return pid.replaceAll("http://orcid.org/", "");
|
||||
}
|
||||
return pid;
|
||||
}
|
||||
public String getPid() {
|
||||
return pid;
|
||||
}
|
||||
|
||||
public String getPid() {
|
||||
return pid;
|
||||
}
|
||||
public PMIdentifier setPid(String pid) {
|
||||
this.pid = cleanPid(pid);
|
||||
return this;
|
||||
}
|
||||
|
||||
public PMIdentifier setPid(String pid) {
|
||||
this.pid = cleanPid(pid);
|
||||
return this;
|
||||
}
|
||||
public String getType() {
|
||||
return type;
|
||||
}
|
||||
|
||||
public String getType() {
|
||||
return type;
|
||||
}
|
||||
|
||||
public PMIdentifier setType(String type) {
|
||||
this.type = type;
|
||||
return this;
|
||||
}
|
||||
public PMIdentifier setType(String type) {
|
||||
this.type = type;
|
||||
return this;
|
||||
}
|
||||
}
|
||||
|
|
|
@ -20,8 +20,6 @@
|
|||
<fs>
|
||||
<delete path='${raidActionSetPath}'/>
|
||||
<mkdir path='${raidActionSetPath}'/>
|
||||
<delete path='${workingDir}'/>
|
||||
<mkdir path='${workingDir}'/>
|
||||
</fs>
|
||||
<ok to="processRAiDFile"/>
|
||||
<error to="Kill"/>
|
||||
|
|
|
@ -673,7 +673,6 @@ case object Crossref2Oaf {
|
|||
val doi = input.getString(0)
|
||||
val rorId = input.getString(1)
|
||||
|
||||
|
||||
val pubId = IdentifierFactory.idFromPid("50", "doi", DoiCleaningRule.clean(doi), true)
|
||||
val affId = GenerateRorActionSetJob.calculateOpenaireId(rorId)
|
||||
|
||||
|
|
|
@ -82,21 +82,22 @@ class PMParser2 {
|
|||
a.setLastName((author \ "LastName").text)
|
||||
a.setForeName((author \ "ForeName").text)
|
||||
val id = (author \ "Identifier").text
|
||||
val idType =(author \ "Identifier" \ "@Source").text
|
||||
val idType = (author \ "Identifier" \ "@Source").text
|
||||
|
||||
if(id != null && id.nonEmpty && idType != null && idType.nonEmpty) {
|
||||
if (id != null && id.nonEmpty && idType != null && idType.nonEmpty) {
|
||||
a.setIdentifier(new PMIdentifier(id, idType))
|
||||
}
|
||||
|
||||
|
||||
val affiliation = (author \ "AffiliationInfo" \ "Affiliation").text
|
||||
val affiliationId = (author \ "AffiliationInfo" \ "Identifier").text
|
||||
val affiliationId = (author \ "AffiliationInfo" \ "Identifier").text
|
||||
val affiliationIdType = (author \ "AffiliationInfo" \ "Identifier" \ "@Source").text
|
||||
|
||||
if(affiliation != null && affiliation.nonEmpty) {
|
||||
if (affiliation != null && affiliation.nonEmpty) {
|
||||
val aff = new PMAffiliation()
|
||||
aff.setName(affiliation)
|
||||
if(affiliationId != null && affiliationId.nonEmpty && affiliationIdType != null && affiliationIdType.nonEmpty) {
|
||||
if (
|
||||
affiliationId != null && affiliationId.nonEmpty && affiliationIdType != null && affiliationIdType.nonEmpty
|
||||
) {
|
||||
aff.setIdentifier(new PMIdentifier(affiliationId, affiliationIdType))
|
||||
}
|
||||
a.setAffiliation(aff)
|
||||
|
|
|
@ -294,11 +294,23 @@ object PubMedToOaf {
|
|||
author.setName(a.getForeName)
|
||||
author.setSurname(a.getLastName)
|
||||
author.setFullname(a.getFullName)
|
||||
if(a.getIdentifier != null) {
|
||||
author.setPid(List(OafMapperUtils.structuredProperty(a.getIdentifier.getPid,
|
||||
OafMapperUtils.qualifier(a.getIdentifier.getType,a.getIdentifier.getType,ModelConstants.DNET_PID_TYPES, ModelConstants.DNET_PID_TYPES), dataInfo)).asJava)
|
||||
if (a.getIdentifier != null) {
|
||||
author.setPid(
|
||||
List(
|
||||
OafMapperUtils.structuredProperty(
|
||||
a.getIdentifier.getPid,
|
||||
OafMapperUtils.qualifier(
|
||||
a.getIdentifier.getType,
|
||||
a.getIdentifier.getType,
|
||||
ModelConstants.DNET_PID_TYPES,
|
||||
ModelConstants.DNET_PID_TYPES
|
||||
),
|
||||
dataInfo
|
||||
)
|
||||
).asJava
|
||||
)
|
||||
}
|
||||
if (a.getAffiliation!= null)
|
||||
if (a.getAffiliation != null)
|
||||
author.setRawAffiliationString(List(a.getAffiliation.getName).asJava)
|
||||
author.setRank(index + 1)
|
||||
author
|
||||
|
|
|
@ -1,11 +1,16 @@
|
|||
|
||||
package eu.dnetlib.dhp.actionmanager.raid;
|
||||
|
||||
import eu.dnetlib.dhp.actionmanager.opencitations.CreateOpenCitationsASTest;
|
||||
import eu.dnetlib.dhp.actionmanager.raid.model.RAiDEntity;
|
||||
import eu.dnetlib.dhp.schema.action.AtomicAction;
|
||||
import eu.dnetlib.dhp.schema.oaf.Oaf;
|
||||
import eu.dnetlib.dhp.schema.oaf.OtherResearchProduct;
|
||||
import eu.dnetlib.dhp.schema.oaf.Relation;
|
||||
import static java.nio.file.Files.createTempDirectory;
|
||||
|
||||
import static eu.dnetlib.dhp.actionmanager.Constants.OBJECT_MAPPER;
|
||||
import static org.junit.jupiter.api.Assertions.assertEquals;
|
||||
|
||||
import java.io.File;
|
||||
import java.nio.file.Paths;
|
||||
import java.util.Arrays;
|
||||
import java.util.List;
|
||||
|
||||
import org.apache.commons.io.FileUtils;
|
||||
import org.apache.hadoop.io.Text;
|
||||
import org.apache.hadoop.mapreduce.lib.input.SequenceFileInputFormat;
|
||||
|
@ -20,93 +25,141 @@ import org.junit.jupiter.api.AfterAll;
|
|||
import org.junit.jupiter.api.BeforeEach;
|
||||
import org.junit.jupiter.api.Disabled;
|
||||
import org.junit.jupiter.api.Test;
|
||||
|
||||
import eu.dnetlib.dhp.actionmanager.opencitations.CreateOpenCitationsASTest;
|
||||
import eu.dnetlib.dhp.actionmanager.raid.model.RAiDEntity;
|
||||
import eu.dnetlib.dhp.schema.action.AtomicAction;
|
||||
import eu.dnetlib.dhp.schema.oaf.Oaf;
|
||||
import eu.dnetlib.dhp.schema.oaf.OtherResearchProduct;
|
||||
import eu.dnetlib.dhp.schema.oaf.Relation;
|
||||
import scala.Tuple2;
|
||||
|
||||
import java.io.File;
|
||||
import java.nio.file.Paths;
|
||||
import java.util.Arrays;
|
||||
import java.util.List;
|
||||
|
||||
import static eu.dnetlib.dhp.actionmanager.Constants.OBJECT_MAPPER;
|
||||
import static java.nio.file.Files.createTempDirectory;
|
||||
import static org.junit.jupiter.api.Assertions.assertEquals;
|
||||
|
||||
public class GenerateRAiDActionSetJobTest {
|
||||
private static String input_path;
|
||||
private static String output_path;
|
||||
static SparkSession spark;
|
||||
private static String input_path;
|
||||
private static String output_path;
|
||||
static SparkSession spark;
|
||||
|
||||
@BeforeEach
|
||||
void setUp() throws Exception {
|
||||
@BeforeEach
|
||||
void setUp() throws Exception {
|
||||
|
||||
input_path = Paths
|
||||
.get(GenerateRAiDActionSetJobTest.class.getResource("/eu/dnetlib/dhp/actionmanager/raid/raid_example.json").toURI())
|
||||
.toFile()
|
||||
.getAbsolutePath();
|
||||
input_path = Paths
|
||||
.get(
|
||||
GenerateRAiDActionSetJobTest.class
|
||||
.getResource("/eu/dnetlib/dhp/actionmanager/raid/raid_example.json")
|
||||
.toURI())
|
||||
.toFile()
|
||||
.getAbsolutePath();
|
||||
|
||||
output_path = createTempDirectory(GenerateRAiDActionSetJobTest.class.getSimpleName() + "-")
|
||||
.toAbsolutePath()
|
||||
.toString();
|
||||
output_path = createTempDirectory(GenerateRAiDActionSetJobTest.class.getSimpleName() + "-")
|
||||
.toAbsolutePath()
|
||||
.toString();
|
||||
|
||||
SparkConf conf = new SparkConf();
|
||||
conf.setAppName(GenerateRAiDActionSetJobTest.class.getSimpleName());
|
||||
SparkConf conf = new SparkConf();
|
||||
conf.setAppName(GenerateRAiDActionSetJobTest.class.getSimpleName());
|
||||
|
||||
conf.setMaster("local[*]");
|
||||
conf.set("spark.driver.host", "localhost");
|
||||
conf.set("hive.metastore.local", "true");
|
||||
conf.set("spark.ui.enabled", "false");
|
||||
conf.set("spark.sql.warehouse.dir", output_path);
|
||||
conf.set("hive.metastore.warehouse.dir", output_path);
|
||||
conf.setMaster("local[*]");
|
||||
conf.set("spark.driver.host", "localhost");
|
||||
conf.set("hive.metastore.local", "true");
|
||||
conf.set("spark.ui.enabled", "false");
|
||||
conf.set("spark.sql.warehouse.dir", output_path);
|
||||
conf.set("hive.metastore.warehouse.dir", output_path);
|
||||
|
||||
spark = SparkSession
|
||||
.builder()
|
||||
.appName(GenerateRAiDActionSetJobTest.class.getSimpleName())
|
||||
.config(conf)
|
||||
.getOrCreate();
|
||||
}
|
||||
spark = SparkSession
|
||||
.builder()
|
||||
.appName(GenerateRAiDActionSetJobTest.class.getSimpleName())
|
||||
.config(conf)
|
||||
.getOrCreate();
|
||||
}
|
||||
|
||||
@AfterAll
|
||||
static void cleanUp() throws Exception {
|
||||
FileUtils.deleteDirectory(new File(output_path));
|
||||
}
|
||||
@AfterAll
|
||||
static void cleanUp() throws Exception {
|
||||
FileUtils.deleteDirectory(new File(output_path));
|
||||
}
|
||||
|
||||
@Test
|
||||
@Disabled
|
||||
void testProcessRAiDEntities() {
|
||||
GenerateRAiDActionSetJob.processRAiDEntities(spark, input_path, output_path + "/test_raid_action_set");
|
||||
@Test
|
||||
@Disabled
|
||||
void testProcessRAiDEntities() {
|
||||
GenerateRAiDActionSetJob.processRAiDEntities(spark, input_path, output_path + "/test_raid_action_set");
|
||||
|
||||
JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext());
|
||||
JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext());
|
||||
|
||||
JavaRDD<? extends Oaf> result = sc
|
||||
.sequenceFile(output_path + "/test_raid_action_set", Text.class, Text.class)
|
||||
.map(value -> OBJECT_MAPPER.readValue(value._2().toString(), AtomicAction.class))
|
||||
.map(AtomicAction::getPayload);
|
||||
JavaRDD<? extends Oaf> result = sc
|
||||
.sequenceFile(output_path + "/test_raid_action_set", Text.class, Text.class)
|
||||
.map(value -> OBJECT_MAPPER.readValue(value._2().toString(), AtomicAction.class))
|
||||
.map(AtomicAction::getPayload);
|
||||
|
||||
assertEquals(80, result.count());
|
||||
}
|
||||
assertEquals(80, result.count());
|
||||
}
|
||||
|
||||
@Test
|
||||
void testPrepareRAiD() {
|
||||
@Test
|
||||
void testPrepareRAiD() {
|
||||
|
||||
List<AtomicAction<? extends Oaf>> atomicActions = GenerateRAiDActionSetJob.prepareRAiD(new RAiDEntity(
|
||||
"-92190526",
|
||||
Arrays.asList("Berli, Justin", "Le Mao, Bérénice", "Guillaume Touya", "Wenclik, Laura", "Courtial, Azelle", "Muehlenhaus, Ian", "Justin Berli", "Touya, Guillaume", "Gruget, Maïeul", "Azelle Courtial", "Ian Muhlenhaus", "Maïeul Gruget", "Marion Dumont", "Maïeul GRUGET", "Cécile Duchêne"),
|
||||
"2021-09-10",
|
||||
"2024-02-16",
|
||||
Arrays.asList("cartography, zoom, pan, desert fog", "Road network", "zooming", "Pan-scalar maps", "pan-scalar map", "Python library", "QGIS", "map design", "landmarks", "Cartes transscalaires", "anchor", "disorientation", "[INFO]Computer Science [cs]", "[SHS.GEO]Humanities and Social Sciences/Geography", "cognitive cartography", "eye-tracking", "Computers in Earth Sciences", "Topographic map", "National Mapping Agency", "General Medicine", "Geography, Planning and Development", "multi-scales", "pan-scalar maps", "Selection", "cartography", "General Earth and Planetary Sciences", "progressiveness", "map generalisation", "Eye-tracker", "zoom", "algorithms", "Map Design", "cartography, map generalisation, zoom, multi-scale map", "Interactive maps", "Map generalisation", "Earth and Planetary Sciences (miscellaneous)", "Cartographic generalization", "rivers", "Benchmark", "General Environmental Science", "open source", "drawing", "Constraint", "Multi-scale maps"),
|
||||
Arrays.asList("Where do people look at during multi-scale map tasks?", "FogDetector survey raw data", "Collection of cartographic disorientation stories", "Anchorwhat dataset", "BasqueRoads: A Benchmark for Road Network Selection", "Progressive river network selection for pan-scalar maps", "BasqueRoads, a dataset to benchmark road selection algorithms", "Missing the city for buildings? A critical review of pan-scalar map generalization and design in contemporary zoomable maps", "Empirical approach to advance the generalisation of multi-scale maps", "L'Alpe d'Huez: a dataset to benchmark topographic map generalisation", "eye-tracking data from a survey on zooming in a pan-scalar map", "Material of the experiment 'More is Less' from the MapMuxing project", "Cartagen4py, an open source Python library for map generalisation", "L’Alpe d’Huez: A Benchmark for Topographic Map Generalisation"),
|
||||
Arrays.asList("50|doi_dedup___::6915135e0aa39f913394513f809ae58a", "50|doi_dedup___::754e3c283639bc6e104c925ff3e34007", "50|doi_dedup___::13517477f3c1261d57a3364363ce6ce0", "50|doi_dedup___::675b16c73accc4e7242bbb4ed9b3724a", "50|doi_dedup___::94ce09906b2d7d37eb2206cea8a50153", "50|dedup_wf_002::cc575d5ca5651ff8c3029a3a76e7e70a", "50|doi_dedup___::c5e52baddda17c755d1bae012a97dc13", "50|doi_dedup___::4f5f38c9e08fe995f7278963183f8ad4", "50|doi_dedup___::a9bc4453273b2d02648a5cb453195042", "50|doi_dedup___::5e893dc0cb7624a33f41c9b428bd59f7", "50|doi_dedup___::c1ecdef48fd9be811a291deed950e1c5", "50|doi_dedup___::9e93c8f2d97c35de8a6a57a5b53ef283", "50|dedup_wf_002::d08be0ed27b13d8a880e891e08d093ea", "50|doi_dedup___::f8d8b3b9eddeca2fc0e3bc9e63996555"),
|
||||
"Exploring Multi-Scale Map Generalization and Design",
|
||||
"This project aims to advance the generalization of multi-scale maps by investigating the impact of different design elements on user experience. The research involves collecting and analyzing data from various sources, including surveys, eye-tracking studies, and user experiments. The goal is to identify best practices for map generalization and design, with a focus on reducing disorientation and improving information retrieval during exploration. The project has led to the development of several datasets, including BasqueRoads, AnchorWhat, and L'Alpe d'Huez, which can be used to benchmark road selection algorithms and topographic map generalization techniques. The research has also resulted in the creation of a Python library, Cartagen4py, for map generalization. The findings of this project have the potential to improve the design and usability of multi-scale maps, making them more effective tools for navigation and information retrieval."
|
||||
));
|
||||
List<AtomicAction<? extends Oaf>> atomicActions = GenerateRAiDActionSetJob
|
||||
.prepareRAiD(
|
||||
new RAiDEntity(
|
||||
"-92190526",
|
||||
Arrays
|
||||
.asList(
|
||||
"Berli, Justin", "Le Mao, Bérénice", "Guillaume Touya", "Wenclik, Laura",
|
||||
"Courtial, Azelle", "Muehlenhaus, Ian", "Justin Berli", "Touya, Guillaume",
|
||||
"Gruget, Maïeul", "Azelle Courtial", "Ian Muhlenhaus", "Maïeul Gruget", "Marion Dumont",
|
||||
"Maïeul GRUGET", "Cécile Duchêne"),
|
||||
"2021-09-10",
|
||||
"2024-02-16",
|
||||
Arrays
|
||||
.asList(
|
||||
"cartography, zoom, pan, desert fog", "Road network", "zooming", "Pan-scalar maps",
|
||||
"pan-scalar map", "Python library", "QGIS", "map design", "landmarks",
|
||||
"Cartes transscalaires", "anchor", "disorientation", "[INFO]Computer Science [cs]",
|
||||
"[SHS.GEO]Humanities and Social Sciences/Geography", "cognitive cartography",
|
||||
"eye-tracking", "Computers in Earth Sciences", "Topographic map", "National Mapping Agency",
|
||||
"General Medicine", "Geography, Planning and Development", "multi-scales",
|
||||
"pan-scalar maps", "Selection", "cartography", "General Earth and Planetary Sciences",
|
||||
"progressiveness", "map generalisation", "Eye-tracker", "zoom", "algorithms", "Map Design",
|
||||
"cartography, map generalisation, zoom, multi-scale map", "Interactive maps",
|
||||
"Map generalisation", "Earth and Planetary Sciences (miscellaneous)",
|
||||
"Cartographic generalization", "rivers", "Benchmark", "General Environmental Science",
|
||||
"open source", "drawing", "Constraint", "Multi-scale maps"),
|
||||
Arrays
|
||||
.asList(
|
||||
"Where do people look at during multi-scale map tasks?", "FogDetector survey raw data",
|
||||
"Collection of cartographic disorientation stories", "Anchorwhat dataset",
|
||||
"BasqueRoads: A Benchmark for Road Network Selection",
|
||||
"Progressive river network selection for pan-scalar maps",
|
||||
"BasqueRoads, a dataset to benchmark road selection algorithms",
|
||||
"Missing the city for buildings? A critical review of pan-scalar map generalization and design in contemporary zoomable maps",
|
||||
"Empirical approach to advance the generalisation of multi-scale maps",
|
||||
"L'Alpe d'Huez: a dataset to benchmark topographic map generalisation",
|
||||
"eye-tracking data from a survey on zooming in a pan-scalar map",
|
||||
"Material of the experiment 'More is Less' from the MapMuxing project",
|
||||
"Cartagen4py, an open source Python library for map generalisation",
|
||||
"L’Alpe d’Huez: A Benchmark for Topographic Map Generalisation"),
|
||||
Arrays
|
||||
.asList(
|
||||
"50|doi_dedup___::6915135e0aa39f913394513f809ae58a",
|
||||
"50|doi_dedup___::754e3c283639bc6e104c925ff3e34007",
|
||||
"50|doi_dedup___::13517477f3c1261d57a3364363ce6ce0",
|
||||
"50|doi_dedup___::675b16c73accc4e7242bbb4ed9b3724a",
|
||||
"50|doi_dedup___::94ce09906b2d7d37eb2206cea8a50153",
|
||||
"50|dedup_wf_002::cc575d5ca5651ff8c3029a3a76e7e70a",
|
||||
"50|doi_dedup___::c5e52baddda17c755d1bae012a97dc13",
|
||||
"50|doi_dedup___::4f5f38c9e08fe995f7278963183f8ad4",
|
||||
"50|doi_dedup___::a9bc4453273b2d02648a5cb453195042",
|
||||
"50|doi_dedup___::5e893dc0cb7624a33f41c9b428bd59f7",
|
||||
"50|doi_dedup___::c1ecdef48fd9be811a291deed950e1c5",
|
||||
"50|doi_dedup___::9e93c8f2d97c35de8a6a57a5b53ef283",
|
||||
"50|dedup_wf_002::d08be0ed27b13d8a880e891e08d093ea",
|
||||
"50|doi_dedup___::f8d8b3b9eddeca2fc0e3bc9e63996555"),
|
||||
"Exploring Multi-Scale Map Generalization and Design",
|
||||
"This project aims to advance the generalization of multi-scale maps by investigating the impact of different design elements on user experience. The research involves collecting and analyzing data from various sources, including surveys, eye-tracking studies, and user experiments. The goal is to identify best practices for map generalization and design, with a focus on reducing disorientation and improving information retrieval during exploration. The project has led to the development of several datasets, including BasqueRoads, AnchorWhat, and L'Alpe d'Huez, which can be used to benchmark road selection algorithms and topographic map generalization techniques. The research has also resulted in the creation of a Python library, Cartagen4py, for map generalization. The findings of this project have the potential to improve the design and usability of multi-scale maps, making them more effective tools for navigation and information retrieval."));
|
||||
|
||||
OtherResearchProduct orp = (OtherResearchProduct) atomicActions.get(0).getPayload();
|
||||
Relation rel = (Relation) atomicActions.get(1).getPayload();
|
||||
OtherResearchProduct orp = (OtherResearchProduct) atomicActions.get(0).getPayload();
|
||||
Relation rel = (Relation) atomicActions.get(1).getPayload();
|
||||
|
||||
assertEquals("Exploring Multi-Scale Map Generalization and Design", orp.getTitle().get(0).getValue());
|
||||
assertEquals("50|raid________::759a564ce5cc7360cab030c517c7366b", rel.getSource());
|
||||
assertEquals("50|doi_dedup___::6915135e0aa39f913394513f809ae58a", rel.getTarget());
|
||||
assertEquals("Exploring Multi-Scale Map Generalization and Design", orp.getTitle().get(0).getValue());
|
||||
assertEquals("50|raid________::759a564ce5cc7360cab030c517c7366b", rel.getSource());
|
||||
assertEquals("50|doi_dedup___::6915135e0aa39f913394513f809ae58a", rel.getTarget());
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
|
|
@ -63,7 +63,6 @@ class BioScholixTest extends AbstractVocabularyTest {
|
|||
"0000000333457333",
|
||||
"0000000335964515",
|
||||
"0000000302921949",
|
||||
|
||||
"http://orcid.org/0000-0001-8567-3543",
|
||||
"http://orcid.org/0000-0001-7868-8528",
|
||||
"0000-0001-9189-1440",
|
||||
|
|
|
@ -130,7 +130,7 @@ public class ResultTagger implements Serializable {
|
|||
// log.info("Remove constraints for " + communityId);
|
||||
if (conf.getRemoveConstraintsMap().keySet().contains(communityId) &&
|
||||
conf.getRemoveConstraintsMap().get(communityId).getCriteria() != null &&
|
||||
!conf.getRemoveConstraintsMap().get(communityId).getCriteria().isEmpty() &&
|
||||
!conf.getRemoveConstraintsMap().get(communityId).getCriteria().isEmpty() &&
|
||||
conf
|
||||
.getRemoveConstraintsMap()
|
||||
.get(communityId)
|
||||
|
@ -228,7 +228,7 @@ public class ResultTagger implements Serializable {
|
|||
.forEach(communityId -> {
|
||||
if (!removeCommunities.contains(communityId) &&
|
||||
conf.getSelectionConstraintsMap().get(communityId).getCriteria() != null &&
|
||||
!conf.getSelectionConstraintsMap().get(communityId).getCriteria().isEmpty() &&
|
||||
!conf.getSelectionConstraintsMap().get(communityId).getCriteria().isEmpty() &&
|
||||
conf
|
||||
.getSelectionConstraintsMap()
|
||||
.get(communityId)
|
||||
|
|
|
@ -915,7 +915,8 @@ class MappersTest {
|
|||
|
||||
@Test
|
||||
void testODFRecord_guidelines4() throws IOException {
|
||||
final String xml = IOUtils.toString(Objects.requireNonNull(getClass().getResourceAsStream("odf_guidelines4.xml")));
|
||||
final String xml = IOUtils
|
||||
.toString(Objects.requireNonNull(getClass().getResourceAsStream("odf_guidelines4.xml")));
|
||||
final List<Oaf> list = new OdfToOafMapper(vocs, false, true).processMdRecord(xml);
|
||||
|
||||
final Publication p = (Publication) list.get(0);
|
||||
|
|
|
@ -5,7 +5,6 @@ import java.io.StringReader;
|
|||
import java.util.*;
|
||||
import java.util.stream.Collectors;
|
||||
|
||||
import eu.dnetlib.dhp.schema.solr.PersonTopic;
|
||||
import org.apache.commons.lang3.StringUtils;
|
||||
import org.dom4j.Document;
|
||||
import org.dom4j.DocumentException;
|
||||
|
@ -40,6 +39,7 @@ import eu.dnetlib.dhp.schema.solr.OpenAccessColor;
|
|||
import eu.dnetlib.dhp.schema.solr.OpenAccessRoute;
|
||||
import eu.dnetlib.dhp.schema.solr.Organization;
|
||||
import eu.dnetlib.dhp.schema.solr.Person;
|
||||
import eu.dnetlib.dhp.schema.solr.PersonTopic;
|
||||
import eu.dnetlib.dhp.schema.solr.Pid;
|
||||
import eu.dnetlib.dhp.schema.solr.Project;
|
||||
import eu.dnetlib.dhp.schema.solr.Result;
|
||||
|
@ -216,11 +216,14 @@ public class ProvisionModelSupport {
|
|||
}
|
||||
|
||||
private static List<PersonTopic> mapPersonTopics(List<eu.dnetlib.dhp.schema.oaf.PersonTopic> subjects) {
|
||||
return Optional.ofNullable(subjects)
|
||||
.map(ss -> ss.stream()
|
||||
.map(ProvisionModelSupport::mapPersonTopic)
|
||||
.collect(Collectors.toList()))
|
||||
.orElse(null);
|
||||
return Optional
|
||||
.ofNullable(subjects)
|
||||
.map(
|
||||
ss -> ss
|
||||
.stream()
|
||||
.map(ProvisionModelSupport::mapPersonTopic)
|
||||
.collect(Collectors.toList()))
|
||||
.orElse(null);
|
||||
}
|
||||
|
||||
private static PersonTopic mapPersonTopic(eu.dnetlib.dhp.schema.oaf.PersonTopic pt) {
|
||||
|
|
Loading…
Reference in New Issue