forked from D-Net/dnet-hadoop
refactoring
This commit is contained in:
parent
c15c8c0ad0
commit
8a4f84f8c0
|
@ -61,7 +61,7 @@
|
|||
<dependency>
|
||||
<groupId>eu.dnetlib</groupId>
|
||||
<artifactId>dnet-openaire-broker-common</artifactId>
|
||||
<version>[2.0.1,3.0.0)</version>
|
||||
<version>[3.0.1,4.0.0)</version>
|
||||
</dependency>
|
||||
|
||||
</dependencies>
|
||||
|
|
|
@ -0,0 +1,37 @@
|
|||
<?xml version='1.0' encoding='UTF-8'?>
|
||||
<dfxml xmloutputversion='1.0'>
|
||||
<metadata
|
||||
xmlns='http://afflib.org/tcpflow/'
|
||||
xmlns:xsi='http://www.w3.org/2001/XMLSchema-instance'
|
||||
xmlns:dc='http://purl.org/dc/elements/1.1/'>
|
||||
<dc:type>Feature Extraction</dc:type>
|
||||
</metadata>
|
||||
<creator version='1.0'>
|
||||
<program>TCPFLOW</program>
|
||||
<version>1.5.0</version>
|
||||
<build_environment>
|
||||
<compiler>4.2.1 (4.2.1 Compatible Apple LLVM 11.0.0 (clang-1100.0.33.8))</compiler>
|
||||
<CPPFLAGS>-D_THREAD_SAFE -pthread -I/usr/local/include -I/usr/local/include -DUTC_OFFSET=+0000 </CPPFLAGS>
|
||||
<CFLAGS>-g -D_THREAD_SAFE -pthread -g -O3 -MD -Wpointer-arith -Wmissing-declarations -Wmissing-prototypes -Wshadow -Wwrite-strings -Wcast-align -Waggregate-return -Wbad-function-cast -Wcast-qual -Wundef -Wredundant-decls -Wdisabled-optimization -Wfloat-equal -Wmultichar -Wc++-compat -Wmissing-noreturn -Wall -Wstrict-prototypes -MD -D_FORTIFY_SOURCE=2 -Wpointer-arith -Wmissing-declarations -Wmissing-prototypes -Wshadow -Wwrite-strings -Wcast-align -Waggregate-return -Wbad-function-cast -Wcast-qual -Wundef -Wredundant-decls -Wdisabled-optimization -Wfloat-equal -Wmultichar -Wc++-compat -Wmissing-noreturn -Wall -Wstrict-prototypes</CFLAGS>
|
||||
<CXXFLAGS>-g -D_THREAD_SAFE -pthread -g -O3 -Wall -MD -D_FORTIFY_SOURCE=2 -Wpointer-arith -Wshadow -Wwrite-strings -Wcast-align -Wredundant-decls -Wdisabled-optimization -Wfloat-equal -Wmultichar -Wmissing-noreturn -Woverloaded-virtual -Wsign-promo -funit-at-a-time -Weffc++ -std=c++11 -Wall -MD -D_FORTIFY_SOURCE=2 -Wpointer-arith -Wshadow -Wwrite-strings -Wcast-align -Wredundant-decls -Wdisabled-optimization -Wfloat-equal -Wmultichar -Wmissing-noreturn -Woverloaded-virtual -Wsign-promo -funit-at-a-time -Weffc++ </CXXFLAGS>
|
||||
<LDFLAGS>-L/usr/local/lib -L/usr/local/lib </LDFLAGS>
|
||||
<LIBS>-lpython2.7 -lpython2.7 -lpcap -lbz2 -lexpat -lsqlite3 -lcrypto -lssl -lcrypto -ldl -lz </LIBS>
|
||||
<compilation_date>2019-10-11T01:16:58</compilation_date>
|
||||
<library name="boost" version="107100"/>
|
||||
<library name="sqlite" version="3.28.0" source_id="2019-04-15 14:49:49 378230ae7f4b721c8b8d83c8ceb891449685cd23b1702a57841f1be40b5daapl"/>
|
||||
</build_environment>
|
||||
<execution_environment>
|
||||
<os_sysname>Darwin</os_sysname>
|
||||
<os_release>19.5.0</os_release>
|
||||
<os_version>Darwin Kernel Version 19.5.0: Tue May 26 20:41:44 PDT 2020; root:xnu-6153.121.2~2/RELEASE_X86_64</os_version>
|
||||
<host>Micheles-MBP.local</host>
|
||||
<arch>x86_64</arch>
|
||||
<command_line>tcpflow</command_line>
|
||||
<uid>501</uid>
|
||||
<username>michele</username>
|
||||
<start_time>2020-06-15T14:55:03Z</start_time>
|
||||
</execution_environment>
|
||||
</creator>
|
||||
<configuration>
|
||||
</configuration>
|
||||
<tdelta>0</tdelta>
|
|
@ -1,9 +1,15 @@
|
|||
|
||||
package eu.dnetlib.dhp.broker.model;
|
||||
|
||||
import java.io.Serializable;
|
||||
import java.util.Map;
|
||||
|
||||
public class Event {
|
||||
public class Event implements Serializable {
|
||||
|
||||
/**
|
||||
*
|
||||
*/
|
||||
private static final long serialVersionUID = -5936790326505781395L;
|
||||
|
||||
private String eventId;
|
||||
|
||||
|
|
|
@ -6,17 +6,13 @@ import java.util.Date;
|
|||
import java.util.HashMap;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.stream.Collectors;
|
||||
|
||||
import org.apache.commons.codec.digest.DigestUtils;
|
||||
import org.apache.commons.lang3.StringUtils;
|
||||
import org.apache.commons.lang3.time.DateUtils;
|
||||
|
||||
import eu.dnetlib.broker.objects.OpenaireBrokerResult;
|
||||
import eu.dnetlib.dhp.broker.oa.util.UpdateInfo;
|
||||
import eu.dnetlib.dhp.schema.oaf.Author;
|
||||
import eu.dnetlib.dhp.schema.oaf.KeyValue;
|
||||
import eu.dnetlib.dhp.schema.oaf.Result;
|
||||
import eu.dnetlib.dhp.schema.oaf.StructuredProperty;
|
||||
|
||||
public class EventFactory {
|
||||
|
||||
|
@ -37,8 +33,7 @@ public class EventFactory {
|
|||
final Map<String, Object> map = createMapFromResult(updateInfo);
|
||||
|
||||
final String eventId = calculateEventId(
|
||||
updateInfo.getTopicPath(), updateInfo.getTarget().getResult().getOriginalId().get(0),
|
||||
updateInfo.getHighlightValueAsString());
|
||||
updateInfo.getTopicPath(), updateInfo.getTarget().getOriginalId(), updateInfo.getHighlightValueAsString());
|
||||
|
||||
res.setEventId(eventId);
|
||||
res.setProducerId(PRODUCER_ID);
|
||||
|
@ -54,53 +49,31 @@ public class EventFactory {
|
|||
private static Map<String, Object> createMapFromResult(final UpdateInfo<?> updateInfo) {
|
||||
final Map<String, Object> map = new HashMap<>();
|
||||
|
||||
final Result source = updateInfo.getSource().getResult();
|
||||
final Result target = updateInfo.getTarget().getResult();
|
||||
final OpenaireBrokerResult source = updateInfo.getSource();
|
||||
final OpenaireBrokerResult target = updateInfo.getTarget();
|
||||
|
||||
final List<KeyValue> collectedFrom = target.getCollectedfrom();
|
||||
if (collectedFrom.size() == 1) {
|
||||
map.put("target_datasource_id", collectedFrom.get(0).getKey());
|
||||
map.put("target_datasource_name", collectedFrom.get(0).getValue());
|
||||
}
|
||||
map.put("target_datasource_id", target.getCollectedFromId());
|
||||
map.put("target_datasource_name", target.getCollectedFromName());
|
||||
|
||||
final List<String> ids = target.getOriginalId();
|
||||
if (ids.size() > 0) {
|
||||
map.put("target_publication_id", ids.get(0));
|
||||
}
|
||||
map.put("target_publication_id", target.getOriginalId());
|
||||
|
||||
final List<StructuredProperty> titles = target.getTitle();
|
||||
final List<String> titles = target.getTitles();
|
||||
if (titles.size() > 0) {
|
||||
map.put("target_publication_title", titles.get(0));
|
||||
}
|
||||
|
||||
final long date = parseDateTolong(target.getDateofacceptance().getValue());
|
||||
final long date = parseDateTolong(target.getPublicationdate());
|
||||
if (date > 0) {
|
||||
map.put("target_dateofacceptance", date);
|
||||
}
|
||||
|
||||
final List<StructuredProperty> subjects = target.getSubject();
|
||||
if (subjects.size() > 0) {
|
||||
map
|
||||
.put(
|
||||
"target_publication_subject_list",
|
||||
subjects.stream().map(StructuredProperty::getValue).collect(Collectors.toList()));
|
||||
}
|
||||
|
||||
final List<Author> authors = target.getAuthor();
|
||||
if (authors.size() > 0) {
|
||||
map
|
||||
.put(
|
||||
"target_publication_author_list",
|
||||
authors.stream().map(Author::getFullname).collect(Collectors.toList()));
|
||||
}
|
||||
map.put("target_publication_subject_list", target.getSubjects());
|
||||
map.put("target_publication_author_list", target.getCreators());
|
||||
|
||||
// PROVENANCE INFO
|
||||
map.put("trust", updateInfo.getTrust());
|
||||
final List<KeyValue> sourceCollectedFrom = source.getCollectedfrom();
|
||||
if (sourceCollectedFrom.size() == 1) {
|
||||
map.put("provenance_datasource_id", sourceCollectedFrom.get(0).getKey());
|
||||
map.put("provenance_datasource_name", sourceCollectedFrom.get(0).getValue());
|
||||
}
|
||||
map.put("provenance_datasource_id", source.getCollectedFromId());
|
||||
map.put("provenance_datasource_name", source.getCollectedFromName());
|
||||
map.put("provenance_publication_id_list", source.getOriginalId());
|
||||
|
||||
return map;
|
||||
|
|
|
@ -18,18 +18,20 @@ import org.slf4j.LoggerFactory;
|
|||
|
||||
import com.fasterxml.jackson.databind.ObjectMapper;
|
||||
|
||||
import eu.dnetlib.broker.objects.OpenaireBrokerResult;
|
||||
import eu.dnetlib.dhp.application.ArgumentApplicationParser;
|
||||
import eu.dnetlib.dhp.broker.model.Event;
|
||||
import eu.dnetlib.dhp.broker.oa.util.BrokerConstants;
|
||||
import eu.dnetlib.dhp.broker.oa.util.ConversionUtils;
|
||||
import eu.dnetlib.dhp.broker.oa.util.EventFinder;
|
||||
import eu.dnetlib.dhp.broker.oa.util.EventGroup;
|
||||
import eu.dnetlib.dhp.broker.oa.util.aggregators.simple.ResultAggregator;
|
||||
import eu.dnetlib.dhp.broker.oa.util.aggregators.simple.ResultGroup;
|
||||
import eu.dnetlib.dhp.broker.oa.util.aggregators.withRels.OpenaireBrokerResultAggregator;
|
||||
import eu.dnetlib.dhp.broker.oa.util.aggregators.withRels.RelatedEntityFactory;
|
||||
import eu.dnetlib.dhp.broker.oa.util.aggregators.withRels.RelatedProject;
|
||||
import eu.dnetlib.dhp.broker.oa.util.aggregators.withRels.ResultWithRelations;
|
||||
import eu.dnetlib.dhp.broker.oa.util.aggregators.withRels.ResultWithRelationsAggregator;
|
||||
import eu.dnetlib.dhp.common.HdfsSupport;
|
||||
import eu.dnetlib.dhp.schema.oaf.OtherResearchProduct;
|
||||
import eu.dnetlib.dhp.schema.oaf.Project;
|
||||
import eu.dnetlib.dhp.schema.oaf.Publication;
|
||||
import eu.dnetlib.dhp.schema.oaf.Relation;
|
||||
|
@ -73,6 +75,8 @@ public class GenerateEventsApplication {
|
|||
log.info("dedupConfigProfileId: {}", dedupConfigProfileId);
|
||||
|
||||
final SparkConf conf = new SparkConf();
|
||||
conf.set("spark.serializer", "org.apache.spark.serializer.KryoSerializer");
|
||||
conf.registerKryoClasses(BrokerConstants.getModelClasses());
|
||||
|
||||
final DedupConfig dedupConfig = loadDedupConfig(isLookupUrl, dedupConfigProfileId);
|
||||
|
||||
|
@ -80,13 +84,16 @@ public class GenerateEventsApplication {
|
|||
|
||||
removeOutputDir(spark, eventsPath);
|
||||
|
||||
final Dataset<Event> all = spark.emptyDataset(Encoders.kryo(Event.class));
|
||||
|
||||
for (final Class<? extends Result> r1 : BrokerConstants.RESULT_CLASSES) {
|
||||
all.union(generateEvents(spark, graphPath, r1, dedupConfig));
|
||||
}
|
||||
|
||||
all.write().mode(SaveMode.Overwrite).option("compression", "gzip").json(eventsPath);
|
||||
spark
|
||||
.emptyDataset(Encoders.kryo(Event.class))
|
||||
.union(generateEvents(spark, graphPath, Publication.class, dedupConfig))
|
||||
.union(generateEvents(spark, graphPath, eu.dnetlib.dhp.schema.oaf.Dataset.class, dedupConfig))
|
||||
.union(generateEvents(spark, graphPath, Software.class, dedupConfig))
|
||||
.union(generateEvents(spark, graphPath, OtherResearchProduct.class, dedupConfig))
|
||||
.write()
|
||||
.mode(SaveMode.Overwrite)
|
||||
.option("compression", "gzip")
|
||||
.json(eventsPath);
|
||||
});
|
||||
|
||||
}
|
||||
|
@ -101,18 +108,18 @@ public class GenerateEventsApplication {
|
|||
final Class<SRC> sourceClass,
|
||||
final DedupConfig dedupConfig) {
|
||||
|
||||
final Dataset<ResultWithRelations> results = expandResultsWithRelations(spark, graphPath, sourceClass);
|
||||
final Dataset<OpenaireBrokerResult> results = expandResultsWithRelations(spark, graphPath, sourceClass);
|
||||
|
||||
final Dataset<Relation> mergedRels = readPath(spark, graphPath + "/relation", Relation.class)
|
||||
.filter(r -> r.getRelClass().equals(BrokerConstants.IS_MERGED_IN_CLASS));
|
||||
|
||||
final TypedColumn<Tuple2<ResultWithRelations, Relation>, ResultGroup> aggr = new ResultAggregator()
|
||||
final TypedColumn<Tuple2<OpenaireBrokerResult, Relation>, ResultGroup> aggr = new ResultAggregator()
|
||||
.toColumn();
|
||||
|
||||
return results
|
||||
.joinWith(mergedRels, results.col("result.id").equalTo(mergedRels.col("source")), "inner")
|
||||
.groupByKey(
|
||||
(MapFunction<Tuple2<ResultWithRelations, Relation>, String>) t -> t._2.getTarget(), Encoders.STRING())
|
||||
(MapFunction<Tuple2<OpenaireBrokerResult, Relation>, String>) t -> t._2.getTarget(), Encoders.STRING())
|
||||
.agg(aggr)
|
||||
.map((MapFunction<Tuple2<String, ResultGroup>, ResultGroup>) t -> t._2, Encoders.kryo(ResultGroup.class))
|
||||
.filter(ResultGroup::isValid)
|
||||
|
@ -122,7 +129,7 @@ public class GenerateEventsApplication {
|
|||
.flatMap(group -> group.getData().iterator(), Encoders.kryo(Event.class));
|
||||
}
|
||||
|
||||
private static <SRC extends Result> Dataset<ResultWithRelations> expandResultsWithRelations(
|
||||
private static <SRC extends Result> Dataset<OpenaireBrokerResult> expandResultsWithRelations(
|
||||
final SparkSession spark,
|
||||
final String graphPath,
|
||||
final Class<SRC> sourceClass) {
|
||||
|
@ -135,14 +142,15 @@ public class GenerateEventsApplication {
|
|||
final Dataset<Relation> rels = readPath(spark, graphPath + "/relation", Relation.class)
|
||||
.filter(r -> !r.getRelClass().equals(BrokerConstants.IS_MERGED_IN_CLASS));
|
||||
|
||||
final Dataset<ResultWithRelations> r0 = readPath(
|
||||
final Dataset<OpenaireBrokerResult> r0 = readPath(
|
||||
spark, graphPath + "/" + sourceClass.getSimpleName().toLowerCase(), Result.class)
|
||||
.filter(r -> r.getDataInfo().getDeletedbyinference())
|
||||
.map(r -> new ResultWithRelations(r), Encoders.kryo(ResultWithRelations.class));
|
||||
final Dataset<ResultWithRelations> r1 = join(r0, rels, relatedEntities(projects, rels, RelatedProject.class));
|
||||
final Dataset<ResultWithRelations> r2 = join(r1, rels, relatedEntities(softwares, rels, RelatedProject.class));
|
||||
final Dataset<ResultWithRelations> r3 = join(r2, rels, relatedEntities(datasets, rels, RelatedProject.class));
|
||||
final Dataset<ResultWithRelations> r4 = join(
|
||||
.map(ConversionUtils::oafResultToBrokerResult, Encoders.kryo(OpenaireBrokerResult.class));
|
||||
|
||||
final Dataset<OpenaireBrokerResult> r1 = join(r0, rels, relatedEntities(projects, rels, RelatedProject.class));
|
||||
final Dataset<OpenaireBrokerResult> r2 = join(r1, rels, relatedEntities(softwares, rels, RelatedProject.class));
|
||||
final Dataset<OpenaireBrokerResult> r3 = join(r2, rels, relatedEntities(datasets, rels, RelatedProject.class));
|
||||
final Dataset<OpenaireBrokerResult> r4 = join(
|
||||
r3, rels, relatedEntities(publications, rels, RelatedProject.class));
|
||||
;
|
||||
|
||||
|
@ -159,20 +167,20 @@ public class GenerateEventsApplication {
|
|||
Encoders.kryo(clazz));
|
||||
}
|
||||
|
||||
private static <T> Dataset<ResultWithRelations> join(final Dataset<ResultWithRelations> sources,
|
||||
private static <T> Dataset<OpenaireBrokerResult> join(final Dataset<OpenaireBrokerResult> sources,
|
||||
final Dataset<Relation> rels,
|
||||
final Dataset<T> typedRels) {
|
||||
|
||||
final TypedColumn<Tuple2<ResultWithRelations, T>, ResultWithRelations> aggr = new ResultWithRelationsAggregator<T>()
|
||||
final TypedColumn<Tuple2<OpenaireBrokerResult, T>, OpenaireBrokerResult> aggr = new OpenaireBrokerResultAggregator<T>()
|
||||
.toColumn();
|
||||
;
|
||||
|
||||
return sources
|
||||
.joinWith(typedRels, sources.col("result.id").equalTo(rels.col("source")), "left_outer")
|
||||
.groupByKey(
|
||||
(MapFunction<Tuple2<ResultWithRelations, T>, String>) t -> t._1.getResult().getId(), Encoders.STRING())
|
||||
(MapFunction<Tuple2<OpenaireBrokerResult, T>, String>) t -> t._1.getOpenaireId(), Encoders.STRING())
|
||||
.agg(aggr)
|
||||
.map(t -> t._2, Encoders.kryo(ResultWithRelations.class));
|
||||
.map(t -> t._2, Encoders.kryo(OpenaireBrokerResult.class));
|
||||
}
|
||||
|
||||
public static <R> Dataset<R> readPath(
|
||||
|
|
|
@ -12,22 +12,20 @@ import java.util.function.Function;
|
|||
import org.apache.commons.codec.digest.DigestUtils;
|
||||
import org.apache.commons.lang3.StringUtils;
|
||||
|
||||
import eu.dnetlib.broker.objects.Publication;
|
||||
import eu.dnetlib.broker.objects.OpenaireBrokerResult;
|
||||
import eu.dnetlib.dhp.broker.model.Topic;
|
||||
import eu.dnetlib.dhp.broker.oa.util.UpdateInfo;
|
||||
import eu.dnetlib.dhp.broker.oa.util.aggregators.withRels.ResultWithRelations;
|
||||
import eu.dnetlib.dhp.schema.oaf.Field;
|
||||
import eu.dnetlib.pace.config.DedupConfig;
|
||||
|
||||
public abstract class UpdateMatcher<T> {
|
||||
|
||||
private final boolean multipleUpdate;
|
||||
private final Function<T, Topic> topicFunction;
|
||||
private final BiConsumer<Publication, T> compileHighlightFunction;
|
||||
private final BiConsumer<OpenaireBrokerResult, T> compileHighlightFunction;
|
||||
private final Function<T, String> highlightToStringFunction;
|
||||
|
||||
public UpdateMatcher(final boolean multipleUpdate, final Function<T, Topic> topicFunction,
|
||||
final BiConsumer<Publication, T> compileHighlightFunction,
|
||||
final BiConsumer<OpenaireBrokerResult, T> compileHighlightFunction,
|
||||
final Function<T, String> highlightToStringFunction) {
|
||||
this.multipleUpdate = multipleUpdate;
|
||||
this.topicFunction = topicFunction;
|
||||
|
@ -35,19 +33,18 @@ public abstract class UpdateMatcher<T> {
|
|||
this.highlightToStringFunction = highlightToStringFunction;
|
||||
}
|
||||
|
||||
public Collection<UpdateInfo<T>> searchUpdatesForRecord(final ResultWithRelations res,
|
||||
final Collection<ResultWithRelations> others,
|
||||
public Collection<UpdateInfo<T>> searchUpdatesForRecord(final OpenaireBrokerResult res,
|
||||
final Collection<OpenaireBrokerResult> others,
|
||||
final DedupConfig dedupConfig) {
|
||||
|
||||
final Map<String, UpdateInfo<T>> infoMap = new HashMap<>();
|
||||
|
||||
for (final ResultWithRelations source : others) {
|
||||
for (final OpenaireBrokerResult source : others) {
|
||||
if (source != res) {
|
||||
for (final T hl : findDifferences(source, res)) {
|
||||
final Topic topic = getTopicFunction().apply(hl);
|
||||
final UpdateInfo<T> info = new UpdateInfo<>(topic, hl, source, res, getCompileHighlightFunction(),
|
||||
getHighlightToStringFunction(),
|
||||
dedupConfig);
|
||||
getHighlightToStringFunction(), dedupConfig);
|
||||
final String s = DigestUtils.md5Hex(info.getHighlightValueAsString());
|
||||
if (!infoMap.containsKey(s) || infoMap.get(s).getTrust() < info.getTrust()) {
|
||||
} else {
|
||||
|
@ -71,14 +68,14 @@ public abstract class UpdateMatcher<T> {
|
|||
}
|
||||
}
|
||||
|
||||
protected abstract List<T> findDifferences(ResultWithRelations source, ResultWithRelations target);
|
||||
protected abstract List<T> findDifferences(OpenaireBrokerResult source, OpenaireBrokerResult target);
|
||||
|
||||
protected static boolean isMissing(final List<Field<String>> list) {
|
||||
return list == null || list.isEmpty() || StringUtils.isBlank(list.get(0).getValue());
|
||||
protected static boolean isMissing(final List<String> list) {
|
||||
return list == null || list.isEmpty() || StringUtils.isBlank(list.get(0));
|
||||
}
|
||||
|
||||
protected boolean isMissing(final Field<String> field) {
|
||||
return field == null || StringUtils.isBlank(field.getValue());
|
||||
protected boolean isMissing(final String field) {
|
||||
return StringUtils.isBlank(field);
|
||||
}
|
||||
|
||||
public boolean isMultipleUpdate() {
|
||||
|
@ -89,7 +86,7 @@ public abstract class UpdateMatcher<T> {
|
|||
return topicFunction;
|
||||
}
|
||||
|
||||
public BiConsumer<Publication, T> getCompileHighlightFunction() {
|
||||
public BiConsumer<OpenaireBrokerResult, T> getCompileHighlightFunction() {
|
||||
return compileHighlightFunction;
|
||||
}
|
||||
|
||||
|
|
|
@ -5,45 +5,39 @@ import java.util.List;
|
|||
import java.util.Set;
|
||||
import java.util.stream.Collectors;
|
||||
|
||||
import eu.dnetlib.broker.objects.Dataset;
|
||||
import eu.dnetlib.broker.objects.OpenaireBrokerResult;
|
||||
import eu.dnetlib.dhp.broker.model.Topic;
|
||||
import eu.dnetlib.dhp.broker.oa.matchers.UpdateMatcher;
|
||||
import eu.dnetlib.dhp.broker.oa.util.ConversionUtils;
|
||||
import eu.dnetlib.dhp.broker.oa.util.aggregators.withRels.RelatedDataset;
|
||||
import eu.dnetlib.dhp.broker.oa.util.aggregators.withRels.ResultWithRelations;
|
||||
import eu.dnetlib.dhp.schema.oaf.Dataset;
|
||||
|
||||
public abstract class AbstractEnrichMissingDataset
|
||||
extends UpdateMatcher<eu.dnetlib.broker.objects.Dataset> {
|
||||
extends UpdateMatcher<Dataset> {
|
||||
|
||||
public AbstractEnrichMissingDataset(final Topic topic) {
|
||||
super(true,
|
||||
rel -> topic,
|
||||
(p, rel) -> p.getDatasets().add(rel),
|
||||
rel -> rel.getInstances().get(0).getUrl());
|
||||
rel -> rel.getOriginalId());
|
||||
}
|
||||
|
||||
protected abstract boolean filterByType(String relType);
|
||||
|
||||
@Override
|
||||
protected final List<eu.dnetlib.broker.objects.Dataset> findDifferences(
|
||||
final ResultWithRelations source,
|
||||
final ResultWithRelations target) {
|
||||
protected final List<Dataset> findDifferences(final OpenaireBrokerResult source,
|
||||
final OpenaireBrokerResult target) {
|
||||
|
||||
final Set<String> existingDatasets = target
|
||||
.getDatasets()
|
||||
.stream()
|
||||
.filter(rel -> filterByType(rel.getRelType()))
|
||||
.map(RelatedDataset::getRelDataset)
|
||||
.map(Dataset::getId)
|
||||
.map(Dataset::getOriginalId)
|
||||
.collect(Collectors.toSet());
|
||||
|
||||
return source
|
||||
.getDatasets()
|
||||
.stream()
|
||||
.filter(rel -> filterByType(rel.getRelType()))
|
||||
.map(RelatedDataset::getRelDataset)
|
||||
.filter(d -> !existingDatasets.contains(d.getId()))
|
||||
.map(ConversionUtils::oafDatasetToBrokerDataset)
|
||||
.filter(d -> !existingDatasets.contains(d.getOriginalId()))
|
||||
.collect(Collectors.toList());
|
||||
|
||||
}
|
||||
|
|
|
@ -1,19 +1,15 @@
|
|||
|
||||
package eu.dnetlib.dhp.broker.oa.matchers.relatedProjects;
|
||||
|
||||
import java.util.Arrays;
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
import java.util.stream.Collectors;
|
||||
|
||||
import eu.dnetlib.broker.objects.OpenaireBrokerResult;
|
||||
import eu.dnetlib.broker.objects.Project;
|
||||
import eu.dnetlib.dhp.broker.model.Topic;
|
||||
import eu.dnetlib.dhp.broker.oa.matchers.UpdateMatcher;
|
||||
import eu.dnetlib.dhp.broker.oa.util.ConversionUtils;
|
||||
import eu.dnetlib.dhp.broker.oa.util.aggregators.withRels.RelatedProject;
|
||||
import eu.dnetlib.dhp.broker.oa.util.aggregators.withRels.ResultWithRelations;
|
||||
|
||||
public class EnrichMissingProject
|
||||
extends UpdateMatcher<eu.dnetlib.broker.objects.Project> {
|
||||
public class EnrichMissingProject extends UpdateMatcher<Project> {
|
||||
|
||||
public EnrichMissingProject() {
|
||||
super(true,
|
||||
|
@ -23,16 +19,11 @@ public class EnrichMissingProject
|
|||
}
|
||||
|
||||
@Override
|
||||
protected List<Project> findDifferences(final ResultWithRelations source, final ResultWithRelations target) {
|
||||
if (source.getProjects().isEmpty()) {
|
||||
return Arrays.asList();
|
||||
protected List<Project> findDifferences(final OpenaireBrokerResult source, final OpenaireBrokerResult target) {
|
||||
if (target.getProjects().isEmpty()) {
|
||||
return source.getProjects();
|
||||
} else {
|
||||
return target
|
||||
.getProjects()
|
||||
.stream()
|
||||
.map(RelatedProject::getRelProject)
|
||||
.map(ConversionUtils::oafProjectToBrokerProject)
|
||||
.collect(Collectors.toList());
|
||||
return new ArrayList<>();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -5,39 +5,38 @@ import java.util.List;
|
|||
import java.util.Set;
|
||||
import java.util.stream.Collectors;
|
||||
|
||||
import eu.dnetlib.broker.objects.OpenaireBrokerResult;
|
||||
import eu.dnetlib.broker.objects.Project;
|
||||
import eu.dnetlib.dhp.broker.model.Topic;
|
||||
import eu.dnetlib.dhp.broker.oa.matchers.UpdateMatcher;
|
||||
import eu.dnetlib.dhp.broker.oa.util.ConversionUtils;
|
||||
import eu.dnetlib.dhp.broker.oa.util.aggregators.withRels.RelatedProject;
|
||||
import eu.dnetlib.dhp.broker.oa.util.aggregators.withRels.ResultWithRelations;
|
||||
import eu.dnetlib.dhp.schema.oaf.Project;
|
||||
|
||||
public class EnrichMoreProject extends UpdateMatcher<eu.dnetlib.broker.objects.Project> {
|
||||
public class EnrichMoreProject extends UpdateMatcher<Project> {
|
||||
|
||||
public EnrichMoreProject() {
|
||||
super(true,
|
||||
prj -> Topic.ENRICH_MORE_PROJECT,
|
||||
(p, prj) -> p.getProjects().add(prj),
|
||||
prj -> prj.getFunder() + "::" + prj.getFundingProgram() + prj.getCode());
|
||||
prj -> projectAsString(prj));
|
||||
}
|
||||
|
||||
private static String projectAsString(final Project prj) {
|
||||
return prj.getFunder() + "::" + prj.getFundingProgram() + "::" + prj.getCode();
|
||||
}
|
||||
|
||||
@Override
|
||||
protected List<eu.dnetlib.broker.objects.Project> findDifferences(final ResultWithRelations source,
|
||||
final ResultWithRelations target) {
|
||||
protected List<eu.dnetlib.broker.objects.Project> findDifferences(final OpenaireBrokerResult source,
|
||||
final OpenaireBrokerResult target) {
|
||||
|
||||
final Set<String> existingProjects = source
|
||||
final Set<String> existingProjects = target
|
||||
.getProjects()
|
||||
.stream()
|
||||
.map(RelatedProject::getRelProject)
|
||||
.map(Project::getId)
|
||||
.map(EnrichMoreProject::projectAsString)
|
||||
.collect(Collectors.toSet());
|
||||
|
||||
return target
|
||||
return source
|
||||
.getProjects()
|
||||
.stream()
|
||||
.map(RelatedProject::getRelProject)
|
||||
.filter(p -> !existingProjects.contains(p.getId()))
|
||||
.map(ConversionUtils::oafProjectToBrokerProject)
|
||||
.filter(p -> !existingProjects.contains(projectAsString(p)))
|
||||
.collect(Collectors.toList());
|
||||
}
|
||||
|
||||
|
|
|
@ -5,21 +5,18 @@ import java.util.List;
|
|||
import java.util.Set;
|
||||
import java.util.stream.Collectors;
|
||||
|
||||
import eu.dnetlib.broker.objects.OpenaireBrokerResult;
|
||||
import eu.dnetlib.broker.objects.Publication;
|
||||
import eu.dnetlib.dhp.broker.model.Topic;
|
||||
import eu.dnetlib.dhp.broker.oa.matchers.UpdateMatcher;
|
||||
import eu.dnetlib.dhp.broker.oa.util.ConversionUtils;
|
||||
import eu.dnetlib.dhp.broker.oa.util.aggregators.withRels.RelatedPublication;
|
||||
import eu.dnetlib.dhp.broker.oa.util.aggregators.withRels.ResultWithRelations;
|
||||
import eu.dnetlib.dhp.schema.oaf.Publication;
|
||||
|
||||
public abstract class AbstractEnrichMissingPublication
|
||||
extends UpdateMatcher<eu.dnetlib.broker.objects.Publication> {
|
||||
public abstract class AbstractEnrichMissingPublication extends UpdateMatcher<Publication> {
|
||||
|
||||
public AbstractEnrichMissingPublication(final Topic topic) {
|
||||
super(true,
|
||||
rel -> topic,
|
||||
(p, rel) -> p.getPublications().add(rel),
|
||||
rel -> rel.getInstances().get(0).getUrl());
|
||||
rel -> rel.getOriginalId());
|
||||
|
||||
}
|
||||
|
||||
|
@ -27,24 +24,21 @@ public abstract class AbstractEnrichMissingPublication
|
|||
|
||||
@Override
|
||||
protected final List<eu.dnetlib.broker.objects.Publication> findDifferences(
|
||||
final ResultWithRelations source,
|
||||
final ResultWithRelations target) {
|
||||
final OpenaireBrokerResult source,
|
||||
final OpenaireBrokerResult target) {
|
||||
|
||||
final Set<String> existingPublications = target
|
||||
.getPublications()
|
||||
.stream()
|
||||
.filter(rel -> filterByType(rel.getRelType()))
|
||||
.map(RelatedPublication::getRelPublication)
|
||||
.map(Publication::getId)
|
||||
.map(Publication::getOriginalId)
|
||||
.collect(Collectors.toSet());
|
||||
|
||||
return source
|
||||
.getPublications()
|
||||
.stream()
|
||||
.filter(rel -> filterByType(rel.getRelType()))
|
||||
.map(RelatedPublication::getRelPublication)
|
||||
.filter(d -> !existingPublications.contains(d.getId()))
|
||||
.map(ConversionUtils::oafResultToBrokerPublication)
|
||||
.filter(p -> !existingPublications.contains(p.getOriginalId()))
|
||||
.collect(Collectors.toList());
|
||||
}
|
||||
|
||||
|
|
|
@ -1,15 +1,12 @@
|
|||
|
||||
package eu.dnetlib.dhp.broker.oa.matchers.relatedSoftware;
|
||||
|
||||
import java.util.Arrays;
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
import java.util.stream.Collectors;
|
||||
|
||||
import eu.dnetlib.broker.objects.OpenaireBrokerResult;
|
||||
import eu.dnetlib.dhp.broker.model.Topic;
|
||||
import eu.dnetlib.dhp.broker.oa.matchers.UpdateMatcher;
|
||||
import eu.dnetlib.dhp.broker.oa.util.ConversionUtils;
|
||||
import eu.dnetlib.dhp.broker.oa.util.aggregators.withRels.RelatedSoftware;
|
||||
import eu.dnetlib.dhp.broker.oa.util.aggregators.withRels.ResultWithRelations;
|
||||
|
||||
public class EnrichMissingSoftware
|
||||
extends UpdateMatcher<eu.dnetlib.broker.objects.Software> {
|
||||
|
@ -23,18 +20,13 @@ public class EnrichMissingSoftware
|
|||
|
||||
@Override
|
||||
protected List<eu.dnetlib.broker.objects.Software> findDifferences(
|
||||
final ResultWithRelations source,
|
||||
final ResultWithRelations target) {
|
||||
final OpenaireBrokerResult source,
|
||||
final OpenaireBrokerResult target) {
|
||||
|
||||
if (source.getSoftwares().isEmpty()) {
|
||||
return Arrays.asList();
|
||||
if (target.getSoftwares().isEmpty()) {
|
||||
return source.getSoftwares();
|
||||
} else {
|
||||
return target
|
||||
.getSoftwares()
|
||||
.stream()
|
||||
.map(RelatedSoftware::getRelSoftware)
|
||||
.map(ConversionUtils::oafSoftwareToBrokerSoftware)
|
||||
.collect(Collectors.toList());
|
||||
return new ArrayList<>();
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -5,15 +5,12 @@ import java.util.List;
|
|||
import java.util.Set;
|
||||
import java.util.stream.Collectors;
|
||||
|
||||
import eu.dnetlib.broker.objects.OpenaireBrokerResult;
|
||||
import eu.dnetlib.broker.objects.Software;
|
||||
import eu.dnetlib.dhp.broker.model.Topic;
|
||||
import eu.dnetlib.dhp.broker.oa.matchers.UpdateMatcher;
|
||||
import eu.dnetlib.dhp.broker.oa.util.ConversionUtils;
|
||||
import eu.dnetlib.dhp.broker.oa.util.aggregators.withRels.RelatedSoftware;
|
||||
import eu.dnetlib.dhp.broker.oa.util.aggregators.withRels.ResultWithRelations;
|
||||
import eu.dnetlib.dhp.schema.oaf.Software;
|
||||
|
||||
public class EnrichMoreSoftware
|
||||
extends UpdateMatcher<eu.dnetlib.broker.objects.Software> {
|
||||
public class EnrichMoreSoftware extends UpdateMatcher<Software> {
|
||||
|
||||
public EnrichMoreSoftware() {
|
||||
super(true,
|
||||
|
@ -24,22 +21,19 @@ public class EnrichMoreSoftware
|
|||
|
||||
@Override
|
||||
protected List<eu.dnetlib.broker.objects.Software> findDifferences(
|
||||
final ResultWithRelations source,
|
||||
final ResultWithRelations target) {
|
||||
final OpenaireBrokerResult source,
|
||||
final OpenaireBrokerResult target) {
|
||||
|
||||
final Set<String> existingSoftwares = source
|
||||
.getSoftwares()
|
||||
.stream()
|
||||
.map(RelatedSoftware::getRelSoftware)
|
||||
.map(Software::getId)
|
||||
.map(Software::getName)
|
||||
.collect(Collectors.toSet());
|
||||
|
||||
return target
|
||||
.getSoftwares()
|
||||
.stream()
|
||||
.map(RelatedSoftware::getRelSoftware)
|
||||
.filter(p -> !existingSoftwares.contains(p.getId()))
|
||||
.map(ConversionUtils::oafSoftwareToBrokerSoftware)
|
||||
.filter(p -> !existingSoftwares.contains(p.getName()))
|
||||
.collect(Collectors.toList());
|
||||
}
|
||||
|
||||
|
|
|
@ -5,9 +5,9 @@ import java.util.ArrayList;
|
|||
import java.util.Arrays;
|
||||
import java.util.List;
|
||||
|
||||
import eu.dnetlib.broker.objects.OpenaireBrokerResult;
|
||||
import eu.dnetlib.dhp.broker.model.Topic;
|
||||
import eu.dnetlib.dhp.broker.oa.matchers.UpdateMatcher;
|
||||
import eu.dnetlib.dhp.broker.oa.util.aggregators.withRels.ResultWithRelations;
|
||||
|
||||
public class EnrichMissingAbstract extends UpdateMatcher<String> {
|
||||
|
||||
|
@ -19,13 +19,12 @@ public class EnrichMissingAbstract extends UpdateMatcher<String> {
|
|||
}
|
||||
|
||||
@Override
|
||||
protected List<String> findDifferences(final ResultWithRelations source,
|
||||
final ResultWithRelations target) {
|
||||
if (isMissing(target.getResult().getDescription()) && !isMissing(source.getResult().getDescription())) {
|
||||
return Arrays
|
||||
.asList(source.getResult().getDescription().get(0).getValue());
|
||||
}
|
||||
protected List<String> findDifferences(final OpenaireBrokerResult source, final OpenaireBrokerResult target) {
|
||||
if (isMissing(target.getAbstracts()) && !isMissing(source.getAbstracts())) {
|
||||
return Arrays.asList(source.getAbstracts().get(0));
|
||||
} else {
|
||||
return new ArrayList<>();
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
|
|
@ -1,53 +1,43 @@
|
|||
|
||||
package eu.dnetlib.dhp.broker.oa.matchers.simple;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
import java.util.Set;
|
||||
import java.util.stream.Collectors;
|
||||
|
||||
import org.apache.commons.lang3.StringUtils;
|
||||
|
||||
import eu.dnetlib.broker.objects.Author;
|
||||
import eu.dnetlib.broker.objects.OpenaireBrokerResult;
|
||||
import eu.dnetlib.dhp.broker.model.Topic;
|
||||
import eu.dnetlib.dhp.broker.oa.matchers.UpdateMatcher;
|
||||
import eu.dnetlib.dhp.broker.oa.util.aggregators.withRels.ResultWithRelations;
|
||||
import eu.dnetlib.dhp.schema.oaf.Author;
|
||||
import eu.dnetlib.dhp.schema.oaf.StructuredProperty;
|
||||
|
||||
public class EnrichMissingAuthorOrcid extends UpdateMatcher<String> {
|
||||
public class EnrichMissingAuthorOrcid extends UpdateMatcher<Author> {
|
||||
|
||||
public EnrichMissingAuthorOrcid() {
|
||||
super(true,
|
||||
aut -> Topic.ENRICH_MISSING_AUTHOR_ORCID,
|
||||
(p, aut) -> p.getCreators().add(aut),
|
||||
aut -> aut);
|
||||
aut -> aut.getOrcid());
|
||||
}
|
||||
|
||||
@Override
|
||||
protected List<String> findDifferences(final ResultWithRelations source,
|
||||
final ResultWithRelations target) {
|
||||
protected List<Author> findDifferences(final OpenaireBrokerResult source,
|
||||
final OpenaireBrokerResult target) {
|
||||
|
||||
final Set<String> existingOrcids = target
|
||||
.getResult()
|
||||
.getAuthor()
|
||||
.getCreators()
|
||||
.stream()
|
||||
.map(Author::getPid)
|
||||
.flatMap(List::stream)
|
||||
.filter(pid -> pid.getQualifier().getClassid().equalsIgnoreCase("orcid"))
|
||||
.map(pid -> pid.getValue())
|
||||
.map(Author::getOrcid)
|
||||
.filter(StringUtils::isNotBlank)
|
||||
.collect(Collectors.toSet());
|
||||
|
||||
final List<String> list = new ArrayList<>();
|
||||
return source
|
||||
.getCreators()
|
||||
.stream()
|
||||
.filter(a -> StringUtils.isNotBlank(a.getOrcid()))
|
||||
.filter(a -> !existingOrcids.contains(a.getOrcid()))
|
||||
.collect(Collectors.toList());
|
||||
|
||||
for (final Author author : source.getResult().getAuthor()) {
|
||||
final String name = author.getFullname();
|
||||
|
||||
for (final StructuredProperty pid : author.getPid()) {
|
||||
if (pid.getQualifier().getClassid().equalsIgnoreCase("orcid")
|
||||
&& !existingOrcids.contains(pid.getValue())) {
|
||||
list.add(name + " [ORCID: " + pid.getValue() + "]");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return list;
|
||||
}
|
||||
}
|
||||
|
|
|
@ -6,11 +6,10 @@ import java.util.List;
|
|||
import java.util.stream.Collectors;
|
||||
|
||||
import eu.dnetlib.broker.objects.Instance;
|
||||
import eu.dnetlib.broker.objects.OpenaireBrokerResult;
|
||||
import eu.dnetlib.dhp.broker.model.Topic;
|
||||
import eu.dnetlib.dhp.broker.oa.matchers.UpdateMatcher;
|
||||
import eu.dnetlib.dhp.broker.oa.util.BrokerConstants;
|
||||
import eu.dnetlib.dhp.broker.oa.util.ConversionUtils;
|
||||
import eu.dnetlib.dhp.broker.oa.util.aggregators.withRels.ResultWithRelations;
|
||||
|
||||
public class EnrichMissingOpenAccess extends UpdateMatcher<Instance> {
|
||||
|
||||
|
@ -22,13 +21,12 @@ public class EnrichMissingOpenAccess extends UpdateMatcher<Instance> {
|
|||
}
|
||||
|
||||
@Override
|
||||
protected List<Instance> findDifferences(final ResultWithRelations source,
|
||||
final ResultWithRelations target) {
|
||||
protected List<Instance> findDifferences(final OpenaireBrokerResult source,
|
||||
final OpenaireBrokerResult target) {
|
||||
final long count = target
|
||||
.getResult()
|
||||
.getInstance()
|
||||
.getInstances()
|
||||
.stream()
|
||||
.map(i -> i.getAccessright().getClassid())
|
||||
.map(Instance::getLicense)
|
||||
.filter(right -> right.equals(BrokerConstants.OPEN_ACCESS))
|
||||
.count();
|
||||
|
||||
|
@ -37,12 +35,9 @@ public class EnrichMissingOpenAccess extends UpdateMatcher<Instance> {
|
|||
}
|
||||
|
||||
return source
|
||||
.getResult()
|
||||
.getInstance()
|
||||
.getInstances()
|
||||
.stream()
|
||||
.filter(i -> i.getAccessright().getClassid().equals(BrokerConstants.OPEN_ACCESS))
|
||||
.map(ConversionUtils::oafInstanceToBrokerInstances)
|
||||
.flatMap(List::stream)
|
||||
.filter(i -> i.getLicense().equals(BrokerConstants.OPEN_ACCESS))
|
||||
.collect(Collectors.toList());
|
||||
}
|
||||
|
||||
|
|
|
@ -5,13 +5,12 @@ import java.util.Arrays;
|
|||
import java.util.List;
|
||||
import java.util.stream.Collectors;
|
||||
|
||||
import eu.dnetlib.broker.objects.Pid;
|
||||
import eu.dnetlib.broker.objects.OpenaireBrokerResult;
|
||||
import eu.dnetlib.broker.objects.TypedValue;
|
||||
import eu.dnetlib.dhp.broker.model.Topic;
|
||||
import eu.dnetlib.dhp.broker.oa.matchers.UpdateMatcher;
|
||||
import eu.dnetlib.dhp.broker.oa.util.ConversionUtils;
|
||||
import eu.dnetlib.dhp.broker.oa.util.aggregators.withRels.ResultWithRelations;
|
||||
|
||||
public class EnrichMissingPid extends UpdateMatcher<Pid> {
|
||||
public class EnrichMissingPid extends UpdateMatcher<TypedValue> {
|
||||
|
||||
public EnrichMissingPid() {
|
||||
super(true,
|
||||
|
@ -21,19 +20,17 @@ public class EnrichMissingPid extends UpdateMatcher<Pid> {
|
|||
}
|
||||
|
||||
@Override
|
||||
protected List<Pid> findDifferences(final ResultWithRelations source,
|
||||
final ResultWithRelations target) {
|
||||
final long count = target.getResult().getPid().size();
|
||||
protected List<TypedValue> findDifferences(final OpenaireBrokerResult source,
|
||||
final OpenaireBrokerResult target) {
|
||||
final long count = target.getPids().size();
|
||||
|
||||
if (count > 0) {
|
||||
return Arrays.asList();
|
||||
}
|
||||
|
||||
return source
|
||||
.getResult()
|
||||
.getPid()
|
||||
.getPids()
|
||||
.stream()
|
||||
.map(ConversionUtils::oafPidToBrokerPid)
|
||||
.collect(Collectors.toList());
|
||||
}
|
||||
|
||||
|
|
|
@ -5,9 +5,9 @@ import java.util.ArrayList;
|
|||
import java.util.Arrays;
|
||||
import java.util.List;
|
||||
|
||||
import eu.dnetlib.broker.objects.OpenaireBrokerResult;
|
||||
import eu.dnetlib.dhp.broker.model.Topic;
|
||||
import eu.dnetlib.dhp.broker.oa.matchers.UpdateMatcher;
|
||||
import eu.dnetlib.dhp.broker.oa.util.aggregators.withRels.ResultWithRelations;
|
||||
|
||||
public class EnrichMissingPublicationDate extends UpdateMatcher<String> {
|
||||
|
||||
|
@ -19,13 +19,14 @@ public class EnrichMissingPublicationDate extends UpdateMatcher<String> {
|
|||
}
|
||||
|
||||
@Override
|
||||
protected List<String> findDifferences(final ResultWithRelations source,
|
||||
final ResultWithRelations target) {
|
||||
if (isMissing(target.getResult().getDateofacceptance())
|
||||
&& !isMissing(source.getResult().getDateofacceptance())) {
|
||||
return Arrays.asList(source.getResult().getDateofacceptance().getValue());
|
||||
}
|
||||
protected List<String> findDifferences(final OpenaireBrokerResult source,
|
||||
final OpenaireBrokerResult target) {
|
||||
|
||||
if (isMissing(target.getPublicationdate()) && !isMissing(source.getPublicationdate())) {
|
||||
return Arrays.asList(source.getPublicationdate());
|
||||
} else {
|
||||
return new ArrayList<>();
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
|
|
@ -5,42 +5,38 @@ import java.util.List;
|
|||
import java.util.Set;
|
||||
import java.util.stream.Collectors;
|
||||
|
||||
import org.apache.commons.lang3.tuple.Pair;
|
||||
|
||||
import eu.dnetlib.broker.objects.OpenaireBrokerResult;
|
||||
import eu.dnetlib.broker.objects.TypedValue;
|
||||
import eu.dnetlib.dhp.broker.model.Topic;
|
||||
import eu.dnetlib.dhp.broker.oa.matchers.UpdateMatcher;
|
||||
import eu.dnetlib.dhp.broker.oa.util.ConversionUtils;
|
||||
import eu.dnetlib.dhp.broker.oa.util.aggregators.withRels.ResultWithRelations;
|
||||
import eu.dnetlib.dhp.schema.oaf.Qualifier;
|
||||
import eu.dnetlib.dhp.schema.oaf.StructuredProperty;
|
||||
|
||||
public class EnrichMissingSubject extends UpdateMatcher<Pair<String, String>> {
|
||||
public class EnrichMissingSubject extends UpdateMatcher<TypedValue> {
|
||||
|
||||
public EnrichMissingSubject() {
|
||||
super(true,
|
||||
pair -> Topic.fromPath("ENRICH/MISSING/SUBJECT/" + pair.getLeft()),
|
||||
(p, pair) -> p.getSubjects().add(pair.getRight()),
|
||||
pair -> pair.getLeft() + "::" + pair.getRight());
|
||||
s -> Topic.fromPath("ENRICH/MISSING/SUBJECT/" + s.getType()),
|
||||
(p, s) -> p.getSubjects().add(s),
|
||||
s -> subjectAsString(s));
|
||||
}
|
||||
|
||||
@Override
|
||||
protected List<Pair<String, String>> findDifferences(final ResultWithRelations source,
|
||||
final ResultWithRelations target) {
|
||||
final Set<String> existingTypes = target
|
||||
.getResult()
|
||||
.getSubject()
|
||||
protected List<TypedValue> findDifferences(final OpenaireBrokerResult source,
|
||||
final OpenaireBrokerResult target) {
|
||||
final Set<String> existingSubject = target
|
||||
.getSubjects()
|
||||
.stream()
|
||||
.map(StructuredProperty::getQualifier)
|
||||
.map(Qualifier::getClassid)
|
||||
.map(s -> subjectAsString(s))
|
||||
.collect(Collectors.toSet());
|
||||
|
||||
return source
|
||||
.getResult()
|
||||
.getPid()
|
||||
.getSubjects()
|
||||
.stream()
|
||||
.filter(pid -> !existingTypes.contains(pid.getQualifier().getClassid()))
|
||||
.map(ConversionUtils::oafSubjectToPair)
|
||||
.filter(s -> !existingSubject.contains(subjectAsString(s)))
|
||||
.collect(Collectors.toList());
|
||||
}
|
||||
|
||||
private static String subjectAsString(final TypedValue s) {
|
||||
return s.getType() + "::" + s.getValue();
|
||||
}
|
||||
|
||||
}
|
||||
|
|
|
@ -6,11 +6,10 @@ import java.util.Set;
|
|||
import java.util.stream.Collectors;
|
||||
|
||||
import eu.dnetlib.broker.objects.Instance;
|
||||
import eu.dnetlib.broker.objects.OpenaireBrokerResult;
|
||||
import eu.dnetlib.dhp.broker.model.Topic;
|
||||
import eu.dnetlib.dhp.broker.oa.matchers.UpdateMatcher;
|
||||
import eu.dnetlib.dhp.broker.oa.util.BrokerConstants;
|
||||
import eu.dnetlib.dhp.broker.oa.util.ConversionUtils;
|
||||
import eu.dnetlib.dhp.broker.oa.util.aggregators.withRels.ResultWithRelations;
|
||||
|
||||
public class EnrichMoreOpenAccess extends UpdateMatcher<Instance> {
|
||||
|
||||
|
@ -22,24 +21,19 @@ public class EnrichMoreOpenAccess extends UpdateMatcher<Instance> {
|
|||
}
|
||||
|
||||
@Override
|
||||
protected List<Instance> findDifferences(final ResultWithRelations source,
|
||||
final ResultWithRelations target) {
|
||||
protected List<Instance> findDifferences(final OpenaireBrokerResult source,
|
||||
final OpenaireBrokerResult target) {
|
||||
final Set<String> urls = target
|
||||
.getResult()
|
||||
.getInstance()
|
||||
.getInstances()
|
||||
.stream()
|
||||
.filter(i -> i.getAccessright().getClassid().equals(BrokerConstants.OPEN_ACCESS))
|
||||
.filter(i -> i.getLicense().equals(BrokerConstants.OPEN_ACCESS))
|
||||
.map(i -> i.getUrl())
|
||||
.flatMap(List::stream)
|
||||
.collect(Collectors.toSet());
|
||||
|
||||
return source
|
||||
.getResult()
|
||||
.getInstance()
|
||||
.getInstances()
|
||||
.stream()
|
||||
.filter(i -> i.getAccessright().getClassid().equals(BrokerConstants.OPEN_ACCESS))
|
||||
.map(ConversionUtils::oafInstanceToBrokerInstances)
|
||||
.flatMap(List::stream)
|
||||
.filter(i -> i.getLicense().equals(BrokerConstants.OPEN_ACCESS))
|
||||
.filter(i -> !urls.contains(i.getUrl()))
|
||||
.collect(Collectors.toList());
|
||||
}
|
||||
|
|
|
@ -5,38 +5,37 @@ import java.util.List;
|
|||
import java.util.Set;
|
||||
import java.util.stream.Collectors;
|
||||
|
||||
import eu.dnetlib.broker.objects.Pid;
|
||||
import eu.dnetlib.broker.objects.OpenaireBrokerResult;
|
||||
import eu.dnetlib.broker.objects.TypedValue;
|
||||
import eu.dnetlib.dhp.broker.model.Topic;
|
||||
import eu.dnetlib.dhp.broker.oa.matchers.UpdateMatcher;
|
||||
import eu.dnetlib.dhp.broker.oa.util.ConversionUtils;
|
||||
import eu.dnetlib.dhp.broker.oa.util.aggregators.withRels.ResultWithRelations;
|
||||
|
||||
public class EnrichMorePid extends UpdateMatcher<Pid> {
|
||||
public class EnrichMorePid extends UpdateMatcher<TypedValue> {
|
||||
|
||||
public EnrichMorePid() {
|
||||
super(true,
|
||||
pid -> Topic.ENRICH_MORE_PID,
|
||||
(p, pid) -> p.getPids().add(pid),
|
||||
pid -> pid.getType() + "::" + pid.getValue());
|
||||
pid -> pidAsString(pid));
|
||||
}
|
||||
|
||||
@Override
|
||||
protected List<Pid> findDifferences(final ResultWithRelations source,
|
||||
final ResultWithRelations target) {
|
||||
protected List<TypedValue> findDifferences(final OpenaireBrokerResult source,
|
||||
final OpenaireBrokerResult target) {
|
||||
final Set<String> existingPids = target
|
||||
.getResult()
|
||||
.getPid()
|
||||
.getPids()
|
||||
.stream()
|
||||
.map(pid -> pid.getQualifier().getClassid() + "::" + pid.getValue())
|
||||
.map(pid -> pidAsString(pid))
|
||||
.collect(Collectors.toSet());
|
||||
|
||||
return source
|
||||
.getResult()
|
||||
.getPid()
|
||||
.getPids()
|
||||
.stream()
|
||||
.filter(pid -> !existingPids.contains(pid.getQualifier().getClassid() + "::" + pid.getValue()))
|
||||
.map(ConversionUtils::oafPidToBrokerPid)
|
||||
.filter(pid -> !existingPids.contains(pidAsString(pid)))
|
||||
.collect(Collectors.toList());
|
||||
}
|
||||
|
||||
private static String pidAsString(final TypedValue pid) {
|
||||
return pid.getType() + "::" + pid.getValue();
|
||||
}
|
||||
}
|
||||
|
|
|
@ -5,39 +5,37 @@ import java.util.List;
|
|||
import java.util.Set;
|
||||
import java.util.stream.Collectors;
|
||||
|
||||
import org.apache.commons.lang3.tuple.Pair;
|
||||
|
||||
import eu.dnetlib.broker.objects.OpenaireBrokerResult;
|
||||
import eu.dnetlib.broker.objects.TypedValue;
|
||||
import eu.dnetlib.dhp.broker.model.Topic;
|
||||
import eu.dnetlib.dhp.broker.oa.matchers.UpdateMatcher;
|
||||
import eu.dnetlib.dhp.broker.oa.util.ConversionUtils;
|
||||
import eu.dnetlib.dhp.broker.oa.util.aggregators.withRels.ResultWithRelations;
|
||||
|
||||
public class EnrichMoreSubject extends UpdateMatcher<Pair<String, String>> {
|
||||
public class EnrichMoreSubject extends UpdateMatcher<TypedValue> {
|
||||
|
||||
public EnrichMoreSubject() {
|
||||
super(true,
|
||||
pair -> Topic.fromPath("ENRICH/MORE/SUBJECT/" + pair.getLeft()),
|
||||
(p, pair) -> p.getSubjects().add(pair.getRight()),
|
||||
pair -> pair.getLeft() + "::" + pair.getRight());
|
||||
s -> Topic.fromPath("ENRICH/MORE/SUBJECT/" + s.getType()),
|
||||
(p, s) -> p.getSubjects().add(s),
|
||||
s -> subjectAsString(s));
|
||||
}
|
||||
|
||||
@Override
|
||||
protected List<Pair<String, String>> findDifferences(final ResultWithRelations source,
|
||||
final ResultWithRelations target) {
|
||||
protected List<TypedValue> findDifferences(final OpenaireBrokerResult source,
|
||||
final OpenaireBrokerResult target) {
|
||||
final Set<String> existingSubjects = target
|
||||
.getResult()
|
||||
.getSubject()
|
||||
.getSubjects()
|
||||
.stream()
|
||||
.map(pid -> pid.getQualifier().getClassid() + "::" + pid.getValue())
|
||||
.map(pid -> subjectAsString(pid))
|
||||
.collect(Collectors.toSet());
|
||||
|
||||
return source
|
||||
.getResult()
|
||||
.getPid()
|
||||
.getPids()
|
||||
.stream()
|
||||
.filter(pid -> !existingSubjects.contains(pid.getQualifier().getClassid() + "::" + pid.getValue()))
|
||||
.map(ConversionUtils::oafSubjectToPair)
|
||||
.filter(s -> !existingSubjects.contains(subjectAsString(s)))
|
||||
.collect(Collectors.toList());
|
||||
}
|
||||
|
||||
private static String subjectAsString(final TypedValue s) {
|
||||
return s.getType() + "::" + s.getValue();
|
||||
}
|
||||
}
|
||||
|
|
|
@ -2,13 +2,12 @@
|
|||
package eu.dnetlib.dhp.broker.oa.util;
|
||||
|
||||
import java.util.Arrays;
|
||||
import java.util.List;
|
||||
import java.util.HashSet;
|
||||
import java.util.Set;
|
||||
|
||||
import eu.dnetlib.dhp.schema.oaf.Dataset;
|
||||
import eu.dnetlib.dhp.schema.oaf.OtherResearchProduct;
|
||||
import eu.dnetlib.dhp.schema.oaf.Publication;
|
||||
import eu.dnetlib.dhp.schema.oaf.Result;
|
||||
import eu.dnetlib.dhp.schema.oaf.Software;
|
||||
import eu.dnetlib.dhp.broker.model.Event;
|
||||
import eu.dnetlib.dhp.broker.oa.util.aggregators.simple.ResultGroup;
|
||||
import eu.dnetlib.dhp.schema.common.ModelSupport;
|
||||
|
||||
public class BrokerConstants {
|
||||
|
||||
|
@ -18,7 +17,11 @@ public class BrokerConstants {
|
|||
public static final float MIN_TRUST = 0.25f;
|
||||
public static final float MAX_TRUST = 1.00f;
|
||||
|
||||
public static final List<Class<? extends Result>> RESULT_CLASSES = Arrays
|
||||
.asList(Publication.class, Dataset.class, Software.class, OtherResearchProduct.class);
|
||||
public static Class<?>[] getModelClasses() {
|
||||
final Set<Class<?>> list = new HashSet<>();
|
||||
list.addAll(Arrays.asList(ModelSupport.getOafModelClasses()));
|
||||
list.addAll(Arrays.asList(ResultGroup.class, Event.class));
|
||||
return list.toArray(new Class[] {});
|
||||
}
|
||||
|
||||
}
|
||||
|
|
|
@ -13,7 +13,8 @@ import org.dom4j.DocumentHelper;
|
|||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
|
||||
import eu.dnetlib.broker.objects.Pid;
|
||||
import eu.dnetlib.broker.objects.OpenaireBrokerResult;
|
||||
import eu.dnetlib.broker.objects.TypedValue;
|
||||
import eu.dnetlib.dhp.schema.oaf.Author;
|
||||
import eu.dnetlib.dhp.schema.oaf.Dataset;
|
||||
import eu.dnetlib.dhp.schema.oaf.ExternalReference;
|
||||
|
@ -41,8 +42,8 @@ public class ConversionUtils {
|
|||
}).collect(Collectors.toList());
|
||||
}
|
||||
|
||||
public static Pid oafPidToBrokerPid(final StructuredProperty sp) {
|
||||
return sp != null ? new Pid()
|
||||
public static TypedValue oafPidToBrokerPid(final StructuredProperty sp) {
|
||||
return sp != null ? new TypedValue()
|
||||
.setValue(sp.getValue())
|
||||
.setType(sp.getQualifier().getClassid()) : null;
|
||||
}
|
||||
|
@ -54,7 +55,7 @@ public class ConversionUtils {
|
|||
public static final eu.dnetlib.broker.objects.Dataset oafDatasetToBrokerDataset(final Dataset d) {
|
||||
return d != null ? new eu.dnetlib.broker.objects.Dataset()
|
||||
.setOriginalId(d.getOriginalId().get(0))
|
||||
.setTitles(structPropList(d.getTitle()))
|
||||
.setTitle(structPropValue(d.getTitle()))
|
||||
.setPids(d.getPid().stream().map(ConversionUtils::oafPidToBrokerPid).collect(Collectors.toList()))
|
||||
.setInstances(
|
||||
d
|
||||
|
@ -63,26 +64,30 @@ public class ConversionUtils {
|
|||
.map(ConversionUtils::oafInstanceToBrokerInstances)
|
||||
.flatMap(List::stream)
|
||||
.collect(Collectors.toList()))
|
||||
.setCollectedFrom(d.getCollectedfrom().stream().map(KeyValue::getValue).collect(Collectors.toList()))
|
||||
.setCollectedFrom(d.getCollectedfrom().stream().map(KeyValue::getValue).findFirst().orElse(null))
|
||||
: null;
|
||||
}
|
||||
|
||||
public static final eu.dnetlib.broker.objects.Publication oafResultToBrokerPublication(final Result result) {
|
||||
public static final OpenaireBrokerResult oafResultToBrokerResult(final Result result) {
|
||||
|
||||
return result != null ? new eu.dnetlib.broker.objects.Publication()
|
||||
return result != null ? new OpenaireBrokerResult()
|
||||
.setOpenaireId(result.getId())
|
||||
.setOriginalId(result.getOriginalId().get(0))
|
||||
.setTypology(result.getResulttype().getClassid())
|
||||
.setTitles(structPropList(result.getTitle()))
|
||||
.setAbstracts(fieldList(result.getDescription()))
|
||||
.setLanguage(result.getLanguage().getClassid())
|
||||
.setSubjects(structPropList(result.getSubject()))
|
||||
.setCreators(result.getAuthor().stream().map(Author::getFullname).collect(Collectors.toList()))
|
||||
.setPublicationdate(result.getDateofcollection())
|
||||
.setSubjects(structPropTypedList(result.getSubject()))
|
||||
.setCreators(
|
||||
result.getAuthor().stream().map(ConversionUtils::oafAuthorToBrokerAuthor).collect(Collectors.toList()))
|
||||
.setPublicationdate(result.getDateofacceptance().getValue())
|
||||
.setPublisher(fieldValue(result.getPublisher()))
|
||||
.setEmbargoenddate(fieldValue(result.getEmbargoenddate()))
|
||||
.setContributor(fieldList(result.getContributor()))
|
||||
.setJournal(
|
||||
result instanceof Publication ? oafJournalToBrokerJournal(((Publication) result).getJournal()) : null)
|
||||
.setCollectedFrom(result.getCollectedfrom().stream().map(KeyValue::getValue).collect(Collectors.toList()))
|
||||
.setCollectedFromId(result.getCollectedfrom().stream().map(KeyValue::getKey).findFirst().orElse(null))
|
||||
.setCollectedFromName(result.getCollectedfrom().stream().map(KeyValue::getValue).findFirst().orElse(null))
|
||||
.setPids(result.getPid().stream().map(ConversionUtils::oafPidToBrokerPid).collect(Collectors.toList()))
|
||||
.setInstances(
|
||||
result
|
||||
|
@ -100,6 +105,30 @@ public class ConversionUtils {
|
|||
: null;
|
||||
}
|
||||
|
||||
private static List<TypedValue> structPropTypedList(final List<StructuredProperty> list) {
|
||||
return list
|
||||
.stream()
|
||||
.map(
|
||||
p -> new TypedValue()
|
||||
.setValue(p.getValue())
|
||||
.setType(p.getQualifier().getClassid()))
|
||||
.collect(Collectors.toList());
|
||||
}
|
||||
|
||||
private static eu.dnetlib.broker.objects.Author oafAuthorToBrokerAuthor(final Author author) {
|
||||
return author != null ? new eu.dnetlib.broker.objects.Author()
|
||||
.setFullname(author.getFullname())
|
||||
.setOrcid(
|
||||
author
|
||||
.getPid()
|
||||
.stream()
|
||||
.filter(pid -> pid.getQualifier().getClassid().equalsIgnoreCase("orcid"))
|
||||
.map(pid -> pid.getValue())
|
||||
.findFirst()
|
||||
.orElse(null))
|
||||
: null;
|
||||
}
|
||||
|
||||
private static eu.dnetlib.broker.objects.Journal oafJournalToBrokerJournal(final Journal journal) {
|
||||
return journal != null ? new eu.dnetlib.broker.objects.Journal()
|
||||
.setName(journal.getName())
|
||||
|
|
|
@ -4,6 +4,7 @@ package eu.dnetlib.dhp.broker.oa.util;
|
|||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
|
||||
import eu.dnetlib.broker.objects.OpenaireBrokerResult;
|
||||
import eu.dnetlib.dhp.broker.model.EventFactory;
|
||||
import eu.dnetlib.dhp.broker.oa.matchers.UpdateMatcher;
|
||||
import eu.dnetlib.dhp.broker.oa.matchers.relatedDatasets.EnrichMissingDatasetIsReferencedBy;
|
||||
|
@ -30,7 +31,6 @@ import eu.dnetlib.dhp.broker.oa.matchers.simple.EnrichMoreOpenAccess;
|
|||
import eu.dnetlib.dhp.broker.oa.matchers.simple.EnrichMorePid;
|
||||
import eu.dnetlib.dhp.broker.oa.matchers.simple.EnrichMoreSubject;
|
||||
import eu.dnetlib.dhp.broker.oa.util.aggregators.simple.ResultGroup;
|
||||
import eu.dnetlib.dhp.broker.oa.util.aggregators.withRels.ResultWithRelations;
|
||||
import eu.dnetlib.pace.config.DedupConfig;
|
||||
|
||||
public class EventFinder {
|
||||
|
@ -68,7 +68,7 @@ public class EventFinder {
|
|||
public static EventGroup generateEvents(final ResultGroup results, final DedupConfig dedupConfig) {
|
||||
final List<UpdateInfo<?>> list = new ArrayList<>();
|
||||
|
||||
for (final ResultWithRelations target : results.getData()) {
|
||||
for (final OpenaireBrokerResult target : results.getData()) {
|
||||
for (final UpdateMatcher<?> matcher : matchers) {
|
||||
list.addAll(matcher.searchUpdatesForRecord(target, results.getData(), dedupConfig));
|
||||
}
|
||||
|
|
|
@ -1,7 +1,6 @@
|
|||
|
||||
package eu.dnetlib.dhp.broker.oa.util;
|
||||
|
||||
import java.util.List;
|
||||
import java.util.function.BiConsumer;
|
||||
import java.util.function.Function;
|
||||
|
||||
|
@ -10,14 +9,11 @@ import org.slf4j.LoggerFactory;
|
|||
|
||||
import com.fasterxml.jackson.databind.ObjectMapper;
|
||||
|
||||
import eu.dnetlib.broker.objects.Instance;
|
||||
import eu.dnetlib.broker.objects.OpenAireEventPayload;
|
||||
import eu.dnetlib.broker.objects.OpenaireBrokerResult;
|
||||
import eu.dnetlib.broker.objects.Provenance;
|
||||
import eu.dnetlib.broker.objects.Publication;
|
||||
import eu.dnetlib.dhp.broker.model.Topic;
|
||||
import eu.dnetlib.dhp.broker.oa.util.aggregators.withRels.ResultWithRelations;
|
||||
import eu.dnetlib.dhp.schema.oaf.Instance;
|
||||
import eu.dnetlib.dhp.schema.oaf.KeyValue;
|
||||
import eu.dnetlib.dhp.schema.oaf.Result;
|
||||
import eu.dnetlib.pace.config.DedupConfig;
|
||||
import eu.dnetlib.pace.model.MapDocument;
|
||||
import eu.dnetlib.pace.tree.support.TreeProcessor;
|
||||
|
@ -29,11 +25,11 @@ public final class UpdateInfo<T> {
|
|||
|
||||
private final T highlightValue;
|
||||
|
||||
private final ResultWithRelations source;
|
||||
private final OpenaireBrokerResult source;
|
||||
|
||||
private final ResultWithRelations target;
|
||||
private final OpenaireBrokerResult target;
|
||||
|
||||
private final BiConsumer<Publication, T> compileHighlight;
|
||||
private final BiConsumer<OpenaireBrokerResult, T> compileHighlight;
|
||||
|
||||
private final Function<T, String> highlightToString;
|
||||
|
||||
|
@ -41,9 +37,9 @@ public final class UpdateInfo<T> {
|
|||
|
||||
private static final Logger log = LoggerFactory.getLogger(UpdateInfo.class);
|
||||
|
||||
public UpdateInfo(final Topic topic, final T highlightValue, final ResultWithRelations source,
|
||||
final ResultWithRelations target,
|
||||
final BiConsumer<Publication, T> compileHighlight,
|
||||
public UpdateInfo(final Topic topic, final T highlightValue, final OpenaireBrokerResult source,
|
||||
final OpenaireBrokerResult target,
|
||||
final BiConsumer<OpenaireBrokerResult, T> compileHighlight,
|
||||
final Function<T, String> highlightToString,
|
||||
final DedupConfig dedupConfig) {
|
||||
this.topic = topic;
|
||||
|
@ -52,22 +48,23 @@ public final class UpdateInfo<T> {
|
|||
this.target = target;
|
||||
this.compileHighlight = compileHighlight;
|
||||
this.highlightToString = highlightToString;
|
||||
this.trust = calculateTrust(dedupConfig, source.getResult(), target.getResult());
|
||||
this.trust = calculateTrust(dedupConfig, source, target);
|
||||
}
|
||||
|
||||
public T getHighlightValue() {
|
||||
return highlightValue;
|
||||
}
|
||||
|
||||
public ResultWithRelations getSource() {
|
||||
public OpenaireBrokerResult getSource() {
|
||||
return source;
|
||||
}
|
||||
|
||||
public ResultWithRelations getTarget() {
|
||||
public OpenaireBrokerResult getTarget() {
|
||||
return target;
|
||||
}
|
||||
|
||||
private float calculateTrust(final DedupConfig dedupConfig, final Result r1, final Result r2) {
|
||||
private float calculateTrust(final DedupConfig dedupConfig, final OpenaireBrokerResult r1,
|
||||
final OpenaireBrokerResult r2) {
|
||||
try {
|
||||
final ObjectMapper objectMapper = new ObjectMapper();
|
||||
final MapDocument doc1 = MapDocumentUtil
|
||||
|
@ -103,26 +100,18 @@ public final class UpdateInfo<T> {
|
|||
|
||||
public OpenAireEventPayload asBrokerPayload() {
|
||||
|
||||
final Publication p = ConversionUtils.oafResultToBrokerPublication(getSource().getResult());
|
||||
compileHighlight.accept(p, getHighlightValue());
|
||||
compileHighlight.accept(target, getHighlightValue());
|
||||
|
||||
final Publication hl = new Publication();
|
||||
final OpenaireBrokerResult hl = new OpenaireBrokerResult();
|
||||
compileHighlight.accept(hl, getHighlightValue());
|
||||
|
||||
final String provId = getSource().getResult().getOriginalId().stream().findFirst().orElse(null);
|
||||
final String provRepo = getSource()
|
||||
.getResult()
|
||||
.getCollectedfrom()
|
||||
.stream()
|
||||
.map(KeyValue::getValue)
|
||||
.findFirst()
|
||||
.orElse(null);
|
||||
final String provId = getSource().getOriginalId();
|
||||
final String provRepo = getSource().getCollectedFromName();
|
||||
|
||||
final String provUrl = getSource()
|
||||
.getResult()
|
||||
.getInstance()
|
||||
.getInstances()
|
||||
.stream()
|
||||
.map(Instance::getUrl)
|
||||
.flatMap(List::stream)
|
||||
.findFirst()
|
||||
.orElse(null);
|
||||
;
|
||||
|
@ -130,7 +119,7 @@ public final class UpdateInfo<T> {
|
|||
final Provenance provenance = new Provenance().setId(provId).setRepositoryName(provRepo).setUrl(provUrl);
|
||||
|
||||
return new OpenAireEventPayload()
|
||||
.setPublication(p)
|
||||
.setPublication(target)
|
||||
.setHighlight(hl)
|
||||
.setTrust(trust)
|
||||
.setProvenance(provenance);
|
||||
|
|
|
@ -5,11 +5,11 @@ import org.apache.spark.sql.Encoder;
|
|||
import org.apache.spark.sql.Encoders;
|
||||
import org.apache.spark.sql.expressions.Aggregator;
|
||||
|
||||
import eu.dnetlib.dhp.broker.oa.util.aggregators.withRels.ResultWithRelations;
|
||||
import eu.dnetlib.broker.objects.OpenaireBrokerResult;
|
||||
import eu.dnetlib.dhp.schema.oaf.Relation;
|
||||
import scala.Tuple2;
|
||||
|
||||
public class ResultAggregator extends Aggregator<Tuple2<ResultWithRelations, Relation>, ResultGroup, ResultGroup> {
|
||||
public class ResultAggregator extends Aggregator<Tuple2<OpenaireBrokerResult, Relation>, ResultGroup, ResultGroup> {
|
||||
|
||||
/**
|
||||
*
|
||||
|
@ -22,7 +22,7 @@ public class ResultAggregator extends Aggregator<Tuple2<ResultWithRelations, Rel
|
|||
}
|
||||
|
||||
@Override
|
||||
public ResultGroup reduce(final ResultGroup group, final Tuple2<ResultWithRelations, Relation> t) {
|
||||
public ResultGroup reduce(final ResultGroup group, final Tuple2<OpenaireBrokerResult, Relation> t) {
|
||||
return group.addElement(t._1);
|
||||
}
|
||||
|
||||
|
|
|
@ -5,7 +5,7 @@ import java.io.Serializable;
|
|||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
|
||||
import eu.dnetlib.dhp.broker.oa.util.aggregators.withRels.ResultWithRelations;
|
||||
import eu.dnetlib.broker.objects.OpenaireBrokerResult;
|
||||
|
||||
public class ResultGroup implements Serializable {
|
||||
|
||||
|
@ -14,13 +14,13 @@ public class ResultGroup implements Serializable {
|
|||
*/
|
||||
private static final long serialVersionUID = -3360828477088669296L;
|
||||
|
||||
private final List<ResultWithRelations> data = new ArrayList<>();
|
||||
private final List<OpenaireBrokerResult> data = new ArrayList<>();
|
||||
|
||||
public List<ResultWithRelations> getData() {
|
||||
public List<OpenaireBrokerResult> getData() {
|
||||
return data;
|
||||
}
|
||||
|
||||
public ResultGroup addElement(final ResultWithRelations elem) {
|
||||
public ResultGroup addElement(final OpenaireBrokerResult elem) {
|
||||
data.add(elem);
|
||||
return this;
|
||||
}
|
||||
|
|
|
@ -0,0 +1,69 @@
|
|||
|
||||
package eu.dnetlib.dhp.broker.oa.util.aggregators.withRels;
|
||||
|
||||
import org.apache.spark.sql.Encoder;
|
||||
import org.apache.spark.sql.Encoders;
|
||||
import org.apache.spark.sql.expressions.Aggregator;
|
||||
|
||||
import eu.dnetlib.broker.objects.OpenaireBrokerResult;
|
||||
import scala.Tuple2;
|
||||
|
||||
public class OpenaireBrokerResultAggregator<T>
|
||||
extends Aggregator<Tuple2<OpenaireBrokerResult, T>, OpenaireBrokerResult, OpenaireBrokerResult> {
|
||||
|
||||
/**
|
||||
*
|
||||
*/
|
||||
private static final long serialVersionUID = -3687878788861013488L;
|
||||
|
||||
@Override
|
||||
public OpenaireBrokerResult zero() {
|
||||
return new OpenaireBrokerResult();
|
||||
}
|
||||
|
||||
@Override
|
||||
public OpenaireBrokerResult finish(final OpenaireBrokerResult g) {
|
||||
return g;
|
||||
}
|
||||
|
||||
@Override
|
||||
public OpenaireBrokerResult reduce(final OpenaireBrokerResult g, final Tuple2<OpenaireBrokerResult, T> t) {
|
||||
if (g.getOriginalId() == null) {
|
||||
return t._1;
|
||||
} else if (t._2 instanceof RelatedSoftware) {
|
||||
g.getSoftwares().add(((RelatedSoftware) t._2).getRelSoftware());
|
||||
} else if (t._2 instanceof RelatedDataset) {
|
||||
g.getDatasets().add(((RelatedDataset) t._2).getRelDataset());
|
||||
} else if (t._2 instanceof RelatedPublication) {
|
||||
g.getPublications().add(((RelatedPublication) t._2).getRelPublication());
|
||||
} else if (t._2 instanceof RelatedProject) {
|
||||
g.getProjects().add(((RelatedProject) t._2).getRelProject());
|
||||
}
|
||||
return g;
|
||||
|
||||
}
|
||||
|
||||
@Override
|
||||
public OpenaireBrokerResult merge(final OpenaireBrokerResult g1, final OpenaireBrokerResult g2) {
|
||||
if (g1.getOriginalId() != null) {
|
||||
g1.getSoftwares().addAll(g2.getSoftwares());
|
||||
g1.getDatasets().addAll(g2.getDatasets());
|
||||
g1.getPublications().addAll(g2.getPublications());
|
||||
g1.getProjects().addAll(g2.getProjects());
|
||||
return g1;
|
||||
} else {
|
||||
return g2;
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public Encoder<OpenaireBrokerResult> bufferEncoder() {
|
||||
return Encoders.kryo(OpenaireBrokerResult.class);
|
||||
}
|
||||
|
||||
@Override
|
||||
public Encoder<OpenaireBrokerResult> outputEncoder() {
|
||||
return Encoders.kryo(OpenaireBrokerResult.class);
|
||||
}
|
||||
|
||||
}
|
|
@ -1,10 +1,16 @@
|
|||
|
||||
package eu.dnetlib.dhp.broker.oa.util.aggregators.withRels;
|
||||
|
||||
import eu.dnetlib.dhp.schema.oaf.Dataset;
|
||||
import java.io.Serializable;
|
||||
|
||||
public class RelatedDataset {
|
||||
import eu.dnetlib.broker.objects.Dataset;
|
||||
|
||||
public class RelatedDataset implements Serializable {
|
||||
|
||||
/**
|
||||
*
|
||||
*/
|
||||
private static final long serialVersionUID = 774487705184038324L;
|
||||
private final String source;
|
||||
private final String relType;
|
||||
private final Dataset relDataset;
|
||||
|
|
|
@ -1,15 +1,17 @@
|
|||
|
||||
package eu.dnetlib.dhp.broker.oa.util.aggregators.withRels;
|
||||
|
||||
import eu.dnetlib.dhp.schema.oaf.Dataset;
|
||||
import eu.dnetlib.dhp.schema.oaf.Project;
|
||||
import eu.dnetlib.dhp.schema.oaf.Publication;
|
||||
import eu.dnetlib.dhp.schema.oaf.Software;
|
||||
import eu.dnetlib.broker.objects.Dataset;
|
||||
import eu.dnetlib.broker.objects.Project;
|
||||
import eu.dnetlib.broker.objects.Publication;
|
||||
import eu.dnetlib.broker.objects.Software;
|
||||
|
||||
public class RelatedEntityFactory {
|
||||
|
||||
@SuppressWarnings("unchecked")
|
||||
public static <RT, T> RT newRelatedEntity(final String sourceId, final String relType, final T target,
|
||||
public static <RT, T> RT newRelatedEntity(final String sourceId,
|
||||
final String relType,
|
||||
final T target,
|
||||
final Class<RT> clazz) {
|
||||
if (clazz == RelatedProject.class) {
|
||||
return (RT) new RelatedProject(sourceId, relType, (Project) target);
|
||||
|
|
|
@ -1,9 +1,16 @@
|
|||
|
||||
package eu.dnetlib.dhp.broker.oa.util.aggregators.withRels;
|
||||
|
||||
import eu.dnetlib.dhp.schema.oaf.Project;
|
||||
import java.io.Serializable;
|
||||
|
||||
public class RelatedProject {
|
||||
import eu.dnetlib.broker.objects.Project;
|
||||
|
||||
public class RelatedProject implements Serializable {
|
||||
|
||||
/**
|
||||
*
|
||||
*/
|
||||
private static final long serialVersionUID = 4941437626549329870L;
|
||||
|
||||
private final String source;
|
||||
private final String relType;
|
||||
|
|
|
@ -1,9 +1,16 @@
|
|||
|
||||
package eu.dnetlib.dhp.broker.oa.util.aggregators.withRels;
|
||||
|
||||
import eu.dnetlib.dhp.schema.oaf.Publication;
|
||||
import java.io.Serializable;
|
||||
|
||||
public class RelatedPublication {
|
||||
import eu.dnetlib.broker.objects.Publication;
|
||||
|
||||
public class RelatedPublication implements Serializable {
|
||||
|
||||
/**
|
||||
*
|
||||
*/
|
||||
private static final long serialVersionUID = 9021609640411395128L;
|
||||
|
||||
private final String source;
|
||||
private final String relType;
|
||||
|
|
|
@ -1,10 +1,16 @@
|
|||
|
||||
package eu.dnetlib.dhp.broker.oa.util.aggregators.withRels;
|
||||
|
||||
import eu.dnetlib.dhp.schema.oaf.Software;
|
||||
import java.io.Serializable;
|
||||
|
||||
public class RelatedSoftware {
|
||||
import eu.dnetlib.broker.objects.Software;
|
||||
|
||||
public class RelatedSoftware implements Serializable {
|
||||
|
||||
/**
|
||||
*
|
||||
*/
|
||||
private static final long serialVersionUID = 7573383356943300157L;
|
||||
private final String source;
|
||||
private final String relType;
|
||||
private final Software relSoftware;
|
||||
|
|
|
@ -1,55 +0,0 @@
|
|||
|
||||
package eu.dnetlib.dhp.broker.oa.util.aggregators.withRels;
|
||||
|
||||
import java.io.Serializable;
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
|
||||
import eu.dnetlib.dhp.schema.oaf.Result;
|
||||
|
||||
public class ResultWithRelations implements Serializable {
|
||||
|
||||
/**
|
||||
*
|
||||
*/
|
||||
private static final long serialVersionUID = -1368401915974311571L;
|
||||
|
||||
private Result result;
|
||||
|
||||
private final List<RelatedDataset> datasets = new ArrayList<>();
|
||||
private final List<RelatedPublication> publications = new ArrayList<>();
|
||||
private final List<RelatedSoftware> softwares = new ArrayList<>();
|
||||
private final List<RelatedProject> projects = new ArrayList<>();
|
||||
|
||||
public ResultWithRelations() {
|
||||
}
|
||||
|
||||
public ResultWithRelations(final Result result) {
|
||||
this.result = result;
|
||||
}
|
||||
|
||||
public Result getResult() {
|
||||
return result;
|
||||
}
|
||||
|
||||
public List<RelatedDataset> getDatasets() {
|
||||
return datasets;
|
||||
}
|
||||
|
||||
public List<RelatedPublication> getPublications() {
|
||||
return publications;
|
||||
}
|
||||
|
||||
public List<RelatedSoftware> getSoftwares() {
|
||||
return softwares;
|
||||
}
|
||||
|
||||
public List<RelatedProject> getProjects() {
|
||||
return projects;
|
||||
}
|
||||
|
||||
public void setResult(final Result result) {
|
||||
this.result = result;
|
||||
}
|
||||
|
||||
}
|
|
@ -1,68 +0,0 @@
|
|||
|
||||
package eu.dnetlib.dhp.broker.oa.util.aggregators.withRels;
|
||||
|
||||
import org.apache.spark.sql.Encoder;
|
||||
import org.apache.spark.sql.Encoders;
|
||||
import org.apache.spark.sql.expressions.Aggregator;
|
||||
|
||||
import scala.Tuple2;
|
||||
|
||||
public class ResultWithRelationsAggregator<T>
|
||||
extends Aggregator<Tuple2<ResultWithRelations, T>, ResultWithRelations, ResultWithRelations> {
|
||||
|
||||
/**
|
||||
*
|
||||
*/
|
||||
private static final long serialVersionUID = -3687878788861013488L;
|
||||
|
||||
@Override
|
||||
public ResultWithRelations zero() {
|
||||
return new ResultWithRelations();
|
||||
}
|
||||
|
||||
@Override
|
||||
public ResultWithRelations finish(final ResultWithRelations g) {
|
||||
return g;
|
||||
}
|
||||
|
||||
@Override
|
||||
public ResultWithRelations reduce(final ResultWithRelations g, final Tuple2<ResultWithRelations, T> t) {
|
||||
if (g.getResult() == null) {
|
||||
return t._1;
|
||||
} else if (t._2 instanceof RelatedSoftware) {
|
||||
g.getSoftwares().add((RelatedSoftware) t._2);
|
||||
} else if (t._2 instanceof RelatedDataset) {
|
||||
g.getDatasets().add((RelatedDataset) t._2);
|
||||
} else if (t._2 instanceof RelatedPublication) {
|
||||
g.getPublications().add((RelatedPublication) t._2);
|
||||
} else if (t._2 instanceof RelatedProject) {
|
||||
g.getProjects().add((RelatedProject) t._2);
|
||||
}
|
||||
return g;
|
||||
|
||||
}
|
||||
|
||||
@Override
|
||||
public ResultWithRelations merge(final ResultWithRelations g1, final ResultWithRelations g2) {
|
||||
if (g1.getResult() != null) {
|
||||
g1.getSoftwares().addAll(g2.getSoftwares());
|
||||
g1.getDatasets().addAll(g2.getDatasets());
|
||||
g1.getPublications().addAll(g2.getPublications());
|
||||
g1.getProjects().addAll(g2.getProjects());
|
||||
return g1;
|
||||
} else {
|
||||
return g2;
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public Encoder<ResultWithRelations> bufferEncoder() {
|
||||
return Encoders.kryo(ResultWithRelations.class);
|
||||
}
|
||||
|
||||
@Override
|
||||
public Encoder<ResultWithRelations> outputEncoder() {
|
||||
return Encoders.kryo(ResultWithRelations.class);
|
||||
}
|
||||
|
||||
}
|
|
@ -0,0 +1,200 @@
|
|||
|
||||
package eu.dnetlib.dhp.oa.graph.reflections;
|
||||
|
||||
import static org.junit.jupiter.api.Assertions.assertEquals;
|
||||
|
||||
import java.lang.reflect.Field;
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
|
||||
import org.junit.jupiter.api.Test;
|
||||
|
||||
public class ReflectionTest {
|
||||
|
||||
private final Cleaner cleaner = new Cleaner();
|
||||
|
||||
@Test
|
||||
void testObject() throws Exception {
|
||||
final Publication pub = new Publication();
|
||||
pub.setTitle("openaire guidelines");
|
||||
pub.getAuthors().add(new Author("Michele Artini", new Prop("aa-001", "orcid")));
|
||||
pub.getAuthors().add(new Author("Claudio Atzori", new Prop("aa-002", "orcid")));
|
||||
pub.getAuthors().add(new Author("Alessia Bardi", new Prop("aa-003", "orcid")));
|
||||
pub.getSubjects().add(new Prop("infrastructures", "keyword"));
|
||||
pub.getSubjects().add(new Prop("digital libraries", "keyword"));
|
||||
|
||||
cleaner.clean(pub);
|
||||
|
||||
System.out.println(pub);
|
||||
|
||||
assertEquals("OPENAIRE GUIDELINES", pub.getTitle());
|
||||
|
||||
assertEquals("MICHELE ARTINI", pub.getAuthors().get(0).getName());
|
||||
assertEquals("CLAUDIO ATZORI", pub.getAuthors().get(1).getName());
|
||||
assertEquals("ALESSIA BARDI", pub.getAuthors().get(2).getName());
|
||||
|
||||
assertEquals("dnet:aa-001", pub.getAuthors().get(0).getId().getId());
|
||||
assertEquals("dnet:aa-002", pub.getAuthors().get(1).getId().getId());
|
||||
assertEquals("dnet:aa-003", pub.getAuthors().get(2).getId().getId());
|
||||
assertEquals("dnet:orcid", pub.getAuthors().get(0).getId().getName());
|
||||
assertEquals("dnet:orcid", pub.getAuthors().get(1).getId().getName());
|
||||
assertEquals("dnet:orcid", pub.getAuthors().get(2).getId().getName());
|
||||
|
||||
assertEquals("dnet:infrastructures", pub.getSubjects().get(0).getId());
|
||||
assertEquals("dnet:keyword", pub.getSubjects().get(0).getName());
|
||||
assertEquals("dnet:digital libraries", pub.getSubjects().get(1).getId());
|
||||
assertEquals("dnet:keyword", pub.getSubjects().get(1).getName());
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
class Cleaner {
|
||||
|
||||
public void clean(final Object o) throws IllegalArgumentException, IllegalAccessException {
|
||||
if (isPrimitive(o)) {
|
||||
return;
|
||||
} else if (isIterable(o.getClass())) {
|
||||
for (final Object elem : (Iterable<?>) o) {
|
||||
clean(elem);
|
||||
}
|
||||
} else if (hasMapping(o)) {
|
||||
mapObject(o);
|
||||
} else {
|
||||
for (final Field f : o.getClass().getDeclaredFields()) {
|
||||
f.setAccessible(true);
|
||||
final Object val = f.get(o);
|
||||
if (isPrimitive(val)) {
|
||||
f.set(o, cleanValue(f.get(o)));
|
||||
} else if (hasMapping(val)) {
|
||||
mapObject(val);
|
||||
} else {
|
||||
clean(f.get(o));
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private boolean hasMapping(final Object o) {
|
||||
return o.getClass() == Prop.class;
|
||||
}
|
||||
|
||||
private void mapObject(final Object o) {
|
||||
if (o.getClass() == Prop.class) {
|
||||
((Prop) o).setId("dnet:" + ((Prop) o).getId());
|
||||
((Prop) o).setName("dnet:" + ((Prop) o).getName());
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
private Object cleanValue(final Object o) {
|
||||
if (o.getClass() == String.class) {
|
||||
return ((String) o).toUpperCase();
|
||||
} else {
|
||||
return o;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
private boolean isIterable(final Class<?> cl) {
|
||||
return Iterable.class.isAssignableFrom(cl);
|
||||
}
|
||||
|
||||
private boolean isPrimitive(final Object o) {
|
||||
return o.getClass() == String.class;
|
||||
}
|
||||
}
|
||||
|
||||
class Publication {
|
||||
|
||||
private String title;
|
||||
private final List<Author> authors = new ArrayList<>();
|
||||
private final List<Prop> subjects = new ArrayList<>();
|
||||
|
||||
public String getTitle() {
|
||||
return title;
|
||||
}
|
||||
|
||||
public void setTitle(final String title) {
|
||||
this.title = title;
|
||||
}
|
||||
|
||||
public List<Author> getAuthors() {
|
||||
return authors;
|
||||
}
|
||||
|
||||
public List<Prop> getSubjects() {
|
||||
return subjects;
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
return String.format("Publication [title=%s, authors=%s, subjects=%s]", title, authors, subjects);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
class Prop {
|
||||
|
||||
private String id;
|
||||
private String name;
|
||||
|
||||
public Prop(final String id, final String name) {
|
||||
this.id = id;
|
||||
this.name = name;
|
||||
}
|
||||
|
||||
public String getId() {
|
||||
return id;
|
||||
}
|
||||
|
||||
public void setId(final String id) {
|
||||
this.id = id;
|
||||
}
|
||||
|
||||
public String getName() {
|
||||
return name;
|
||||
}
|
||||
|
||||
public void setName(final String name) {
|
||||
this.name = name;
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
return String.format("Prop [id=%s, name=%s]", id, name);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
class Author {
|
||||
|
||||
private String name;
|
||||
private Prop id;
|
||||
|
||||
public Author(final String name, final Prop id) {
|
||||
this.name = name;
|
||||
this.id = id;
|
||||
}
|
||||
|
||||
public String getName() {
|
||||
return name;
|
||||
}
|
||||
|
||||
public void setName(final String name) {
|
||||
this.name = name;
|
||||
}
|
||||
|
||||
public Prop getId() {
|
||||
return id;
|
||||
}
|
||||
|
||||
public void setId(final Prop id) {
|
||||
this.id = id;
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
return String.format("Author [name=%s, id=%s]", name, id);
|
||||
}
|
||||
|
||||
}
|
Loading…
Reference in New Issue