refactoring

This commit is contained in:
Michele Artini 2020-06-16 12:34:13 +02:00
parent c15c8c0ad0
commit 8a4f84f8c0
36 changed files with 640 additions and 491 deletions

View File

@ -61,7 +61,7 @@
<dependency>
<groupId>eu.dnetlib</groupId>
<artifactId>dnet-openaire-broker-common</artifactId>
<version>[2.0.1,3.0.0)</version>
<version>[3.0.1,4.0.0)</version>
</dependency>
</dependencies>

View File

@ -0,0 +1,37 @@
<?xml version='1.0' encoding='UTF-8'?>
<dfxml xmloutputversion='1.0'>
<metadata
xmlns='http://afflib.org/tcpflow/'
xmlns:xsi='http://www.w3.org/2001/XMLSchema-instance'
xmlns:dc='http://purl.org/dc/elements/1.1/'>
<dc:type>Feature Extraction</dc:type>
</metadata>
<creator version='1.0'>
<program>TCPFLOW</program>
<version>1.5.0</version>
<build_environment>
<compiler>4.2.1 (4.2.1 Compatible Apple LLVM 11.0.0 (clang-1100.0.33.8))</compiler>
<CPPFLAGS>-D_THREAD_SAFE -pthread -I/usr/local/include -I/usr/local/include -DUTC_OFFSET=+0000 </CPPFLAGS>
<CFLAGS>-g -D_THREAD_SAFE -pthread -g -O3 -MD -Wpointer-arith -Wmissing-declarations -Wmissing-prototypes -Wshadow -Wwrite-strings -Wcast-align -Waggregate-return -Wbad-function-cast -Wcast-qual -Wundef -Wredundant-decls -Wdisabled-optimization -Wfloat-equal -Wmultichar -Wc++-compat -Wmissing-noreturn -Wall -Wstrict-prototypes -MD -D_FORTIFY_SOURCE=2 -Wpointer-arith -Wmissing-declarations -Wmissing-prototypes -Wshadow -Wwrite-strings -Wcast-align -Waggregate-return -Wbad-function-cast -Wcast-qual -Wundef -Wredundant-decls -Wdisabled-optimization -Wfloat-equal -Wmultichar -Wc++-compat -Wmissing-noreturn -Wall -Wstrict-prototypes</CFLAGS>
<CXXFLAGS>-g -D_THREAD_SAFE -pthread -g -O3 -Wall -MD -D_FORTIFY_SOURCE=2 -Wpointer-arith -Wshadow -Wwrite-strings -Wcast-align -Wredundant-decls -Wdisabled-optimization -Wfloat-equal -Wmultichar -Wmissing-noreturn -Woverloaded-virtual -Wsign-promo -funit-at-a-time -Weffc++ -std=c++11 -Wall -MD -D_FORTIFY_SOURCE=2 -Wpointer-arith -Wshadow -Wwrite-strings -Wcast-align -Wredundant-decls -Wdisabled-optimization -Wfloat-equal -Wmultichar -Wmissing-noreturn -Woverloaded-virtual -Wsign-promo -funit-at-a-time -Weffc++ </CXXFLAGS>
<LDFLAGS>-L/usr/local/lib -L/usr/local/lib </LDFLAGS>
<LIBS>-lpython2.7 -lpython2.7 -lpcap -lbz2 -lexpat -lsqlite3 -lcrypto -lssl -lcrypto -ldl -lz </LIBS>
<compilation_date>2019-10-11T01:16:58</compilation_date>
<library name="boost" version="107100"/>
<library name="sqlite" version="3.28.0" source_id="2019-04-15 14:49:49 378230ae7f4b721c8b8d83c8ceb891449685cd23b1702a57841f1be40b5daapl"/>
</build_environment>
<execution_environment>
<os_sysname>Darwin</os_sysname>
<os_release>19.5.0</os_release>
<os_version>Darwin Kernel Version 19.5.0: Tue May 26 20:41:44 PDT 2020; root:xnu-6153.121.2~2/RELEASE_X86_64</os_version>
<host>Micheles-MBP.local</host>
<arch>x86_64</arch>
<command_line>tcpflow</command_line>
<uid>501</uid>
<username>michele</username>
<start_time>2020-06-15T14:55:03Z</start_time>
</execution_environment>
</creator>
<configuration>
</configuration>
<tdelta>0</tdelta>

View File

@ -1,9 +1,15 @@
package eu.dnetlib.dhp.broker.model;
import java.io.Serializable;
import java.util.Map;
public class Event {
public class Event implements Serializable {
/**
*
*/
private static final long serialVersionUID = -5936790326505781395L;
private String eventId;

View File

@ -6,17 +6,13 @@ import java.util.Date;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.stream.Collectors;
import org.apache.commons.codec.digest.DigestUtils;
import org.apache.commons.lang3.StringUtils;
import org.apache.commons.lang3.time.DateUtils;
import eu.dnetlib.broker.objects.OpenaireBrokerResult;
import eu.dnetlib.dhp.broker.oa.util.UpdateInfo;
import eu.dnetlib.dhp.schema.oaf.Author;
import eu.dnetlib.dhp.schema.oaf.KeyValue;
import eu.dnetlib.dhp.schema.oaf.Result;
import eu.dnetlib.dhp.schema.oaf.StructuredProperty;
public class EventFactory {
@ -37,8 +33,7 @@ public class EventFactory {
final Map<String, Object> map = createMapFromResult(updateInfo);
final String eventId = calculateEventId(
updateInfo.getTopicPath(), updateInfo.getTarget().getResult().getOriginalId().get(0),
updateInfo.getHighlightValueAsString());
updateInfo.getTopicPath(), updateInfo.getTarget().getOriginalId(), updateInfo.getHighlightValueAsString());
res.setEventId(eventId);
res.setProducerId(PRODUCER_ID);
@ -54,53 +49,31 @@ public class EventFactory {
private static Map<String, Object> createMapFromResult(final UpdateInfo<?> updateInfo) {
final Map<String, Object> map = new HashMap<>();
final Result source = updateInfo.getSource().getResult();
final Result target = updateInfo.getTarget().getResult();
final OpenaireBrokerResult source = updateInfo.getSource();
final OpenaireBrokerResult target = updateInfo.getTarget();
final List<KeyValue> collectedFrom = target.getCollectedfrom();
if (collectedFrom.size() == 1) {
map.put("target_datasource_id", collectedFrom.get(0).getKey());
map.put("target_datasource_name", collectedFrom.get(0).getValue());
}
map.put("target_datasource_id", target.getCollectedFromId());
map.put("target_datasource_name", target.getCollectedFromName());
final List<String> ids = target.getOriginalId();
if (ids.size() > 0) {
map.put("target_publication_id", ids.get(0));
}
map.put("target_publication_id", target.getOriginalId());
final List<StructuredProperty> titles = target.getTitle();
final List<String> titles = target.getTitles();
if (titles.size() > 0) {
map.put("target_publication_title", titles.get(0));
}
final long date = parseDateTolong(target.getDateofacceptance().getValue());
final long date = parseDateTolong(target.getPublicationdate());
if (date > 0) {
map.put("target_dateofacceptance", date);
}
final List<StructuredProperty> subjects = target.getSubject();
if (subjects.size() > 0) {
map
.put(
"target_publication_subject_list",
subjects.stream().map(StructuredProperty::getValue).collect(Collectors.toList()));
}
final List<Author> authors = target.getAuthor();
if (authors.size() > 0) {
map
.put(
"target_publication_author_list",
authors.stream().map(Author::getFullname).collect(Collectors.toList()));
}
map.put("target_publication_subject_list", target.getSubjects());
map.put("target_publication_author_list", target.getCreators());
// PROVENANCE INFO
map.put("trust", updateInfo.getTrust());
final List<KeyValue> sourceCollectedFrom = source.getCollectedfrom();
if (sourceCollectedFrom.size() == 1) {
map.put("provenance_datasource_id", sourceCollectedFrom.get(0).getKey());
map.put("provenance_datasource_name", sourceCollectedFrom.get(0).getValue());
}
map.put("provenance_datasource_id", source.getCollectedFromId());
map.put("provenance_datasource_name", source.getCollectedFromName());
map.put("provenance_publication_id_list", source.getOriginalId());
return map;

View File

@ -18,18 +18,20 @@ import org.slf4j.LoggerFactory;
import com.fasterxml.jackson.databind.ObjectMapper;
import eu.dnetlib.broker.objects.OpenaireBrokerResult;
import eu.dnetlib.dhp.application.ArgumentApplicationParser;
import eu.dnetlib.dhp.broker.model.Event;
import eu.dnetlib.dhp.broker.oa.util.BrokerConstants;
import eu.dnetlib.dhp.broker.oa.util.ConversionUtils;
import eu.dnetlib.dhp.broker.oa.util.EventFinder;
import eu.dnetlib.dhp.broker.oa.util.EventGroup;
import eu.dnetlib.dhp.broker.oa.util.aggregators.simple.ResultAggregator;
import eu.dnetlib.dhp.broker.oa.util.aggregators.simple.ResultGroup;
import eu.dnetlib.dhp.broker.oa.util.aggregators.withRels.OpenaireBrokerResultAggregator;
import eu.dnetlib.dhp.broker.oa.util.aggregators.withRels.RelatedEntityFactory;
import eu.dnetlib.dhp.broker.oa.util.aggregators.withRels.RelatedProject;
import eu.dnetlib.dhp.broker.oa.util.aggregators.withRels.ResultWithRelations;
import eu.dnetlib.dhp.broker.oa.util.aggregators.withRels.ResultWithRelationsAggregator;
import eu.dnetlib.dhp.common.HdfsSupport;
import eu.dnetlib.dhp.schema.oaf.OtherResearchProduct;
import eu.dnetlib.dhp.schema.oaf.Project;
import eu.dnetlib.dhp.schema.oaf.Publication;
import eu.dnetlib.dhp.schema.oaf.Relation;
@ -73,6 +75,8 @@ public class GenerateEventsApplication {
log.info("dedupConfigProfileId: {}", dedupConfigProfileId);
final SparkConf conf = new SparkConf();
conf.set("spark.serializer", "org.apache.spark.serializer.KryoSerializer");
conf.registerKryoClasses(BrokerConstants.getModelClasses());
final DedupConfig dedupConfig = loadDedupConfig(isLookupUrl, dedupConfigProfileId);
@ -80,13 +84,16 @@ public class GenerateEventsApplication {
removeOutputDir(spark, eventsPath);
final Dataset<Event> all = spark.emptyDataset(Encoders.kryo(Event.class));
for (final Class<? extends Result> r1 : BrokerConstants.RESULT_CLASSES) {
all.union(generateEvents(spark, graphPath, r1, dedupConfig));
}
all.write().mode(SaveMode.Overwrite).option("compression", "gzip").json(eventsPath);
spark
.emptyDataset(Encoders.kryo(Event.class))
.union(generateEvents(spark, graphPath, Publication.class, dedupConfig))
.union(generateEvents(spark, graphPath, eu.dnetlib.dhp.schema.oaf.Dataset.class, dedupConfig))
.union(generateEvents(spark, graphPath, Software.class, dedupConfig))
.union(generateEvents(spark, graphPath, OtherResearchProduct.class, dedupConfig))
.write()
.mode(SaveMode.Overwrite)
.option("compression", "gzip")
.json(eventsPath);
});
}
@ -101,18 +108,18 @@ public class GenerateEventsApplication {
final Class<SRC> sourceClass,
final DedupConfig dedupConfig) {
final Dataset<ResultWithRelations> results = expandResultsWithRelations(spark, graphPath, sourceClass);
final Dataset<OpenaireBrokerResult> results = expandResultsWithRelations(spark, graphPath, sourceClass);
final Dataset<Relation> mergedRels = readPath(spark, graphPath + "/relation", Relation.class)
.filter(r -> r.getRelClass().equals(BrokerConstants.IS_MERGED_IN_CLASS));
final TypedColumn<Tuple2<ResultWithRelations, Relation>, ResultGroup> aggr = new ResultAggregator()
final TypedColumn<Tuple2<OpenaireBrokerResult, Relation>, ResultGroup> aggr = new ResultAggregator()
.toColumn();
return results
.joinWith(mergedRels, results.col("result.id").equalTo(mergedRels.col("source")), "inner")
.groupByKey(
(MapFunction<Tuple2<ResultWithRelations, Relation>, String>) t -> t._2.getTarget(), Encoders.STRING())
(MapFunction<Tuple2<OpenaireBrokerResult, Relation>, String>) t -> t._2.getTarget(), Encoders.STRING())
.agg(aggr)
.map((MapFunction<Tuple2<String, ResultGroup>, ResultGroup>) t -> t._2, Encoders.kryo(ResultGroup.class))
.filter(ResultGroup::isValid)
@ -122,7 +129,7 @@ public class GenerateEventsApplication {
.flatMap(group -> group.getData().iterator(), Encoders.kryo(Event.class));
}
private static <SRC extends Result> Dataset<ResultWithRelations> expandResultsWithRelations(
private static <SRC extends Result> Dataset<OpenaireBrokerResult> expandResultsWithRelations(
final SparkSession spark,
final String graphPath,
final Class<SRC> sourceClass) {
@ -135,14 +142,15 @@ public class GenerateEventsApplication {
final Dataset<Relation> rels = readPath(spark, graphPath + "/relation", Relation.class)
.filter(r -> !r.getRelClass().equals(BrokerConstants.IS_MERGED_IN_CLASS));
final Dataset<ResultWithRelations> r0 = readPath(
final Dataset<OpenaireBrokerResult> r0 = readPath(
spark, graphPath + "/" + sourceClass.getSimpleName().toLowerCase(), Result.class)
.filter(r -> r.getDataInfo().getDeletedbyinference())
.map(r -> new ResultWithRelations(r), Encoders.kryo(ResultWithRelations.class));
final Dataset<ResultWithRelations> r1 = join(r0, rels, relatedEntities(projects, rels, RelatedProject.class));
final Dataset<ResultWithRelations> r2 = join(r1, rels, relatedEntities(softwares, rels, RelatedProject.class));
final Dataset<ResultWithRelations> r3 = join(r2, rels, relatedEntities(datasets, rels, RelatedProject.class));
final Dataset<ResultWithRelations> r4 = join(
.map(ConversionUtils::oafResultToBrokerResult, Encoders.kryo(OpenaireBrokerResult.class));
final Dataset<OpenaireBrokerResult> r1 = join(r0, rels, relatedEntities(projects, rels, RelatedProject.class));
final Dataset<OpenaireBrokerResult> r2 = join(r1, rels, relatedEntities(softwares, rels, RelatedProject.class));
final Dataset<OpenaireBrokerResult> r3 = join(r2, rels, relatedEntities(datasets, rels, RelatedProject.class));
final Dataset<OpenaireBrokerResult> r4 = join(
r3, rels, relatedEntities(publications, rels, RelatedProject.class));
;
@ -159,20 +167,20 @@ public class GenerateEventsApplication {
Encoders.kryo(clazz));
}
private static <T> Dataset<ResultWithRelations> join(final Dataset<ResultWithRelations> sources,
private static <T> Dataset<OpenaireBrokerResult> join(final Dataset<OpenaireBrokerResult> sources,
final Dataset<Relation> rels,
final Dataset<T> typedRels) {
final TypedColumn<Tuple2<ResultWithRelations, T>, ResultWithRelations> aggr = new ResultWithRelationsAggregator<T>()
final TypedColumn<Tuple2<OpenaireBrokerResult, T>, OpenaireBrokerResult> aggr = new OpenaireBrokerResultAggregator<T>()
.toColumn();
;
return sources
.joinWith(typedRels, sources.col("result.id").equalTo(rels.col("source")), "left_outer")
.groupByKey(
(MapFunction<Tuple2<ResultWithRelations, T>, String>) t -> t._1.getResult().getId(), Encoders.STRING())
(MapFunction<Tuple2<OpenaireBrokerResult, T>, String>) t -> t._1.getOpenaireId(), Encoders.STRING())
.agg(aggr)
.map(t -> t._2, Encoders.kryo(ResultWithRelations.class));
.map(t -> t._2, Encoders.kryo(OpenaireBrokerResult.class));
}
public static <R> Dataset<R> readPath(

View File

@ -12,22 +12,20 @@ import java.util.function.Function;
import org.apache.commons.codec.digest.DigestUtils;
import org.apache.commons.lang3.StringUtils;
import eu.dnetlib.broker.objects.Publication;
import eu.dnetlib.broker.objects.OpenaireBrokerResult;
import eu.dnetlib.dhp.broker.model.Topic;
import eu.dnetlib.dhp.broker.oa.util.UpdateInfo;
import eu.dnetlib.dhp.broker.oa.util.aggregators.withRels.ResultWithRelations;
import eu.dnetlib.dhp.schema.oaf.Field;
import eu.dnetlib.pace.config.DedupConfig;
public abstract class UpdateMatcher<T> {
private final boolean multipleUpdate;
private final Function<T, Topic> topicFunction;
private final BiConsumer<Publication, T> compileHighlightFunction;
private final BiConsumer<OpenaireBrokerResult, T> compileHighlightFunction;
private final Function<T, String> highlightToStringFunction;
public UpdateMatcher(final boolean multipleUpdate, final Function<T, Topic> topicFunction,
final BiConsumer<Publication, T> compileHighlightFunction,
final BiConsumer<OpenaireBrokerResult, T> compileHighlightFunction,
final Function<T, String> highlightToStringFunction) {
this.multipleUpdate = multipleUpdate;
this.topicFunction = topicFunction;
@ -35,19 +33,18 @@ public abstract class UpdateMatcher<T> {
this.highlightToStringFunction = highlightToStringFunction;
}
public Collection<UpdateInfo<T>> searchUpdatesForRecord(final ResultWithRelations res,
final Collection<ResultWithRelations> others,
public Collection<UpdateInfo<T>> searchUpdatesForRecord(final OpenaireBrokerResult res,
final Collection<OpenaireBrokerResult> others,
final DedupConfig dedupConfig) {
final Map<String, UpdateInfo<T>> infoMap = new HashMap<>();
for (final ResultWithRelations source : others) {
for (final OpenaireBrokerResult source : others) {
if (source != res) {
for (final T hl : findDifferences(source, res)) {
final Topic topic = getTopicFunction().apply(hl);
final UpdateInfo<T> info = new UpdateInfo<>(topic, hl, source, res, getCompileHighlightFunction(),
getHighlightToStringFunction(),
dedupConfig);
getHighlightToStringFunction(), dedupConfig);
final String s = DigestUtils.md5Hex(info.getHighlightValueAsString());
if (!infoMap.containsKey(s) || infoMap.get(s).getTrust() < info.getTrust()) {
} else {
@ -71,14 +68,14 @@ public abstract class UpdateMatcher<T> {
}
}
protected abstract List<T> findDifferences(ResultWithRelations source, ResultWithRelations target);
protected abstract List<T> findDifferences(OpenaireBrokerResult source, OpenaireBrokerResult target);
protected static boolean isMissing(final List<Field<String>> list) {
return list == null || list.isEmpty() || StringUtils.isBlank(list.get(0).getValue());
protected static boolean isMissing(final List<String> list) {
return list == null || list.isEmpty() || StringUtils.isBlank(list.get(0));
}
protected boolean isMissing(final Field<String> field) {
return field == null || StringUtils.isBlank(field.getValue());
protected boolean isMissing(final String field) {
return StringUtils.isBlank(field);
}
public boolean isMultipleUpdate() {
@ -89,7 +86,7 @@ public abstract class UpdateMatcher<T> {
return topicFunction;
}
public BiConsumer<Publication, T> getCompileHighlightFunction() {
public BiConsumer<OpenaireBrokerResult, T> getCompileHighlightFunction() {
return compileHighlightFunction;
}

View File

@ -5,45 +5,39 @@ import java.util.List;
import java.util.Set;
import java.util.stream.Collectors;
import eu.dnetlib.broker.objects.Dataset;
import eu.dnetlib.broker.objects.OpenaireBrokerResult;
import eu.dnetlib.dhp.broker.model.Topic;
import eu.dnetlib.dhp.broker.oa.matchers.UpdateMatcher;
import eu.dnetlib.dhp.broker.oa.util.ConversionUtils;
import eu.dnetlib.dhp.broker.oa.util.aggregators.withRels.RelatedDataset;
import eu.dnetlib.dhp.broker.oa.util.aggregators.withRels.ResultWithRelations;
import eu.dnetlib.dhp.schema.oaf.Dataset;
public abstract class AbstractEnrichMissingDataset
extends UpdateMatcher<eu.dnetlib.broker.objects.Dataset> {
extends UpdateMatcher<Dataset> {
public AbstractEnrichMissingDataset(final Topic topic) {
super(true,
rel -> topic,
(p, rel) -> p.getDatasets().add(rel),
rel -> rel.getInstances().get(0).getUrl());
rel -> rel.getOriginalId());
}
protected abstract boolean filterByType(String relType);
@Override
protected final List<eu.dnetlib.broker.objects.Dataset> findDifferences(
final ResultWithRelations source,
final ResultWithRelations target) {
protected final List<Dataset> findDifferences(final OpenaireBrokerResult source,
final OpenaireBrokerResult target) {
final Set<String> existingDatasets = target
.getDatasets()
.stream()
.filter(rel -> filterByType(rel.getRelType()))
.map(RelatedDataset::getRelDataset)
.map(Dataset::getId)
.map(Dataset::getOriginalId)
.collect(Collectors.toSet());
return source
.getDatasets()
.stream()
.filter(rel -> filterByType(rel.getRelType()))
.map(RelatedDataset::getRelDataset)
.filter(d -> !existingDatasets.contains(d.getId()))
.map(ConversionUtils::oafDatasetToBrokerDataset)
.filter(d -> !existingDatasets.contains(d.getOriginalId()))
.collect(Collectors.toList());
}

View File

@ -1,19 +1,15 @@
package eu.dnetlib.dhp.broker.oa.matchers.relatedProjects;
import java.util.Arrays;
import java.util.ArrayList;
import java.util.List;
import java.util.stream.Collectors;
import eu.dnetlib.broker.objects.OpenaireBrokerResult;
import eu.dnetlib.broker.objects.Project;
import eu.dnetlib.dhp.broker.model.Topic;
import eu.dnetlib.dhp.broker.oa.matchers.UpdateMatcher;
import eu.dnetlib.dhp.broker.oa.util.ConversionUtils;
import eu.dnetlib.dhp.broker.oa.util.aggregators.withRels.RelatedProject;
import eu.dnetlib.dhp.broker.oa.util.aggregators.withRels.ResultWithRelations;
public class EnrichMissingProject
extends UpdateMatcher<eu.dnetlib.broker.objects.Project> {
public class EnrichMissingProject extends UpdateMatcher<Project> {
public EnrichMissingProject() {
super(true,
@ -23,16 +19,11 @@ public class EnrichMissingProject
}
@Override
protected List<Project> findDifferences(final ResultWithRelations source, final ResultWithRelations target) {
if (source.getProjects().isEmpty()) {
return Arrays.asList();
protected List<Project> findDifferences(final OpenaireBrokerResult source, final OpenaireBrokerResult target) {
if (target.getProjects().isEmpty()) {
return source.getProjects();
} else {
return target
.getProjects()
.stream()
.map(RelatedProject::getRelProject)
.map(ConversionUtils::oafProjectToBrokerProject)
.collect(Collectors.toList());
return new ArrayList<>();
}
}
}

View File

@ -5,39 +5,38 @@ import java.util.List;
import java.util.Set;
import java.util.stream.Collectors;
import eu.dnetlib.broker.objects.OpenaireBrokerResult;
import eu.dnetlib.broker.objects.Project;
import eu.dnetlib.dhp.broker.model.Topic;
import eu.dnetlib.dhp.broker.oa.matchers.UpdateMatcher;
import eu.dnetlib.dhp.broker.oa.util.ConversionUtils;
import eu.dnetlib.dhp.broker.oa.util.aggregators.withRels.RelatedProject;
import eu.dnetlib.dhp.broker.oa.util.aggregators.withRels.ResultWithRelations;
import eu.dnetlib.dhp.schema.oaf.Project;
public class EnrichMoreProject extends UpdateMatcher<eu.dnetlib.broker.objects.Project> {
public class EnrichMoreProject extends UpdateMatcher<Project> {
public EnrichMoreProject() {
super(true,
prj -> Topic.ENRICH_MORE_PROJECT,
(p, prj) -> p.getProjects().add(prj),
prj -> prj.getFunder() + "::" + prj.getFundingProgram() + prj.getCode());
prj -> projectAsString(prj));
}
private static String projectAsString(final Project prj) {
return prj.getFunder() + "::" + prj.getFundingProgram() + "::" + prj.getCode();
}
@Override
protected List<eu.dnetlib.broker.objects.Project> findDifferences(final ResultWithRelations source,
final ResultWithRelations target) {
protected List<eu.dnetlib.broker.objects.Project> findDifferences(final OpenaireBrokerResult source,
final OpenaireBrokerResult target) {
final Set<String> existingProjects = source
final Set<String> existingProjects = target
.getProjects()
.stream()
.map(RelatedProject::getRelProject)
.map(Project::getId)
.map(EnrichMoreProject::projectAsString)
.collect(Collectors.toSet());
return target
return source
.getProjects()
.stream()
.map(RelatedProject::getRelProject)
.filter(p -> !existingProjects.contains(p.getId()))
.map(ConversionUtils::oafProjectToBrokerProject)
.filter(p -> !existingProjects.contains(projectAsString(p)))
.collect(Collectors.toList());
}

View File

@ -5,21 +5,18 @@ import java.util.List;
import java.util.Set;
import java.util.stream.Collectors;
import eu.dnetlib.broker.objects.OpenaireBrokerResult;
import eu.dnetlib.broker.objects.Publication;
import eu.dnetlib.dhp.broker.model.Topic;
import eu.dnetlib.dhp.broker.oa.matchers.UpdateMatcher;
import eu.dnetlib.dhp.broker.oa.util.ConversionUtils;
import eu.dnetlib.dhp.broker.oa.util.aggregators.withRels.RelatedPublication;
import eu.dnetlib.dhp.broker.oa.util.aggregators.withRels.ResultWithRelations;
import eu.dnetlib.dhp.schema.oaf.Publication;
public abstract class AbstractEnrichMissingPublication
extends UpdateMatcher<eu.dnetlib.broker.objects.Publication> {
public abstract class AbstractEnrichMissingPublication extends UpdateMatcher<Publication> {
public AbstractEnrichMissingPublication(final Topic topic) {
super(true,
rel -> topic,
(p, rel) -> p.getPublications().add(rel),
rel -> rel.getInstances().get(0).getUrl());
rel -> rel.getOriginalId());
}
@ -27,24 +24,21 @@ public abstract class AbstractEnrichMissingPublication
@Override
protected final List<eu.dnetlib.broker.objects.Publication> findDifferences(
final ResultWithRelations source,
final ResultWithRelations target) {
final OpenaireBrokerResult source,
final OpenaireBrokerResult target) {
final Set<String> existingPublications = target
.getPublications()
.stream()
.filter(rel -> filterByType(rel.getRelType()))
.map(RelatedPublication::getRelPublication)
.map(Publication::getId)
.map(Publication::getOriginalId)
.collect(Collectors.toSet());
return source
.getPublications()
.stream()
.filter(rel -> filterByType(rel.getRelType()))
.map(RelatedPublication::getRelPublication)
.filter(d -> !existingPublications.contains(d.getId()))
.map(ConversionUtils::oafResultToBrokerPublication)
.filter(p -> !existingPublications.contains(p.getOriginalId()))
.collect(Collectors.toList());
}

View File

@ -1,15 +1,12 @@
package eu.dnetlib.dhp.broker.oa.matchers.relatedSoftware;
import java.util.Arrays;
import java.util.ArrayList;
import java.util.List;
import java.util.stream.Collectors;
import eu.dnetlib.broker.objects.OpenaireBrokerResult;
import eu.dnetlib.dhp.broker.model.Topic;
import eu.dnetlib.dhp.broker.oa.matchers.UpdateMatcher;
import eu.dnetlib.dhp.broker.oa.util.ConversionUtils;
import eu.dnetlib.dhp.broker.oa.util.aggregators.withRels.RelatedSoftware;
import eu.dnetlib.dhp.broker.oa.util.aggregators.withRels.ResultWithRelations;
public class EnrichMissingSoftware
extends UpdateMatcher<eu.dnetlib.broker.objects.Software> {
@ -23,18 +20,13 @@ public class EnrichMissingSoftware
@Override
protected List<eu.dnetlib.broker.objects.Software> findDifferences(
final ResultWithRelations source,
final ResultWithRelations target) {
final OpenaireBrokerResult source,
final OpenaireBrokerResult target) {
if (source.getSoftwares().isEmpty()) {
return Arrays.asList();
if (target.getSoftwares().isEmpty()) {
return source.getSoftwares();
} else {
return target
.getSoftwares()
.stream()
.map(RelatedSoftware::getRelSoftware)
.map(ConversionUtils::oafSoftwareToBrokerSoftware)
.collect(Collectors.toList());
return new ArrayList<>();
}
}

View File

@ -5,15 +5,12 @@ import java.util.List;
import java.util.Set;
import java.util.stream.Collectors;
import eu.dnetlib.broker.objects.OpenaireBrokerResult;
import eu.dnetlib.broker.objects.Software;
import eu.dnetlib.dhp.broker.model.Topic;
import eu.dnetlib.dhp.broker.oa.matchers.UpdateMatcher;
import eu.dnetlib.dhp.broker.oa.util.ConversionUtils;
import eu.dnetlib.dhp.broker.oa.util.aggregators.withRels.RelatedSoftware;
import eu.dnetlib.dhp.broker.oa.util.aggregators.withRels.ResultWithRelations;
import eu.dnetlib.dhp.schema.oaf.Software;
public class EnrichMoreSoftware
extends UpdateMatcher<eu.dnetlib.broker.objects.Software> {
public class EnrichMoreSoftware extends UpdateMatcher<Software> {
public EnrichMoreSoftware() {
super(true,
@ -24,22 +21,19 @@ public class EnrichMoreSoftware
@Override
protected List<eu.dnetlib.broker.objects.Software> findDifferences(
final ResultWithRelations source,
final ResultWithRelations target) {
final OpenaireBrokerResult source,
final OpenaireBrokerResult target) {
final Set<String> existingSoftwares = source
.getSoftwares()
.stream()
.map(RelatedSoftware::getRelSoftware)
.map(Software::getId)
.map(Software::getName)
.collect(Collectors.toSet());
return target
.getSoftwares()
.stream()
.map(RelatedSoftware::getRelSoftware)
.filter(p -> !existingSoftwares.contains(p.getId()))
.map(ConversionUtils::oafSoftwareToBrokerSoftware)
.filter(p -> !existingSoftwares.contains(p.getName()))
.collect(Collectors.toList());
}

View File

@ -5,9 +5,9 @@ import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
import eu.dnetlib.broker.objects.OpenaireBrokerResult;
import eu.dnetlib.dhp.broker.model.Topic;
import eu.dnetlib.dhp.broker.oa.matchers.UpdateMatcher;
import eu.dnetlib.dhp.broker.oa.util.aggregators.withRels.ResultWithRelations;
public class EnrichMissingAbstract extends UpdateMatcher<String> {
@ -19,13 +19,12 @@ public class EnrichMissingAbstract extends UpdateMatcher<String> {
}
@Override
protected List<String> findDifferences(final ResultWithRelations source,
final ResultWithRelations target) {
if (isMissing(target.getResult().getDescription()) && !isMissing(source.getResult().getDescription())) {
return Arrays
.asList(source.getResult().getDescription().get(0).getValue());
}
protected List<String> findDifferences(final OpenaireBrokerResult source, final OpenaireBrokerResult target) {
if (isMissing(target.getAbstracts()) && !isMissing(source.getAbstracts())) {
return Arrays.asList(source.getAbstracts().get(0));
} else {
return new ArrayList<>();
}
}
}

View File

@ -1,53 +1,43 @@
package eu.dnetlib.dhp.broker.oa.matchers.simple;
import java.util.ArrayList;
import java.util.List;
import java.util.Set;
import java.util.stream.Collectors;
import org.apache.commons.lang3.StringUtils;
import eu.dnetlib.broker.objects.Author;
import eu.dnetlib.broker.objects.OpenaireBrokerResult;
import eu.dnetlib.dhp.broker.model.Topic;
import eu.dnetlib.dhp.broker.oa.matchers.UpdateMatcher;
import eu.dnetlib.dhp.broker.oa.util.aggregators.withRels.ResultWithRelations;
import eu.dnetlib.dhp.schema.oaf.Author;
import eu.dnetlib.dhp.schema.oaf.StructuredProperty;
public class EnrichMissingAuthorOrcid extends UpdateMatcher<String> {
public class EnrichMissingAuthorOrcid extends UpdateMatcher<Author> {
public EnrichMissingAuthorOrcid() {
super(true,
aut -> Topic.ENRICH_MISSING_AUTHOR_ORCID,
(p, aut) -> p.getCreators().add(aut),
aut -> aut);
aut -> aut.getOrcid());
}
@Override
protected List<String> findDifferences(final ResultWithRelations source,
final ResultWithRelations target) {
protected List<Author> findDifferences(final OpenaireBrokerResult source,
final OpenaireBrokerResult target) {
final Set<String> existingOrcids = target
.getResult()
.getAuthor()
.getCreators()
.stream()
.map(Author::getPid)
.flatMap(List::stream)
.filter(pid -> pid.getQualifier().getClassid().equalsIgnoreCase("orcid"))
.map(pid -> pid.getValue())
.map(Author::getOrcid)
.filter(StringUtils::isNotBlank)
.collect(Collectors.toSet());
final List<String> list = new ArrayList<>();
return source
.getCreators()
.stream()
.filter(a -> StringUtils.isNotBlank(a.getOrcid()))
.filter(a -> !existingOrcids.contains(a.getOrcid()))
.collect(Collectors.toList());
for (final Author author : source.getResult().getAuthor()) {
final String name = author.getFullname();
for (final StructuredProperty pid : author.getPid()) {
if (pid.getQualifier().getClassid().equalsIgnoreCase("orcid")
&& !existingOrcids.contains(pid.getValue())) {
list.add(name + " [ORCID: " + pid.getValue() + "]");
}
}
}
return list;
}
}

View File

@ -6,11 +6,10 @@ import java.util.List;
import java.util.stream.Collectors;
import eu.dnetlib.broker.objects.Instance;
import eu.dnetlib.broker.objects.OpenaireBrokerResult;
import eu.dnetlib.dhp.broker.model.Topic;
import eu.dnetlib.dhp.broker.oa.matchers.UpdateMatcher;
import eu.dnetlib.dhp.broker.oa.util.BrokerConstants;
import eu.dnetlib.dhp.broker.oa.util.ConversionUtils;
import eu.dnetlib.dhp.broker.oa.util.aggregators.withRels.ResultWithRelations;
public class EnrichMissingOpenAccess extends UpdateMatcher<Instance> {
@ -22,13 +21,12 @@ public class EnrichMissingOpenAccess extends UpdateMatcher<Instance> {
}
@Override
protected List<Instance> findDifferences(final ResultWithRelations source,
final ResultWithRelations target) {
protected List<Instance> findDifferences(final OpenaireBrokerResult source,
final OpenaireBrokerResult target) {
final long count = target
.getResult()
.getInstance()
.getInstances()
.stream()
.map(i -> i.getAccessright().getClassid())
.map(Instance::getLicense)
.filter(right -> right.equals(BrokerConstants.OPEN_ACCESS))
.count();
@ -37,12 +35,9 @@ public class EnrichMissingOpenAccess extends UpdateMatcher<Instance> {
}
return source
.getResult()
.getInstance()
.getInstances()
.stream()
.filter(i -> i.getAccessright().getClassid().equals(BrokerConstants.OPEN_ACCESS))
.map(ConversionUtils::oafInstanceToBrokerInstances)
.flatMap(List::stream)
.filter(i -> i.getLicense().equals(BrokerConstants.OPEN_ACCESS))
.collect(Collectors.toList());
}

View File

@ -5,13 +5,12 @@ import java.util.Arrays;
import java.util.List;
import java.util.stream.Collectors;
import eu.dnetlib.broker.objects.Pid;
import eu.dnetlib.broker.objects.OpenaireBrokerResult;
import eu.dnetlib.broker.objects.TypedValue;
import eu.dnetlib.dhp.broker.model.Topic;
import eu.dnetlib.dhp.broker.oa.matchers.UpdateMatcher;
import eu.dnetlib.dhp.broker.oa.util.ConversionUtils;
import eu.dnetlib.dhp.broker.oa.util.aggregators.withRels.ResultWithRelations;
public class EnrichMissingPid extends UpdateMatcher<Pid> {
public class EnrichMissingPid extends UpdateMatcher<TypedValue> {
public EnrichMissingPid() {
super(true,
@ -21,19 +20,17 @@ public class EnrichMissingPid extends UpdateMatcher<Pid> {
}
@Override
protected List<Pid> findDifferences(final ResultWithRelations source,
final ResultWithRelations target) {
final long count = target.getResult().getPid().size();
protected List<TypedValue> findDifferences(final OpenaireBrokerResult source,
final OpenaireBrokerResult target) {
final long count = target.getPids().size();
if (count > 0) {
return Arrays.asList();
}
return source
.getResult()
.getPid()
.getPids()
.stream()
.map(ConversionUtils::oafPidToBrokerPid)
.collect(Collectors.toList());
}

View File

@ -5,9 +5,9 @@ import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
import eu.dnetlib.broker.objects.OpenaireBrokerResult;
import eu.dnetlib.dhp.broker.model.Topic;
import eu.dnetlib.dhp.broker.oa.matchers.UpdateMatcher;
import eu.dnetlib.dhp.broker.oa.util.aggregators.withRels.ResultWithRelations;
public class EnrichMissingPublicationDate extends UpdateMatcher<String> {
@ -19,13 +19,14 @@ public class EnrichMissingPublicationDate extends UpdateMatcher<String> {
}
@Override
protected List<String> findDifferences(final ResultWithRelations source,
final ResultWithRelations target) {
if (isMissing(target.getResult().getDateofacceptance())
&& !isMissing(source.getResult().getDateofacceptance())) {
return Arrays.asList(source.getResult().getDateofacceptance().getValue());
}
protected List<String> findDifferences(final OpenaireBrokerResult source,
final OpenaireBrokerResult target) {
if (isMissing(target.getPublicationdate()) && !isMissing(source.getPublicationdate())) {
return Arrays.asList(source.getPublicationdate());
} else {
return new ArrayList<>();
}
}
}

View File

@ -5,42 +5,38 @@ import java.util.List;
import java.util.Set;
import java.util.stream.Collectors;
import org.apache.commons.lang3.tuple.Pair;
import eu.dnetlib.broker.objects.OpenaireBrokerResult;
import eu.dnetlib.broker.objects.TypedValue;
import eu.dnetlib.dhp.broker.model.Topic;
import eu.dnetlib.dhp.broker.oa.matchers.UpdateMatcher;
import eu.dnetlib.dhp.broker.oa.util.ConversionUtils;
import eu.dnetlib.dhp.broker.oa.util.aggregators.withRels.ResultWithRelations;
import eu.dnetlib.dhp.schema.oaf.Qualifier;
import eu.dnetlib.dhp.schema.oaf.StructuredProperty;
public class EnrichMissingSubject extends UpdateMatcher<Pair<String, String>> {
public class EnrichMissingSubject extends UpdateMatcher<TypedValue> {
public EnrichMissingSubject() {
super(true,
pair -> Topic.fromPath("ENRICH/MISSING/SUBJECT/" + pair.getLeft()),
(p, pair) -> p.getSubjects().add(pair.getRight()),
pair -> pair.getLeft() + "::" + pair.getRight());
s -> Topic.fromPath("ENRICH/MISSING/SUBJECT/" + s.getType()),
(p, s) -> p.getSubjects().add(s),
s -> subjectAsString(s));
}
@Override
protected List<Pair<String, String>> findDifferences(final ResultWithRelations source,
final ResultWithRelations target) {
final Set<String> existingTypes = target
.getResult()
.getSubject()
protected List<TypedValue> findDifferences(final OpenaireBrokerResult source,
final OpenaireBrokerResult target) {
final Set<String> existingSubject = target
.getSubjects()
.stream()
.map(StructuredProperty::getQualifier)
.map(Qualifier::getClassid)
.map(s -> subjectAsString(s))
.collect(Collectors.toSet());
return source
.getResult()
.getPid()
.getSubjects()
.stream()
.filter(pid -> !existingTypes.contains(pid.getQualifier().getClassid()))
.map(ConversionUtils::oafSubjectToPair)
.filter(s -> !existingSubject.contains(subjectAsString(s)))
.collect(Collectors.toList());
}
private static String subjectAsString(final TypedValue s) {
return s.getType() + "::" + s.getValue();
}
}

View File

@ -6,11 +6,10 @@ import java.util.Set;
import java.util.stream.Collectors;
import eu.dnetlib.broker.objects.Instance;
import eu.dnetlib.broker.objects.OpenaireBrokerResult;
import eu.dnetlib.dhp.broker.model.Topic;
import eu.dnetlib.dhp.broker.oa.matchers.UpdateMatcher;
import eu.dnetlib.dhp.broker.oa.util.BrokerConstants;
import eu.dnetlib.dhp.broker.oa.util.ConversionUtils;
import eu.dnetlib.dhp.broker.oa.util.aggregators.withRels.ResultWithRelations;
public class EnrichMoreOpenAccess extends UpdateMatcher<Instance> {
@ -22,24 +21,19 @@ public class EnrichMoreOpenAccess extends UpdateMatcher<Instance> {
}
@Override
protected List<Instance> findDifferences(final ResultWithRelations source,
final ResultWithRelations target) {
protected List<Instance> findDifferences(final OpenaireBrokerResult source,
final OpenaireBrokerResult target) {
final Set<String> urls = target
.getResult()
.getInstance()
.getInstances()
.stream()
.filter(i -> i.getAccessright().getClassid().equals(BrokerConstants.OPEN_ACCESS))
.filter(i -> i.getLicense().equals(BrokerConstants.OPEN_ACCESS))
.map(i -> i.getUrl())
.flatMap(List::stream)
.collect(Collectors.toSet());
return source
.getResult()
.getInstance()
.getInstances()
.stream()
.filter(i -> i.getAccessright().getClassid().equals(BrokerConstants.OPEN_ACCESS))
.map(ConversionUtils::oafInstanceToBrokerInstances)
.flatMap(List::stream)
.filter(i -> i.getLicense().equals(BrokerConstants.OPEN_ACCESS))
.filter(i -> !urls.contains(i.getUrl()))
.collect(Collectors.toList());
}

View File

@ -5,38 +5,37 @@ import java.util.List;
import java.util.Set;
import java.util.stream.Collectors;
import eu.dnetlib.broker.objects.Pid;
import eu.dnetlib.broker.objects.OpenaireBrokerResult;
import eu.dnetlib.broker.objects.TypedValue;
import eu.dnetlib.dhp.broker.model.Topic;
import eu.dnetlib.dhp.broker.oa.matchers.UpdateMatcher;
import eu.dnetlib.dhp.broker.oa.util.ConversionUtils;
import eu.dnetlib.dhp.broker.oa.util.aggregators.withRels.ResultWithRelations;
public class EnrichMorePid extends UpdateMatcher<Pid> {
public class EnrichMorePid extends UpdateMatcher<TypedValue> {
public EnrichMorePid() {
super(true,
pid -> Topic.ENRICH_MORE_PID,
(p, pid) -> p.getPids().add(pid),
pid -> pid.getType() + "::" + pid.getValue());
pid -> pidAsString(pid));
}
@Override
protected List<Pid> findDifferences(final ResultWithRelations source,
final ResultWithRelations target) {
protected List<TypedValue> findDifferences(final OpenaireBrokerResult source,
final OpenaireBrokerResult target) {
final Set<String> existingPids = target
.getResult()
.getPid()
.getPids()
.stream()
.map(pid -> pid.getQualifier().getClassid() + "::" + pid.getValue())
.map(pid -> pidAsString(pid))
.collect(Collectors.toSet());
return source
.getResult()
.getPid()
.getPids()
.stream()
.filter(pid -> !existingPids.contains(pid.getQualifier().getClassid() + "::" + pid.getValue()))
.map(ConversionUtils::oafPidToBrokerPid)
.filter(pid -> !existingPids.contains(pidAsString(pid)))
.collect(Collectors.toList());
}
private static String pidAsString(final TypedValue pid) {
return pid.getType() + "::" + pid.getValue();
}
}

View File

@ -5,39 +5,37 @@ import java.util.List;
import java.util.Set;
import java.util.stream.Collectors;
import org.apache.commons.lang3.tuple.Pair;
import eu.dnetlib.broker.objects.OpenaireBrokerResult;
import eu.dnetlib.broker.objects.TypedValue;
import eu.dnetlib.dhp.broker.model.Topic;
import eu.dnetlib.dhp.broker.oa.matchers.UpdateMatcher;
import eu.dnetlib.dhp.broker.oa.util.ConversionUtils;
import eu.dnetlib.dhp.broker.oa.util.aggregators.withRels.ResultWithRelations;
public class EnrichMoreSubject extends UpdateMatcher<Pair<String, String>> {
public class EnrichMoreSubject extends UpdateMatcher<TypedValue> {
public EnrichMoreSubject() {
super(true,
pair -> Topic.fromPath("ENRICH/MORE/SUBJECT/" + pair.getLeft()),
(p, pair) -> p.getSubjects().add(pair.getRight()),
pair -> pair.getLeft() + "::" + pair.getRight());
s -> Topic.fromPath("ENRICH/MORE/SUBJECT/" + s.getType()),
(p, s) -> p.getSubjects().add(s),
s -> subjectAsString(s));
}
@Override
protected List<Pair<String, String>> findDifferences(final ResultWithRelations source,
final ResultWithRelations target) {
protected List<TypedValue> findDifferences(final OpenaireBrokerResult source,
final OpenaireBrokerResult target) {
final Set<String> existingSubjects = target
.getResult()
.getSubject()
.getSubjects()
.stream()
.map(pid -> pid.getQualifier().getClassid() + "::" + pid.getValue())
.map(pid -> subjectAsString(pid))
.collect(Collectors.toSet());
return source
.getResult()
.getPid()
.getPids()
.stream()
.filter(pid -> !existingSubjects.contains(pid.getQualifier().getClassid() + "::" + pid.getValue()))
.map(ConversionUtils::oafSubjectToPair)
.filter(s -> !existingSubjects.contains(subjectAsString(s)))
.collect(Collectors.toList());
}
private static String subjectAsString(final TypedValue s) {
return s.getType() + "::" + s.getValue();
}
}

View File

@ -2,13 +2,12 @@
package eu.dnetlib.dhp.broker.oa.util;
import java.util.Arrays;
import java.util.List;
import java.util.HashSet;
import java.util.Set;
import eu.dnetlib.dhp.schema.oaf.Dataset;
import eu.dnetlib.dhp.schema.oaf.OtherResearchProduct;
import eu.dnetlib.dhp.schema.oaf.Publication;
import eu.dnetlib.dhp.schema.oaf.Result;
import eu.dnetlib.dhp.schema.oaf.Software;
import eu.dnetlib.dhp.broker.model.Event;
import eu.dnetlib.dhp.broker.oa.util.aggregators.simple.ResultGroup;
import eu.dnetlib.dhp.schema.common.ModelSupport;
public class BrokerConstants {
@ -18,7 +17,11 @@ public class BrokerConstants {
public static final float MIN_TRUST = 0.25f;
public static final float MAX_TRUST = 1.00f;
public static final List<Class<? extends Result>> RESULT_CLASSES = Arrays
.asList(Publication.class, Dataset.class, Software.class, OtherResearchProduct.class);
public static Class<?>[] getModelClasses() {
final Set<Class<?>> list = new HashSet<>();
list.addAll(Arrays.asList(ModelSupport.getOafModelClasses()));
list.addAll(Arrays.asList(ResultGroup.class, Event.class));
return list.toArray(new Class[] {});
}
}

View File

@ -13,7 +13,8 @@ import org.dom4j.DocumentHelper;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import eu.dnetlib.broker.objects.Pid;
import eu.dnetlib.broker.objects.OpenaireBrokerResult;
import eu.dnetlib.broker.objects.TypedValue;
import eu.dnetlib.dhp.schema.oaf.Author;
import eu.dnetlib.dhp.schema.oaf.Dataset;
import eu.dnetlib.dhp.schema.oaf.ExternalReference;
@ -41,8 +42,8 @@ public class ConversionUtils {
}).collect(Collectors.toList());
}
public static Pid oafPidToBrokerPid(final StructuredProperty sp) {
return sp != null ? new Pid()
public static TypedValue oafPidToBrokerPid(final StructuredProperty sp) {
return sp != null ? new TypedValue()
.setValue(sp.getValue())
.setType(sp.getQualifier().getClassid()) : null;
}
@ -54,7 +55,7 @@ public class ConversionUtils {
public static final eu.dnetlib.broker.objects.Dataset oafDatasetToBrokerDataset(final Dataset d) {
return d != null ? new eu.dnetlib.broker.objects.Dataset()
.setOriginalId(d.getOriginalId().get(0))
.setTitles(structPropList(d.getTitle()))
.setTitle(structPropValue(d.getTitle()))
.setPids(d.getPid().stream().map(ConversionUtils::oafPidToBrokerPid).collect(Collectors.toList()))
.setInstances(
d
@ -63,26 +64,30 @@ public class ConversionUtils {
.map(ConversionUtils::oafInstanceToBrokerInstances)
.flatMap(List::stream)
.collect(Collectors.toList()))
.setCollectedFrom(d.getCollectedfrom().stream().map(KeyValue::getValue).collect(Collectors.toList()))
.setCollectedFrom(d.getCollectedfrom().stream().map(KeyValue::getValue).findFirst().orElse(null))
: null;
}
public static final eu.dnetlib.broker.objects.Publication oafResultToBrokerPublication(final Result result) {
public static final OpenaireBrokerResult oafResultToBrokerResult(final Result result) {
return result != null ? new eu.dnetlib.broker.objects.Publication()
return result != null ? new OpenaireBrokerResult()
.setOpenaireId(result.getId())
.setOriginalId(result.getOriginalId().get(0))
.setTypology(result.getResulttype().getClassid())
.setTitles(structPropList(result.getTitle()))
.setAbstracts(fieldList(result.getDescription()))
.setLanguage(result.getLanguage().getClassid())
.setSubjects(structPropList(result.getSubject()))
.setCreators(result.getAuthor().stream().map(Author::getFullname).collect(Collectors.toList()))
.setPublicationdate(result.getDateofcollection())
.setSubjects(structPropTypedList(result.getSubject()))
.setCreators(
result.getAuthor().stream().map(ConversionUtils::oafAuthorToBrokerAuthor).collect(Collectors.toList()))
.setPublicationdate(result.getDateofacceptance().getValue())
.setPublisher(fieldValue(result.getPublisher()))
.setEmbargoenddate(fieldValue(result.getEmbargoenddate()))
.setContributor(fieldList(result.getContributor()))
.setJournal(
result instanceof Publication ? oafJournalToBrokerJournal(((Publication) result).getJournal()) : null)
.setCollectedFrom(result.getCollectedfrom().stream().map(KeyValue::getValue).collect(Collectors.toList()))
.setCollectedFromId(result.getCollectedfrom().stream().map(KeyValue::getKey).findFirst().orElse(null))
.setCollectedFromName(result.getCollectedfrom().stream().map(KeyValue::getValue).findFirst().orElse(null))
.setPids(result.getPid().stream().map(ConversionUtils::oafPidToBrokerPid).collect(Collectors.toList()))
.setInstances(
result
@ -100,6 +105,30 @@ public class ConversionUtils {
: null;
}
private static List<TypedValue> structPropTypedList(final List<StructuredProperty> list) {
return list
.stream()
.map(
p -> new TypedValue()
.setValue(p.getValue())
.setType(p.getQualifier().getClassid()))
.collect(Collectors.toList());
}
private static eu.dnetlib.broker.objects.Author oafAuthorToBrokerAuthor(final Author author) {
return author != null ? new eu.dnetlib.broker.objects.Author()
.setFullname(author.getFullname())
.setOrcid(
author
.getPid()
.stream()
.filter(pid -> pid.getQualifier().getClassid().equalsIgnoreCase("orcid"))
.map(pid -> pid.getValue())
.findFirst()
.orElse(null))
: null;
}
private static eu.dnetlib.broker.objects.Journal oafJournalToBrokerJournal(final Journal journal) {
return journal != null ? new eu.dnetlib.broker.objects.Journal()
.setName(journal.getName())

View File

@ -4,6 +4,7 @@ package eu.dnetlib.dhp.broker.oa.util;
import java.util.ArrayList;
import java.util.List;
import eu.dnetlib.broker.objects.OpenaireBrokerResult;
import eu.dnetlib.dhp.broker.model.EventFactory;
import eu.dnetlib.dhp.broker.oa.matchers.UpdateMatcher;
import eu.dnetlib.dhp.broker.oa.matchers.relatedDatasets.EnrichMissingDatasetIsReferencedBy;
@ -30,7 +31,6 @@ import eu.dnetlib.dhp.broker.oa.matchers.simple.EnrichMoreOpenAccess;
import eu.dnetlib.dhp.broker.oa.matchers.simple.EnrichMorePid;
import eu.dnetlib.dhp.broker.oa.matchers.simple.EnrichMoreSubject;
import eu.dnetlib.dhp.broker.oa.util.aggregators.simple.ResultGroup;
import eu.dnetlib.dhp.broker.oa.util.aggregators.withRels.ResultWithRelations;
import eu.dnetlib.pace.config.DedupConfig;
public class EventFinder {
@ -68,7 +68,7 @@ public class EventFinder {
public static EventGroup generateEvents(final ResultGroup results, final DedupConfig dedupConfig) {
final List<UpdateInfo<?>> list = new ArrayList<>();
for (final ResultWithRelations target : results.getData()) {
for (final OpenaireBrokerResult target : results.getData()) {
for (final UpdateMatcher<?> matcher : matchers) {
list.addAll(matcher.searchUpdatesForRecord(target, results.getData(), dedupConfig));
}

View File

@ -1,7 +1,6 @@
package eu.dnetlib.dhp.broker.oa.util;
import java.util.List;
import java.util.function.BiConsumer;
import java.util.function.Function;
@ -10,14 +9,11 @@ import org.slf4j.LoggerFactory;
import com.fasterxml.jackson.databind.ObjectMapper;
import eu.dnetlib.broker.objects.Instance;
import eu.dnetlib.broker.objects.OpenAireEventPayload;
import eu.dnetlib.broker.objects.OpenaireBrokerResult;
import eu.dnetlib.broker.objects.Provenance;
import eu.dnetlib.broker.objects.Publication;
import eu.dnetlib.dhp.broker.model.Topic;
import eu.dnetlib.dhp.broker.oa.util.aggregators.withRels.ResultWithRelations;
import eu.dnetlib.dhp.schema.oaf.Instance;
import eu.dnetlib.dhp.schema.oaf.KeyValue;
import eu.dnetlib.dhp.schema.oaf.Result;
import eu.dnetlib.pace.config.DedupConfig;
import eu.dnetlib.pace.model.MapDocument;
import eu.dnetlib.pace.tree.support.TreeProcessor;
@ -29,11 +25,11 @@ public final class UpdateInfo<T> {
private final T highlightValue;
private final ResultWithRelations source;
private final OpenaireBrokerResult source;
private final ResultWithRelations target;
private final OpenaireBrokerResult target;
private final BiConsumer<Publication, T> compileHighlight;
private final BiConsumer<OpenaireBrokerResult, T> compileHighlight;
private final Function<T, String> highlightToString;
@ -41,9 +37,9 @@ public final class UpdateInfo<T> {
private static final Logger log = LoggerFactory.getLogger(UpdateInfo.class);
public UpdateInfo(final Topic topic, final T highlightValue, final ResultWithRelations source,
final ResultWithRelations target,
final BiConsumer<Publication, T> compileHighlight,
public UpdateInfo(final Topic topic, final T highlightValue, final OpenaireBrokerResult source,
final OpenaireBrokerResult target,
final BiConsumer<OpenaireBrokerResult, T> compileHighlight,
final Function<T, String> highlightToString,
final DedupConfig dedupConfig) {
this.topic = topic;
@ -52,22 +48,23 @@ public final class UpdateInfo<T> {
this.target = target;
this.compileHighlight = compileHighlight;
this.highlightToString = highlightToString;
this.trust = calculateTrust(dedupConfig, source.getResult(), target.getResult());
this.trust = calculateTrust(dedupConfig, source, target);
}
public T getHighlightValue() {
return highlightValue;
}
public ResultWithRelations getSource() {
public OpenaireBrokerResult getSource() {
return source;
}
public ResultWithRelations getTarget() {
public OpenaireBrokerResult getTarget() {
return target;
}
private float calculateTrust(final DedupConfig dedupConfig, final Result r1, final Result r2) {
private float calculateTrust(final DedupConfig dedupConfig, final OpenaireBrokerResult r1,
final OpenaireBrokerResult r2) {
try {
final ObjectMapper objectMapper = new ObjectMapper();
final MapDocument doc1 = MapDocumentUtil
@ -103,26 +100,18 @@ public final class UpdateInfo<T> {
public OpenAireEventPayload asBrokerPayload() {
final Publication p = ConversionUtils.oafResultToBrokerPublication(getSource().getResult());
compileHighlight.accept(p, getHighlightValue());
compileHighlight.accept(target, getHighlightValue());
final Publication hl = new Publication();
final OpenaireBrokerResult hl = new OpenaireBrokerResult();
compileHighlight.accept(hl, getHighlightValue());
final String provId = getSource().getResult().getOriginalId().stream().findFirst().orElse(null);
final String provRepo = getSource()
.getResult()
.getCollectedfrom()
.stream()
.map(KeyValue::getValue)
.findFirst()
.orElse(null);
final String provId = getSource().getOriginalId();
final String provRepo = getSource().getCollectedFromName();
final String provUrl = getSource()
.getResult()
.getInstance()
.getInstances()
.stream()
.map(Instance::getUrl)
.flatMap(List::stream)
.findFirst()
.orElse(null);
;
@ -130,7 +119,7 @@ public final class UpdateInfo<T> {
final Provenance provenance = new Provenance().setId(provId).setRepositoryName(provRepo).setUrl(provUrl);
return new OpenAireEventPayload()
.setPublication(p)
.setPublication(target)
.setHighlight(hl)
.setTrust(trust)
.setProvenance(provenance);

View File

@ -5,11 +5,11 @@ import org.apache.spark.sql.Encoder;
import org.apache.spark.sql.Encoders;
import org.apache.spark.sql.expressions.Aggregator;
import eu.dnetlib.dhp.broker.oa.util.aggregators.withRels.ResultWithRelations;
import eu.dnetlib.broker.objects.OpenaireBrokerResult;
import eu.dnetlib.dhp.schema.oaf.Relation;
import scala.Tuple2;
public class ResultAggregator extends Aggregator<Tuple2<ResultWithRelations, Relation>, ResultGroup, ResultGroup> {
public class ResultAggregator extends Aggregator<Tuple2<OpenaireBrokerResult, Relation>, ResultGroup, ResultGroup> {
/**
*
@ -22,7 +22,7 @@ public class ResultAggregator extends Aggregator<Tuple2<ResultWithRelations, Rel
}
@Override
public ResultGroup reduce(final ResultGroup group, final Tuple2<ResultWithRelations, Relation> t) {
public ResultGroup reduce(final ResultGroup group, final Tuple2<OpenaireBrokerResult, Relation> t) {
return group.addElement(t._1);
}

View File

@ -5,7 +5,7 @@ import java.io.Serializable;
import java.util.ArrayList;
import java.util.List;
import eu.dnetlib.dhp.broker.oa.util.aggregators.withRels.ResultWithRelations;
import eu.dnetlib.broker.objects.OpenaireBrokerResult;
public class ResultGroup implements Serializable {
@ -14,13 +14,13 @@ public class ResultGroup implements Serializable {
*/
private static final long serialVersionUID = -3360828477088669296L;
private final List<ResultWithRelations> data = new ArrayList<>();
private final List<OpenaireBrokerResult> data = new ArrayList<>();
public List<ResultWithRelations> getData() {
public List<OpenaireBrokerResult> getData() {
return data;
}
public ResultGroup addElement(final ResultWithRelations elem) {
public ResultGroup addElement(final OpenaireBrokerResult elem) {
data.add(elem);
return this;
}

View File

@ -0,0 +1,69 @@
package eu.dnetlib.dhp.broker.oa.util.aggregators.withRels;
import org.apache.spark.sql.Encoder;
import org.apache.spark.sql.Encoders;
import org.apache.spark.sql.expressions.Aggregator;
import eu.dnetlib.broker.objects.OpenaireBrokerResult;
import scala.Tuple2;
public class OpenaireBrokerResultAggregator<T>
extends Aggregator<Tuple2<OpenaireBrokerResult, T>, OpenaireBrokerResult, OpenaireBrokerResult> {
/**
*
*/
private static final long serialVersionUID = -3687878788861013488L;
@Override
public OpenaireBrokerResult zero() {
return new OpenaireBrokerResult();
}
@Override
public OpenaireBrokerResult finish(final OpenaireBrokerResult g) {
return g;
}
@Override
public OpenaireBrokerResult reduce(final OpenaireBrokerResult g, final Tuple2<OpenaireBrokerResult, T> t) {
if (g.getOriginalId() == null) {
return t._1;
} else if (t._2 instanceof RelatedSoftware) {
g.getSoftwares().add(((RelatedSoftware) t._2).getRelSoftware());
} else if (t._2 instanceof RelatedDataset) {
g.getDatasets().add(((RelatedDataset) t._2).getRelDataset());
} else if (t._2 instanceof RelatedPublication) {
g.getPublications().add(((RelatedPublication) t._2).getRelPublication());
} else if (t._2 instanceof RelatedProject) {
g.getProjects().add(((RelatedProject) t._2).getRelProject());
}
return g;
}
@Override
public OpenaireBrokerResult merge(final OpenaireBrokerResult g1, final OpenaireBrokerResult g2) {
if (g1.getOriginalId() != null) {
g1.getSoftwares().addAll(g2.getSoftwares());
g1.getDatasets().addAll(g2.getDatasets());
g1.getPublications().addAll(g2.getPublications());
g1.getProjects().addAll(g2.getProjects());
return g1;
} else {
return g2;
}
}
@Override
public Encoder<OpenaireBrokerResult> bufferEncoder() {
return Encoders.kryo(OpenaireBrokerResult.class);
}
@Override
public Encoder<OpenaireBrokerResult> outputEncoder() {
return Encoders.kryo(OpenaireBrokerResult.class);
}
}

View File

@ -1,10 +1,16 @@
package eu.dnetlib.dhp.broker.oa.util.aggregators.withRels;
import eu.dnetlib.dhp.schema.oaf.Dataset;
import java.io.Serializable;
public class RelatedDataset {
import eu.dnetlib.broker.objects.Dataset;
public class RelatedDataset implements Serializable {
/**
*
*/
private static final long serialVersionUID = 774487705184038324L;
private final String source;
private final String relType;
private final Dataset relDataset;

View File

@ -1,15 +1,17 @@
package eu.dnetlib.dhp.broker.oa.util.aggregators.withRels;
import eu.dnetlib.dhp.schema.oaf.Dataset;
import eu.dnetlib.dhp.schema.oaf.Project;
import eu.dnetlib.dhp.schema.oaf.Publication;
import eu.dnetlib.dhp.schema.oaf.Software;
import eu.dnetlib.broker.objects.Dataset;
import eu.dnetlib.broker.objects.Project;
import eu.dnetlib.broker.objects.Publication;
import eu.dnetlib.broker.objects.Software;
public class RelatedEntityFactory {
@SuppressWarnings("unchecked")
public static <RT, T> RT newRelatedEntity(final String sourceId, final String relType, final T target,
public static <RT, T> RT newRelatedEntity(final String sourceId,
final String relType,
final T target,
final Class<RT> clazz) {
if (clazz == RelatedProject.class) {
return (RT) new RelatedProject(sourceId, relType, (Project) target);

View File

@ -1,9 +1,16 @@
package eu.dnetlib.dhp.broker.oa.util.aggregators.withRels;
import eu.dnetlib.dhp.schema.oaf.Project;
import java.io.Serializable;
public class RelatedProject {
import eu.dnetlib.broker.objects.Project;
public class RelatedProject implements Serializable {
/**
*
*/
private static final long serialVersionUID = 4941437626549329870L;
private final String source;
private final String relType;

View File

@ -1,9 +1,16 @@
package eu.dnetlib.dhp.broker.oa.util.aggregators.withRels;
import eu.dnetlib.dhp.schema.oaf.Publication;
import java.io.Serializable;
public class RelatedPublication {
import eu.dnetlib.broker.objects.Publication;
public class RelatedPublication implements Serializable {
/**
*
*/
private static final long serialVersionUID = 9021609640411395128L;
private final String source;
private final String relType;

View File

@ -1,10 +1,16 @@
package eu.dnetlib.dhp.broker.oa.util.aggregators.withRels;
import eu.dnetlib.dhp.schema.oaf.Software;
import java.io.Serializable;
public class RelatedSoftware {
import eu.dnetlib.broker.objects.Software;
public class RelatedSoftware implements Serializable {
/**
*
*/
private static final long serialVersionUID = 7573383356943300157L;
private final String source;
private final String relType;
private final Software relSoftware;

View File

@ -1,55 +0,0 @@
package eu.dnetlib.dhp.broker.oa.util.aggregators.withRels;
import java.io.Serializable;
import java.util.ArrayList;
import java.util.List;
import eu.dnetlib.dhp.schema.oaf.Result;
public class ResultWithRelations implements Serializable {
/**
*
*/
private static final long serialVersionUID = -1368401915974311571L;
private Result result;
private final List<RelatedDataset> datasets = new ArrayList<>();
private final List<RelatedPublication> publications = new ArrayList<>();
private final List<RelatedSoftware> softwares = new ArrayList<>();
private final List<RelatedProject> projects = new ArrayList<>();
public ResultWithRelations() {
}
public ResultWithRelations(final Result result) {
this.result = result;
}
public Result getResult() {
return result;
}
public List<RelatedDataset> getDatasets() {
return datasets;
}
public List<RelatedPublication> getPublications() {
return publications;
}
public List<RelatedSoftware> getSoftwares() {
return softwares;
}
public List<RelatedProject> getProjects() {
return projects;
}
public void setResult(final Result result) {
this.result = result;
}
}

View File

@ -1,68 +0,0 @@
package eu.dnetlib.dhp.broker.oa.util.aggregators.withRels;
import org.apache.spark.sql.Encoder;
import org.apache.spark.sql.Encoders;
import org.apache.spark.sql.expressions.Aggregator;
import scala.Tuple2;
public class ResultWithRelationsAggregator<T>
extends Aggregator<Tuple2<ResultWithRelations, T>, ResultWithRelations, ResultWithRelations> {
/**
*
*/
private static final long serialVersionUID = -3687878788861013488L;
@Override
public ResultWithRelations zero() {
return new ResultWithRelations();
}
@Override
public ResultWithRelations finish(final ResultWithRelations g) {
return g;
}
@Override
public ResultWithRelations reduce(final ResultWithRelations g, final Tuple2<ResultWithRelations, T> t) {
if (g.getResult() == null) {
return t._1;
} else if (t._2 instanceof RelatedSoftware) {
g.getSoftwares().add((RelatedSoftware) t._2);
} else if (t._2 instanceof RelatedDataset) {
g.getDatasets().add((RelatedDataset) t._2);
} else if (t._2 instanceof RelatedPublication) {
g.getPublications().add((RelatedPublication) t._2);
} else if (t._2 instanceof RelatedProject) {
g.getProjects().add((RelatedProject) t._2);
}
return g;
}
@Override
public ResultWithRelations merge(final ResultWithRelations g1, final ResultWithRelations g2) {
if (g1.getResult() != null) {
g1.getSoftwares().addAll(g2.getSoftwares());
g1.getDatasets().addAll(g2.getDatasets());
g1.getPublications().addAll(g2.getPublications());
g1.getProjects().addAll(g2.getProjects());
return g1;
} else {
return g2;
}
}
@Override
public Encoder<ResultWithRelations> bufferEncoder() {
return Encoders.kryo(ResultWithRelations.class);
}
@Override
public Encoder<ResultWithRelations> outputEncoder() {
return Encoders.kryo(ResultWithRelations.class);
}
}

View File

@ -0,0 +1,200 @@
package eu.dnetlib.dhp.oa.graph.reflections;
import static org.junit.jupiter.api.Assertions.assertEquals;
import java.lang.reflect.Field;
import java.util.ArrayList;
import java.util.List;
import org.junit.jupiter.api.Test;
public class ReflectionTest {
private final Cleaner cleaner = new Cleaner();
@Test
void testObject() throws Exception {
final Publication pub = new Publication();
pub.setTitle("openaire guidelines");
pub.getAuthors().add(new Author("Michele Artini", new Prop("aa-001", "orcid")));
pub.getAuthors().add(new Author("Claudio Atzori", new Prop("aa-002", "orcid")));
pub.getAuthors().add(new Author("Alessia Bardi", new Prop("aa-003", "orcid")));
pub.getSubjects().add(new Prop("infrastructures", "keyword"));
pub.getSubjects().add(new Prop("digital libraries", "keyword"));
cleaner.clean(pub);
System.out.println(pub);
assertEquals("OPENAIRE GUIDELINES", pub.getTitle());
assertEquals("MICHELE ARTINI", pub.getAuthors().get(0).getName());
assertEquals("CLAUDIO ATZORI", pub.getAuthors().get(1).getName());
assertEquals("ALESSIA BARDI", pub.getAuthors().get(2).getName());
assertEquals("dnet:aa-001", pub.getAuthors().get(0).getId().getId());
assertEquals("dnet:aa-002", pub.getAuthors().get(1).getId().getId());
assertEquals("dnet:aa-003", pub.getAuthors().get(2).getId().getId());
assertEquals("dnet:orcid", pub.getAuthors().get(0).getId().getName());
assertEquals("dnet:orcid", pub.getAuthors().get(1).getId().getName());
assertEquals("dnet:orcid", pub.getAuthors().get(2).getId().getName());
assertEquals("dnet:infrastructures", pub.getSubjects().get(0).getId());
assertEquals("dnet:keyword", pub.getSubjects().get(0).getName());
assertEquals("dnet:digital libraries", pub.getSubjects().get(1).getId());
assertEquals("dnet:keyword", pub.getSubjects().get(1).getName());
}
}
class Cleaner {
public void clean(final Object o) throws IllegalArgumentException, IllegalAccessException {
if (isPrimitive(o)) {
return;
} else if (isIterable(o.getClass())) {
for (final Object elem : (Iterable<?>) o) {
clean(elem);
}
} else if (hasMapping(o)) {
mapObject(o);
} else {
for (final Field f : o.getClass().getDeclaredFields()) {
f.setAccessible(true);
final Object val = f.get(o);
if (isPrimitive(val)) {
f.set(o, cleanValue(f.get(o)));
} else if (hasMapping(val)) {
mapObject(val);
} else {
clean(f.get(o));
}
}
}
}
private boolean hasMapping(final Object o) {
return o.getClass() == Prop.class;
}
private void mapObject(final Object o) {
if (o.getClass() == Prop.class) {
((Prop) o).setId("dnet:" + ((Prop) o).getId());
((Prop) o).setName("dnet:" + ((Prop) o).getName());
}
}
private Object cleanValue(final Object o) {
if (o.getClass() == String.class) {
return ((String) o).toUpperCase();
} else {
return o;
}
}
private boolean isIterable(final Class<?> cl) {
return Iterable.class.isAssignableFrom(cl);
}
private boolean isPrimitive(final Object o) {
return o.getClass() == String.class;
}
}
class Publication {
private String title;
private final List<Author> authors = new ArrayList<>();
private final List<Prop> subjects = new ArrayList<>();
public String getTitle() {
return title;
}
public void setTitle(final String title) {
this.title = title;
}
public List<Author> getAuthors() {
return authors;
}
public List<Prop> getSubjects() {
return subjects;
}
@Override
public String toString() {
return String.format("Publication [title=%s, authors=%s, subjects=%s]", title, authors, subjects);
}
}
class Prop {
private String id;
private String name;
public Prop(final String id, final String name) {
this.id = id;
this.name = name;
}
public String getId() {
return id;
}
public void setId(final String id) {
this.id = id;
}
public String getName() {
return name;
}
public void setName(final String name) {
this.name = name;
}
@Override
public String toString() {
return String.format("Prop [id=%s, name=%s]", id, name);
}
}
class Author {
private String name;
private Prop id;
public Author(final String name, final Prop id) {
this.name = name;
this.id = id;
}
public String getName() {
return name;
}
public void setName(final String name) {
this.name = name;
}
public Prop getId() {
return id;
}
public void setId(final Prop id) {
this.id = id;
}
@Override
public String toString() {
return String.format("Author [name=%s, id=%s]", name, id);
}
}