enrichment steps #38

Merged
claudio.atzori merged 334 commits from miriam.baglioni/dnet-hadoop:master into enrichment_wfs 2020-08-11 16:40:26 +02:00
5 changed files with 18 additions and 11 deletions
Showing only changes of commit d839e88783 - Show all commits

View File

@ -33,7 +33,7 @@ public class EventFactory {
final Map<String, Object> map = createMapFromResult(updateInfo);
final String eventId = calculateEventId(
updateInfo.getTopicPath(), updateInfo.getTarget().getOriginalId(), updateInfo.getHighlightValueAsString());
updateInfo.getTopicPath(), updateInfo.getTarget().getOpenaireId(), updateInfo.getHighlightValueAsString());
res.setEventId(eventId);
res.setProducerId(PRODUCER_ID);
@ -55,7 +55,7 @@ public class EventFactory {
map.put("target_datasource_id", target.getCollectedFromId());
map.put("target_datasource_name", target.getCollectedFromName());
map.put("target_publication_id", target.getOriginalId());
map.put("target_publication_id", target.getOpenaireId());
final List<String> titles = target.getTitles();
if (titles.size() > 0) {
@ -74,7 +74,7 @@ public class EventFactory {
map.put("trust", updateInfo.getTrust());
map.put("provenance_datasource_id", source.getCollectedFromId());
map.put("provenance_datasource_name", source.getCollectedFromName());
map.put("provenance_publication_id_list", source.getOriginalId());
map.put("provenance_publication_id_list", source.getOpenaireId());
return map;
}

View File

@ -43,17 +43,19 @@ public abstract class UpdateMatcher<T> {
if (source != res) {
for (final T hl : findDifferences(source, res)) {
final Topic topic = getTopicFunction().apply(hl);
final UpdateInfo<T> info = new UpdateInfo<>(topic, hl, source, res, getCompileHighlightFunction(),
if (topic != null) {
final UpdateInfo<T> info = new UpdateInfo<>(topic, hl, source, res,
getCompileHighlightFunction(),
getHighlightToStringFunction(), dedupConfig);
final String s = DigestUtils.md5Hex(info.getHighlightValueAsString());
if (!infoMap.containsKey(s) || infoMap.get(s).getTrust() < info.getTrust()) {
} else {
infoMap.put(s, info);
}
}
}
}
}
final Collection<UpdateInfo<T>> values = infoMap.values();

View File

@ -22,6 +22,7 @@ public class EnrichMoreSubject extends UpdateMatcher<OaBrokerTypedValue> {
@Override
protected List<OaBrokerTypedValue> findDifferences(final OaBrokerMainEntity source,
final OaBrokerMainEntity target) {
final Set<String> existingSubjects = target
.getSubjects()
.stream()

View File

@ -14,12 +14,16 @@ public class EventGroup implements Serializable {
*/
private static final long serialVersionUID = 765977943803533130L;
private final List<Event> data = new ArrayList<>();
private List<Event> data = new ArrayList<>();
public List<Event> getData() {
return data;
}
public void setData(final List<Event> data) {
this.data = data;
}
public EventGroup addElement(final Event elem) {
data.add(elem);
return this;

View File

@ -111,7 +111,7 @@ public final class UpdateInfo<T> {
final OaBrokerMainEntity hl = new OaBrokerMainEntity();
compileHighlight.accept(hl, getHighlightValue());
final String provId = getSource().getOriginalId();
final String provId = getSource().getOpenaireId();
final String provRepo = getSource().getCollectedFromName();
final String provUrl = getSource()