enrichment steps #38
|
@ -6,7 +6,7 @@
|
|||
<parent>
|
||||
<groupId>eu.dnetlib.dhp</groupId>
|
||||
<artifactId>dhp-build</artifactId>
|
||||
<version>1.2.1-SNAPSHOT</version>
|
||||
<version>1.2.2-SNAPSHOT</version>
|
||||
</parent>
|
||||
|
||||
<artifactId>dhp-build-assembly-resources</artifactId>
|
||||
|
|
|
@ -6,7 +6,7 @@
|
|||
<parent>
|
||||
<groupId>eu.dnetlib.dhp</groupId>
|
||||
<artifactId>dhp-build</artifactId>
|
||||
<version>1.2.1-SNAPSHOT</version>
|
||||
<version>1.2.2-SNAPSHOT</version>
|
||||
</parent>
|
||||
|
||||
<artifactId>dhp-build-properties-maven-plugin</artifactId>
|
||||
|
|
|
@ -5,7 +5,7 @@
|
|||
|
||||
<groupId>eu.dnetlib.dhp</groupId>
|
||||
<artifactId>dhp-code-style</artifactId>
|
||||
<version>1.2.1-SNAPSHOT</version>
|
||||
<version>1.2.2-SNAPSHOT</version>
|
||||
|
||||
<packaging>jar</packaging>
|
||||
|
||||
|
|
|
@ -4,7 +4,7 @@
|
|||
<parent>
|
||||
<groupId>eu.dnetlib.dhp</groupId>
|
||||
<artifactId>dhp</artifactId>
|
||||
<version>1.2.1-SNAPSHOT</version>
|
||||
<version>1.2.2-SNAPSHOT</version>
|
||||
</parent>
|
||||
<artifactId>dhp-build</artifactId>
|
||||
<packaging>pom</packaging>
|
||||
|
|
|
@ -5,7 +5,7 @@
|
|||
<parent>
|
||||
<groupId>eu.dnetlib.dhp</groupId>
|
||||
<artifactId>dhp</artifactId>
|
||||
<version>1.2.1-SNAPSHOT</version>
|
||||
<version>1.2.2-SNAPSHOT</version>
|
||||
<relativePath>../</relativePath>
|
||||
</parent>
|
||||
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
|
||||
package eu.dnetlib.dhp.oa.graph.raw.common;
|
||||
package eu.dnetlib.dhp.common;
|
||||
|
||||
import java.nio.charset.StandardCharsets;
|
||||
import java.text.Normalizer;
|
|
@ -5,7 +5,7 @@
|
|||
<parent>
|
||||
<groupId>eu.dnetlib.dhp</groupId>
|
||||
<artifactId>dhp</artifactId>
|
||||
<version>1.2.1-SNAPSHOT</version>
|
||||
<version>1.2.2-SNAPSHOT</version>
|
||||
<relativePath>../</relativePath>
|
||||
</parent>
|
||||
|
||||
|
|
|
@ -0,0 +1,69 @@
|
|||
|
||||
package eu.dnetlib.dhp.schema.common;
|
||||
|
||||
import java.util.Comparator;
|
||||
|
||||
import eu.dnetlib.dhp.schema.oaf.Qualifier;
|
||||
|
||||
public class LicenseComparator implements Comparator<Qualifier> {
|
||||
|
||||
@Override
|
||||
public int compare(Qualifier left, Qualifier right) {
|
||||
|
||||
if (left == null && right == null)
|
||||
return 0;
|
||||
if (left == null)
|
||||
return 1;
|
||||
if (right == null)
|
||||
return -1;
|
||||
|
||||
String lClass = left.getClassid();
|
||||
String rClass = right.getClassid();
|
||||
|
||||
if (lClass.equals(rClass))
|
||||
return 0;
|
||||
|
||||
if (lClass.equals("OPEN SOURCE"))
|
||||
return -1;
|
||||
if (rClass.equals("OPEN SOURCE"))
|
||||
return 1;
|
||||
|
||||
if (lClass.equals("OPEN"))
|
||||
return -1;
|
||||
if (rClass.equals("OPEN"))
|
||||
return 1;
|
||||
|
||||
if (lClass.equals("6MONTHS"))
|
||||
return -1;
|
||||
if (rClass.equals("6MONTHS"))
|
||||
return 1;
|
||||
|
||||
if (lClass.equals("12MONTHS"))
|
||||
return -1;
|
||||
if (rClass.equals("12MONTHS"))
|
||||
return 1;
|
||||
|
||||
if (lClass.equals("EMBARGO"))
|
||||
return -1;
|
||||
if (rClass.equals("EMBARGO"))
|
||||
return 1;
|
||||
|
||||
if (lClass.equals("RESTRICTED"))
|
||||
return -1;
|
||||
if (rClass.equals("RESTRICTED"))
|
||||
return 1;
|
||||
|
||||
if (lClass.equals("CLOSED"))
|
||||
return -1;
|
||||
if (rClass.equals("CLOSED"))
|
||||
return 1;
|
||||
|
||||
if (lClass.equals("UNKNOWN"))
|
||||
return -1;
|
||||
if (rClass.equals("UNKNOWN"))
|
||||
return 1;
|
||||
|
||||
// Else (but unlikely), lexicographical ordering will do.
|
||||
return lClass.compareTo(rClass);
|
||||
}
|
||||
}
|
|
@ -8,7 +8,7 @@ public class DataInfo implements Serializable {
|
|||
|
||||
private Boolean invisible = false;
|
||||
private Boolean inferred;
|
||||
private Boolean deletedbyinference;
|
||||
private Boolean deletedbyinference = false;
|
||||
private String trust;
|
||||
private String inferenceprovenance;
|
||||
private Qualifier provenanceaction;
|
||||
|
|
|
@ -2,6 +2,7 @@
|
|||
package eu.dnetlib.dhp.schema.oaf;
|
||||
|
||||
import java.io.Serializable;
|
||||
import java.util.Objects;
|
||||
|
||||
public class Field<T> implements Serializable {
|
||||
|
||||
|
@ -39,6 +40,6 @@ public class Field<T> implements Serializable {
|
|||
if (getClass() != obj.getClass())
|
||||
return false;
|
||||
Field<T> other = (Field<T>) obj;
|
||||
return getValue().equals(other.getValue());
|
||||
return Objects.equals(getValue(), other.getValue());
|
||||
}
|
||||
}
|
||||
|
|
|
@ -106,6 +106,7 @@ public abstract class OafEntity extends Oaf implements Serializable {
|
|||
.stream(lists)
|
||||
.filter(Objects::nonNull)
|
||||
.flatMap(List::stream)
|
||||
.filter(Objects::nonNull)
|
||||
.distinct()
|
||||
.collect(Collectors.toList());
|
||||
}
|
||||
|
|
|
@ -244,7 +244,25 @@ public class Result extends OafEntity implements Serializable {
|
|||
|
||||
subject = mergeLists(subject, r.getSubject());
|
||||
|
||||
// merge title lists: main title with higher trust and distinct between the others
|
||||
StructuredProperty baseMainTitle = null;
|
||||
if (title != null) {
|
||||
baseMainTitle = getMainTitle(title);
|
||||
title.remove(baseMainTitle);
|
||||
}
|
||||
|
||||
StructuredProperty newMainTitle = null;
|
||||
if (r.getTitle() != null) {
|
||||
newMainTitle = getMainTitle(r.getTitle());
|
||||
r.getTitle().remove(newMainTitle);
|
||||
}
|
||||
|
||||
if (newMainTitle != null && compareTrust(this, r) < 0)
|
||||
baseMainTitle = newMainTitle;
|
||||
|
||||
title = mergeLists(title, r.getTitle());
|
||||
if (title != null && baseMainTitle != null)
|
||||
title.add(baseMainTitle);
|
||||
|
||||
relevantdate = mergeLists(relevantdate, r.getRelevantdate());
|
||||
|
||||
|
@ -294,4 +312,15 @@ public class Result extends OafEntity implements Serializable {
|
|||
}
|
||||
return a.size() > b.size() ? a : b;
|
||||
}
|
||||
|
||||
private StructuredProperty getMainTitle(List<StructuredProperty> titles) {
|
||||
// need to check if the list of titles contains more than 1 main title? (in that case, we should chose which
|
||||
// main title select in the list)
|
||||
for (StructuredProperty title : titles) {
|
||||
if (title.getQualifier() != null && title.getQualifier().getClassid() != null)
|
||||
if (title.getQualifier().getClassid().equals("main title"))
|
||||
return title;
|
||||
}
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
|
|
@ -4,7 +4,7 @@
|
|||
<parent>
|
||||
<groupId>eu.dnetlib.dhp</groupId>
|
||||
<artifactId>dhp-workflows</artifactId>
|
||||
<version>1.2.1-SNAPSHOT</version>
|
||||
<version>1.2.2-SNAPSHOT</version>
|
||||
</parent>
|
||||
<artifactId>dhp-actionmanager</artifactId>
|
||||
|
||||
|
|
|
@ -4,7 +4,7 @@
|
|||
<parent>
|
||||
<groupId>eu.dnetlib.dhp</groupId>
|
||||
<artifactId>dhp-workflows</artifactId>
|
||||
<version>1.2.1-SNAPSHOT</version>
|
||||
<version>1.2.2-SNAPSHOT</version>
|
||||
</parent>
|
||||
<artifactId>dhp-aggregation</artifactId>
|
||||
|
||||
|
|
|
@ -1,11 +1,9 @@
|
|||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<project xmlns="http://maven.apache.org/POM/4.0.0"
|
||||
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
|
||||
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
|
||||
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
|
||||
<parent>
|
||||
<artifactId>dhp-workflows</artifactId>
|
||||
<groupId>eu.dnetlib.dhp</groupId>
|
||||
<version>1.2.1-SNAPSHOT</version>
|
||||
<version>1.2.2-SNAPSHOT</version>
|
||||
</parent>
|
||||
<modelVersion>4.0.0</modelVersion>
|
||||
|
||||
|
|
|
@ -61,12 +61,6 @@ public class BlackListTest {
|
|||
spark.stop();
|
||||
}
|
||||
|
||||
/*
|
||||
* String inputPath = parser.get("sourcePath"); log.info("inputPath: {}", inputPath); final String outputPath =
|
||||
* parser.get("outputPath"); log.info("outputPath {}: ", outputPath); final String blacklistPath =
|
||||
* parser.get("hdfsPath"); log.info("blacklistPath {}: ", blacklistPath); final String mergesPath =
|
||||
* parser.get("mergesPath"); log.info("mergesPath {}: ", mergesPath);
|
||||
*/
|
||||
@Test
|
||||
public void noRemoveTest() throws Exception {
|
||||
SparkRemoveBlacklistedRelationJob
|
||||
|
|
|
@ -3,7 +3,7 @@
|
|||
<parent>
|
||||
<artifactId>dhp-workflows</artifactId>
|
||||
<groupId>eu.dnetlib.dhp</groupId>
|
||||
<version>1.2.1-SNAPSHOT</version>
|
||||
<version>1.2.2-SNAPSHOT</version>
|
||||
</parent>
|
||||
<modelVersion>4.0.0</modelVersion>
|
||||
|
||||
|
@ -57,7 +57,7 @@
|
|||
<dependency>
|
||||
<groupId>eu.dnetlib</groupId>
|
||||
<artifactId>dnet-openaire-broker-common</artifactId>
|
||||
<version>[1.0.0,2.0.0)</version>
|
||||
<version>[2.0.0,3.0.0)</version>
|
||||
</dependency>
|
||||
|
||||
</dependencies>
|
||||
|
|
|
@ -30,30 +30,30 @@ import com.fasterxml.jackson.databind.ObjectMapper;
|
|||
import eu.dnetlib.dhp.application.ArgumentApplicationParser;
|
||||
import eu.dnetlib.dhp.broker.model.Event;
|
||||
import eu.dnetlib.dhp.broker.model.EventFactory;
|
||||
import eu.dnetlib.dhp.broker.oa.matchers.EnrichMissingAbstract;
|
||||
import eu.dnetlib.dhp.broker.oa.matchers.EnrichMissingAuthorOrcid;
|
||||
import eu.dnetlib.dhp.broker.oa.matchers.EnrichMissingDatasetIsReferencedBy;
|
||||
import eu.dnetlib.dhp.broker.oa.matchers.EnrichMissingDatasetIsRelatedTo;
|
||||
import eu.dnetlib.dhp.broker.oa.matchers.EnrichMissingDatasetIsSupplementedBy;
|
||||
import eu.dnetlib.dhp.broker.oa.matchers.EnrichMissingDatasetIsSupplementedTo;
|
||||
import eu.dnetlib.dhp.broker.oa.matchers.EnrichMissingDatasetReferences;
|
||||
import eu.dnetlib.dhp.broker.oa.matchers.EnrichMissingOpenAccess;
|
||||
import eu.dnetlib.dhp.broker.oa.matchers.EnrichMissingPid;
|
||||
import eu.dnetlib.dhp.broker.oa.matchers.EnrichMissingProject;
|
||||
import eu.dnetlib.dhp.broker.oa.matchers.EnrichMissingPublicationDate;
|
||||
import eu.dnetlib.dhp.broker.oa.matchers.EnrichMissingPublicationIsReferencedBy;
|
||||
import eu.dnetlib.dhp.broker.oa.matchers.EnrichMissingPublicationIsRelatedTo;
|
||||
import eu.dnetlib.dhp.broker.oa.matchers.EnrichMissingPublicationIsSupplementedBy;
|
||||
import eu.dnetlib.dhp.broker.oa.matchers.EnrichMissingPublicationIsSupplementedTo;
|
||||
import eu.dnetlib.dhp.broker.oa.matchers.EnrichMissingPublicationReferences;
|
||||
import eu.dnetlib.dhp.broker.oa.matchers.EnrichMissingSoftware;
|
||||
import eu.dnetlib.dhp.broker.oa.matchers.EnrichMissingSubject;
|
||||
import eu.dnetlib.dhp.broker.oa.matchers.EnrichMoreOpenAccess;
|
||||
import eu.dnetlib.dhp.broker.oa.matchers.EnrichMorePid;
|
||||
import eu.dnetlib.dhp.broker.oa.matchers.EnrichMoreProject;
|
||||
import eu.dnetlib.dhp.broker.oa.matchers.EnrichMoreSoftware;
|
||||
import eu.dnetlib.dhp.broker.oa.matchers.EnrichMoreSubject;
|
||||
import eu.dnetlib.dhp.broker.oa.matchers.UpdateMatcher;
|
||||
import eu.dnetlib.dhp.broker.oa.matchers.relatedDatasets.EnrichMissingDatasetIsReferencedBy;
|
||||
import eu.dnetlib.dhp.broker.oa.matchers.relatedDatasets.EnrichMissingDatasetIsRelatedTo;
|
||||
import eu.dnetlib.dhp.broker.oa.matchers.relatedDatasets.EnrichMissingDatasetIsSupplementedBy;
|
||||
import eu.dnetlib.dhp.broker.oa.matchers.relatedDatasets.EnrichMissingDatasetIsSupplementedTo;
|
||||
import eu.dnetlib.dhp.broker.oa.matchers.relatedDatasets.EnrichMissingDatasetReferences;
|
||||
import eu.dnetlib.dhp.broker.oa.matchers.relatedProjects.EnrichMissingProject;
|
||||
import eu.dnetlib.dhp.broker.oa.matchers.relatedProjects.EnrichMoreProject;
|
||||
import eu.dnetlib.dhp.broker.oa.matchers.relatedPublications.EnrichMissingPublicationIsReferencedBy;
|
||||
import eu.dnetlib.dhp.broker.oa.matchers.relatedPublications.EnrichMissingPublicationIsRelatedTo;
|
||||
import eu.dnetlib.dhp.broker.oa.matchers.relatedPublications.EnrichMissingPublicationIsSupplementedBy;
|
||||
import eu.dnetlib.dhp.broker.oa.matchers.relatedPublications.EnrichMissingPublicationIsSupplementedTo;
|
||||
import eu.dnetlib.dhp.broker.oa.matchers.relatedPublications.EnrichMissingPublicationReferences;
|
||||
import eu.dnetlib.dhp.broker.oa.matchers.simple.EnrichMissingAbstract;
|
||||
import eu.dnetlib.dhp.broker.oa.matchers.simple.EnrichMissingAuthorOrcid;
|
||||
import eu.dnetlib.dhp.broker.oa.matchers.simple.EnrichMissingOpenAccess;
|
||||
import eu.dnetlib.dhp.broker.oa.matchers.simple.EnrichMissingPid;
|
||||
import eu.dnetlib.dhp.broker.oa.matchers.simple.EnrichMissingPublicationDate;
|
||||
import eu.dnetlib.dhp.broker.oa.matchers.simple.EnrichMissingSoftware;
|
||||
import eu.dnetlib.dhp.broker.oa.matchers.simple.EnrichMissingSubject;
|
||||
import eu.dnetlib.dhp.broker.oa.matchers.simple.EnrichMoreOpenAccess;
|
||||
import eu.dnetlib.dhp.broker.oa.matchers.simple.EnrichMorePid;
|
||||
import eu.dnetlib.dhp.broker.oa.matchers.simple.EnrichMoreSoftware;
|
||||
import eu.dnetlib.dhp.broker.oa.matchers.simple.EnrichMoreSubject;
|
||||
import eu.dnetlib.dhp.broker.oa.util.BrokerConstants;
|
||||
import eu.dnetlib.dhp.broker.oa.util.UpdateInfo;
|
||||
import eu.dnetlib.dhp.common.HdfsSupport;
|
||||
|
|
|
@ -1,38 +0,0 @@
|
|||
|
||||
package eu.dnetlib.dhp.broker.oa.matchers;
|
||||
|
||||
import java.util.List;
|
||||
|
||||
import org.apache.commons.lang3.tuple.Pair;
|
||||
|
||||
import eu.dnetlib.dhp.broker.model.Topic;
|
||||
import eu.dnetlib.dhp.broker.oa.util.UpdateInfo;
|
||||
import eu.dnetlib.dhp.schema.oaf.Dataset;
|
||||
import eu.dnetlib.dhp.schema.oaf.Result;
|
||||
|
||||
public class EnrichMissingDatasetIsReferencedBy
|
||||
extends UpdateMatcher<Pair<Result, List<Dataset>>, eu.dnetlib.broker.objects.Dataset> {
|
||||
|
||||
public EnrichMissingDatasetIsReferencedBy() {
|
||||
super(true);
|
||||
}
|
||||
|
||||
@Override
|
||||
protected List<UpdateInfo<eu.dnetlib.broker.objects.Dataset>> findUpdates(final Pair<Result, List<Dataset>> source,
|
||||
final Pair<Result, List<Dataset>> target) {
|
||||
// TODO Auto-generated method stub
|
||||
return null;
|
||||
}
|
||||
|
||||
@Override
|
||||
protected UpdateInfo<eu.dnetlib.broker.objects.Dataset> generateUpdateInfo(
|
||||
final eu.dnetlib.broker.objects.Dataset highlightValue,
|
||||
final Pair<Result, List<Dataset>> source,
|
||||
final Pair<Result, List<Dataset>> target) {
|
||||
return new UpdateInfo<>(
|
||||
Topic.ENRICH_MISSING_DATASET_IS_REFERENCED_BY,
|
||||
highlightValue, source.getLeft(), target.getLeft(),
|
||||
(p, rel) -> p.getDatasets().add(rel),
|
||||
rel -> rel.getInstances().get(0).getUrl());
|
||||
}
|
||||
}
|
|
@ -1,38 +0,0 @@
|
|||
|
||||
package eu.dnetlib.dhp.broker.oa.matchers;
|
||||
|
||||
import java.util.List;
|
||||
|
||||
import org.apache.commons.lang3.tuple.Pair;
|
||||
|
||||
import eu.dnetlib.dhp.broker.model.Topic;
|
||||
import eu.dnetlib.dhp.broker.oa.util.UpdateInfo;
|
||||
import eu.dnetlib.dhp.schema.oaf.Dataset;
|
||||
import eu.dnetlib.dhp.schema.oaf.Result;
|
||||
|
||||
public class EnrichMissingDatasetIsRelatedTo
|
||||
extends UpdateMatcher<Pair<Result, List<Dataset>>, eu.dnetlib.broker.objects.Dataset> {
|
||||
|
||||
public EnrichMissingDatasetIsRelatedTo() {
|
||||
super(true);
|
||||
}
|
||||
|
||||
@Override
|
||||
protected List<UpdateInfo<eu.dnetlib.broker.objects.Dataset>> findUpdates(final Pair<Result, List<Dataset>> source,
|
||||
final Pair<Result, List<Dataset>> target) {
|
||||
// TODO Auto-generated method stub
|
||||
return null;
|
||||
}
|
||||
|
||||
@Override
|
||||
protected UpdateInfo<eu.dnetlib.broker.objects.Dataset> generateUpdateInfo(
|
||||
final eu.dnetlib.broker.objects.Dataset highlightValue,
|
||||
final Pair<Result, List<Dataset>> source,
|
||||
final Pair<Result, List<Dataset>> target) {
|
||||
return new UpdateInfo<>(
|
||||
Topic.ENRICH_MISSING_DATASET_IS_RELATED_TO,
|
||||
highlightValue, source.getLeft(), target.getLeft(),
|
||||
(p, rel) -> p.getDatasets().add(rel),
|
||||
rel -> rel.getInstances().get(0).getUrl());
|
||||
}
|
||||
}
|
|
@ -1,38 +0,0 @@
|
|||
|
||||
package eu.dnetlib.dhp.broker.oa.matchers;
|
||||
|
||||
import java.util.List;
|
||||
|
||||
import org.apache.commons.lang3.tuple.Pair;
|
||||
|
||||
import eu.dnetlib.dhp.broker.model.Topic;
|
||||
import eu.dnetlib.dhp.broker.oa.util.UpdateInfo;
|
||||
import eu.dnetlib.dhp.schema.oaf.Dataset;
|
||||
import eu.dnetlib.dhp.schema.oaf.Result;
|
||||
|
||||
public class EnrichMissingDatasetIsSupplementedBy
|
||||
extends UpdateMatcher<Pair<Result, List<Dataset>>, eu.dnetlib.broker.objects.Dataset> {
|
||||
|
||||
public EnrichMissingDatasetIsSupplementedBy() {
|
||||
super(true);
|
||||
}
|
||||
|
||||
@Override
|
||||
protected List<UpdateInfo<eu.dnetlib.broker.objects.Dataset>> findUpdates(final Pair<Result, List<Dataset>> source,
|
||||
final Pair<Result, List<Dataset>> target) {
|
||||
// TODO Auto-generated method stub
|
||||
return null;
|
||||
}
|
||||
|
||||
@Override
|
||||
protected UpdateInfo<eu.dnetlib.broker.objects.Dataset> generateUpdateInfo(
|
||||
final eu.dnetlib.broker.objects.Dataset highlightValue,
|
||||
final Pair<Result, List<Dataset>> source,
|
||||
final Pair<Result, List<Dataset>> target) {
|
||||
return new UpdateInfo<>(
|
||||
Topic.ENRICH_MISSING_DATASET_IS_SUPPLEMENTED_BY,
|
||||
highlightValue, source.getLeft(), target.getLeft(),
|
||||
(p, rel) -> p.getDatasets().add(rel),
|
||||
rel -> rel.getInstances().get(0).getUrl());
|
||||
}
|
||||
}
|
|
@ -1,38 +0,0 @@
|
|||
|
||||
package eu.dnetlib.dhp.broker.oa.matchers;
|
||||
|
||||
import java.util.List;
|
||||
|
||||
import org.apache.commons.lang3.tuple.Pair;
|
||||
|
||||
import eu.dnetlib.dhp.broker.model.Topic;
|
||||
import eu.dnetlib.dhp.broker.oa.util.UpdateInfo;
|
||||
import eu.dnetlib.dhp.schema.oaf.Dataset;
|
||||
import eu.dnetlib.dhp.schema.oaf.Result;
|
||||
|
||||
public class EnrichMissingDatasetIsSupplementedTo
|
||||
extends UpdateMatcher<Pair<Result, List<Dataset>>, eu.dnetlib.broker.objects.Dataset> {
|
||||
|
||||
public EnrichMissingDatasetIsSupplementedTo() {
|
||||
super(true);
|
||||
}
|
||||
|
||||
@Override
|
||||
protected List<UpdateInfo<eu.dnetlib.broker.objects.Dataset>> findUpdates(final Pair<Result, List<Dataset>> source,
|
||||
final Pair<Result, List<Dataset>> target) {
|
||||
// TODO Auto-generated method stub
|
||||
return null;
|
||||
}
|
||||
|
||||
@Override
|
||||
protected UpdateInfo<eu.dnetlib.broker.objects.Dataset> generateUpdateInfo(
|
||||
final eu.dnetlib.broker.objects.Dataset highlightValue,
|
||||
final Pair<Result, List<Dataset>> source,
|
||||
final Pair<Result, List<Dataset>> target) {
|
||||
return new UpdateInfo<>(
|
||||
Topic.ENRICH_MISSING_DATASET_IS_SUPPLEMENTED_TO,
|
||||
highlightValue, source.getLeft(), target.getLeft(),
|
||||
(p, rel) -> p.getDatasets().add(rel),
|
||||
rel -> rel.getInstances().get(0).getUrl());
|
||||
}
|
||||
}
|
|
@ -1,38 +0,0 @@
|
|||
|
||||
package eu.dnetlib.dhp.broker.oa.matchers;
|
||||
|
||||
import java.util.List;
|
||||
|
||||
import org.apache.commons.lang3.tuple.Pair;
|
||||
|
||||
import eu.dnetlib.dhp.broker.model.Topic;
|
||||
import eu.dnetlib.dhp.broker.oa.util.UpdateInfo;
|
||||
import eu.dnetlib.dhp.schema.oaf.Dataset;
|
||||
import eu.dnetlib.dhp.schema.oaf.Result;
|
||||
|
||||
public class EnrichMissingDatasetReferences
|
||||
extends UpdateMatcher<Pair<Result, List<Dataset>>, eu.dnetlib.broker.objects.Dataset> {
|
||||
|
||||
public EnrichMissingDatasetReferences() {
|
||||
super(true);
|
||||
}
|
||||
|
||||
@Override
|
||||
protected List<UpdateInfo<eu.dnetlib.broker.objects.Dataset>> findUpdates(final Pair<Result, List<Dataset>> source,
|
||||
final Pair<Result, List<Dataset>> target) {
|
||||
// TODO Auto-generated method stub
|
||||
return null;
|
||||
}
|
||||
|
||||
@Override
|
||||
protected UpdateInfo<eu.dnetlib.broker.objects.Dataset> generateUpdateInfo(
|
||||
final eu.dnetlib.broker.objects.Dataset highlightValue,
|
||||
final Pair<Result, List<Dataset>> source,
|
||||
final Pair<Result, List<Dataset>> target) {
|
||||
return new UpdateInfo<>(
|
||||
Topic.ENRICH_MISSING_DATASET_REFERENCES,
|
||||
highlightValue, source.getLeft(), target.getLeft(),
|
||||
(p, rel) -> p.getDatasets().add(rel),
|
||||
rel -> rel.getInstances().get(0).getUrl());
|
||||
}
|
||||
}
|
|
@ -1,42 +0,0 @@
|
|||
|
||||
package eu.dnetlib.dhp.broker.oa.matchers;
|
||||
|
||||
import java.util.Arrays;
|
||||
import java.util.List;
|
||||
|
||||
import org.apache.commons.lang3.tuple.Pair;
|
||||
|
||||
import eu.dnetlib.dhp.broker.model.Topic;
|
||||
import eu.dnetlib.dhp.broker.oa.util.UpdateInfo;
|
||||
import eu.dnetlib.dhp.schema.oaf.Publication;
|
||||
import eu.dnetlib.dhp.schema.oaf.Result;
|
||||
|
||||
public class EnrichMissingPublicationIsReferencedBy
|
||||
extends UpdateMatcher<Pair<Result, List<Publication>>, eu.dnetlib.broker.objects.Publication> {
|
||||
|
||||
public EnrichMissingPublicationIsReferencedBy() {
|
||||
super(true);
|
||||
}
|
||||
|
||||
@Override
|
||||
protected List<UpdateInfo<eu.dnetlib.broker.objects.Publication>> findUpdates(
|
||||
final Pair<Result, List<Publication>> source,
|
||||
final Pair<Result, List<Publication>> target) {
|
||||
// TODO Auto-generated method stub
|
||||
return Arrays.asList();
|
||||
}
|
||||
|
||||
@Override
|
||||
protected UpdateInfo<eu.dnetlib.broker.objects.Publication> generateUpdateInfo(
|
||||
final eu.dnetlib.broker.objects.Publication highlightValue,
|
||||
final Pair<Result, List<Publication>> source,
|
||||
final Pair<Result, List<Publication>> target) {
|
||||
return new UpdateInfo<>(
|
||||
Topic.ENRICH_MISSING_PUBLICATION_IS_REFERENCED_BY,
|
||||
highlightValue, source.getLeft(), target.getLeft(),
|
||||
(p, rel) -> {
|
||||
}, // p.getPublications().add(rel), //TODO available in the future release of dnet-openaire-broker-common
|
||||
rel -> rel.getOriginalId());
|
||||
}
|
||||
|
||||
}
|
|
@ -1,42 +0,0 @@
|
|||
|
||||
package eu.dnetlib.dhp.broker.oa.matchers;
|
||||
|
||||
import java.util.Arrays;
|
||||
import java.util.List;
|
||||
|
||||
import org.apache.commons.lang3.tuple.Pair;
|
||||
|
||||
import eu.dnetlib.dhp.broker.model.Topic;
|
||||
import eu.dnetlib.dhp.broker.oa.util.UpdateInfo;
|
||||
import eu.dnetlib.dhp.schema.oaf.Publication;
|
||||
import eu.dnetlib.dhp.schema.oaf.Result;
|
||||
|
||||
public class EnrichMissingPublicationIsRelatedTo
|
||||
extends UpdateMatcher<Pair<Result, List<Publication>>, eu.dnetlib.broker.objects.Publication> {
|
||||
|
||||
public EnrichMissingPublicationIsRelatedTo() {
|
||||
super(true);
|
||||
}
|
||||
|
||||
@Override
|
||||
protected List<UpdateInfo<eu.dnetlib.broker.objects.Publication>> findUpdates(
|
||||
final Pair<Result, List<Publication>> source,
|
||||
final Pair<Result, List<Publication>> target) {
|
||||
// TODO Auto-generated method stub
|
||||
return Arrays.asList();
|
||||
}
|
||||
|
||||
@Override
|
||||
protected UpdateInfo<eu.dnetlib.broker.objects.Publication> generateUpdateInfo(
|
||||
final eu.dnetlib.broker.objects.Publication highlightValue,
|
||||
final Pair<Result, List<Publication>> source,
|
||||
final Pair<Result, List<Publication>> target) {
|
||||
return new UpdateInfo<>(
|
||||
Topic.ENRICH_MISSING_PUBLICATION_IS_RELATED_TO,
|
||||
highlightValue, source.getLeft(), target.getLeft(),
|
||||
(p, rel) -> {
|
||||
}, // p.getPublications().add(rel), //TODO available in the future release of dnet-openaire-broker-common
|
||||
rel -> rel.getOriginalId());
|
||||
}
|
||||
|
||||
}
|
|
@ -1,42 +0,0 @@
|
|||
|
||||
package eu.dnetlib.dhp.broker.oa.matchers;
|
||||
|
||||
import java.util.Arrays;
|
||||
import java.util.List;
|
||||
|
||||
import org.apache.commons.lang3.tuple.Pair;
|
||||
|
||||
import eu.dnetlib.dhp.broker.model.Topic;
|
||||
import eu.dnetlib.dhp.broker.oa.util.UpdateInfo;
|
||||
import eu.dnetlib.dhp.schema.oaf.Publication;
|
||||
import eu.dnetlib.dhp.schema.oaf.Result;
|
||||
|
||||
public class EnrichMissingPublicationIsSupplementedBy
|
||||
extends UpdateMatcher<Pair<Result, List<Publication>>, eu.dnetlib.broker.objects.Publication> {
|
||||
|
||||
public EnrichMissingPublicationIsSupplementedBy() {
|
||||
super(true);
|
||||
}
|
||||
|
||||
@Override
|
||||
protected List<UpdateInfo<eu.dnetlib.broker.objects.Publication>> findUpdates(
|
||||
final Pair<Result, List<Publication>> source,
|
||||
final Pair<Result, List<Publication>> target) {
|
||||
// TODO Auto-generated method stub
|
||||
return Arrays.asList();
|
||||
}
|
||||
|
||||
@Override
|
||||
protected UpdateInfo<eu.dnetlib.broker.objects.Publication> generateUpdateInfo(
|
||||
final eu.dnetlib.broker.objects.Publication highlightValue,
|
||||
final Pair<Result, List<Publication>> source,
|
||||
final Pair<Result, List<Publication>> target) {
|
||||
return new UpdateInfo<>(
|
||||
Topic.ENRICH_MISSING_PUBLICATION_IS_SUPPLEMENTED_BY,
|
||||
highlightValue, source.getLeft(), target.getLeft(),
|
||||
(p, rel) -> {
|
||||
}, // p.getPublications().add(rel), //TODO available in the future release of dnet-openaire-broker-common
|
||||
rel -> rel.getOriginalId());
|
||||
}
|
||||
|
||||
}
|
|
@ -1,42 +0,0 @@
|
|||
|
||||
package eu.dnetlib.dhp.broker.oa.matchers;
|
||||
|
||||
import java.util.Arrays;
|
||||
import java.util.List;
|
||||
|
||||
import org.apache.commons.lang3.tuple.Pair;
|
||||
|
||||
import eu.dnetlib.dhp.broker.model.Topic;
|
||||
import eu.dnetlib.dhp.broker.oa.util.UpdateInfo;
|
||||
import eu.dnetlib.dhp.schema.oaf.Publication;
|
||||
import eu.dnetlib.dhp.schema.oaf.Result;
|
||||
|
||||
public class EnrichMissingPublicationIsSupplementedTo
|
||||
extends UpdateMatcher<Pair<Result, List<Publication>>, eu.dnetlib.broker.objects.Publication> {
|
||||
|
||||
public EnrichMissingPublicationIsSupplementedTo() {
|
||||
super(true);
|
||||
}
|
||||
|
||||
@Override
|
||||
protected List<UpdateInfo<eu.dnetlib.broker.objects.Publication>> findUpdates(
|
||||
final Pair<Result, List<Publication>> source,
|
||||
final Pair<Result, List<Publication>> target) {
|
||||
// TODO Auto-generated method stub
|
||||
return Arrays.asList();
|
||||
}
|
||||
|
||||
@Override
|
||||
protected UpdateInfo<eu.dnetlib.broker.objects.Publication> generateUpdateInfo(
|
||||
final eu.dnetlib.broker.objects.Publication highlightValue,
|
||||
final Pair<Result, List<Publication>> source,
|
||||
final Pair<Result, List<Publication>> target) {
|
||||
return new UpdateInfo<>(
|
||||
Topic.ENRICH_MISSING_PUBLICATION_IS_SUPPLEMENTED_TO,
|
||||
highlightValue, source.getLeft(), target.getLeft(),
|
||||
(p, rel) -> {
|
||||
}, // p.getPublications().add(rel), //TODO available in the future release of dnet-openaire-broker-common
|
||||
rel -> rel.getOriginalId());
|
||||
}
|
||||
|
||||
}
|
|
@ -1,42 +0,0 @@
|
|||
|
||||
package eu.dnetlib.dhp.broker.oa.matchers;
|
||||
|
||||
import java.util.Arrays;
|
||||
import java.util.List;
|
||||
|
||||
import org.apache.commons.lang3.tuple.Pair;
|
||||
|
||||
import eu.dnetlib.dhp.broker.model.Topic;
|
||||
import eu.dnetlib.dhp.broker.oa.util.UpdateInfo;
|
||||
import eu.dnetlib.dhp.schema.oaf.Publication;
|
||||
import eu.dnetlib.dhp.schema.oaf.Result;
|
||||
|
||||
public class EnrichMissingPublicationReferences
|
||||
extends UpdateMatcher<Pair<Result, List<Publication>>, eu.dnetlib.broker.objects.Publication> {
|
||||
|
||||
public EnrichMissingPublicationReferences() {
|
||||
super(true);
|
||||
}
|
||||
|
||||
@Override
|
||||
protected List<UpdateInfo<eu.dnetlib.broker.objects.Publication>> findUpdates(
|
||||
final Pair<Result, List<Publication>> source,
|
||||
final Pair<Result, List<Publication>> target) {
|
||||
// TODO Auto-generated method stub
|
||||
return Arrays.asList();
|
||||
}
|
||||
|
||||
@Override
|
||||
protected UpdateInfo<eu.dnetlib.broker.objects.Publication> generateUpdateInfo(
|
||||
final eu.dnetlib.broker.objects.Publication highlightValue,
|
||||
final Pair<Result, List<Publication>> source,
|
||||
final Pair<Result, List<Publication>> target) {
|
||||
return new UpdateInfo<>(
|
||||
Topic.ENRICH_MISSING_PUBLICATION_REFERENCES,
|
||||
highlightValue, source.getLeft(), target.getLeft(),
|
||||
(p, rel) -> {
|
||||
}, // p.getPublications().add(rel), //TODO available in the future release of dnet-openaire-broker-common
|
||||
rel -> rel.getOriginalId());
|
||||
}
|
||||
|
||||
}
|
|
@ -0,0 +1,63 @@
|
|||
|
||||
package eu.dnetlib.dhp.broker.oa.matchers.relatedDatasets;
|
||||
|
||||
import java.util.List;
|
||||
import java.util.Set;
|
||||
import java.util.stream.Collectors;
|
||||
|
||||
import org.apache.commons.lang3.tuple.Pair;
|
||||
|
||||
import eu.dnetlib.dhp.broker.model.Topic;
|
||||
import eu.dnetlib.dhp.broker.oa.matchers.UpdateMatcher;
|
||||
import eu.dnetlib.dhp.broker.oa.util.ConversionUtils;
|
||||
import eu.dnetlib.dhp.broker.oa.util.UpdateInfo;
|
||||
import eu.dnetlib.dhp.schema.oaf.Dataset;
|
||||
import eu.dnetlib.dhp.schema.oaf.Result;
|
||||
|
||||
public abstract class AbstractEnrichMissingDataset
|
||||
extends UpdateMatcher<Pair<Result, List<Dataset>>, eu.dnetlib.broker.objects.Dataset> {
|
||||
|
||||
private final Topic topic;
|
||||
|
||||
public AbstractEnrichMissingDataset(final Topic topic) {
|
||||
super(true);
|
||||
this.topic = topic;
|
||||
}
|
||||
|
||||
@Override
|
||||
protected final List<UpdateInfo<eu.dnetlib.broker.objects.Dataset>> findUpdates(
|
||||
final Pair<Result, List<Dataset>> source,
|
||||
final Pair<Result, List<Dataset>> target) {
|
||||
|
||||
final Set<String> existingDatasets = target
|
||||
.getRight()
|
||||
.stream()
|
||||
.map(Dataset::getId)
|
||||
.collect(Collectors.toSet());
|
||||
|
||||
return source
|
||||
.getRight()
|
||||
.stream()
|
||||
.filter(d -> !existingDatasets.contains(d.getId()))
|
||||
.map(ConversionUtils::oafDatasetToBrokerDataset)
|
||||
.map(i -> generateUpdateInfo(i, source, target))
|
||||
.collect(Collectors.toList());
|
||||
|
||||
}
|
||||
|
||||
@Override
|
||||
protected final UpdateInfo<eu.dnetlib.broker.objects.Dataset> generateUpdateInfo(
|
||||
final eu.dnetlib.broker.objects.Dataset highlightValue,
|
||||
final Pair<Result, List<Dataset>> source,
|
||||
final Pair<Result, List<Dataset>> target) {
|
||||
return new UpdateInfo<>(
|
||||
getTopic(),
|
||||
highlightValue, source.getLeft(), target.getLeft(),
|
||||
(p, rel) -> p.getDatasets().add(rel),
|
||||
rel -> rel.getInstances().get(0).getUrl());
|
||||
}
|
||||
|
||||
public Topic getTopic() {
|
||||
return topic;
|
||||
}
|
||||
}
|
|
@ -0,0 +1,12 @@
|
|||
|
||||
package eu.dnetlib.dhp.broker.oa.matchers.relatedDatasets;
|
||||
|
||||
import eu.dnetlib.dhp.broker.model.Topic;
|
||||
|
||||
public class EnrichMissingDatasetIsReferencedBy extends AbstractEnrichMissingDataset {
|
||||
|
||||
public EnrichMissingDatasetIsReferencedBy() {
|
||||
super(Topic.ENRICH_MISSING_DATASET_IS_REFERENCED_BY);
|
||||
}
|
||||
|
||||
}
|
|
@ -0,0 +1,12 @@
|
|||
|
||||
package eu.dnetlib.dhp.broker.oa.matchers.relatedDatasets;
|
||||
|
||||
import eu.dnetlib.dhp.broker.model.Topic;
|
||||
|
||||
public class EnrichMissingDatasetIsRelatedTo extends AbstractEnrichMissingDataset {
|
||||
|
||||
public EnrichMissingDatasetIsRelatedTo() {
|
||||
super(Topic.ENRICH_MISSING_DATASET_IS_RELATED_TO);
|
||||
}
|
||||
|
||||
}
|
|
@ -0,0 +1,12 @@
|
|||
|
||||
package eu.dnetlib.dhp.broker.oa.matchers.relatedDatasets;
|
||||
|
||||
import eu.dnetlib.dhp.broker.model.Topic;
|
||||
|
||||
public class EnrichMissingDatasetIsSupplementedBy extends AbstractEnrichMissingDataset {
|
||||
|
||||
public EnrichMissingDatasetIsSupplementedBy() {
|
||||
super(Topic.ENRICH_MISSING_DATASET_IS_SUPPLEMENTED_BY);
|
||||
}
|
||||
|
||||
}
|
|
@ -0,0 +1,12 @@
|
|||
|
||||
package eu.dnetlib.dhp.broker.oa.matchers.relatedDatasets;
|
||||
|
||||
import eu.dnetlib.dhp.broker.model.Topic;
|
||||
|
||||
public class EnrichMissingDatasetIsSupplementedTo extends AbstractEnrichMissingDataset {
|
||||
|
||||
public EnrichMissingDatasetIsSupplementedTo() {
|
||||
super(Topic.ENRICH_MISSING_DATASET_IS_SUPPLEMENTED_TO);
|
||||
}
|
||||
|
||||
}
|
|
@ -0,0 +1,12 @@
|
|||
|
||||
package eu.dnetlib.dhp.broker.oa.matchers.relatedDatasets;
|
||||
|
||||
import eu.dnetlib.dhp.broker.model.Topic;
|
||||
|
||||
public class EnrichMissingDatasetReferences extends AbstractEnrichMissingDataset {
|
||||
|
||||
public EnrichMissingDatasetReferences() {
|
||||
super(Topic.ENRICH_MISSING_DATASET_REFERENCES);
|
||||
}
|
||||
|
||||
}
|
|
@ -1,5 +1,5 @@
|
|||
|
||||
package eu.dnetlib.dhp.broker.oa.matchers;
|
||||
package eu.dnetlib.dhp.broker.oa.matchers.relatedProjects;
|
||||
|
||||
import java.util.Arrays;
|
||||
import java.util.List;
|
||||
|
@ -7,6 +7,7 @@ import java.util.List;
|
|||
import org.apache.commons.lang3.tuple.Pair;
|
||||
|
||||
import eu.dnetlib.dhp.broker.model.Topic;
|
||||
import eu.dnetlib.dhp.broker.oa.matchers.UpdateMatcher;
|
||||
import eu.dnetlib.dhp.broker.oa.util.UpdateInfo;
|
||||
import eu.dnetlib.dhp.schema.oaf.Project;
|
||||
import eu.dnetlib.dhp.schema.oaf.Result;
|
|
@ -1,5 +1,5 @@
|
|||
|
||||
package eu.dnetlib.dhp.broker.oa.matchers;
|
||||
package eu.dnetlib.dhp.broker.oa.matchers.relatedProjects;
|
||||
|
||||
import java.util.Arrays;
|
||||
import java.util.List;
|
||||
|
@ -7,6 +7,7 @@ import java.util.List;
|
|||
import org.apache.commons.lang3.tuple.Pair;
|
||||
|
||||
import eu.dnetlib.dhp.broker.model.Topic;
|
||||
import eu.dnetlib.dhp.broker.oa.matchers.UpdateMatcher;
|
||||
import eu.dnetlib.dhp.broker.oa.util.UpdateInfo;
|
||||
import eu.dnetlib.dhp.schema.oaf.Project;
|
||||
import eu.dnetlib.dhp.schema.oaf.Result;
|
|
@ -0,0 +1,63 @@
|
|||
|
||||
package eu.dnetlib.dhp.broker.oa.matchers.relatedPublications;
|
||||
|
||||
import java.util.List;
|
||||
import java.util.Set;
|
||||
import java.util.stream.Collectors;
|
||||
|
||||
import org.apache.commons.lang3.tuple.Pair;
|
||||
|
||||
import eu.dnetlib.dhp.broker.model.Topic;
|
||||
import eu.dnetlib.dhp.broker.oa.matchers.UpdateMatcher;
|
||||
import eu.dnetlib.dhp.broker.oa.util.ConversionUtils;
|
||||
import eu.dnetlib.dhp.broker.oa.util.UpdateInfo;
|
||||
import eu.dnetlib.dhp.schema.oaf.Publication;
|
||||
import eu.dnetlib.dhp.schema.oaf.Result;
|
||||
|
||||
public abstract class AbstractEnrichMissingPublication
|
||||
extends UpdateMatcher<Pair<Result, List<Publication>>, eu.dnetlib.broker.objects.Publication> {
|
||||
|
||||
private final Topic topic;
|
||||
|
||||
public AbstractEnrichMissingPublication(final Topic topic) {
|
||||
super(true);
|
||||
this.topic = topic;
|
||||
}
|
||||
|
||||
@Override
|
||||
protected final List<UpdateInfo<eu.dnetlib.broker.objects.Publication>> findUpdates(
|
||||
final Pair<Result, List<Publication>> source,
|
||||
final Pair<Result, List<Publication>> target) {
|
||||
|
||||
final Set<String> existingPublications = target
|
||||
.getRight()
|
||||
.stream()
|
||||
.map(Publication::getId)
|
||||
.collect(Collectors.toSet());
|
||||
|
||||
return source
|
||||
.getRight()
|
||||
.stream()
|
||||
.filter(d -> !existingPublications.contains(d.getId()))
|
||||
.map(ConversionUtils::oafPublicationToBrokerPublication)
|
||||
.map(i -> generateUpdateInfo(i, source, target))
|
||||
.collect(Collectors.toList());
|
||||
|
||||
}
|
||||
|
||||
@Override
|
||||
protected final UpdateInfo<eu.dnetlib.broker.objects.Publication> generateUpdateInfo(
|
||||
final eu.dnetlib.broker.objects.Publication highlightValue,
|
||||
final Pair<Result, List<Publication>> source,
|
||||
final Pair<Result, List<Publication>> target) {
|
||||
return new UpdateInfo<>(
|
||||
getTopic(),
|
||||
highlightValue, source.getLeft(), target.getLeft(),
|
||||
(p, rel) -> p.getPublications().add(rel),
|
||||
rel -> rel.getInstances().get(0).getUrl());
|
||||
}
|
||||
|
||||
public Topic getTopic() {
|
||||
return topic;
|
||||
}
|
||||
}
|
|
@ -0,0 +1,12 @@
|
|||
|
||||
package eu.dnetlib.dhp.broker.oa.matchers.relatedPublications;
|
||||
|
||||
import eu.dnetlib.dhp.broker.model.Topic;
|
||||
|
||||
public class EnrichMissingPublicationIsReferencedBy extends AbstractEnrichMissingPublication {
|
||||
|
||||
public EnrichMissingPublicationIsReferencedBy() {
|
||||
super(Topic.ENRICH_MISSING_PUBLICATION_IS_REFERENCED_BY);
|
||||
}
|
||||
|
||||
}
|
|
@ -0,0 +1,12 @@
|
|||
|
||||
package eu.dnetlib.dhp.broker.oa.matchers.relatedPublications;
|
||||
|
||||
import eu.dnetlib.dhp.broker.model.Topic;
|
||||
|
||||
public class EnrichMissingPublicationIsRelatedTo extends AbstractEnrichMissingPublication {
|
||||
|
||||
public EnrichMissingPublicationIsRelatedTo() {
|
||||
super(Topic.ENRICH_MISSING_PUBLICATION_IS_RELATED_TO);
|
||||
}
|
||||
|
||||
}
|
|
@ -0,0 +1,12 @@
|
|||
|
||||
package eu.dnetlib.dhp.broker.oa.matchers.relatedPublications;
|
||||
|
||||
import eu.dnetlib.dhp.broker.model.Topic;
|
||||
|
||||
public class EnrichMissingPublicationIsSupplementedBy extends AbstractEnrichMissingPublication {
|
||||
|
||||
public EnrichMissingPublicationIsSupplementedBy() {
|
||||
super(Topic.ENRICH_MISSING_PUBLICATION_IS_SUPPLEMENTED_BY);
|
||||
}
|
||||
|
||||
}
|
|
@ -0,0 +1,12 @@
|
|||
|
||||
package eu.dnetlib.dhp.broker.oa.matchers.relatedPublications;
|
||||
|
||||
import eu.dnetlib.dhp.broker.model.Topic;
|
||||
|
||||
public class EnrichMissingPublicationIsSupplementedTo extends AbstractEnrichMissingPublication {
|
||||
|
||||
public EnrichMissingPublicationIsSupplementedTo() {
|
||||
super(Topic.ENRICH_MISSING_PUBLICATION_IS_SUPPLEMENTED_TO);
|
||||
}
|
||||
|
||||
}
|
|
@ -0,0 +1,12 @@
|
|||
|
||||
package eu.dnetlib.dhp.broker.oa.matchers.relatedPublications;
|
||||
|
||||
import eu.dnetlib.dhp.broker.model.Topic;
|
||||
|
||||
public class EnrichMissingPublicationReferences extends AbstractEnrichMissingPublication {
|
||||
|
||||
public EnrichMissingPublicationReferences() {
|
||||
super(Topic.ENRICH_MISSING_PUBLICATION_REFERENCES);
|
||||
}
|
||||
|
||||
}
|
|
@ -1,11 +1,12 @@
|
|||
|
||||
package eu.dnetlib.dhp.broker.oa.matchers;
|
||||
package eu.dnetlib.dhp.broker.oa.matchers.simple;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.Arrays;
|
||||
import java.util.List;
|
||||
|
||||
import eu.dnetlib.dhp.broker.model.Topic;
|
||||
import eu.dnetlib.dhp.broker.oa.matchers.UpdateMatcher;
|
||||
import eu.dnetlib.dhp.broker.oa.util.UpdateInfo;
|
||||
import eu.dnetlib.dhp.schema.oaf.Result;
|
||||
|
|
@ -1,5 +1,5 @@
|
|||
|
||||
package eu.dnetlib.dhp.broker.oa.matchers;
|
||||
package eu.dnetlib.dhp.broker.oa.matchers.simple;
|
||||
|
||||
import java.util.Arrays;
|
||||
import java.util.List;
|
||||
|
@ -7,6 +7,7 @@ import java.util.List;
|
|||
import org.apache.commons.lang3.tuple.Pair;
|
||||
|
||||
import eu.dnetlib.dhp.broker.model.Topic;
|
||||
import eu.dnetlib.dhp.broker.oa.matchers.UpdateMatcher;
|
||||
import eu.dnetlib.dhp.broker.oa.util.UpdateInfo;
|
||||
import eu.dnetlib.dhp.schema.oaf.Result;
|
||||
|
|
@ -1,5 +1,5 @@
|
|||
|
||||
package eu.dnetlib.dhp.broker.oa.matchers;
|
||||
package eu.dnetlib.dhp.broker.oa.matchers.simple;
|
||||
|
||||
import java.util.Arrays;
|
||||
import java.util.List;
|
||||
|
@ -7,6 +7,7 @@ import java.util.stream.Collectors;
|
|||
|
||||
import eu.dnetlib.broker.objects.Instance;
|
||||
import eu.dnetlib.dhp.broker.model.Topic;
|
||||
import eu.dnetlib.dhp.broker.oa.matchers.UpdateMatcher;
|
||||
import eu.dnetlib.dhp.broker.oa.util.BrokerConstants;
|
||||
import eu.dnetlib.dhp.broker.oa.util.ConversionUtils;
|
||||
import eu.dnetlib.dhp.broker.oa.util.UpdateInfo;
|
|
@ -1,5 +1,5 @@
|
|||
|
||||
package eu.dnetlib.dhp.broker.oa.matchers;
|
||||
package eu.dnetlib.dhp.broker.oa.matchers.simple;
|
||||
|
||||
import java.util.Arrays;
|
||||
import java.util.List;
|
||||
|
@ -7,6 +7,7 @@ import java.util.stream.Collectors;
|
|||
|
||||
import eu.dnetlib.broker.objects.Pid;
|
||||
import eu.dnetlib.dhp.broker.model.Topic;
|
||||
import eu.dnetlib.dhp.broker.oa.matchers.UpdateMatcher;
|
||||
import eu.dnetlib.dhp.broker.oa.util.ConversionUtils;
|
||||
import eu.dnetlib.dhp.broker.oa.util.UpdateInfo;
|
||||
import eu.dnetlib.dhp.schema.oaf.Result;
|
|
@ -1,11 +1,12 @@
|
|||
|
||||
package eu.dnetlib.dhp.broker.oa.matchers;
|
||||
package eu.dnetlib.dhp.broker.oa.matchers.simple;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.Arrays;
|
||||
import java.util.List;
|
||||
|
||||
import eu.dnetlib.dhp.broker.model.Topic;
|
||||
import eu.dnetlib.dhp.broker.oa.matchers.UpdateMatcher;
|
||||
import eu.dnetlib.dhp.broker.oa.util.UpdateInfo;
|
||||
import eu.dnetlib.dhp.schema.oaf.Result;
|
||||
|
|
@ -1,5 +1,5 @@
|
|||
|
||||
package eu.dnetlib.dhp.broker.oa.matchers;
|
||||
package eu.dnetlib.dhp.broker.oa.matchers.simple;
|
||||
|
||||
import java.util.Arrays;
|
||||
import java.util.List;
|
||||
|
@ -7,6 +7,7 @@ import java.util.List;
|
|||
import org.apache.commons.lang3.tuple.Pair;
|
||||
|
||||
import eu.dnetlib.dhp.broker.model.Topic;
|
||||
import eu.dnetlib.dhp.broker.oa.matchers.UpdateMatcher;
|
||||
import eu.dnetlib.dhp.broker.oa.util.UpdateInfo;
|
||||
import eu.dnetlib.dhp.schema.oaf.Result;
|
||||
import eu.dnetlib.dhp.schema.oaf.Software;
|
|
@ -1,5 +1,5 @@
|
|||
|
||||
package eu.dnetlib.dhp.broker.oa.matchers;
|
||||
package eu.dnetlib.dhp.broker.oa.matchers.simple;
|
||||
|
||||
import java.util.List;
|
||||
import java.util.Set;
|
||||
|
@ -8,6 +8,7 @@ import java.util.stream.Collectors;
|
|||
import org.apache.commons.lang3.tuple.Pair;
|
||||
|
||||
import eu.dnetlib.dhp.broker.model.Topic;
|
||||
import eu.dnetlib.dhp.broker.oa.matchers.UpdateMatcher;
|
||||
import eu.dnetlib.dhp.broker.oa.util.ConversionUtils;
|
||||
import eu.dnetlib.dhp.broker.oa.util.UpdateInfo;
|
||||
import eu.dnetlib.dhp.schema.oaf.Qualifier;
|
|
@ -1,5 +1,5 @@
|
|||
|
||||
package eu.dnetlib.dhp.broker.oa.matchers;
|
||||
package eu.dnetlib.dhp.broker.oa.matchers.simple;
|
||||
|
||||
import java.util.List;
|
||||
import java.util.Set;
|
||||
|
@ -7,6 +7,7 @@ import java.util.stream.Collectors;
|
|||
|
||||
import eu.dnetlib.broker.objects.Instance;
|
||||
import eu.dnetlib.dhp.broker.model.Topic;
|
||||
import eu.dnetlib.dhp.broker.oa.matchers.UpdateMatcher;
|
||||
import eu.dnetlib.dhp.broker.oa.util.BrokerConstants;
|
||||
import eu.dnetlib.dhp.broker.oa.util.ConversionUtils;
|
||||
import eu.dnetlib.dhp.broker.oa.util.UpdateInfo;
|
|
@ -1,5 +1,5 @@
|
|||
|
||||
package eu.dnetlib.dhp.broker.oa.matchers;
|
||||
package eu.dnetlib.dhp.broker.oa.matchers.simple;
|
||||
|
||||
import java.util.List;
|
||||
import java.util.Set;
|
||||
|
@ -7,6 +7,7 @@ import java.util.stream.Collectors;
|
|||
|
||||
import eu.dnetlib.broker.objects.Pid;
|
||||
import eu.dnetlib.dhp.broker.model.Topic;
|
||||
import eu.dnetlib.dhp.broker.oa.matchers.UpdateMatcher;
|
||||
import eu.dnetlib.dhp.broker.oa.util.ConversionUtils;
|
||||
import eu.dnetlib.dhp.broker.oa.util.UpdateInfo;
|
||||
import eu.dnetlib.dhp.schema.oaf.Result;
|
|
@ -1,5 +1,5 @@
|
|||
|
||||
package eu.dnetlib.dhp.broker.oa.matchers;
|
||||
package eu.dnetlib.dhp.broker.oa.matchers.simple;
|
||||
|
||||
import java.util.Arrays;
|
||||
import java.util.List;
|
||||
|
@ -7,6 +7,7 @@ import java.util.List;
|
|||
import org.apache.commons.lang3.tuple.Pair;
|
||||
|
||||
import eu.dnetlib.dhp.broker.model.Topic;
|
||||
import eu.dnetlib.dhp.broker.oa.matchers.UpdateMatcher;
|
||||
import eu.dnetlib.dhp.broker.oa.util.UpdateInfo;
|
||||
import eu.dnetlib.dhp.schema.oaf.Result;
|
||||
import eu.dnetlib.dhp.schema.oaf.Software;
|
|
@ -1,5 +1,5 @@
|
|||
|
||||
package eu.dnetlib.dhp.broker.oa.matchers;
|
||||
package eu.dnetlib.dhp.broker.oa.matchers.simple;
|
||||
|
||||
import java.util.List;
|
||||
import java.util.Set;
|
||||
|
@ -8,6 +8,7 @@ import java.util.stream.Collectors;
|
|||
import org.apache.commons.lang3.tuple.Pair;
|
||||
|
||||
import eu.dnetlib.dhp.broker.model.Topic;
|
||||
import eu.dnetlib.dhp.broker.oa.matchers.UpdateMatcher;
|
||||
import eu.dnetlib.dhp.broker.oa.util.ConversionUtils;
|
||||
import eu.dnetlib.dhp.broker.oa.util.UpdateInfo;
|
||||
import eu.dnetlib.dhp.schema.oaf.Result;
|
|
@ -7,6 +7,8 @@ import org.apache.commons.lang3.tuple.Pair;
|
|||
|
||||
import eu.dnetlib.broker.objects.Instance;
|
||||
import eu.dnetlib.broker.objects.Pid;
|
||||
import eu.dnetlib.dhp.schema.oaf.Dataset;
|
||||
import eu.dnetlib.dhp.schema.oaf.Publication;
|
||||
import eu.dnetlib.dhp.schema.oaf.StructuredProperty;
|
||||
|
||||
public class ConversionUtils {
|
||||
|
@ -33,4 +35,15 @@ public class ConversionUtils {
|
|||
return Pair.of(sp.getQualifier().getClassid(), sp.getValue());
|
||||
}
|
||||
|
||||
public static final eu.dnetlib.broker.objects.Dataset oafDatasetToBrokerDataset(final Dataset d) {
|
||||
final eu.dnetlib.broker.objects.Dataset res = new eu.dnetlib.broker.objects.Dataset();
|
||||
// TODO
|
||||
return res;
|
||||
}
|
||||
|
||||
public static final eu.dnetlib.broker.objects.Publication oafPublicationToBrokerPublication(final Publication d) {
|
||||
final eu.dnetlib.broker.objects.Publication res = new eu.dnetlib.broker.objects.Publication();
|
||||
// TODO
|
||||
return res;
|
||||
}
|
||||
}
|
||||
|
|
|
@ -3,7 +3,7 @@
|
|||
<parent>
|
||||
<artifactId>dhp-workflows</artifactId>
|
||||
<groupId>eu.dnetlib.dhp</groupId>
|
||||
<version>1.2.1-SNAPSHOT</version>
|
||||
<version>1.2.2-SNAPSHOT</version>
|
||||
</parent>
|
||||
<modelVersion>4.0.0</modelVersion>
|
||||
<artifactId>dhp-dedup-openaire</artifactId>
|
||||
|
|
|
@ -20,12 +20,7 @@ public class AuthorMerger {
|
|||
|
||||
public static List<Author> merge(List<List<Author>> authors) {
|
||||
|
||||
authors.sort(new Comparator<List<Author>>() {
|
||||
@Override
|
||||
public int compare(List<Author> o1, List<Author> o2) {
|
||||
return -Integer.compare(countAuthorsPids(o1), countAuthorsPids(o2));
|
||||
}
|
||||
});
|
||||
authors.sort((o1, o2) -> -Integer.compare(countAuthorsPids(o1), countAuthorsPids(o2)));
|
||||
|
||||
List<Author> author = new ArrayList<>();
|
||||
|
||||
|
@ -86,20 +81,30 @@ public class AuthorMerger {
|
|||
.stream()
|
||||
.map(ba -> new Tuple2<>(sim(ba, a._2()), ba))
|
||||
.max(Comparator.comparing(Tuple2::_1));
|
||||
if (simAuthor.isPresent() && simAuthor.get()._1() > THRESHOLD) {
|
||||
|
||||
if (simAuthor.isPresent()) {
|
||||
double th = THRESHOLD;
|
||||
// increase the threshold if the surname is too short
|
||||
if (simAuthor.get()._2().getSurname() != null
|
||||
&& simAuthor.get()._2().getSurname().length() <= 3)
|
||||
th = 0.99;
|
||||
|
||||
if (simAuthor.get()._1() > th) {
|
||||
Author r = simAuthor.get()._2();
|
||||
if (r.getPid() == null) {
|
||||
r.setPid(new ArrayList<>());
|
||||
}
|
||||
r.getPid().add(a._1());
|
||||
}
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
public static String pidToComparableString(StructuredProperty pid) {
|
||||
return (pid.getQualifier() != null
|
||||
? pid.getQualifier().getClassid() != null ? pid.getQualifier().getClassid().toLowerCase() : ""
|
||||
: "") + (pid.getValue() != null ? pid.getValue().toLowerCase() : "");
|
||||
: "")
|
||||
+ (pid.getValue() != null ? pid.getValue().toLowerCase() : "");
|
||||
}
|
||||
|
||||
public static int countAuthorsPids(List<Author> authors) {
|
||||
|
@ -120,9 +125,10 @@ public class AuthorMerger {
|
|||
final Person pa = parse(a);
|
||||
final Person pb = parse(b);
|
||||
|
||||
// if both are accurate (e.g. they have name and surname)
|
||||
if (pa.isAccurate() & pb.isAccurate()) {
|
||||
return new JaroWinkler()
|
||||
.score(normalize(pa.getSurnameString()), normalize(pb.getSurnameString()));
|
||||
return new JaroWinkler().score(normalize(pa.getSurnameString()), normalize(pb.getSurnameString())) * 0.5
|
||||
+ new JaroWinkler().score(normalize(pa.getNameString()), normalize(pb.getNameString())) * 0.5;
|
||||
} else {
|
||||
return new JaroWinkler()
|
||||
.score(normalize(pa.getNormalisedFullname()), normalize(pb.getNormalisedFullname()));
|
||||
|
|
|
@ -21,6 +21,7 @@ import scala.Tuple2;
|
|||
public class EntityMergerTest implements Serializable {
|
||||
|
||||
List<Tuple2<String, Publication>> publications;
|
||||
List<Tuple2<String, Publication>> publications2;
|
||||
|
||||
String testEntityBasePath;
|
||||
DataInfo dataInfo;
|
||||
|
@ -36,6 +37,7 @@ public class EntityMergerTest implements Serializable {
|
|||
.getAbsolutePath();
|
||||
|
||||
publications = readSample(testEntityBasePath + "/publication_merge.json", Publication.class);
|
||||
publications2 = readSample(testEntityBasePath + "/publication_merge2.json", Publication.class);
|
||||
|
||||
pub_top = getTopPub(publications);
|
||||
|
||||
|
@ -88,6 +90,25 @@ public class EntityMergerTest implements Serializable {
|
|||
// verify authors
|
||||
assertEquals(pub_merged.getAuthor().size(), 9);
|
||||
assertEquals(AuthorMerger.countAuthorsPids(pub_merged.getAuthor()), 4);
|
||||
|
||||
// verify title
|
||||
int count = 0;
|
||||
for (StructuredProperty title : pub_merged.getTitle()) {
|
||||
if (title.getQualifier().getClassid().equals("main title"))
|
||||
count++;
|
||||
}
|
||||
assertEquals(count, 1);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void publicationMergerTest2() throws InstantiationException, IllegalAccessException {
|
||||
|
||||
Publication pub_merged = DedupRecordFactory
|
||||
.entityMerger(dedupId, publications2.iterator(), 0, dataInfo, Publication.class);
|
||||
|
||||
assertEquals(pub_merged.getAuthor().size(), 27);
|
||||
// insert assertions here
|
||||
|
||||
}
|
||||
|
||||
public DataInfo setDI() {
|
||||
|
|
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
|
@ -3,7 +3,7 @@
|
|||
<parent>
|
||||
<artifactId>dhp-workflows</artifactId>
|
||||
<groupId>eu.dnetlib.dhp</groupId>
|
||||
<version>1.2.1-SNAPSHOT</version>
|
||||
<version>1.2.2-SNAPSHOT</version>
|
||||
</parent>
|
||||
<modelVersion>4.0.0</modelVersion>
|
||||
|
||||
|
|
|
@ -3,7 +3,7 @@
|
|||
<parent>
|
||||
<artifactId>dhp-workflows</artifactId>
|
||||
<groupId>eu.dnetlib.dhp</groupId>
|
||||
<version>1.2.1-SNAPSHOT</version>
|
||||
<version>1.2.2-SNAPSHOT</version>
|
||||
</parent>
|
||||
<modelVersion>4.0.0</modelVersion>
|
||||
|
||||
|
|
|
@ -1,11 +1,9 @@
|
|||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<project xmlns="http://maven.apache.org/POM/4.0.0"
|
||||
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
|
||||
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
|
||||
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
|
||||
<parent>
|
||||
<artifactId>dhp-workflows</artifactId>
|
||||
<groupId>eu.dnetlib.dhp</groupId>
|
||||
<version>1.2.1-SNAPSHOT</version>
|
||||
<version>1.2.2-SNAPSHOT</version>
|
||||
</parent>
|
||||
<modelVersion>4.0.0</modelVersion>
|
||||
|
||||
|
|
|
@ -4,6 +4,7 @@ package eu.dnetlib.dhp.bulktag;
|
|||
import static eu.dnetlib.dhp.PropagationConstant.removeOutputDir;
|
||||
import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.Optional;
|
||||
|
||||
import org.apache.commons.io.IOUtils;
|
||||
|
@ -100,6 +101,7 @@ public class SparkBulkTagJob {
|
|||
|
||||
ResultTagger resultTagger = new ResultTagger();
|
||||
readPath(spark, inputPath, resultClazz)
|
||||
.map(patchResult(), Encoders.bean(resultClazz))
|
||||
.map(
|
||||
(MapFunction<R, R>) value -> resultTagger
|
||||
.enrichContextCriteria(
|
||||
|
@ -119,4 +121,17 @@ public class SparkBulkTagJob {
|
|||
.map((MapFunction<String, R>) value -> OBJECT_MAPPER.readValue(value, clazz), Encoders.bean(clazz));
|
||||
}
|
||||
|
||||
// TODO remove this hack as soon as the values fixed by this method will be provided as NON null
|
||||
private static <R extends Result> MapFunction<R, R> patchResult() {
|
||||
return (MapFunction<R, R>) r -> {
|
||||
if (r.getDataInfo().getDeletedbyinference() == null) {
|
||||
r.getDataInfo().setDeletedbyinference(false);
|
||||
}
|
||||
if (r.getContext() == null) {
|
||||
r.setContext(new ArrayList<>());
|
||||
}
|
||||
return r;
|
||||
};
|
||||
}
|
||||
|
||||
}
|
||||
|
|
|
@ -131,7 +131,7 @@ public class CommunityConfiguration implements Serializable {
|
|||
p -> {
|
||||
if (p.getSnd() == null)
|
||||
return p.getFst();
|
||||
if (((SelectionConstraints) p.getSnd()).verifyCriteria(param))
|
||||
if (p.getSnd().verifyCriteria(param))
|
||||
return p.getFst();
|
||||
else
|
||||
return null;
|
||||
|
|
|
@ -34,7 +34,7 @@ public class VerbResolver implements Serializable {
|
|||
.collect(
|
||||
Collectors
|
||||
.toMap(
|
||||
value -> (String) ((ClassInfo) value)
|
||||
value -> (String) value
|
||||
.getAnnotationInfo()
|
||||
.get(0)
|
||||
.getParameterValues()
|
||||
|
|
|
@ -77,9 +77,15 @@ public class PrepareDatasourceCountryAssociation {
|
|||
List<String> allowedtypes,
|
||||
String inputPath,
|
||||
String outputPath) {
|
||||
String whitelisted = "";
|
||||
for (String i : whitelist) {
|
||||
whitelisted += " OR id = '" + i + "'";
|
||||
String whitelisted = " d.id = '" + whitelist.get(0) + "'";
|
||||
for (int i = 1; i < whitelist.size(); i++) {
|
||||
whitelisted += " OR d.id = '" + whitelist.get(i) + "'";
|
||||
}
|
||||
|
||||
String allowed = "d.datasourcetype.classid = '" + allowedtypes.get(0) + "'";
|
||||
|
||||
for (int i = 1; i < allowedtypes.size(); i++) {
|
||||
allowed += " OR d.datasourcetype.classid = '" + allowedtypes.get(i) + "'";
|
||||
}
|
||||
|
||||
Dataset<Datasource> datasource = readPath(spark, inputPath + "/datasource", Datasource.class);
|
||||
|
@ -90,26 +96,39 @@ public class PrepareDatasourceCountryAssociation {
|
|||
relation.createOrReplaceTempView("relation");
|
||||
organization.createOrReplaceTempView("organization");
|
||||
|
||||
String query = "SELECT source dataSourceId, named_struct('classid', country.classid, 'classname', country.classname) country "
|
||||
+ "FROM ( SELECT id "
|
||||
+ " FROM datasource "
|
||||
+ " WHERE (datainfo.deletedbyinference = false "
|
||||
+ whitelisted
|
||||
+ ") "
|
||||
+ getConstraintList("datasourcetype.classid = '", allowedtypes)
|
||||
+ ") d "
|
||||
+ "JOIN ( SELECT source, target "
|
||||
+ " FROM relation "
|
||||
+ " WHERE relclass = '"
|
||||
+ ModelConstants.IS_PROVIDED_BY
|
||||
+ "' "
|
||||
+ " AND datainfo.deletedbyinference = false ) rel "
|
||||
+ "ON d.id = rel.source "
|
||||
+ "JOIN (SELECT id, country "
|
||||
+ " FROM organization "
|
||||
+ " WHERE datainfo.deletedbyinference = false "
|
||||
+ " AND length(country.classid) > 0) o "
|
||||
+ "ON o.id = rel.target";
|
||||
// String query = "SELECT source dataSourceId, named_struct('classid', country.classid, 'classname', country.classname) country "
|
||||
// + "FROM ( SELECT id "
|
||||
// + " FROM datasource "
|
||||
// + " WHERE (datainfo.deletedbyinference = false "
|
||||
// + whitelisted
|
||||
// + ") "
|
||||
// + getConstraintList("datasourcetype.classid = '", allowedtypes)
|
||||
// + ") d "
|
||||
// + "JOIN ( SELECT source, target "
|
||||
// + " FROM relation "
|
||||
// + " WHERE relclass = '"
|
||||
// + ModelConstants.IS_PROVIDED_BY
|
||||
// + "' "
|
||||
// + " AND datainfo.deletedbyinference = false ) rel "
|
||||
// + "ON d.id = rel.source "
|
||||
// + "JOIN (SELECT id, country "
|
||||
// + " FROM organization "
|
||||
// + " WHERE datainfo.deletedbyinference = false "
|
||||
// + " AND length(country.classid) > 0) o "
|
||||
// + "ON o.id = rel.target";
|
||||
|
||||
String query = "SELECT source dataSourceId, " +
|
||||
"named_struct('classid', country.classid, 'classname', country.classname) country " +
|
||||
"FROM datasource d " +
|
||||
"JOIN relation rel " +
|
||||
"ON d.id = rel.source " +
|
||||
"JOIN organization o " +
|
||||
"ON o.id = rel.target " +
|
||||
"WHERE rel.datainfo.deletedbyinference = false " +
|
||||
"and rel.relclass = '" + ModelConstants.IS_PROVIDED_BY + "'" +
|
||||
"and o.datainfo.deletedbyinference = false " +
|
||||
"and length(o.country.classid) > 0 " +
|
||||
"and (" + allowed + " or " + whitelisted + ")";
|
||||
|
||||
spark
|
||||
.sql(query)
|
||||
|
|
|
@ -4,7 +4,12 @@ package eu.dnetlib.dhp.countrypropagation;
|
|||
import static eu.dnetlib.dhp.PropagationConstant.*;
|
||||
import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkHiveSession;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.Set;
|
||||
import java.util.stream.Collectors;
|
||||
|
||||
import org.apache.commons.io.IOUtils;
|
||||
import org.apache.hadoop.io.compress.GzipCodec;
|
||||
import org.apache.spark.SparkConf;
|
||||
import org.apache.spark.sql.*;
|
||||
import org.apache.spark.sql.Dataset;
|
||||
|
@ -13,6 +18,7 @@ import org.slf4j.LoggerFactory;
|
|||
|
||||
import eu.dnetlib.dhp.application.ArgumentApplicationParser;
|
||||
import eu.dnetlib.dhp.schema.oaf.*;
|
||||
import scala.Tuple2;
|
||||
|
||||
public class PrepareResultCountrySet {
|
||||
private static final Logger log = LoggerFactory.getLogger(PrepareResultCountrySet.class);
|
||||
|
@ -60,6 +66,7 @@ public class PrepareResultCountrySet {
|
|||
conf,
|
||||
isSparkSessionManaged,
|
||||
spark -> {
|
||||
removeOutputDir(spark, outputPath);
|
||||
getPotentialResultToUpdate(
|
||||
spark,
|
||||
inputPath,
|
||||
|
@ -89,10 +96,33 @@ public class PrepareResultCountrySet {
|
|||
spark
|
||||
.sql(RESULT_COUNTRYSET_QUERY)
|
||||
.as(Encoders.bean(ResultCountrySet.class))
|
||||
.write()
|
||||
.option("compression", "gzip")
|
||||
.mode(SaveMode.Append)
|
||||
.json(outputPath);
|
||||
.toJavaRDD()
|
||||
.mapToPair(value -> new Tuple2<>(value.getResultId(), value))
|
||||
.reduceByKey((a, b) -> {
|
||||
ArrayList<CountrySbs> countryList = a.getCountrySet();
|
||||
Set<String> countryCodes = countryList
|
||||
.stream()
|
||||
.map(country -> country.getClassid())
|
||||
.collect(Collectors.toSet());
|
||||
b
|
||||
.getCountrySet()
|
||||
.stream()
|
||||
.forEach(c -> {
|
||||
if (!countryCodes.contains(c.getClassid())) {
|
||||
countryList.add(c);
|
||||
countryCodes.add(c.getClassid());
|
||||
}
|
||||
|
||||
});
|
||||
a.setCountrySet(countryList);
|
||||
return a;
|
||||
})
|
||||
.map(couple -> OBJECT_MAPPER.writeValueAsString(couple._2()))
|
||||
.saveAsTextFile(outputPath, GzipCodec.class);
|
||||
// .write()
|
||||
// .option("compression", "gzip")
|
||||
// .mode(SaveMode.Append)
|
||||
// .json(outputPath);
|
||||
}
|
||||
|
||||
}
|
||||
|
|
|
@ -22,6 +22,7 @@ import com.fasterxml.jackson.databind.ObjectMapper;
|
|||
import com.google.common.collect.Lists;
|
||||
|
||||
import eu.dnetlib.dhp.application.ArgumentApplicationParser;
|
||||
import eu.dnetlib.dhp.common.PacePerson;
|
||||
import eu.dnetlib.dhp.schema.oaf.Author;
|
||||
import eu.dnetlib.dhp.schema.oaf.Result;
|
||||
import eu.dnetlib.dhp.schema.oaf.StructuredProperty;
|
||||
|
@ -121,30 +122,39 @@ public class SparkOrcidToResultFromSemRelJob {
|
|||
}
|
||||
|
||||
private static void enrichAuthor(Author a, List<AutoritativeAuthor> au) {
|
||||
PacePerson pp = new PacePerson(a.getFullname(), false);
|
||||
for (AutoritativeAuthor aa : au) {
|
||||
if (enrichAuthor(aa, a)) {
|
||||
if (enrichAuthor(aa, a, pp.getNormalisedFirstName(), pp.getNormalisedSurname())) {
|
||||
return;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private static boolean enrichAuthor(AutoritativeAuthor autoritative_author, Author author) {
|
||||
private static boolean enrichAuthor(AutoritativeAuthor autoritative_author, Author author,
|
||||
String author_name,
|
||||
String author_surname) {
|
||||
boolean toaddpid = false;
|
||||
|
||||
if (StringUtils.isNotEmpty(autoritative_author.getSurname())) {
|
||||
if (StringUtils.isNotEmpty(author.getSurname())) {
|
||||
author_surname = author.getSurname();
|
||||
}
|
||||
if (StringUtils.isNotEmpty(author_surname)) {
|
||||
if (autoritative_author
|
||||
.getSurname()
|
||||
.trim()
|
||||
.equalsIgnoreCase(author.getSurname().trim())) {
|
||||
.equalsIgnoreCase(author_surname.trim())) {
|
||||
|
||||
// have the same surname. Check the name
|
||||
if (StringUtils.isNotEmpty(autoritative_author.getName())) {
|
||||
if (StringUtils.isNotEmpty(author.getName())) {
|
||||
author_name = author.getName();
|
||||
}
|
||||
if (StringUtils.isNotEmpty(author_name)) {
|
||||
if (autoritative_author
|
||||
.getName()
|
||||
.trim()
|
||||
.equalsIgnoreCase(author.getName().trim())) {
|
||||
.equalsIgnoreCase(author_name.trim())) {
|
||||
toaddpid = true;
|
||||
}
|
||||
// they could be differently written (i.e. only the initials of the name
|
||||
|
@ -154,7 +164,7 @@ public class SparkOrcidToResultFromSemRelJob {
|
|||
.getName()
|
||||
.trim()
|
||||
.substring(0, 0)
|
||||
.equalsIgnoreCase(author.getName().trim().substring(0, 0))) {
|
||||
.equalsIgnoreCase(author_name.trim().substring(0, 0))) {
|
||||
toaddpid = true;
|
||||
}
|
||||
}
|
||||
|
|
|
@ -105,11 +105,7 @@ public class SparkResultToProjectThroughSemRelJob {
|
|||
.stream()
|
||||
.forEach(
|
||||
(p -> {
|
||||
if (potential_update
|
||||
.getProjectSet()
|
||||
.contains(p)) {
|
||||
potential_update.getProjectSet().remove(p);
|
||||
}
|
||||
}));
|
||||
}
|
||||
String resId = potential_update.getResultId();
|
||||
|
|
|
@ -7,6 +7,7 @@ import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkHiveSession;
|
|||
import java.util.*;
|
||||
|
||||
import org.apache.commons.io.IOUtils;
|
||||
import org.apache.hadoop.io.compress.GzipCodec;
|
||||
import org.apache.spark.SparkConf;
|
||||
import org.apache.spark.api.java.function.MapFunction;
|
||||
import org.apache.spark.sql.*;
|
||||
|
@ -19,6 +20,7 @@ import com.google.gson.Gson;
|
|||
import eu.dnetlib.dhp.application.ArgumentApplicationParser;
|
||||
import eu.dnetlib.dhp.schema.common.ModelConstants;
|
||||
import eu.dnetlib.dhp.schema.oaf.Relation;
|
||||
import scala.Tuple2;
|
||||
|
||||
public class PrepareResultCommunitySet {
|
||||
|
||||
|
@ -93,10 +95,24 @@ public class PrepareResultCommunitySet {
|
|||
result_organizationset
|
||||
.map(mapResultCommunityFn(organizationMap), Encoders.bean(ResultCommunityList.class))
|
||||
.filter(Objects::nonNull)
|
||||
.write()
|
||||
.mode(SaveMode.Overwrite)
|
||||
.option("compression", "gzip")
|
||||
.json(outputPath);
|
||||
.toJavaRDD()
|
||||
.mapToPair(value -> new Tuple2<>(value.getResultId(), value))
|
||||
.reduceByKey((a, b) -> {
|
||||
ArrayList<String> cl = a.getCommunityList();
|
||||
b.getCommunityList().stream().forEach(s -> {
|
||||
if (!cl.contains(s)) {
|
||||
cl.add(s);
|
||||
}
|
||||
});
|
||||
a.setCommunityList(cl);
|
||||
return a;
|
||||
})
|
||||
.map(value -> OBJECT_MAPPER.writeValueAsString(value._2()))
|
||||
.saveAsTextFile(outputPath, GzipCodec.class);
|
||||
// .write()
|
||||
// .mode(SaveMode.Overwrite)
|
||||
// .option("compression", "gzip")
|
||||
// .json(outputPath);
|
||||
}
|
||||
|
||||
private static MapFunction<ResultOrganizations, ResultCommunityList> mapResultCommunityFn(
|
||||
|
|
|
@ -136,9 +136,7 @@ public class SparkResultToOrganizationFromIstRepoJob {
|
|||
.stream()
|
||||
.forEach(
|
||||
rId -> {
|
||||
if (organization_list.contains(rId)) {
|
||||
organization_list.remove(rId);
|
||||
}
|
||||
});
|
||||
}
|
||||
String resultId = potential_update.getResultId();
|
||||
|
|
|
@ -99,6 +99,7 @@ public class ResultToOrganizationJobTest {
|
|||
.map(item -> OBJECT_MAPPER.readValue(item, Relation.class));
|
||||
|
||||
Assertions.assertEquals(0, tmp.count());
|
||||
FileUtils.deleteDirectory(workingDir.toFile());
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -171,6 +172,7 @@ public class ResultToOrganizationJobTest {
|
|||
+ "(target = '20|opendoar____::124266ebc4ece2934eb80edfda3f2091' "
|
||||
+ "or target = '20|dedup_wf_001::5168917a6aeeea55269daeac1af2ecd2')")
|
||||
.count());
|
||||
FileUtils.deleteDirectory(workingDir.toFile());
|
||||
}
|
||||
|
||||
@Test
|
||||
|
@ -266,5 +268,6 @@ public class ResultToOrganizationJobTest {
|
|||
"relclass = 'isAuthorInstitutionOf' and "
|
||||
+ "substring(source, 1,2) = '20' and substring(target, 1, 2) = '50'")
|
||||
.count());
|
||||
FileUtils.deleteDirectory(workingDir.toFile());
|
||||
}
|
||||
}
|
||||
|
|
|
@ -3,7 +3,7 @@
|
|||
<parent>
|
||||
<artifactId>dhp-workflows</artifactId>
|
||||
<groupId>eu.dnetlib.dhp</groupId>
|
||||
<version>1.2.1-SNAPSHOT</version>
|
||||
<version>1.2.2-SNAPSHOT</version>
|
||||
</parent>
|
||||
<modelVersion>4.0.0</modelVersion>
|
||||
|
||||
|
|
|
@ -10,23 +10,10 @@ import static eu.dnetlib.dhp.oa.graph.raw.common.OafMapperUtils.listFields;
|
|||
import static eu.dnetlib.dhp.oa.graph.raw.common.OafMapperUtils.oaiIProvenance;
|
||||
import static eu.dnetlib.dhp.oa.graph.raw.common.OafMapperUtils.qualifier;
|
||||
import static eu.dnetlib.dhp.oa.graph.raw.common.OafMapperUtils.structuredProperty;
|
||||
import static eu.dnetlib.dhp.schema.common.ModelConstants.DATASET_DEFAULT_RESULTTYPE;
|
||||
import static eu.dnetlib.dhp.schema.common.ModelConstants.DNET_PID_TYPES;
|
||||
import static eu.dnetlib.dhp.schema.common.ModelConstants.IS_PRODUCED_BY;
|
||||
import static eu.dnetlib.dhp.schema.common.ModelConstants.ORP_DEFAULT_RESULTTYPE;
|
||||
import static eu.dnetlib.dhp.schema.common.ModelConstants.OUTCOME;
|
||||
import static eu.dnetlib.dhp.schema.common.ModelConstants.PRODUCES;
|
||||
import static eu.dnetlib.dhp.schema.common.ModelConstants.PUBLICATION_DEFAULT_RESULTTYPE;
|
||||
import static eu.dnetlib.dhp.schema.common.ModelConstants.REPOSITORY_PROVENANCE_ACTIONS;
|
||||
import static eu.dnetlib.dhp.schema.common.ModelConstants.RESULT_PROJECT;
|
||||
import static eu.dnetlib.dhp.schema.common.ModelConstants.SOFTWARE_DEFAULT_RESULTTYPE;
|
||||
import static eu.dnetlib.dhp.schema.common.ModelConstants.*;
|
||||
import static eu.dnetlib.dhp.schema.common.ModelConstants.DNET_ACCESS_MODES;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.Arrays;
|
||||
import java.util.Date;
|
||||
import java.util.HashMap;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.*;
|
||||
|
||||
import org.apache.commons.lang3.StringUtils;
|
||||
import org.dom4j.Document;
|
||||
|
@ -34,6 +21,7 @@ import org.dom4j.DocumentFactory;
|
|||
import org.dom4j.DocumentHelper;
|
||||
import org.dom4j.Node;
|
||||
|
||||
import eu.dnetlib.dhp.schema.common.LicenseComparator;
|
||||
import eu.dnetlib.dhp.schema.oaf.Author;
|
||||
import eu.dnetlib.dhp.schema.oaf.Context;
|
||||
import eu.dnetlib.dhp.schema.oaf.DataInfo;
|
||||
|
@ -285,7 +273,9 @@ public abstract class AbstractMdRecordToOafMapper {
|
|||
r.setCoverage(prepareCoverages(doc, info));
|
||||
r.setContext(prepareContexts(doc, info));
|
||||
r.setExternalReference(new ArrayList<>()); // NOT PRESENT IN MDSTORES
|
||||
r.setInstance(prepareInstances(doc, info, collectedFrom, hostedBy));
|
||||
final List<Instance> instances = prepareInstances(doc, info, collectedFrom, hostedBy);
|
||||
r.setInstance(instances);
|
||||
r.setBestaccessright(getBestAccessRights(instances));
|
||||
}
|
||||
|
||||
private List<Context> prepareContexts(final Document doc, final DataInfo info) {
|
||||
|
@ -368,6 +358,34 @@ public abstract class AbstractMdRecordToOafMapper {
|
|||
|
||||
protected abstract Field<String> prepareDatasetStorageDate(Document doc, DataInfo info);
|
||||
|
||||
protected static Qualifier getBestAccessRights(List<Instance> instanceList) {
|
||||
if (instanceList != null) {
|
||||
final Optional<Qualifier> min = instanceList
|
||||
.stream()
|
||||
.map(i -> i.getAccessright())
|
||||
.min(new LicenseComparator());
|
||||
|
||||
final Qualifier rights = min.isPresent() ? min.get() : new Qualifier();
|
||||
|
||||
if (StringUtils.isBlank(rights.getClassid())) {
|
||||
rights.setClassid(UNKNOWN);
|
||||
}
|
||||
if (StringUtils.isBlank(rights.getClassname())
|
||||
|| UNKNOWN.equalsIgnoreCase(rights.getClassname())) {
|
||||
rights.setClassname(NOT_AVAILABLE);
|
||||
}
|
||||
if (StringUtils.isBlank(rights.getSchemeid())) {
|
||||
rights.setSchemeid(DNET_ACCESS_MODES);
|
||||
}
|
||||
if (StringUtils.isBlank(rights.getSchemename())) {
|
||||
rights.setSchemename(DNET_ACCESS_MODES);
|
||||
}
|
||||
|
||||
return rights;
|
||||
}
|
||||
return null;
|
||||
}
|
||||
|
||||
private Journal prepareJournal(final Document doc, final DataInfo info) {
|
||||
final Node n = doc.selectSingleNode("//oaf:journal");
|
||||
if (n != null) {
|
||||
|
|
|
@ -23,7 +23,7 @@ import org.dom4j.Node;
|
|||
|
||||
import com.google.common.collect.Lists;
|
||||
|
||||
import eu.dnetlib.dhp.oa.graph.raw.common.PacePerson;
|
||||
import eu.dnetlib.dhp.common.PacePerson;
|
||||
import eu.dnetlib.dhp.schema.oaf.Author;
|
||||
import eu.dnetlib.dhp.schema.oaf.DataInfo;
|
||||
import eu.dnetlib.dhp.schema.oaf.Field;
|
||||
|
|
|
@ -28,7 +28,7 @@ import org.apache.commons.lang3.StringUtils;
|
|||
import org.dom4j.Document;
|
||||
import org.dom4j.Node;
|
||||
|
||||
import eu.dnetlib.dhp.oa.graph.raw.common.PacePerson;
|
||||
import eu.dnetlib.dhp.common.PacePerson;
|
||||
import eu.dnetlib.dhp.schema.oaf.Author;
|
||||
import eu.dnetlib.dhp.schema.oaf.DataInfo;
|
||||
import eu.dnetlib.dhp.schema.oaf.Field;
|
||||
|
|
|
@ -85,8 +85,19 @@ public class MappersTest {
|
|||
assertTrue(p.getSubject().size() > 0);
|
||||
assertTrue(StringUtils.isNotBlank(p.getJournal().getIssnOnline()));
|
||||
assertTrue(StringUtils.isNotBlank(p.getJournal().getName()));
|
||||
assertTrue(p.getInstance().size() > 0);
|
||||
|
||||
assertNotNull(p.getInstance());
|
||||
assertTrue(p.getInstance().size() > 0);
|
||||
p
|
||||
.getInstance()
|
||||
.stream()
|
||||
.forEach(i -> {
|
||||
assertNotNull(i.getAccessright());
|
||||
assertEquals("OPEN", i.getAccessright().getClassid());
|
||||
});
|
||||
|
||||
assertNotNull(p.getBestaccessright());
|
||||
assertEquals("OPEN", p.getBestaccessright().getClassid());
|
||||
assertValidId(r1.getSource());
|
||||
assertValidId(r1.getTarget());
|
||||
assertValidId(r2.getSource());
|
||||
|
@ -164,6 +175,16 @@ public class MappersTest {
|
|||
assertTrue(d.getContext().size() > 0);
|
||||
assertTrue(d.getContext().get(0).getId().length() > 0);
|
||||
|
||||
assertNotNull(d.getInstance());
|
||||
assertTrue(d.getInstance().size() > 0);
|
||||
d
|
||||
.getInstance()
|
||||
.stream()
|
||||
.forEach(i -> {
|
||||
assertNotNull(i.getAccessright());
|
||||
assertEquals("OPEN", i.getAccessright().getClassid());
|
||||
});
|
||||
|
||||
assertValidId(r1.getSource());
|
||||
assertValidId(r1.getTarget());
|
||||
assertValidId(r2.getSource());
|
||||
|
|
|
@ -3,7 +3,7 @@
|
|||
<parent>
|
||||
<artifactId>dhp-workflows</artifactId>
|
||||
<groupId>eu.dnetlib.dhp</groupId>
|
||||
<version>1.2.1-SNAPSHOT</version>
|
||||
<version>1.2.2-SNAPSHOT</version>
|
||||
</parent>
|
||||
<modelVersion>4.0.0</modelVersion>
|
||||
|
||||
|
|
|
@ -3,7 +3,7 @@
|
|||
<parent>
|
||||
<artifactId>dhp-workflows</artifactId>
|
||||
<groupId>eu.dnetlib.dhp</groupId>
|
||||
<version>1.2.1-SNAPSHOT</version>
|
||||
<version>1.2.2-SNAPSHOT</version>
|
||||
</parent>
|
||||
<modelVersion>4.0.0</modelVersion>
|
||||
|
||||
|
|
|
@ -769,7 +769,7 @@ public class XmlRecordFactory implements Serializable {
|
|||
XmlSerializationUtils.asXmlElement("websiteurl", o.getWebsiteurl().getValue()));
|
||||
}
|
||||
if (o.getLogourl() != null) {
|
||||
metadata.add(XmlSerializationUtils.asXmlElement("websiteurl", o.getLogourl().getValue()));
|
||||
metadata.add(XmlSerializationUtils.asXmlElement("logourl", o.getLogourl().getValue()));
|
||||
}
|
||||
|
||||
if (o.getEclegalbody() != null) {
|
||||
|
@ -801,13 +801,13 @@ public class XmlRecordFactory implements Serializable {
|
|||
.asXmlElement(
|
||||
"echighereducation", o.getEchighereducation().getValue()));
|
||||
}
|
||||
if (o.getEcinternationalorganization() != null) {
|
||||
if (o.getEcinternationalorganizationeurinterests() != null) {
|
||||
metadata
|
||||
.add(
|
||||
XmlSerializationUtils
|
||||
.asXmlElement(
|
||||
"ecinternationalorganizationeurinterests",
|
||||
o.getEcinternationalorganization().getValue()));
|
||||
o.getEcinternationalorganizationeurinterests().getValue()));
|
||||
}
|
||||
if (o.getEcinternationalorganization() != null) {
|
||||
metadata
|
||||
|
|
|
@ -1,3 +1,3 @@
|
|||
<$name$$if(hasId)$ objidentifier="$id$"$else$$endif$>
|
||||
$metadata:{$it$}$
|
||||
$metadata:{ it | $it$ }$
|
||||
</$name$>
|
File diff suppressed because one or more lines are too long
|
@ -3,7 +3,7 @@
|
|||
<parent>
|
||||
<artifactId>dhp-workflows</artifactId>
|
||||
<groupId>eu.dnetlib.dhp</groupId>
|
||||
<version>1.2.1-SNAPSHOT</version>
|
||||
<version>1.2.2-SNAPSHOT</version>
|
||||
</parent>
|
||||
<modelVersion>4.0.0</modelVersion>
|
||||
<artifactId>dhp-stats-update</artifactId>
|
||||
|
|
|
@ -3,7 +3,7 @@
|
|||
<parent>
|
||||
<artifactId>dhp-workflows</artifactId>
|
||||
<groupId>eu.dnetlib.dhp</groupId>
|
||||
<version>1.2.1-SNAPSHOT</version>
|
||||
<version>1.2.2-SNAPSHOT</version>
|
||||
</parent>
|
||||
<modelVersion>4.0.0</modelVersion>
|
||||
|
||||
|
|
|
@ -6,7 +6,7 @@
|
|||
<parent>
|
||||
<groupId>eu.dnetlib.dhp</groupId>
|
||||
<artifactId>dhp</artifactId>
|
||||
<version>1.2.1-SNAPSHOT</version>
|
||||
<version>1.2.2-SNAPSHOT</version>
|
||||
<relativePath>../</relativePath>
|
||||
</parent>
|
||||
|
||||
|
|
Loading…
Reference in New Issue