forked from D-Net/dnet-hadoop
Merge branch 'beta' into dedup_whitelist
This commit is contained in:
commit
7fa60e166e
|
@ -12,6 +12,7 @@ import eu.dnetlib.dhp.schema.common.ModelConstants;
|
|||
import eu.dnetlib.dhp.schema.oaf.AccessRight;
|
||||
import eu.dnetlib.dhp.schema.oaf.Country;
|
||||
import eu.dnetlib.dhp.schema.oaf.Qualifier;
|
||||
import eu.dnetlib.dhp.schema.oaf.Relation;
|
||||
|
||||
public class CleaningRuleMap extends HashMap<Class<?>, SerializableConsumer<Object>> implements Serializable {
|
||||
|
||||
|
@ -24,17 +25,31 @@ public class CleaningRuleMap extends HashMap<Class<?>, SerializableConsumer<Obje
|
|||
CleaningRuleMap mapping = new CleaningRuleMap();
|
||||
mapping.put(Qualifier.class, o -> cleanQualifier(vocabularies, (Qualifier) o));
|
||||
mapping.put(AccessRight.class, o -> cleanQualifier(vocabularies, (AccessRight) o));
|
||||
mapping.put(Country.class, o -> {
|
||||
final Country c = (Country) o;
|
||||
if (StringUtils.isBlank(c.getSchemeid())) {
|
||||
c.setSchemeid(ModelConstants.DNET_COUNTRY_TYPE);
|
||||
c.setSchemename(ModelConstants.DNET_COUNTRY_TYPE);
|
||||
}
|
||||
cleanQualifier(vocabularies, c);
|
||||
});
|
||||
mapping.put(Country.class, o -> cleanCountry(vocabularies, (Country) o));
|
||||
mapping.put(Relation.class, o -> cleanRelation(vocabularies, (Relation) o));
|
||||
return mapping;
|
||||
}
|
||||
|
||||
private static void cleanRelation(VocabularyGroup vocabularies, Relation r) {
|
||||
if (vocabularies.vocabularyExists(ModelConstants.DNET_RELATION_SUBRELTYPE)) {
|
||||
Qualifier newValue = vocabularies.lookup(ModelConstants.DNET_RELATION_SUBRELTYPE, r.getSubRelType());
|
||||
r.setSubRelType(newValue.getClassid());
|
||||
}
|
||||
if (vocabularies.vocabularyExists(ModelConstants.DNET_RELATION_RELCLASS)) {
|
||||
Qualifier newValue = vocabularies.lookup(ModelConstants.DNET_RELATION_RELCLASS, r.getRelClass());
|
||||
r.setRelClass(newValue.getClassid());
|
||||
}
|
||||
}
|
||||
|
||||
private static void cleanCountry(VocabularyGroup vocabularies, Country o) {
|
||||
final Country c = o;
|
||||
if (StringUtils.isBlank(c.getSchemeid())) {
|
||||
c.setSchemeid(ModelConstants.DNET_COUNTRY_TYPE);
|
||||
c.setSchemename(ModelConstants.DNET_COUNTRY_TYPE);
|
||||
}
|
||||
cleanQualifier(vocabularies, c);
|
||||
}
|
||||
|
||||
private static <Q extends Qualifier> void cleanQualifier(VocabularyGroup vocabularies, Q q) {
|
||||
if (vocabularies.vocabularyExists(q.getSchemeid())) {
|
||||
Qualifier newValue = vocabularies.lookup(q.getSchemeid(), q.getClassid());
|
||||
|
|
|
@ -5,6 +5,7 @@ import static org.junit.jupiter.api.Assertions.*;
|
|||
import static org.mockito.Mockito.lenient;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.Collection;
|
||||
import java.util.List;
|
||||
import java.util.Set;
|
||||
import java.util.stream.Stream;
|
||||
|
@ -16,12 +17,12 @@ import org.junit.jupiter.api.extension.ExtendWith;
|
|||
import org.mockito.Mock;
|
||||
import org.mockito.junit.jupiter.MockitoExtension;
|
||||
|
||||
import com.fasterxml.jackson.databind.DeserializationFeature;
|
||||
import com.fasterxml.jackson.databind.ObjectMapper;
|
||||
|
||||
import eu.dnetlib.dhp.common.vocabulary.VocabularyGroup;
|
||||
import eu.dnetlib.dhp.schema.common.ModelConstants;
|
||||
import eu.dnetlib.dhp.schema.oaf.*;
|
||||
import eu.dnetlib.dhp.schema.oaf.utils.CleaningFunctions;
|
||||
import eu.dnetlib.dhp.schema.oaf.utils.GraphCleaningFunctions;
|
||||
import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpException;
|
||||
import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpService;
|
||||
|
@ -29,7 +30,8 @@ import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpService;
|
|||
@ExtendWith(MockitoExtension.class)
|
||||
public class GraphCleaningFunctionsTest {
|
||||
|
||||
public static final ObjectMapper MAPPER = new ObjectMapper();
|
||||
public static final ObjectMapper MAPPER = new ObjectMapper()
|
||||
.configure(DeserializationFeature.FAIL_ON_UNKNOWN_PROPERTIES, false);
|
||||
|
||||
@Mock
|
||||
private ISLookUpService isLookUpService;
|
||||
|
@ -49,6 +51,23 @@ public class GraphCleaningFunctionsTest {
|
|||
mapping = CleaningRuleMap.create(vocabularies);
|
||||
}
|
||||
|
||||
@Test
|
||||
void testCleanRelations() throws Exception {
|
||||
|
||||
List<String> lines = IOUtils
|
||||
.readLines(getClass().getResourceAsStream("/eu/dnetlib/dhp/oa/graph/clean/relation.json"));
|
||||
for (String json : lines) {
|
||||
Relation r_in = MAPPER.readValue(json, Relation.class);
|
||||
assertNotNull(r_in);
|
||||
|
||||
assertFalse(vocabularies.getTerms(ModelConstants.DNET_RELATION_RELCLASS).contains(r_in.getRelClass()));
|
||||
|
||||
Relation r_out = OafCleaner.apply(r_in, mapping);
|
||||
assertTrue(vocabularies.getTerms(ModelConstants.DNET_RELATION_RELCLASS).contains(r_out.getRelClass()));
|
||||
assertTrue(vocabularies.getTerms(ModelConstants.DNET_RELATION_SUBRELTYPE).contains(r_out.getSubRelType()));
|
||||
}
|
||||
}
|
||||
|
||||
@Test
|
||||
void testCleaning() throws Exception {
|
||||
|
||||
|
@ -87,7 +106,7 @@ public class GraphCleaningFunctionsTest {
|
|||
p_out
|
||||
.getPid()
|
||||
.stream()
|
||||
.map(p -> p.getQualifier())
|
||||
.map(StructuredProperty::getQualifier)
|
||||
.allMatch(q -> pidTerms.contains(q.getClassid())));
|
||||
|
||||
List<Instance> poi = p_out.getInstance();
|
||||
|
@ -101,8 +120,8 @@ public class GraphCleaningFunctionsTest {
|
|||
assertEquals(2, poii.getPid().size());
|
||||
|
||||
assertTrue(
|
||||
poii.getPid().stream().filter(s -> s.getValue().equals("10.1007/s109090161569x")).findFirst().isPresent());
|
||||
assertTrue(poii.getPid().stream().filter(s -> s.getValue().equals("10.1008/abcd")).findFirst().isPresent());
|
||||
poii.getPid().stream().anyMatch(s -> s.getValue().equals("10.1007/s109090161569x")));
|
||||
assertTrue(poii.getPid().stream().anyMatch(s -> s.getValue().equals("10.1008/abcd")));
|
||||
|
||||
assertNotNull(poii.getAlternateIdentifier());
|
||||
assertEquals(2, poii.getAlternateIdentifier().size());
|
||||
|
@ -111,16 +130,12 @@ public class GraphCleaningFunctionsTest {
|
|||
poii
|
||||
.getAlternateIdentifier()
|
||||
.stream()
|
||||
.filter(s -> s.getValue().equals("10.1007/s109090161569x"))
|
||||
.findFirst()
|
||||
.isPresent());
|
||||
.anyMatch(s -> s.getValue().equals("10.1007/s109090161569x")));
|
||||
assertTrue(
|
||||
poii
|
||||
.getAlternateIdentifier()
|
||||
.stream()
|
||||
.filter(s -> s.getValue().equals("10.1009/qwerty"))
|
||||
.findFirst()
|
||||
.isPresent());
|
||||
.anyMatch(s -> s.getValue().equals("10.1009/qwerty")));
|
||||
|
||||
Publication p_cleaned = GraphCleaningFunctions.cleanup(p_out);
|
||||
|
||||
|
@ -142,8 +157,8 @@ public class GraphCleaningFunctionsTest {
|
|||
assertEquals(2, pcii.getPid().size());
|
||||
|
||||
assertTrue(
|
||||
pcii.getPid().stream().filter(s -> s.getValue().equals("10.1007/s109090161569x")).findFirst().isPresent());
|
||||
assertTrue(pcii.getPid().stream().filter(s -> s.getValue().equals("10.1008/abcd")).findFirst().isPresent());
|
||||
pcii.getPid().stream().anyMatch(s -> s.getValue().equals("10.1007/s109090161569x")));
|
||||
assertTrue(pcii.getPid().stream().anyMatch(s -> s.getValue().equals("10.1008/abcd")));
|
||||
|
||||
assertNotNull(pcii.getAlternateIdentifier());
|
||||
assertEquals(1, pcii.getAlternateIdentifier().size());
|
||||
|
@ -151,9 +166,7 @@ public class GraphCleaningFunctionsTest {
|
|||
pcii
|
||||
.getAlternateIdentifier()
|
||||
.stream()
|
||||
.filter(s -> s.getValue().equals("10.1009/qwerty"))
|
||||
.findFirst()
|
||||
.isPresent());
|
||||
.anyMatch(s -> s.getValue().equals("10.1009/qwerty")));
|
||||
|
||||
getAuthorPids(p_cleaned).forEach(pid -> {
|
||||
System.out
|
||||
|
@ -172,17 +185,17 @@ public class GraphCleaningFunctionsTest {
|
|||
return pub
|
||||
.getAuthor()
|
||||
.stream()
|
||||
.map(a -> a.getPid())
|
||||
.flatMap(p -> p.stream())
|
||||
.map(s -> s.getQualifier());
|
||||
.map(Author::getPid)
|
||||
.flatMap(Collection::stream)
|
||||
.map(StructuredProperty::getQualifier);
|
||||
}
|
||||
|
||||
private Stream<StructuredProperty> getAuthorPids(Result pub) {
|
||||
return pub
|
||||
.getAuthor()
|
||||
.stream()
|
||||
.map(a -> a.getPid())
|
||||
.flatMap(p -> p.stream());
|
||||
.map(Author::getPid)
|
||||
.flatMap(Collection::stream);
|
||||
}
|
||||
|
||||
private List<String> vocs() throws IOException {
|
||||
|
|
|
@ -0,0 +1,10 @@
|
|||
{"relType":"resultResult","subRelType":"citation","relClass":"cites","source":"50|4ScienceCRIS::f66f1bd369679b5b077dcdf006089556","target":"50|openaire____::007a4870b31056f89b768cf508e1538e"}
|
||||
{"relType":"resultResult","subRelType":"citation","relClass":"isCitedBy","source":"50|openaire____::007a4870b31056f89b768cf508e1538e","target":"50|4ScienceCRIS::f66f1bd369679b5b077dcdf006089556"}
|
||||
{"relType":"resultResult","subRelType":"supplement","relClass":"isSupplementTo","source":"50|openaire____::007a4870b31056f89b768cf508e1538e","target":"50|4ScienceCRIS::f66f1bd369679b5b077dcdf006089556"}
|
||||
{"relType":"resultResult","subRelType":"supplement","relClass":"isSupplementedBy","source":"50|openaire____::007a4870b31056f89b768cf508e1538e","target":"50|4ScienceCRIS::f66f1bd369679b5b077dcdf006089556"}
|
||||
{"relType":"resultResult","subRelType":"part","relClass":"isPartOf","source":"50|openaire____::007a4870b31056f89b768cf508e1538e","target":"50|4ScienceCRIS::f66f1bd369679b5b077dcdf006089556"}
|
||||
{"relType":"resultResult","subRelType":"part","relClass":"hasPart","source":"50|openaire____::007a4870b31056f89b768cf508e1538e","target":"50|4ScienceCRIS::f66f1bd369679b5b077dcdf006089556"}
|
||||
{"relType":"resultResult","subRelType":"review","relClass":"isReviewedBy","source":"50|openaire____::007a4870b31056f89b768cf508e1538e","target":"50|4ScienceCRIS::f66f1bd369679b5b077dcdf006089556"}
|
||||
{"relType":"resultResult","subRelType":"review","relClass":"reviews","source":"50|openaire____::007a4870b31056f89b768cf508e1538e","target":"50|4ScienceCRIS::f66f1bd369679b5b077dcdf006089556"}
|
||||
{"relType":"resultResult","subRelType":"relationship","relClass":"isRelatedTo","source":"50|openaire____::007a4870b31056f89b768cf508e1538e","target":"50|4ScienceCRIS::f66f1bd369679b5b077dcdf006089556"}
|
||||
{"relType":"resultResult","subRelType":"publicationDataset","relClass":"isRelatedTo","source":"50|openaire____::007a4870b31056f89b768cf508e1538e","target":"50|4ScienceCRIS::f66f1bd369679b5b077dcdf006089556"}
|
|
@ -1231,4 +1231,14 @@ dnet:review_levels @=@ 0001 @=@ 印刷物/電子媒体-学術雑誌論文(査
|
|||
dnet:review_levels @=@ 0001 @=@ 印刷物/電子媒体-紀要論文(査読有り)
|
||||
dnet:review_levels @=@ 0001 @=@ 印刷物/電子媒体-雑誌記事(査読有り)
|
||||
dnet:review_levels @=@ 0001 @=@ 原著論文(査読有り)
|
||||
dnet:review_levels @=@ 0001 @=@ 査読論文
|
||||
dnet:review_levels @=@ 0001 @=@ 査読論文
|
||||
dnet:relation_relClass @=@ Cites @=@ cites
|
||||
dnet:relation_relClass @=@ IsCitedBy @=@ isCitedBy
|
||||
dnet:relation_relClass @=@ HasPart @=@ hasPart
|
||||
dnet:relation_relClass @=@ IsPartOf @=@ isPartOf
|
||||
dnet:relation_relClass @=@ IsReviewedBy @=@ isReviewedBy
|
||||
dnet:relation_relClass @=@ Reviews @=@ reviews
|
||||
dnet:relation_relClass @=@ IsSupplementTo @=@ isSupplementTo
|
||||
dnet:relation_relClass @=@ IsSupplementedBy @=@ isSupplementedBy
|
||||
dnet:relation_relClass @=@ IsRelatedTo @=@ isRelatedTo
|
||||
dnet:relation_subRelType @=@ relationship @=@ publicationDataset
|
|
@ -1079,4 +1079,41 @@ dnet:topic_types @=@ dnet:topic_types @=@ ENRICH/MISSING/DATASET/IS_SUPPLEMENTED
|
|||
dnet:topic_types @=@ dnet:topic_types @=@ ENRICH/MISSING/AUTHOR/ORCID @=@ An Open Researcher and Contributor ID (ORCID) that can be associated to an author of your publications
|
||||
dnet:review_levels @=@ dnet:review_levels @=@ 0000 @=@ Unknown
|
||||
dnet:review_levels @=@ dnet:review_levels @=@ 0002 @=@ nonPeerReviewed
|
||||
dnet:review_levels @=@ dnet:review_levels @=@ 0001 @=@ peerReviewed
|
||||
dnet:review_levels @=@ dnet:review_levels @=@ 0001 @=@ peerReviewed
|
||||
dnet:relation_relClass @=@ dnet:relation_relClass @=@ Cites @=@ Cites
|
||||
dnet:relation_relClass @=@ dnet:relation_relClass @=@ IsCitedBy @=@ IsCitedBy
|
||||
dnet:relation_relClass @=@ dnet:relation_relClass @=@ HasPart @=@ HasPart
|
||||
dnet:relation_relClass @=@ dnet:relation_relClass @=@ IsPartOf @=@ IsPartOf
|
||||
dnet:relation_relClass @=@ dnet:relation_relClass @=@ IsReviewedBy @=@ IsReviewedBy
|
||||
dnet:relation_relClass @=@ dnet:relation_relClass @=@ Reviews @=@ Reviews
|
||||
dnet:relation_relClass @=@ dnet:relation_relClass @=@ IsSupplementTo @=@ IsSupplementTo
|
||||
dnet:relation_relClass @=@ dnet:relation_relClass @=@ IsSupplementedBy @=@ IsSupplementedBy
|
||||
dnet:relation_relClass @=@ dnet:relation_relClass @=@ IsRelatedTo @=@ IsRelatedTo
|
||||
dnet:relation_relClass @=@ dnet:relation_relClass @=@ Compiles @=@ Compiles
|
||||
dnet:relation_relClass @=@ dnet:relation_relClass @=@ Continues @=@ Continues
|
||||
dnet:relation_relClass @=@ dnet:relation_relClass @=@ Documents @=@ Documents
|
||||
dnet:relation_relClass @=@ dnet:relation_relClass @=@ HasAmongTopNSimilarDocuments @=@ HasAmongTopNSimilarDocuments
|
||||
dnet:relation_relClass @=@ dnet:relation_relClass @=@ HasVersion @=@ HasVersion
|
||||
dnet:relation_relClass @=@ dnet:relation_relClass @=@ IsAmongTopNSimilarDocuments @=@ IsAmongTopNSimilarDocuments
|
||||
dnet:relation_relClass @=@ dnet:relation_relClass @=@ IsCompiledBy @=@ IsCompiledBy
|
||||
dnet:relation_relClass @=@ dnet:relation_relClass @=@ IsContinuedBy @=@ IsContinuedBy
|
||||
dnet:relation_relClass @=@ dnet:relation_relClass @=@ IsDerivedFrom @=@ IsDerivedFrom
|
||||
dnet:relation_relClass @=@ dnet:relation_relClass @=@ IsDocumentedBy @=@ IsDocumentedBy
|
||||
dnet:relation_relClass @=@ dnet:relation_relClass @=@ IsNewVersionOf @=@ IsNewVersionOf
|
||||
dnet:relation_relClass @=@ dnet:relation_relClass @=@ IsObsoletedBy @=@ IsObsoletedBy
|
||||
dnet:relation_relClass @=@ dnet:relation_relClass @=@ IsOriginalFormOf @=@ IsOriginalFormOf
|
||||
dnet:relation_relClass @=@ dnet:relation_relClass @=@ IsPreviousVersionOf @=@ IsPreviousVersionOf
|
||||
dnet:relation_relClass @=@ dnet:relation_relClass @=@ IsSourceOf @=@ IsSourceOf
|
||||
dnet:relation_relClass @=@ dnet:relation_relClass @=@ IsVariantFormOf @=@ IsVariantFormOf
|
||||
dnet:relation_subRelType @=@ dnet:relation_subRelType @=@ affiliation @=@ affiliation
|
||||
dnet:relation_subRelType @=@ dnet:relation_subRelType @=@ citation @=@ citation
|
||||
dnet:relation_subRelType @=@ dnet:relation_subRelType @=@ dedup @=@ dedup
|
||||
dnet:relation_subRelType @=@ dnet:relation_subRelType @=@ outcome @=@ outcome
|
||||
dnet:relation_subRelType @=@ dnet:relation_subRelType @=@ part @=@ part
|
||||
dnet:relation_subRelType @=@ dnet:relation_subRelType @=@ participation @=@ participation
|
||||
dnet:relation_subRelType @=@ dnet:relation_subRelType @=@ provision @=@ provision
|
||||
dnet:relation_subRelType @=@ dnet:relation_subRelType @=@ relationship @=@ relationship
|
||||
dnet:relation_subRelType @=@ dnet:relation_subRelType @=@ review @=@ review
|
||||
dnet:relation_subRelType @=@ dnet:relation_subRelType @=@ similarity @=@ similarity
|
||||
dnet:relation_subRelType @=@ dnet:relation_subRelType @=@ supplement @=@ supplement
|
||||
dnet:relation_subRelType @=@ dnet:relation_subRelType @=@ version @=@ version
|
2
pom.xml
2
pom.xml
|
@ -753,7 +753,7 @@
|
|||
<mockito-core.version>3.3.3</mockito-core.version>
|
||||
<mongodb.driver.version>3.4.2</mongodb.driver.version>
|
||||
<vtd.version>[2.12,3.0)</vtd.version>
|
||||
<dhp-schemas.version>[2.7.17]</dhp-schemas.version>
|
||||
<dhp-schemas.version>[2.7.18]</dhp-schemas.version>
|
||||
<dnet-actionmanager-api.version>[4.0.3]</dnet-actionmanager-api.version>
|
||||
<dnet-actionmanager-common.version>[6.0.5]</dnet-actionmanager-common.version>
|
||||
<dnet-openaire-broker-common.version>[3.1.6]</dnet-openaire-broker-common.version>
|
||||
|
|
Loading…
Reference in New Issue