scholexplorer_incremental_index #14

Merged
sandro.labruzzo merged 12 commits from scholexplorer_incremental_index into master 2022-03-29 11:43:02 +02:00
9 changed files with 423 additions and 68 deletions
Showing only changes of commit dc514a7281 - Show all commits

15
pom.xml
View File

@ -138,6 +138,9 @@
<artifactId>maven-dependency-plugin</artifactId>
<version>3.0.0</version>
</plugin>
sandro.labruzzo marked this conversation as resolved
Review

Please cleanup non necessary changes.

Please cleanup non necessary changes.
<plugin>
<groupId>org.codehaus.mojo</groupId>
<artifactId>exec-maven-plugin</artifactId>
@ -279,6 +282,12 @@
<dependencyManagement>
<dependencies>
<dependency>
<groupId>me.xuender</groupId>
<artifactId>unidecode</artifactId>
<version>0.0.7</version>
</dependency>
<dependency>
<groupId>org.slf4j</groupId>
<artifactId>jcl-over-slf4j</artifactId>
@ -316,6 +325,7 @@
<version>2.4</version>
</dependency>
<dependency>
<groupId>com.fasterxml.jackson.core</groupId>
<artifactId>jackson-databind</artifactId>
@ -335,6 +345,11 @@
<dependencies>
<dependency>
<groupId>me.xuender</groupId>
<artifactId>unidecode</artifactId>
</dependency>
<dependency>
<groupId>org.junit.jupiter</groupId>
<artifactId>junit-jupiter</artifactId>
sandro.labruzzo marked this conversation as resolved Outdated

The version of each dependency should be indicated under the dependencyManagement element.

The version of each dependency should be indicated under the dependencyManagement element.

View File

@ -5,8 +5,13 @@ import org.apache.commons.lang3.StringUtils;
import java.io.Serializable;
import java.util.List;
import java.util.Objects;
import java.util.stream.Collectors;
import static eu.dnetlib.dhp.schema.sx.scholix.ScholixComparator.*;
public class Scholix implements Serializable, Comparable<Scholix> {
private String publicationDate;
private List<ScholixEntityId> publisher;
@ -78,7 +83,21 @@ public class Scholix implements Serializable, Comparable<Scholix> {
}
@Override
public boolean equals(Object o) {
if (this == o) return true;
if (!(o instanceof Scholix)) return false;
Scholix scholix = (Scholix) o;
return compareTo(scholix) == 0;
}
@Override
public int hashCode() {
final int publisherHash = publisher == null ? 0: publisher.stream().sorted().collect(Collectors.toList()).hashCode();
final int linkProviderHash = linkprovider == null ? 0: linkprovider.stream().sorted().collect(Collectors.toList()).hashCode();
return Objects.hash(normalizeString(publicationDate),publisherHash, linkProviderHash, relationship, source, target, normalizeIdnetifier(identifier));
}
@Override
public int compareTo(Scholix other) {
@ -92,10 +111,24 @@ public class Scholix implements Serializable, Comparable<Scholix> {
if (publicationDateCompare != 0)
return publicationDateCompare;
final int linkPublisherComparator = compareList(publisher, other.getPublisher());
if (linkPublisherComparator!= 0)
return linkPublisherComparator;
final int linkProviderComparator = compareList(linkprovider, other.getLinkprovider());
if (linkProviderComparator!= 0)
return linkProviderComparator;
final int relsComparator = compareObjects(relationship, other.getRelationship());
if (relsComparator!= 0)
return relsComparator;
return 0;
final int sourceComparator = compareObjects(source, other.getSource());
if (sourceComparator!= 0)
return sourceComparator;
return compareObjects(target, other.getTarget());
}
}

View File

@ -1,9 +1,14 @@
package eu.dnetlib.dhp.schema.sx.scholix;
import java.io.Serializable;
import org.apache.commons.lang3.StringUtils;
public class ScholixCollectedFrom implements Serializable {
import java.io.Serializable;
import java.util.Objects;
import static eu.dnetlib.dhp.schema.sx.scholix.ScholixComparator.*;
public class ScholixCollectedFrom implements Serializable, Comparable<ScholixCollectedFrom> {
private ScholixEntityId provider;
private String provisionMode;
@ -42,4 +47,42 @@ public class ScholixCollectedFrom implements Serializable {
public void setCompletionStatus(String completionStatus) {
this.completionStatus = completionStatus;
}
@Override
public boolean equals(Object o) {
if (this == o) return true;
if (!(o instanceof ScholixCollectedFrom)) return false;
ScholixCollectedFrom that = (ScholixCollectedFrom) o;
return compareTo(that)==0;
}
@Override
public int hashCode() {
return Objects.hash(provider, normalizeString(provisionMode), normalizeString(completionStatus));
}
@Override
public int compareTo(ScholixCollectedFrom other) {
if (other == null)
return -1;
int provModeCompare = StringUtils.compare(normalizeString(provisionMode),normalizeString(other.getProvisionMode()) );
int compStatusCompare =StringUtils.compare(normalizeString(completionStatus),normalizeString(other.getCompletionStatus()) );
if (provider == null && other.getProvider() == null)
return provModeCompare == 0 ? compStatusCompare: provModeCompare;
if (provider == null)
return 1;
if (other.getProvider() == null)
return -1;
int provCompare = provider.compareTo(other.getProvider());
if (provCompare == 0)
return provModeCompare == 0 ? compStatusCompare: provModeCompare;
return provCompare;
}
}

View File

@ -1,37 +1,58 @@
package eu.dnetlib.dhp.schema.sx.scholix;
import org.apache.commons.lang3.StringUtils;
import java.text.Normalizer;
import java.util.List;
import java.util.stream.Stream;
import com.google.common.collect.Iterators;
import me.xuender.unidecode.Unidecode;
public class ScholixComparator {
public static String normalizeIdnetifier(final String input) {
if (input == null)
return null;
return Normalizer.normalize(input, Normalizer.Form.NFD)
.toLowerCase();
}
public static String normalizeString(final String input) {
if (input == null)
return null;
return Normalizer.normalize(input, Normalizer.Form.NFD)
.toLowerCase()
.replaceAll("[^a-zA-Z0-9]", "");
return Unidecode.decode(input).toLowerCase();
}
public static int compareScholixEntityId(final List<ScholixEntityId> first, final List<ScholixEntityId> second) {
return 0;
}
public static int compareString(final String first, final String second) {
if (first == null && second == null)
public static <T extends Comparable<T>> int compareObjects (T left, T right) {
if (left == null && right==null)
return 0;
if (first==null )
if(left == null)
return 1;
if (second == null)
return -1;
return first.compareTo(second);
if (right == null)
return -1;
return left.compareTo(right);
}
public static <T extends Comparable<T>> int compareList (List<T> left, List<T> right) {
if (left == null && right==null)
return 0;
if(left == null)
return 1;
if (right == null)
return -1;
Stream<T> sortedLeft = left.stream().sorted();
Stream<T> sortedRight = right.stream().sorted();
boolean equals = Iterators.elementsEqual(sortedLeft.iterator(), sortedRight.iterator());
return equals? 0: -1;
}
}

View File

@ -7,6 +7,7 @@ import org.apache.commons.lang3.StringUtils;
import java.io.Serializable;
import java.util.List;
import java.util.Objects;
import java.util.stream.Collectors;
import java.util.stream.Stream;
import static eu.dnetlib.dhp.schema.sx.scholix.ScholixComparator.normalizeString;
@ -49,28 +50,20 @@ public class ScholixEntityId implements Serializable, Comparable<ScholixEntityId
@Override
public int hashCode() {
return Objects.hash(name, identifiers);
if (identifiers != null)
return Objects.hash(normalizeString(name), identifiers.stream().sorted().collect(Collectors.toList()));
else
return Objects.hash(normalizeString(name));
}
@Override
public int compareTo(ScholixEntityId other) {
if (other == null)
return -1;
final int nameComp = StringUtils.compare(normalizeString(name), normalizeString(other.getName()));
if (nameComp != 0)
return nameComp;
if (identifiers == null && other.getIdentifiers() == null)
return 0;
if (identifiers == null)
return 1;
if (other.getIdentifiers() == null)
return -1;
if (identifiers.size()!= other.getIdentifiers().size())
return -1;
return ScholixComparator.compareList(identifiers,other.getIdentifiers());
Stream<ScholixIdentifier> sortedLeft = identifiers.stream().sorted();
Stream<ScholixIdentifier> sortedRight = other.getIdentifiers().stream().sorted();
boolean equalsStream = Iterators.elementsEqual(sortedLeft.iterator(), sortedRight.iterator());
return equalsStream?0:-1;
}
}

View File

@ -1,22 +1,33 @@
package eu.dnetlib.dhp.schema.sx.scholix;
import com.google.common.collect.ComparisonChain;
import org.apache.commons.lang3.StringUtils;
import static eu.dnetlib.dhp.schema.sx.scholix.ScholixComparator.normalizeString;
import java.io.Serializable;
import java.util.Objects;
import static eu.dnetlib.dhp.schema.sx.scholix.ScholixComparator.normalizeIdnetifier;
/**
* The type Scholix identifier.
*/
public class ScholixIdentifier implements Serializable, Comparable<ScholixIdentifier> {
private String identifier;
private String schema;
private String url;
/**
* Instantiates a new Scholix identifier.
*/
public ScholixIdentifier() {
}
/**
* Instantiates a new Scholix identifier.
*
* @param identifier the identifier
* @param schema the schema
* @param url the url
*/
public ScholixIdentifier(String identifier, String schema, String url) {
this.identifier = identifier;
this.schema = schema;
@ -24,40 +35,71 @@ public class ScholixIdentifier implements Serializable, Comparable<ScholixIdenti
}
/**
* Gets url.
*
* @return the url
*/
public String getUrl() {
return url;
}
/**
* Sets url.
*
* @param url the url
*/
public void setUrl(String url) {
this.url = url;
}
/**
* Gets identifier.
*
* @return the identifier
*/
public String getIdentifier() {
return identifier;
}
/**
* Sets identifier.
*
* @param identifier the identifier
*/
public void setIdentifier(String identifier) {
this.identifier = identifier;
}
/**
* Gets schema.
*
* @return the schema
*/
public String getSchema() {
return schema;
}
/**
* Sets schema.
*
* @param schema the schema
*/
public void setSchema(String schema) {
this.schema = schema;
}
@Override
public int compareTo(ScholixIdentifier o) {
final int idComp = StringUtils.compare(normalizeString(identifier), normalizeString(o.getIdentifier()));
public int compareTo(ScholixIdentifier other) {
if (other == null)
return -1;
final int idComp = StringUtils.compare(normalizeIdnetifier(identifier), normalizeIdnetifier(other.getIdentifier()));
if (idComp !=0)
return idComp;
final int schemaComp = StringUtils.compare(normalizeString(schema), normalizeString(o.getSchema()));
final int schemaComp = StringUtils.compare(normalizeIdnetifier(schema), normalizeIdnetifier(other.getSchema()));
if (schemaComp !=0)
return schemaComp;
final int urlComp = StringUtils.compare(normalizeString(url), normalizeString(o.getUrl()));
return urlComp;
return StringUtils.compare(normalizeIdnetifier(url), normalizeIdnetifier(other.getUrl()));
}
@ -72,7 +114,7 @@ public class ScholixIdentifier implements Serializable, Comparable<ScholixIdenti
@Override
public int hashCode() {
return Objects.hash(normalizeString(identifier), normalizeString(schema), normalizeString(url));
return Objects.hash(normalizeIdnetifier(identifier), normalizeIdnetifier(schema), normalizeIdnetifier(url));
}

View File

@ -1,9 +1,13 @@
package eu.dnetlib.dhp.schema.sx.scholix;
import java.io.Serializable;
import org.apache.commons.lang3.StringUtils;
public class ScholixRelationship implements Serializable {
import java.io.Serializable;
import java.util.Objects;
import static eu.dnetlib.dhp.schema.sx.scholix.ScholixComparator.*;
public class ScholixRelationship implements Serializable, Comparable<ScholixRelationship> {
private String name;
private String schema;
private String inverse;
@ -40,4 +44,34 @@ public class ScholixRelationship implements Serializable {
public void setInverse(String inverse) {
this.inverse = inverse;
}
@Override
public boolean equals(Object o) {
if (this == o) return true;
if (!(o instanceof ScholixRelationship)) return false;
ScholixRelationship that = (ScholixRelationship) o;
return this.compareTo(that) ==0;
}
@Override
public int hashCode() {
return Objects.hash(normalizeString(getName()), normalizeString(getSchema()), normalizeString(getInverse()));
}
@Override
public int compareTo(ScholixRelationship other) {
if (other == null)
return -1;
final int nameCompare = StringUtils.compare(normalizeString(name), normalizeString(other.getName()));
if (nameCompare!= 0 )
return nameCompare;
final int schemaCompare = StringUtils.compare(normalizeString(schema), normalizeString(other.getSchema()));
if (schemaCompare!= 0 )
return schemaCompare;
return StringUtils.compare(normalizeString(inverse), normalizeString(other.getInverse()));
}
}

View File

@ -1,10 +1,16 @@
package eu.dnetlib.dhp.schema.sx.scholix;
import org.apache.commons.lang3.StringUtils;
import java.io.Serializable;
import java.util.List;
import java.util.Objects;
import java.util.stream.Collectors;
public class ScholixResource implements Serializable {
import static eu.dnetlib.dhp.schema.sx.scholix.ScholixComparator.*;
public class ScholixResource implements Serializable, Comparable<ScholixResource> {
private List<ScholixIdentifier> identifier;
private String dnetIdentifier;
@ -87,4 +93,71 @@ public class ScholixResource implements Serializable {
public void setCollectedFrom(List<ScholixCollectedFrom> collectedFrom) {
this.collectedFrom = collectedFrom;
}
@Override
public boolean equals(Object o) {
if (this == o) return true;
if (!(o instanceof ScholixResource)) return false;
ScholixResource that = (ScholixResource) o;
return compareTo(that) == 0;
}
@Override
public int hashCode() {
int idHash = identifier == null ? 0 : identifier.stream().sorted().collect(Collectors.toList()).hashCode();
int creatorHash = creator == null ? 0 : creator.stream().sorted().collect(Collectors.toList()).hashCode();
int publisherHash = publisher == null ? 0 : publisher.stream().sorted().collect(Collectors.toList()).hashCode();
int collectedFromHash = collectedFrom == null ? 0 : collectedFrom.stream().sorted().collect(Collectors.toList()).hashCode();
return Objects.hash(idHash, normalizeIdnetifier(dnetIdentifier), normalizeString(objectType),
normalizeString(objectSubType), normalizeString(title),creatorHash, normalizeString(publicationDate), publisherHash, collectedFromHash);
}
@Override
public int compareTo(ScholixResource other) {
if (other == null)
return -1;
final int compIdentifiers = compareList(identifier, other.getIdentifier());
if (compIdentifiers!= 0)
return compIdentifiers;
final int dnetIdComp = StringUtils.compare(dnetIdentifier, other.getDnetIdentifier());
if (dnetIdComp != 0)
return dnetIdComp;
final int objTypeComparator = StringUtils.compare(normalizeString(objectType), normalizeString(other.getObjectType()));
if (objTypeComparator != 0)
return objTypeComparator;
final int objSubTypeComparator = StringUtils.compare(normalizeString(objectSubType), normalizeString(other.getObjectSubType()));
if (objSubTypeComparator != 0)
return objSubTypeComparator;
final int titleComparator = StringUtils.compare(normalizeString(title), normalizeString(other.getTitle()));
if (titleComparator != 0)
return titleComparator;
final int creatorComparator = compareList(creator, other.getCreator());
if (creatorComparator!= 0)
return creatorComparator;
final int pubDateComparator = StringUtils.compare(normalizeString(publicationDate), normalizeString(other.getPublicationDate()));
if (pubDateComparator!= 0)
return pubDateComparator;
final int publisherComparator = compareList(publisher, other.getPublisher());
if (publisherComparator!= 0)
return publisherComparator;
return compareList(collectedFrom, other.getCollectedFrom());
}
}

View File

@ -1,37 +1,91 @@
package eu.dnetlib.dhp.schema.sx.scholix;
import static org.junit.jupiter.api.Assertions.*;
import org.junit.jupiter.api.Test;
import java.util.ArrayList;
import java.util.List;
import static org.junit.jupiter.api.Assertions.assertEquals;
import static org.junit.jupiter.api.Assertions.assertNotEquals;
public class ScholixCompareTest {
@Test
public void testNormalization() {
final String input = "Tĥïŝ ĩš â fůňķŷ Šťŕĭńġhttps://doi.org/< >10.11646/zootaxa.5099.1.3";
final String expected = "thisisafunkystringhttpsdoiorg1011646zootaxa509913";
final String normalized = ScholixComparator.normalizeString(input);
assertEquals(normalized, expected);
private ScholixIdentifier generateMockScholixId(boolean toUpper, int idCount) {
final String id = String.format("10.11646/zootaxa.5099.1.%d", idCount);
final String schema = "DOI";
final String url =String.format("http://dx.dOI.org/10.11646/Zootaxa.5099.1.%d", idCount);
final ScholixIdentifier result = new ScholixIdentifier();
result.setIdentifier(toUpper ? id.toUpperCase(): id.toLowerCase());
result.setSchema(toUpper ? schema.toUpperCase():schema.toLowerCase());
result.setUrl(toUpper ? url.toUpperCase():url.toLowerCase());
return result;
}
private ScholixEntityId generateMockScholixEntityId(boolean toUpper, boolean invertOrder, int numberOfIds) {
final String datasourceName = "Datacite";
final List<ScholixIdentifier> ids = new ArrayList<>();
if (!invertOrder) {
for (int i = 0; i < numberOfIds; i++) {
ids.add(generateMockScholixId(toUpper, i));
}
}
else {
for (int i = numberOfIds-1; i >=0; i--) {
ids.add(generateMockScholixId(toUpper, i));
}
}
return new ScholixEntityId(toUpper? datasourceName.toUpperCase(): datasourceName.toLowerCase(), ids);
}
private ScholixCollectedFrom generateMockScholixCollectedFrom(boolean toUpper, boolean invertOrder, int numberOfIds) {
final ScholixCollectedFrom result = new ScholixCollectedFrom();
final String completionStatus = "complete";
final String provisionMode = "collected";
result.setProvider(generateMockScholixEntityId(toUpper, invertOrder, numberOfIds));
result.setCompletionStatus(toUpper ? completionStatus.toUpperCase(): completionStatus.toLowerCase());
result.setProvisionMode(toUpper ? provisionMode.toUpperCase(): provisionMode.toLowerCase());
return result;
}
private ScholixRelationship generateMockScholixRelationships(boolean toUpper) {
final String name = "IsRelatedTo";
final String inverse = "RelatedTo";
final String schema = "datacite";
final ScholixRelationship rel = new ScholixRelationship();
rel.setName(toUpper? name.toUpperCase():name.toLowerCase());
rel.setInverse(toUpper? inverse.toUpperCase():inverse.toLowerCase());
rel.setSchema(toUpper? schema.toUpperCase():schema.toLowerCase());
return rel;
}
@Test
public void testScholixIdentifierComparison() {
final String id = "10.11646/zootaxa.5099.1.3";
final String schema = "DOI";
final String url ="http://dx.dOI.org/10.11646/Zootaxa.5099.1.3";
final ScholixIdentifier left = new ScholixIdentifier();
left.setIdentifier(id.toUpperCase());
left.setSchema(schema.toUpperCase());
left.setUrl(url.toUpperCase());
final ScholixIdentifier left = generateMockScholixId(true, 1);
final ScholixIdentifier right = new ScholixIdentifier();
right.setIdentifier(id.toUpperCase());
right.setSchema(schema.toUpperCase());
right.setUrl(url.toLowerCase());
final ScholixIdentifier right = generateMockScholixId(false,1);
assertEquals(0,left.compareTo(right));
@ -45,4 +99,51 @@ public class ScholixCompareTest {
}
@Test
public void testScholixEntityIDComparison() {
final ScholixEntityId first =generateMockScholixEntityId(true,false,10);
final ScholixEntityId second =generateMockScholixEntityId(false,true,10);
assertEquals(first,second);
assertEquals(first.hashCode(), second.hashCode());
}
@Test
public void testScholixCollectedFromComparison() {
final ScholixCollectedFrom cfLeft = generateMockScholixCollectedFrom(true, true, 20);
final ScholixCollectedFrom cfRight = generateMockScholixCollectedFrom(false, false, 20);
assertEquals(cfLeft, cfRight);
assertEquals(cfLeft.hashCode(), cfRight.hashCode());
cfRight.setCompletionStatus(null);
assertNotEquals(cfLeft, cfRight);
}
@Test
public void testCompareScholixRelation() {
final ScholixRelationship left = generateMockScholixRelationships(true);
final ScholixRelationship right = generateMockScholixRelationships(false);
assertEquals(left, right);
assertEquals(left.hashCode(), right.hashCode());
}
}