diff --git a/pom.xml b/pom.xml index 0ebb86f..a615670 100644 --- a/pom.xml +++ b/pom.xml @@ -138,6 +138,9 @@ maven-dependency-plugin 3.0.0 + + + org.codehaus.mojo exec-maven-plugin @@ -279,6 +282,12 @@ + + me.xuender + unidecode + 0.0.7 + + org.slf4j jcl-over-slf4j @@ -316,6 +325,7 @@ 2.4 + com.fasterxml.jackson.core jackson-databind @@ -335,6 +345,11 @@ + + me.xuender + unidecode + + org.junit.jupiter junit-jupiter diff --git a/src/main/java/eu/dnetlib/dhp/schema/sx/scholix/Scholix.java b/src/main/java/eu/dnetlib/dhp/schema/sx/scholix/Scholix.java index fbf3c58..95338ea 100644 --- a/src/main/java/eu/dnetlib/dhp/schema/sx/scholix/Scholix.java +++ b/src/main/java/eu/dnetlib/dhp/schema/sx/scholix/Scholix.java @@ -5,8 +5,13 @@ import org.apache.commons.lang3.StringUtils; import java.io.Serializable; import java.util.List; +import java.util.Objects; +import java.util.stream.Collectors; + +import static eu.dnetlib.dhp.schema.sx.scholix.ScholixComparator.*; public class Scholix implements Serializable, Comparable { + private String publicationDate; private List publisher; @@ -78,7 +83,21 @@ public class Scholix implements Serializable, Comparable { } + @Override + public boolean equals(Object o) { + if (this == o) return true; + if (!(o instanceof Scholix)) return false; + Scholix scholix = (Scholix) o; + return compareTo(scholix) == 0; + } + @Override + public int hashCode() { + final int publisherHash = publisher == null ? 0: publisher.stream().sorted().collect(Collectors.toList()).hashCode(); + final int linkProviderHash = linkprovider == null ? 0: linkprovider.stream().sorted().collect(Collectors.toList()).hashCode(); + + return Objects.hash(normalizeString(publicationDate),publisherHash, linkProviderHash, relationship, source, target, normalizeIdnetifier(identifier)); + } @Override public int compareTo(Scholix other) { @@ -92,10 +111,24 @@ public class Scholix implements Serializable, Comparable { if (publicationDateCompare != 0) return publicationDateCompare; + final int linkPublisherComparator = compareList(publisher, other.getPublisher()); + if (linkPublisherComparator!= 0) + return linkPublisherComparator; + + final int linkProviderComparator = compareList(linkprovider, other.getLinkprovider()); + if (linkProviderComparator!= 0) + return linkProviderComparator; + final int relsComparator = compareObjects(relationship, other.getRelationship()); + if (relsComparator!= 0) + return relsComparator; - return 0; + final int sourceComparator = compareObjects(source, other.getSource()); + if (sourceComparator!= 0) + return sourceComparator; + + return compareObjects(target, other.getTarget()); } } diff --git a/src/main/java/eu/dnetlib/dhp/schema/sx/scholix/ScholixCollectedFrom.java b/src/main/java/eu/dnetlib/dhp/schema/sx/scholix/ScholixCollectedFrom.java index e8e4551..cd415de 100644 --- a/src/main/java/eu/dnetlib/dhp/schema/sx/scholix/ScholixCollectedFrom.java +++ b/src/main/java/eu/dnetlib/dhp/schema/sx/scholix/ScholixCollectedFrom.java @@ -1,9 +1,14 @@ package eu.dnetlib.dhp.schema.sx.scholix; -import java.io.Serializable; +import org.apache.commons.lang3.StringUtils; -public class ScholixCollectedFrom implements Serializable { +import java.io.Serializable; +import java.util.Objects; + +import static eu.dnetlib.dhp.schema.sx.scholix.ScholixComparator.*; + +public class ScholixCollectedFrom implements Serializable, Comparable { private ScholixEntityId provider; private String provisionMode; @@ -42,4 +47,42 @@ public class ScholixCollectedFrom implements Serializable { public void setCompletionStatus(String completionStatus) { this.completionStatus = completionStatus; } + + + @Override + public boolean equals(Object o) { + if (this == o) return true; + if (!(o instanceof ScholixCollectedFrom)) return false; + ScholixCollectedFrom that = (ScholixCollectedFrom) o; + return compareTo(that)==0; + } + + @Override + public int hashCode() { + return Objects.hash(provider, normalizeString(provisionMode), normalizeString(completionStatus)); + } + + @Override + public int compareTo(ScholixCollectedFrom other) { + if (other == null) + return -1; + + int provModeCompare = StringUtils.compare(normalizeString(provisionMode),normalizeString(other.getProvisionMode()) ); + int compStatusCompare =StringUtils.compare(normalizeString(completionStatus),normalizeString(other.getCompletionStatus()) ); + + if (provider == null && other.getProvider() == null) + return provModeCompare == 0 ? compStatusCompare: provModeCompare; + + + if (provider == null) + return 1; + if (other.getProvider() == null) + return -1; + + int provCompare = provider.compareTo(other.getProvider()); + + if (provCompare == 0) + return provModeCompare == 0 ? compStatusCompare: provModeCompare; + return provCompare; + } } diff --git a/src/main/java/eu/dnetlib/dhp/schema/sx/scholix/ScholixComparator.java b/src/main/java/eu/dnetlib/dhp/schema/sx/scholix/ScholixComparator.java index 40c2ff2..55c7c0e 100644 --- a/src/main/java/eu/dnetlib/dhp/schema/sx/scholix/ScholixComparator.java +++ b/src/main/java/eu/dnetlib/dhp/schema/sx/scholix/ScholixComparator.java @@ -1,37 +1,58 @@ package eu.dnetlib.dhp.schema.sx.scholix; -import org.apache.commons.lang3.StringUtils; - import java.text.Normalizer; import java.util.List; +import java.util.stream.Stream; + +import com.google.common.collect.Iterators; +import me.xuender.unidecode.Unidecode; public class ScholixComparator { + public static String normalizeIdnetifier(final String input) { + if (input == null) + return null; + + return Normalizer.normalize(input, Normalizer.Form.NFD) + .toLowerCase(); + } + + public static String normalizeString(final String input) { if (input == null) return null; - - return Normalizer.normalize(input, Normalizer.Form.NFD) - .toLowerCase() - .replaceAll("[^a-zA-Z0-9]", ""); + return Unidecode.decode(input).toLowerCase(); } - public static int compareScholixEntityId(final List first, final List second) { - return 0; - } - - public static int compareString(final String first, final String second) { - - - if (first == null && second == null) + public static > int compareObjects (T left, T right) { + if (left == null && right==null) return 0; - if (first==null ) + if(left == null) return 1; - if (second == null) - return -1; - return first.compareTo(second); + if (right == null) + return -1; + return left.compareTo(right); } + public static > int compareList (List left, List right) { + + if (left == null && right==null) + return 0; + if(left == null) + return 1; + if (right == null) + return -1; + + Stream sortedLeft = left.stream().sorted(); + Stream sortedRight = right.stream().sorted(); + boolean equals = Iterators.elementsEqual(sortedLeft.iterator(), sortedRight.iterator()); + + return equals? 0: -1; + + + } + + } diff --git a/src/main/java/eu/dnetlib/dhp/schema/sx/scholix/ScholixEntityId.java b/src/main/java/eu/dnetlib/dhp/schema/sx/scholix/ScholixEntityId.java index 376eff8..266b946 100644 --- a/src/main/java/eu/dnetlib/dhp/schema/sx/scholix/ScholixEntityId.java +++ b/src/main/java/eu/dnetlib/dhp/schema/sx/scholix/ScholixEntityId.java @@ -7,6 +7,7 @@ import org.apache.commons.lang3.StringUtils; import java.io.Serializable; import java.util.List; import java.util.Objects; +import java.util.stream.Collectors; import java.util.stream.Stream; import static eu.dnetlib.dhp.schema.sx.scholix.ScholixComparator.normalizeString; @@ -49,28 +50,20 @@ public class ScholixEntityId implements Serializable, Comparable sortedLeft = identifiers.stream().sorted(); - Stream sortedRight = other.getIdentifiers().stream().sorted(); - - boolean equalsStream = Iterators.elementsEqual(sortedLeft.iterator(), sortedRight.iterator()); - - return equalsStream?0:-1; } } diff --git a/src/main/java/eu/dnetlib/dhp/schema/sx/scholix/ScholixIdentifier.java b/src/main/java/eu/dnetlib/dhp/schema/sx/scholix/ScholixIdentifier.java index 18c3bbb..f8b0cf1 100644 --- a/src/main/java/eu/dnetlib/dhp/schema/sx/scholix/ScholixIdentifier.java +++ b/src/main/java/eu/dnetlib/dhp/schema/sx/scholix/ScholixIdentifier.java @@ -1,22 +1,33 @@ package eu.dnetlib.dhp.schema.sx.scholix; -import com.google.common.collect.ComparisonChain; import org.apache.commons.lang3.StringUtils; - - -import static eu.dnetlib.dhp.schema.sx.scholix.ScholixComparator.normalizeString; import java.io.Serializable; import java.util.Objects; +import static eu.dnetlib.dhp.schema.sx.scholix.ScholixComparator.normalizeIdnetifier; + +/** + * The type Scholix identifier. + */ public class ScholixIdentifier implements Serializable, Comparable { private String identifier; private String schema; private String url; + /** + * Instantiates a new Scholix identifier. + */ public ScholixIdentifier() { } + /** + * Instantiates a new Scholix identifier. + * + * @param identifier the identifier + * @param schema the schema + * @param url the url + */ public ScholixIdentifier(String identifier, String schema, String url) { this.identifier = identifier; this.schema = schema; @@ -24,40 +35,71 @@ public class ScholixIdentifier implements Serializable, Comparable { private String name; private String schema; private String inverse; @@ -40,4 +44,34 @@ public class ScholixRelationship implements Serializable { public void setInverse(String inverse) { this.inverse = inverse; } + + + @Override + public boolean equals(Object o) { + if (this == o) return true; + if (!(o instanceof ScholixRelationship)) return false; + ScholixRelationship that = (ScholixRelationship) o; + return this.compareTo(that) ==0; + } + + @Override + public int hashCode() { + return Objects.hash(normalizeString(getName()), normalizeString(getSchema()), normalizeString(getInverse())); + } + + @Override + public int compareTo(ScholixRelationship other) { + if (other == null) + return -1; + + final int nameCompare = StringUtils.compare(normalizeString(name), normalizeString(other.getName())); + if (nameCompare!= 0 ) + return nameCompare; + + final int schemaCompare = StringUtils.compare(normalizeString(schema), normalizeString(other.getSchema())); + if (schemaCompare!= 0 ) + return schemaCompare; + + return StringUtils.compare(normalizeString(inverse), normalizeString(other.getInverse())); + } } diff --git a/src/main/java/eu/dnetlib/dhp/schema/sx/scholix/ScholixResource.java b/src/main/java/eu/dnetlib/dhp/schema/sx/scholix/ScholixResource.java index 569b1f3..d7b61ad 100644 --- a/src/main/java/eu/dnetlib/dhp/schema/sx/scholix/ScholixResource.java +++ b/src/main/java/eu/dnetlib/dhp/schema/sx/scholix/ScholixResource.java @@ -1,10 +1,16 @@ package eu.dnetlib.dhp.schema.sx.scholix; +import org.apache.commons.lang3.StringUtils; + import java.io.Serializable; import java.util.List; +import java.util.Objects; +import java.util.stream.Collectors; -public class ScholixResource implements Serializable { +import static eu.dnetlib.dhp.schema.sx.scholix.ScholixComparator.*; + +public class ScholixResource implements Serializable, Comparable { private List identifier; private String dnetIdentifier; @@ -87,4 +93,71 @@ public class ScholixResource implements Serializable { public void setCollectedFrom(List collectedFrom) { this.collectedFrom = collectedFrom; } + + + @Override + public boolean equals(Object o) { + if (this == o) return true; + if (!(o instanceof ScholixResource)) return false; + ScholixResource that = (ScholixResource) o; + return compareTo(that) == 0; + } + + @Override + public int hashCode() { + int idHash = identifier == null ? 0 : identifier.stream().sorted().collect(Collectors.toList()).hashCode(); + int creatorHash = creator == null ? 0 : creator.stream().sorted().collect(Collectors.toList()).hashCode(); + + int publisherHash = publisher == null ? 0 : publisher.stream().sorted().collect(Collectors.toList()).hashCode(); + int collectedFromHash = collectedFrom == null ? 0 : collectedFrom.stream().sorted().collect(Collectors.toList()).hashCode(); + + return Objects.hash(idHash, normalizeIdnetifier(dnetIdentifier), normalizeString(objectType), + normalizeString(objectSubType), normalizeString(title),creatorHash, normalizeString(publicationDate), publisherHash, collectedFromHash); + } + + @Override + public int compareTo(ScholixResource other) { + if (other == null) + return -1; + final int compIdentifiers = compareList(identifier, other.getIdentifier()); + if (compIdentifiers!= 0) + return compIdentifiers; + + final int dnetIdComp = StringUtils.compare(dnetIdentifier, other.getDnetIdentifier()); + + if (dnetIdComp != 0) + return dnetIdComp; + + final int objTypeComparator = StringUtils.compare(normalizeString(objectType), normalizeString(other.getObjectType())); + + if (objTypeComparator != 0) + return objTypeComparator; + + + final int objSubTypeComparator = StringUtils.compare(normalizeString(objectSubType), normalizeString(other.getObjectSubType())); + + if (objSubTypeComparator != 0) + return objSubTypeComparator; + + + final int titleComparator = StringUtils.compare(normalizeString(title), normalizeString(other.getTitle())); + + if (titleComparator != 0) + return titleComparator; + + final int creatorComparator = compareList(creator, other.getCreator()); + if (creatorComparator!= 0) + return creatorComparator; + + final int pubDateComparator = StringUtils.compare(normalizeString(publicationDate), normalizeString(other.getPublicationDate())); + if (pubDateComparator!= 0) + return pubDateComparator; + + final int publisherComparator = compareList(publisher, other.getPublisher()); + if (publisherComparator!= 0) + return publisherComparator; + + return compareList(collectedFrom, other.getCollectedFrom()); + + } } diff --git a/src/test/java/eu/dnetlib/dhp/schema/sx/scholix/ScholixCompareTest.java b/src/test/java/eu/dnetlib/dhp/schema/sx/scholix/ScholixCompareTest.java index 30b64a5..53a489d 100644 --- a/src/test/java/eu/dnetlib/dhp/schema/sx/scholix/ScholixCompareTest.java +++ b/src/test/java/eu/dnetlib/dhp/schema/sx/scholix/ScholixCompareTest.java @@ -1,37 +1,91 @@ package eu.dnetlib.dhp.schema.sx.scholix; -import static org.junit.jupiter.api.Assertions.*; import org.junit.jupiter.api.Test; +import java.util.ArrayList; +import java.util.List; + +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertNotEquals; + public class ScholixCompareTest { - @Test - public void testNormalization() { - final String input = "Tĥïŝ ĩš â fůňķŷ Šťŕĭńġhttps://doi.org/< >10.11646/zootaxa.5099.1.3"; - final String expected = "thisisafunkystringhttpsdoiorg1011646zootaxa509913"; - final String normalized = ScholixComparator.normalizeString(input); - assertEquals(normalized, expected); + + + + + + + private ScholixIdentifier generateMockScholixId(boolean toUpper, int idCount) { + + final String id = String.format("10.11646/zootaxa.5099.1.%d", idCount); + final String schema = "DOI"; + final String url =String.format("http://dx.dOI.org/10.11646/Zootaxa.5099.1.%d", idCount); + + final ScholixIdentifier result = new ScholixIdentifier(); + result.setIdentifier(toUpper ? id.toUpperCase(): id.toLowerCase()); + result.setSchema(toUpper ? schema.toUpperCase():schema.toLowerCase()); + result.setUrl(toUpper ? url.toUpperCase():url.toLowerCase()); + return result; + + } + + + private ScholixEntityId generateMockScholixEntityId(boolean toUpper, boolean invertOrder, int numberOfIds) { + + final String datasourceName = "Datacite"; + + final List ids = new ArrayList<>(); + + if (!invertOrder) { + for (int i = 0; i < numberOfIds; i++) { + ids.add(generateMockScholixId(toUpper, i)); + } + } + else { + for (int i = numberOfIds-1; i >=0; i--) { + ids.add(generateMockScholixId(toUpper, i)); + } + } + + return new ScholixEntityId(toUpper? datasourceName.toUpperCase(): datasourceName.toLowerCase(), ids); + } + + private ScholixCollectedFrom generateMockScholixCollectedFrom(boolean toUpper, boolean invertOrder, int numberOfIds) { + + final ScholixCollectedFrom result = new ScholixCollectedFrom(); + final String completionStatus = "complete"; + final String provisionMode = "collected"; + result.setProvider(generateMockScholixEntityId(toUpper, invertOrder, numberOfIds)); + result.setCompletionStatus(toUpper ? completionStatus.toUpperCase(): completionStatus.toLowerCase()); + result.setProvisionMode(toUpper ? provisionMode.toUpperCase(): provisionMode.toLowerCase()); + return result; + } + + private ScholixRelationship generateMockScholixRelationships(boolean toUpper) { + + final String name = "IsRelatedTo"; + final String inverse = "RelatedTo"; + final String schema = "datacite"; + + final ScholixRelationship rel = new ScholixRelationship(); + + rel.setName(toUpper? name.toUpperCase():name.toLowerCase()); + rel.setInverse(toUpper? inverse.toUpperCase():inverse.toLowerCase()); + rel.setSchema(toUpper? schema.toUpperCase():schema.toLowerCase()); + + return rel; } @Test public void testScholixIdentifierComparison() { - final String id = "10.11646/zootaxa.5099.1.3"; - final String schema = "DOI"; - final String url ="http://dx.dOI.org/10.11646/Zootaxa.5099.1.3"; - final ScholixIdentifier left = new ScholixIdentifier(); - left.setIdentifier(id.toUpperCase()); - left.setSchema(schema.toUpperCase()); - left.setUrl(url.toUpperCase()); + final ScholixIdentifier left = generateMockScholixId(true, 1); - final ScholixIdentifier right = new ScholixIdentifier(); - right.setIdentifier(id.toUpperCase()); - right.setSchema(schema.toUpperCase()); - right.setUrl(url.toLowerCase()); - + final ScholixIdentifier right = generateMockScholixId(false,1); assertEquals(0,left.compareTo(right)); @@ -45,4 +99,51 @@ public class ScholixCompareTest { } + + + @Test + public void testScholixEntityIDComparison() { + + final ScholixEntityId first =generateMockScholixEntityId(true,false,10); + final ScholixEntityId second =generateMockScholixEntityId(false,true,10); + assertEquals(first,second); + assertEquals(first.hashCode(), second.hashCode()); + + } + + + @Test + public void testScholixCollectedFromComparison() { + final ScholixCollectedFrom cfLeft = generateMockScholixCollectedFrom(true, true, 20); + + + + final ScholixCollectedFrom cfRight = generateMockScholixCollectedFrom(false, false, 20); + + assertEquals(cfLeft, cfRight); + + assertEquals(cfLeft.hashCode(), cfRight.hashCode()); + + cfRight.setCompletionStatus(null); + + assertNotEquals(cfLeft, cfRight); + + + } + + + @Test + public void testCompareScholixRelation() { + + + final ScholixRelationship left = generateMockScholixRelationships(true); + final ScholixRelationship right = generateMockScholixRelationships(false); + + assertEquals(left, right); + assertEquals(left.hashCode(), right.hashCode()); + + + + } + }