forked from D-Net/dnet-hadoop
upgraded maven version of commons-lang
This commit is contained in:
parent
5c8f6febee
commit
46727f5c76
|
@ -27,17 +27,14 @@
|
|||
<artifactId>gson</artifactId>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>commons-lang</groupId>
|
||||
<artifactId>commons-lang</artifactId>
|
||||
<groupId>org.apache.commons</groupId>
|
||||
<artifactId>commons-lang3</artifactId>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>commons-io</groupId>
|
||||
<artifactId>commons-io</artifactId>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>commons-collections</groupId>
|
||||
<artifactId>commons-collections</artifactId>
|
||||
</dependency>
|
||||
|
||||
<dependency>
|
||||
<groupId>org.antlr</groupId>
|
||||
<artifactId>stringtemplate</artifactId>
|
||||
|
|
|
@ -3,7 +3,7 @@ package eu.dnetlib.pace.clustering;
|
|||
import eu.dnetlib.pace.common.AbstractPaceFunctions;
|
||||
import eu.dnetlib.pace.config.Config;
|
||||
import eu.dnetlib.pace.model.Field;
|
||||
import org.apache.commons.lang.StringUtils;
|
||||
import org.apache.commons.lang3.StringUtils;
|
||||
|
||||
import java.util.Collection;
|
||||
import java.util.HashSet;
|
||||
|
|
|
@ -6,7 +6,7 @@ import java.util.Map;
|
|||
import com.google.common.base.Predicate;
|
||||
|
||||
import eu.dnetlib.pace.model.Field;
|
||||
import org.apache.commons.lang.StringUtils;
|
||||
import org.apache.commons.lang3.StringUtils;
|
||||
import org.apache.commons.logging.Log;
|
||||
import org.apache.commons.logging.LogFactory;
|
||||
|
||||
|
|
|
@ -3,7 +3,7 @@ package eu.dnetlib.pace.clustering;
|
|||
import eu.dnetlib.pace.common.AbstractPaceFunctions;
|
||||
import eu.dnetlib.pace.config.Config;
|
||||
import eu.dnetlib.pace.model.Field;
|
||||
import org.apache.commons.lang.StringUtils;
|
||||
import org.apache.commons.lang3.StringUtils;
|
||||
|
||||
import java.util.*;
|
||||
import java.util.stream.Collectors;
|
||||
|
|
|
@ -8,7 +8,7 @@ import com.google.common.collect.Lists;
|
|||
import com.google.common.collect.Sets;
|
||||
import eu.dnetlib.pace.config.Config;
|
||||
import eu.dnetlib.pace.model.Field;
|
||||
import org.apache.commons.lang.StringUtils;
|
||||
import org.apache.commons.lang3.StringUtils;
|
||||
|
||||
@ClusteringClass("lowercase")
|
||||
public class LowercaseClustering extends AbstractClusteringFunction {
|
||||
|
|
|
@ -2,7 +2,7 @@ package eu.dnetlib.pace.clustering;
|
|||
|
||||
import java.util.Set;
|
||||
|
||||
import org.apache.commons.lang.StringUtils;
|
||||
import org.apache.commons.lang3.StringUtils;
|
||||
|
||||
import eu.dnetlib.pace.common.AbstractPaceFunctions;
|
||||
|
||||
|
|
|
@ -5,7 +5,7 @@ import eu.dnetlib.pace.common.AbstractPaceFunctions;
|
|||
import eu.dnetlib.pace.config.Config;
|
||||
import eu.dnetlib.pace.model.Field;
|
||||
import eu.dnetlib.pace.model.Person;
|
||||
import org.apache.commons.lang.StringUtils;
|
||||
import org.apache.commons.lang3.StringUtils;
|
||||
|
||||
import java.util.Collection;
|
||||
import java.util.List;
|
||||
|
|
|
@ -5,8 +5,8 @@ import java.util.List;
|
|||
import java.util.Map;
|
||||
|
||||
import eu.dnetlib.pace.config.Config;
|
||||
import org.apache.commons.lang.RandomStringUtils;
|
||||
import org.apache.commons.lang.StringUtils;
|
||||
import org.apache.commons.lang3.RandomStringUtils;
|
||||
import org.apache.commons.lang3.StringUtils;
|
||||
|
||||
import com.google.common.collect.Lists;
|
||||
|
||||
|
|
|
@ -9,9 +9,8 @@ import eu.dnetlib.pace.clustering.NGramUtils;
|
|||
import eu.dnetlib.pace.model.Field;
|
||||
import eu.dnetlib.pace.model.FieldList;
|
||||
import eu.dnetlib.pace.model.FieldListImpl;
|
||||
import org.apache.commons.collections.CollectionUtils;
|
||||
import org.apache.commons.io.IOUtils;
|
||||
import org.apache.commons.lang.StringUtils;
|
||||
import org.apache.commons.lang3.StringUtils;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.io.StringWriter;
|
||||
|
@ -25,7 +24,6 @@ import java.util.stream.Collectors;
|
|||
* Set of common functions for the framework
|
||||
*
|
||||
* @author claudio
|
||||
*
|
||||
*/
|
||||
public abstract class AbstractPaceFunctions {
|
||||
|
||||
|
@ -242,9 +240,8 @@ public abstract class AbstractPaceFunctions {
|
|||
|
||||
public double commonElementsPercentage(Set<String> s1, Set<String> s2) {
|
||||
|
||||
int longer = (s1.size()>s2.size())?s1.size():s2.size();
|
||||
|
||||
return (double)CollectionUtils.intersection(s1,s2).size()/(double)longer;
|
||||
double longer = Math.max(s1.size(), s2.size());
|
||||
return (double) s1.stream().filter(s2::contains).count() / longer;
|
||||
}
|
||||
|
||||
//convert the set of keywords to codes
|
||||
|
|
|
@ -4,7 +4,7 @@ import com.fasterxml.jackson.databind.ObjectMapper;
|
|||
import com.google.common.collect.Lists;
|
||||
import com.google.common.collect.Sets;
|
||||
import eu.dnetlib.pace.util.PaceException;
|
||||
import org.apache.commons.lang.StringUtils;
|
||||
import org.apache.commons.lang3.StringUtils;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.io.Serializable;
|
||||
|
|
|
@ -1,8 +1,9 @@
|
|||
package eu.dnetlib.pace.model;
|
||||
|
||||
import com.fasterxml.jackson.core.JsonProcessingException;
|
||||
import com.fasterxml.jackson.databind.ObjectMapper;
|
||||
import com.google.common.base.Splitter;
|
||||
import com.google.common.collect.Lists;
|
||||
import com.google.gson.Gson;
|
||||
import eu.dnetlib.pace.config.Type;
|
||||
|
||||
import java.io.Serializable;
|
||||
|
@ -103,7 +104,11 @@ public class FieldDef implements Serializable {
|
|||
|
||||
@Override
|
||||
public String toString() {
|
||||
return new Gson().toJson(this);
|
||||
try {
|
||||
return new ObjectMapper().writeValueAsString(this);
|
||||
} catch (JsonProcessingException e) {
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
|
|
@ -1,11 +1,11 @@
|
|||
package eu.dnetlib.pace.model;
|
||||
|
||||
import com.fasterxml.jackson.core.JsonProcessingException;
|
||||
import com.fasterxml.jackson.databind.ObjectMapper;
|
||||
import com.google.common.base.Function;
|
||||
import com.google.common.base.Joiner;
|
||||
import com.google.common.base.Predicate;
|
||||
import com.google.common.collect.Iterables;
|
||||
import com.google.common.collect.Lists;
|
||||
import com.google.gson.Gson;
|
||||
import eu.dnetlib.pace.config.Type;
|
||||
|
||||
import java.util.Collection;
|
||||
|
@ -283,7 +283,12 @@ public class FieldListImpl extends AbstractField implements FieldList {
|
|||
case String:
|
||||
return Joiner.on(" ").join(stringList());
|
||||
case JSON:
|
||||
final String json = new Gson().toJson(stringList());
|
||||
String json;
|
||||
try {
|
||||
json = new ObjectMapper().writeValueAsString(this);
|
||||
} catch (JsonProcessingException e) {
|
||||
json = null;
|
||||
}
|
||||
return json;
|
||||
default:
|
||||
throw new IllegalArgumentException("Unknown type: " + getType().toString());
|
||||
|
|
|
@ -2,12 +2,12 @@ package eu.dnetlib.pace.model;
|
|||
|
||||
import java.net.MalformedURLException;
|
||||
import java.net.URL;
|
||||
import java.util.Collections;
|
||||
import java.util.Iterator;
|
||||
import java.util.List;
|
||||
|
||||
import eu.dnetlib.pace.config.Type;
|
||||
import org.apache.commons.collections.iterators.SingletonIterator;
|
||||
import org.apache.commons.lang.StringUtils;
|
||||
import org.apache.commons.lang3.StringUtils;
|
||||
|
||||
/**
|
||||
* The Class FieldValueImpl.
|
||||
|
@ -124,7 +124,7 @@ public class FieldValueImpl extends AbstractField implements FieldValue {
|
|||
@Override
|
||||
@SuppressWarnings("unchecked")
|
||||
public Iterator<Field> iterator() {
|
||||
return new SingletonIterator(this);
|
||||
return Collections.singleton((Field) this).iterator();
|
||||
}
|
||||
|
||||
}
|
||||
|
|
|
@ -1,129 +0,0 @@
|
|||
package eu.dnetlib.pace.model.gt;
|
||||
|
||||
import java.util.List;
|
||||
import java.util.Set;
|
||||
|
||||
import org.apache.commons.lang.StringUtils;
|
||||
|
||||
import com.google.common.collect.ComparisonChain;
|
||||
import com.google.common.collect.Lists;
|
||||
import com.google.common.collect.Ordering;
|
||||
import com.google.common.collect.Sets;
|
||||
import com.google.gson.Gson;
|
||||
|
||||
public class Author implements Comparable<Author> {
|
||||
|
||||
private String id;
|
||||
private String fullname;
|
||||
private String firstname;
|
||||
private String secondnames;
|
||||
|
||||
private List<Match> matches = Lists.newArrayList();
|
||||
private Set<Author> coauthors = Sets.newHashSet();
|
||||
private SubjectsMap subjectsMap = new SubjectsMap();
|
||||
|
||||
public Author() {
|
||||
super();
|
||||
}
|
||||
|
||||
public Author(final Author a) {
|
||||
this.id = a.getId();
|
||||
this.fullname = a.getFullname();
|
||||
this.firstname = a.getFirstname();
|
||||
this.secondnames = a.getSecondnames();
|
||||
|
||||
this.matches = a.getMatches();
|
||||
this.coauthors = a.getCoauthors();
|
||||
this.subjectsMap = a.getSubjectsMap();
|
||||
}
|
||||
|
||||
public boolean hasMatches() {
|
||||
return (getMatches() != null) && !getMatches().isEmpty();
|
||||
}
|
||||
|
||||
public boolean hasCoauthors() {
|
||||
return (getCoauthors() != null) && !getCoauthors().isEmpty();
|
||||
}
|
||||
|
||||
public boolean isWellFormed() {
|
||||
return StringUtils.isNotBlank(getSecondnames()) && StringUtils.isNotBlank(getFirstname());
|
||||
}
|
||||
|
||||
public String getId() {
|
||||
return id;
|
||||
}
|
||||
|
||||
public void setId(final String id) {
|
||||
this.id = id;
|
||||
}
|
||||
|
||||
public String getFullname() {
|
||||
return fullname;
|
||||
}
|
||||
|
||||
public void setFullname(final String fullname) {
|
||||
this.fullname = fullname;
|
||||
}
|
||||
|
||||
public String getFirstname() {
|
||||
return firstname;
|
||||
}
|
||||
|
||||
public void setFirstname(final String firstname) {
|
||||
this.firstname = firstname;
|
||||
}
|
||||
|
||||
public String getSecondnames() {
|
||||
return secondnames;
|
||||
}
|
||||
|
||||
public void setSecondnames(final String secondnames) {
|
||||
this.secondnames = secondnames;
|
||||
}
|
||||
|
||||
public List<Match> getMatches() {
|
||||
return matches;
|
||||
}
|
||||
|
||||
public void setMatches(final List<Match> matches) {
|
||||
this.matches = matches;
|
||||
}
|
||||
|
||||
public Set<Author> getCoauthors() {
|
||||
return coauthors;
|
||||
}
|
||||
|
||||
public void setCoauthors(final Set<Author> coauthors) {
|
||||
this.coauthors = coauthors;
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
return new Gson().toJson(this);
|
||||
}
|
||||
|
||||
@Override
|
||||
public int hashCode() {
|
||||
return getId().hashCode();
|
||||
}
|
||||
|
||||
@Override
|
||||
public int compareTo(final Author o) {
|
||||
return ComparisonChain.start()
|
||||
.compare(this.getId(), o.getId(), Ordering.natural().nullsLast())
|
||||
.result();
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean equals(final Object o) {
|
||||
return (o instanceof Author) && getId().equals(((Author) o).getId());
|
||||
}
|
||||
|
||||
public SubjectsMap getSubjectsMap() {
|
||||
return subjectsMap;
|
||||
}
|
||||
|
||||
public void setSubjectsMap(final SubjectsMap subjectsMap) {
|
||||
this.subjectsMap = subjectsMap;
|
||||
}
|
||||
}
|
|
@ -1,37 +0,0 @@
|
|||
package eu.dnetlib.pace.model.gt;
|
||||
|
||||
import com.google.gson.Gson;
|
||||
|
||||
public class AuthorSet {
|
||||
|
||||
private String id;
|
||||
private Authors authors;
|
||||
|
||||
public AuthorSet(final String id, final Authors authors) {
|
||||
super();
|
||||
this.id = id;
|
||||
this.authors = authors;
|
||||
}
|
||||
|
||||
public String getId() {
|
||||
return id;
|
||||
}
|
||||
|
||||
public void setId(final String id) {
|
||||
this.id = id;
|
||||
}
|
||||
|
||||
public Authors getAuthors() {
|
||||
return authors;
|
||||
}
|
||||
|
||||
public void setAuthors(final Authors authors) {
|
||||
this.authors = authors;
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
return new Gson().toJson(this);
|
||||
}
|
||||
|
||||
}
|
|
@ -1,54 +0,0 @@
|
|||
package eu.dnetlib.pace.model.gt;
|
||||
|
||||
import java.util.Collection;
|
||||
import java.util.HashSet;
|
||||
|
||||
import com.google.common.collect.ComparisonChain;
|
||||
import com.google.common.collect.Ordering;
|
||||
import com.google.common.collect.Sets;
|
||||
import com.google.gson.Gson;
|
||||
|
||||
public class Authors extends HashSet<Author> implements Comparable<Authors> {
|
||||
|
||||
private static final long serialVersionUID = -6878376220805286142L;
|
||||
|
||||
public Authors() {
|
||||
super();
|
||||
}
|
||||
|
||||
public Authors(final Collection<Author> authors) {
|
||||
super(authors);
|
||||
}
|
||||
|
||||
public Authors(final Author author) {
|
||||
super(Sets.newHashSet(author));
|
||||
}
|
||||
|
||||
@Override
|
||||
public int compareTo(final Authors a) {
|
||||
return ComparisonChain.start()
|
||||
.compare(this.size(), a.size(), Ordering.natural().nullsLast())
|
||||
.result();
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
return new Gson().toJson(this);
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean equals(final Object o) {
|
||||
final boolean res = o instanceof Authors;
|
||||
return res && (Sets.intersection(this, (Authors) o).size() == this.size());
|
||||
}
|
||||
|
||||
@Override
|
||||
public int hashCode() {
|
||||
int res = 0;
|
||||
for (final Author a : this) {
|
||||
res += a.hashCode();
|
||||
}
|
||||
return res;
|
||||
}
|
||||
|
||||
}
|
|
@ -1,50 +0,0 @@
|
|||
package eu.dnetlib.pace.model.gt;
|
||||
|
||||
import com.google.gson.Gson;
|
||||
import org.apache.commons.lang.StringUtils;
|
||||
import org.apache.commons.logging.Log;
|
||||
import org.apache.commons.logging.LogFactory;
|
||||
|
||||
public class CoAuthor extends Author {
|
||||
|
||||
private static final Log log = LogFactory.getLog(CoAuthor.class);
|
||||
private String anchorId = null;
|
||||
|
||||
public CoAuthor() {
|
||||
super();
|
||||
}
|
||||
|
||||
public CoAuthor(final Author author) {
|
||||
super(author);
|
||||
}
|
||||
|
||||
public boolean hasAnchorId() {
|
||||
return StringUtils.isNotBlank(getAnchorId());
|
||||
}
|
||||
|
||||
public String getAnchorId() {
|
||||
return anchorId;
|
||||
}
|
||||
|
||||
public void setAnchorId(final String anchorId) {
|
||||
this.anchorId = anchorId;
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
return new Gson().toJson(this);
|
||||
}
|
||||
|
||||
@Override
|
||||
public int hashCode() {
|
||||
return getId() != null ? getId().hashCode() : getFullname().hashCode();
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean equals(final Object o) {
|
||||
return (o instanceof CoAuthor) && StringUtils.isNotBlank(getId()) ?
|
||||
getId().equals(((CoAuthor) o).getId()) :
|
||||
getFullname().equals(((CoAuthor) o).getFullname());
|
||||
}
|
||||
|
||||
}
|
|
@ -1,36 +0,0 @@
|
|||
package eu.dnetlib.pace.model.gt;
|
||||
|
||||
import com.google.gson.Gson;
|
||||
|
||||
public class CoAuthorSet {
|
||||
|
||||
private Author author;
|
||||
private Authors coAuthors;
|
||||
|
||||
public CoAuthorSet(final Author author, final Authors coAuthors) {
|
||||
super();
|
||||
this.author = author;
|
||||
this.coAuthors = coAuthors;
|
||||
}
|
||||
|
||||
public Author getAuthor() {
|
||||
return author;
|
||||
}
|
||||
|
||||
public void setAuthor(final Author author) {
|
||||
this.author = author;
|
||||
}
|
||||
|
||||
public Authors getCoAuthors() {
|
||||
return coAuthors;
|
||||
}
|
||||
|
||||
public void setCoAuthors(final Authors coAuthors) {
|
||||
this.coAuthors = coAuthors;
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
return new Gson().toJson(this);
|
||||
}
|
||||
}
|
|
@ -1,40 +0,0 @@
|
|||
package eu.dnetlib.pace.model.gt;
|
||||
|
||||
import java.util.Set;
|
||||
|
||||
import com.google.gson.Gson;
|
||||
|
||||
public class CoAuthorSetLite {
|
||||
|
||||
private String id;
|
||||
|
||||
private Set<String> coAuthors;
|
||||
|
||||
public CoAuthorSetLite(final String id, final Set<String> coAuthors) {
|
||||
super();
|
||||
this.id = id;
|
||||
this.coAuthors = coAuthors;
|
||||
}
|
||||
|
||||
public Set<String> getCoAuthors() {
|
||||
return coAuthors;
|
||||
}
|
||||
|
||||
public void setCoAuthors(final Set<String> coAuthors) {
|
||||
this.coAuthors = coAuthors;
|
||||
}
|
||||
|
||||
public String getId() {
|
||||
return id;
|
||||
}
|
||||
|
||||
public void setId(final String id) {
|
||||
this.id = id;
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
return new Gson().toJson(this);
|
||||
}
|
||||
|
||||
}
|
|
@ -1,78 +0,0 @@
|
|||
package eu.dnetlib.pace.model.gt;
|
||||
|
||||
import java.util.Collection;
|
||||
import java.util.HashSet;
|
||||
|
||||
import com.google.common.base.Function;
|
||||
import com.google.common.collect.ComparisonChain;
|
||||
import com.google.common.collect.Ordering;
|
||||
import com.google.common.collect.Sets;
|
||||
import com.google.gson.Gson;
|
||||
|
||||
public class CoAuthors extends HashSet<CoAuthor> implements Comparable<CoAuthors> {
|
||||
|
||||
private static final long serialVersionUID = 2525591524516562892L;
|
||||
|
||||
private Function<CoAuthors, Integer> hashFunction;
|
||||
|
||||
private static Function<CoAuthors, Integer> defaultHashFunction = new Function<CoAuthors, Integer>() {
|
||||
|
||||
@Override
|
||||
public Integer apply(final CoAuthors input) {
|
||||
int res = 0;
|
||||
for (final CoAuthor a : input) {
|
||||
res += a.hashCode();
|
||||
}
|
||||
return res;
|
||||
|
||||
}
|
||||
};
|
||||
|
||||
public CoAuthors() {
|
||||
super();
|
||||
}
|
||||
|
||||
public CoAuthors(final Collection<CoAuthor> coauthors) {
|
||||
super(coauthors);
|
||||
}
|
||||
|
||||
public CoAuthors(final CoAuthor coauthor) {
|
||||
super(Sets.newHashSet(coauthor));
|
||||
}
|
||||
|
||||
public Function<CoAuthors, Integer> getHashFunction() {
|
||||
return hashFunction;
|
||||
}
|
||||
|
||||
public void setHashFunction(final Function<CoAuthors, Integer> hashFunction) {
|
||||
this.hashFunction = hashFunction;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int compareTo(final CoAuthors a) {
|
||||
return ComparisonChain.start()
|
||||
.compare(this.size(), a.size(), Ordering.natural().nullsLast())
|
||||
.result();
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
return new Gson().toJson(this);
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean equals(final Object o) {
|
||||
final boolean res = o instanceof CoAuthors;
|
||||
return res && (Sets.intersection(this, (CoAuthors) o).size() == this.size());
|
||||
}
|
||||
|
||||
public String hashCodeString() {
|
||||
return String.valueOf(hashCode());
|
||||
}
|
||||
|
||||
@Override
|
||||
public int hashCode() {
|
||||
return (getHashFunction() != null) ? getHashFunction().apply(this) : defaultHashFunction.apply(this);
|
||||
}
|
||||
|
||||
}
|
|
@ -1,196 +0,0 @@
|
|||
package eu.dnetlib.pace.model.gt;
|
||||
|
||||
import java.util.Collections;
|
||||
import java.util.Comparator;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
|
||||
import com.google.common.base.Function;
|
||||
import com.google.common.collect.ComparisonChain;
|
||||
import com.google.common.collect.Iterables;
|
||||
import com.google.common.collect.Lists;
|
||||
import com.google.common.collect.Maps;
|
||||
import com.google.common.collect.Ordering;
|
||||
import com.google.gson.Gson;
|
||||
import com.google.gson.GsonBuilder;
|
||||
|
||||
public class GTAuthor implements Comparable<GTAuthor> {
|
||||
|
||||
private String id;
|
||||
private Author author;
|
||||
private Authors merged;
|
||||
private CoAuthors coAuthors;
|
||||
private boolean anchor;
|
||||
|
||||
public GTAuthor() {}
|
||||
|
||||
public GTAuthor(final String id, final Authors merged, final CoAuthors coAuthors, final boolean anchor) {
|
||||
super();
|
||||
|
||||
if ((merged == null) || merged.isEmpty())
|
||||
throw new IllegalArgumentException("empty merged author set, id: " + id);
|
||||
|
||||
this.author = pickAuthor(merged);
|
||||
this.id = id;
|
||||
this.merged = merged;
|
||||
this.coAuthors = coAuthors;
|
||||
this.anchor = anchor;
|
||||
}
|
||||
|
||||
class AuthorFrequency extends Author {
|
||||
|
||||
private Integer frequency = new Integer(1);
|
||||
|
||||
public AuthorFrequency(final Author a) {
|
||||
super(a);
|
||||
}
|
||||
|
||||
public void increment() {
|
||||
setFrequency(getFrequency() + 1);
|
||||
}
|
||||
|
||||
public Integer getFrequency() {
|
||||
return frequency;
|
||||
}
|
||||
|
||||
public void setFrequency(final Integer frequency) {
|
||||
this.frequency = frequency;
|
||||
}
|
||||
}
|
||||
|
||||
private Author pickAuthor(final Authors merged) {
|
||||
final List<AuthorFrequency> freq = getFrequencies(merged);
|
||||
Collections.sort(freq, Collections.reverseOrder(new Comparator<AuthorFrequency>() {
|
||||
|
||||
@Override
|
||||
public int compare(final AuthorFrequency o1, final AuthorFrequency o2) {
|
||||
return ComparisonChain.start().compare(o1.getFullname().length(), o2.getFullname().length()).compare(o1.getFrequency(), o2.getFrequency())
|
||||
.result();
|
||||
}
|
||||
}));
|
||||
|
||||
return Iterables.getFirst(freq, null);
|
||||
}
|
||||
|
||||
private List<AuthorFrequency> getFrequencies(final Authors merged) {
|
||||
final Map<String, Integer> countMap = Maps.newHashMap();
|
||||
for (final Author a : merged) {
|
||||
final Integer count = countMap.get(a.getFullname());
|
||||
if (count == null) {
|
||||
countMap.put(a.getFullname(), new Integer(1));
|
||||
} else {
|
||||
countMap.put(a.getFullname(), count + 1);
|
||||
}
|
||||
}
|
||||
|
||||
return Lists.newArrayList(Iterables.transform(merged, new Function<Author, AuthorFrequency>() {
|
||||
|
||||
@Override
|
||||
public AuthorFrequency apply(final Author a) {
|
||||
final AuthorFrequency af = new AuthorFrequency(a);
|
||||
final Integer freq = countMap.get(af.getFullname());
|
||||
af.setFrequency(freq);
|
||||
return af;
|
||||
}
|
||||
}));
|
||||
}
|
||||
|
||||
public String getId() {
|
||||
return id;
|
||||
}
|
||||
|
||||
public void setId(final String id) {
|
||||
this.id = id;
|
||||
}
|
||||
|
||||
public Author getAuthor() {
|
||||
return author;
|
||||
}
|
||||
|
||||
public void setAuthor(final Author author) {
|
||||
this.author = author;
|
||||
}
|
||||
|
||||
public boolean hasMerged() {
|
||||
return (getMerged() != null) && !getMerged().isEmpty();
|
||||
}
|
||||
|
||||
public Authors getMerged() {
|
||||
return merged;
|
||||
}
|
||||
|
||||
public void setMerged(final Authors merged) {
|
||||
this.merged = merged;
|
||||
}
|
||||
|
||||
public boolean hasCoAuthors() {
|
||||
return (getCoAuthors() != null) && !getCoAuthors().isEmpty();
|
||||
}
|
||||
|
||||
public CoAuthors getCoAuthors() {
|
||||
return coAuthors;
|
||||
}
|
||||
|
||||
public void setCoAuthors(final CoAuthors coAuthors) {
|
||||
this.coAuthors = coAuthors;
|
||||
}
|
||||
|
||||
public boolean isAnchor() {
|
||||
return anchor;
|
||||
}
|
||||
|
||||
public void setAnchor(final boolean anchor) {
|
||||
this.anchor = anchor;
|
||||
}
|
||||
|
||||
public static GTAuthor fromJson(final String json) {
|
||||
final Gson gson = new Gson();
|
||||
return gson.fromJson(json, GTAuthor.class);
|
||||
}
|
||||
|
||||
public static List<GTAuthor> fromOafJson(final List<String> json) {
|
||||
|
||||
final GsonBuilder gb = new GsonBuilder();
|
||||
gb.registerTypeAdapter(GTAuthor.class, new GTAuthorOafSerialiser());
|
||||
final Gson gson = gb.create();
|
||||
|
||||
return Lists.newArrayList(Iterables.transform(json, new Function<String, GTAuthor>() {
|
||||
@Override
|
||||
public GTAuthor apply(final String s) {
|
||||
return gson.fromJson(s, GTAuthor.class);
|
||||
}
|
||||
}));
|
||||
}
|
||||
|
||||
public static GTAuthor fromOafJson(final String json) {
|
||||
|
||||
final GsonBuilder gb = new GsonBuilder();
|
||||
gb.registerTypeAdapter(GTAuthor.class, new GTAuthorOafSerialiser());
|
||||
final Gson gson = gb.create();
|
||||
|
||||
return gson.fromJson(json, GTAuthor.class);
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
return new Gson().toJson(this);
|
||||
}
|
||||
|
||||
@Override
|
||||
public int hashCode() {
|
||||
return getId().hashCode();
|
||||
}
|
||||
|
||||
@Override
|
||||
public int compareTo(final GTAuthor o) {
|
||||
return ComparisonChain.start()
|
||||
.compare(this.getId(), o.getId(), Ordering.natural().nullsLast())
|
||||
.result();
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean equals(final Object o) {
|
||||
return (o instanceof GTAuthor) && getId().equals(((GTAuthor) o).getId());
|
||||
}
|
||||
|
||||
}
|
|
@ -1,104 +0,0 @@
|
|||
package eu.dnetlib.pace.model.gt;
|
||||
|
||||
import java.lang.reflect.Type;
|
||||
|
||||
import com.google.common.base.Function;
|
||||
import com.google.common.base.Joiner;
|
||||
import com.google.common.collect.Iterables;
|
||||
import com.google.common.collect.Lists;
|
||||
import com.google.gson.JsonDeserializationContext;
|
||||
import com.google.gson.JsonDeserializer;
|
||||
import com.google.gson.JsonElement;
|
||||
import com.google.gson.JsonObject;
|
||||
import com.google.gson.JsonParseException;
|
||||
|
||||
public class GTAuthorOafSerialiser implements JsonDeserializer<GTAuthor> {
|
||||
|
||||
private static final String VALUE = "value";
|
||||
private static final String SECONDNAMES = "secondnames";
|
||||
private static final String FIRSTNAME = "firstname";
|
||||
private static final String FULLNAME = "fullname";
|
||||
private static final String ID = "id";
|
||||
private static final String MERGEDPERSON = "mergedperson";
|
||||
private static final String METADATA = "metadata";
|
||||
private static final String ANCHOR_ID = "anchorId";
|
||||
private static final String COAUTHOR = "coauthor";
|
||||
|
||||
@Override
|
||||
public GTAuthor deserialize(final JsonElement json, final Type typeOfT, final JsonDeserializationContext context) throws JsonParseException {
|
||||
final GTAuthor gta = new GTAuthor();
|
||||
|
||||
gta.setAuthor(getAuthor(json));
|
||||
gta.setMerged(getMerged(json));
|
||||
|
||||
gta.setCoAuthors(getCoAuthors(json));
|
||||
|
||||
return gta;
|
||||
}
|
||||
|
||||
private CoAuthors getCoAuthors(final JsonElement json) {
|
||||
final JsonObject obj = json.getAsJsonObject();
|
||||
if (!obj.has(COAUTHOR)) return null;
|
||||
return new CoAuthors(Lists.newArrayList(Iterables.transform(obj.get(COAUTHOR).getAsJsonArray(),
|
||||
new Function<JsonElement, CoAuthor>() {
|
||||
|
||||
@Override
|
||||
public CoAuthor apply(final JsonElement in) {
|
||||
final CoAuthor a = new CoAuthor(getAuthor(in));
|
||||
final JsonObject jsonObject = in.getAsJsonObject();
|
||||
if (jsonObject.has(ANCHOR_ID)) {
|
||||
a.setAnchorId(jsonObject.get(ANCHOR_ID).getAsString());
|
||||
}
|
||||
return a;
|
||||
}
|
||||
})));
|
||||
}
|
||||
|
||||
private Author getAuthor(final JsonElement json) {
|
||||
|
||||
final Author a = new Author();
|
||||
a.setCoauthors(null);
|
||||
a.setMatches(null);
|
||||
|
||||
final JsonObject jso = json.getAsJsonObject();
|
||||
|
||||
a.setId(jso.has(ID) ? jso.get(ID).getAsString() : null);
|
||||
|
||||
final JsonObject jsonObject = json.getAsJsonObject();
|
||||
if (jsonObject.has(METADATA)) {
|
||||
final JsonObject m = jsonObject.get(METADATA).getAsJsonObject();
|
||||
a.setFullname(getValue(m, FULLNAME));
|
||||
a.setFirstname(getValue(m, FIRSTNAME));
|
||||
a.setSecondnames(getValues(m, SECONDNAMES));
|
||||
}
|
||||
return a;
|
||||
}
|
||||
|
||||
private Authors getMerged(final JsonElement json) {
|
||||
final JsonObject obj = json.getAsJsonObject();
|
||||
if (!obj.has(MERGEDPERSON)) return null;
|
||||
return new Authors(Lists.newArrayList(Iterables.transform(obj.get(MERGEDPERSON).getAsJsonArray(),
|
||||
new Function<JsonElement, Author>() {
|
||||
|
||||
@Override
|
||||
public Author apply(final JsonElement in) {
|
||||
return getAuthor(in);
|
||||
}
|
||||
})));
|
||||
}
|
||||
|
||||
private String getValues(final JsonObject m, final String fieldName) {
|
||||
return m.has(fieldName) ? Joiner.on(" ").join(Iterables.transform(m.get(fieldName).getAsJsonArray(), new Function<JsonElement, String>() {
|
||||
|
||||
@Override
|
||||
public String apply(final JsonElement in) {
|
||||
return in.getAsJsonObject().get(VALUE).getAsString();
|
||||
}
|
||||
})) : null;
|
||||
}
|
||||
|
||||
private String getValue(final JsonObject m, final String fieldName) {
|
||||
return m.has(fieldName) ? m.get(fieldName).getAsJsonObject().get(VALUE).getAsString() : null;
|
||||
}
|
||||
|
||||
}
|
|
@ -1,44 +0,0 @@
|
|||
package eu.dnetlib.pace.model.gt;
|
||||
|
||||
import java.util.List;
|
||||
|
||||
import com.google.gson.Gson;
|
||||
|
||||
public class Group {
|
||||
|
||||
private String id;
|
||||
private int size;
|
||||
private List<Result> results;
|
||||
|
||||
public Group() {}
|
||||
|
||||
public String getId() {
|
||||
return id;
|
||||
}
|
||||
|
||||
public void setId(final String id) {
|
||||
this.id = id;
|
||||
}
|
||||
|
||||
public int getSize() {
|
||||
return size;
|
||||
}
|
||||
|
||||
public void setSize(final int size) {
|
||||
this.size = size;
|
||||
}
|
||||
|
||||
public List<Result> getResults() {
|
||||
return results;
|
||||
}
|
||||
|
||||
public void setResults(final List<Result> results) {
|
||||
this.results = results;
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
return new Gson().toJson(this);
|
||||
}
|
||||
|
||||
}
|
|
@ -1,41 +0,0 @@
|
|||
package eu.dnetlib.pace.model.gt;
|
||||
|
||||
import java.util.Collection;
|
||||
|
||||
import com.google.gson.Gson;
|
||||
|
||||
public class InvertedAuthor {
|
||||
|
||||
private Author author;
|
||||
private Collection<String> ids;
|
||||
|
||||
public InvertedAuthor() {}
|
||||
|
||||
public InvertedAuthor(final Author author, final Collection<String> ids) {
|
||||
super();
|
||||
this.author = author;
|
||||
this.ids = ids;
|
||||
}
|
||||
|
||||
public Author getAuthor() {
|
||||
return author;
|
||||
}
|
||||
|
||||
public void setAuthor(final Author author) {
|
||||
this.author = author;
|
||||
}
|
||||
|
||||
public Collection<String> getIds() {
|
||||
return ids;
|
||||
}
|
||||
|
||||
public void setIds(final Collection<String> ids) {
|
||||
this.ids = ids;
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
return new Gson().toJson(this);
|
||||
}
|
||||
|
||||
}
|
|
@ -1,31 +0,0 @@
|
|||
package eu.dnetlib.pace.model.gt;
|
||||
|
||||
public class Match extends Author {
|
||||
|
||||
private double score;
|
||||
|
||||
public Match() {
|
||||
super();
|
||||
}
|
||||
|
||||
public static Match from(final Author a) {
|
||||
final Match m = new Match();
|
||||
if (a.isWellFormed()) {
|
||||
m.setFirstname(a.getFirstname());
|
||||
m.setSecondnames(a.getSecondnames());
|
||||
}
|
||||
m.setFullname(a.getFullname());
|
||||
m.setId(a.getId());
|
||||
|
||||
return m;
|
||||
}
|
||||
|
||||
public double getScore() {
|
||||
return score;
|
||||
}
|
||||
|
||||
public void setScore(final double score) {
|
||||
this.score = score;
|
||||
}
|
||||
|
||||
}
|
|
@ -1,72 +0,0 @@
|
|||
package eu.dnetlib.pace.model.gt;
|
||||
|
||||
import java.util.List;
|
||||
|
||||
import com.google.common.collect.ComparisonChain;
|
||||
import com.google.common.collect.Ordering;
|
||||
import com.google.gson.Gson;
|
||||
|
||||
public class Result implements Comparable<Result> {
|
||||
|
||||
private String id;
|
||||
private String originalId;
|
||||
private String title;
|
||||
private List<Author> authors;
|
||||
|
||||
private double meanDistance;
|
||||
|
||||
public Result() {}
|
||||
|
||||
public String getId() {
|
||||
return id;
|
||||
}
|
||||
|
||||
public void setId(final String id) {
|
||||
this.id = id;
|
||||
}
|
||||
|
||||
public String getOriginalId() {
|
||||
return originalId;
|
||||
}
|
||||
|
||||
public void setOriginalId(final String originalId) {
|
||||
this.originalId = originalId;
|
||||
}
|
||||
|
||||
public String getTitle() {
|
||||
return title;
|
||||
}
|
||||
|
||||
public void setTitle(final String title) {
|
||||
this.title = title;
|
||||
}
|
||||
|
||||
public List<Author> getAuthors() {
|
||||
return authors;
|
||||
}
|
||||
|
||||
public void setAuthors(final List<Author> authors) {
|
||||
this.authors = authors;
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
return new Gson().toJson(this);
|
||||
}
|
||||
|
||||
@Override
|
||||
public int compareTo(final Result o) {
|
||||
return ComparisonChain.start()
|
||||
.compare(this.getAuthors().size(), o.getAuthors().size(), Ordering.natural().nullsLast())
|
||||
.result();
|
||||
}
|
||||
|
||||
public double getMeanDistance() {
|
||||
return meanDistance;
|
||||
}
|
||||
|
||||
public void setMeanDistance(final double meanDistance) {
|
||||
this.meanDistance = meanDistance;
|
||||
}
|
||||
|
||||
}
|
|
@ -1,10 +0,0 @@
|
|||
package eu.dnetlib.pace.model.gt;
|
||||
|
||||
import java.util.HashMap;
|
||||
|
||||
/**
|
||||
* Created by claudio on 07/03/16.
|
||||
*/
|
||||
public class Subjects extends HashMap<String, Integer> {
|
||||
|
||||
}
|
|
@ -1,35 +0,0 @@
|
|||
package eu.dnetlib.pace.model.gt;
|
||||
|
||||
import java.util.HashMap;
|
||||
import java.util.Map.Entry;
|
||||
|
||||
/**
|
||||
* Created by claudio on 07/03/16.
|
||||
*/
|
||||
public class SubjectsMap extends HashMap<String, Subjects> {
|
||||
|
||||
public SubjectsMap mergeFrom(SubjectsMap sm) {
|
||||
|
||||
for(Entry<String, Subjects> e : sm.entrySet()) {
|
||||
if (!this.containsKey(e.getKey())) {
|
||||
Subjects sub = new Subjects();
|
||||
|
||||
sub.putAll(e.getValue());
|
||||
|
||||
this.put(e.getKey(), sub);
|
||||
} else {
|
||||
for (Entry<String, Integer> es : e.getValue().entrySet()) {
|
||||
final Subjects subjects = this.get(e.getKey());
|
||||
if (subjects.containsKey(es.getKey())) {
|
||||
subjects.put(es.getKey(), es.getValue() + subjects.get(es.getKey()));
|
||||
} else {
|
||||
subjects.put(es.getKey(), new Integer(1));
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return this;
|
||||
}
|
||||
|
||||
}
|
|
@ -5,7 +5,7 @@ import eu.dnetlib.pace.tree.support.AbstractComparator;
|
|||
import eu.dnetlib.pace.tree.support.ComparatorClass;
|
||||
|
||||
import eu.dnetlib.pace.config.Config;
|
||||
import org.apache.commons.collections.CollectionUtils;
|
||||
|
||||
|
||||
import java.util.Map;
|
||||
import java.util.Set;
|
||||
|
|
|
@ -1,14 +1,14 @@
|
|||
package eu.dnetlib.pace.tree;
|
||||
|
||||
import eu.dnetlib.pace.config.Config;
|
||||
import org.apache.commons.lang.StringUtils;
|
||||
import org.apache.commons.lang3.StringUtils;
|
||||
|
||||
import com.wcohen.ss.AbstractStringDistance;
|
||||
import eu.dnetlib.pace.config.Type;
|
||||
import eu.dnetlib.pace.model.Field;
|
||||
import eu.dnetlib.pace.tree.support.AbstractComparator;
|
||||
import eu.dnetlib.pace.tree.support.ComparatorClass;
|
||||
import org.apache.commons.lang.StringUtils;
|
||||
|
||||
|
||||
import java.util.Map;
|
||||
|
||||
|
|
|
@ -3,7 +3,7 @@ package eu.dnetlib.pace.tree;
|
|||
import eu.dnetlib.pace.config.Config;
|
||||
import eu.dnetlib.pace.model.Field;
|
||||
import eu.dnetlib.pace.tree.support.ComparatorClass;
|
||||
import org.apache.commons.lang.StringUtils;
|
||||
import org.apache.commons.lang3.StringUtils;
|
||||
|
||||
import java.net.MalformedURLException;
|
||||
import java.net.URL;
|
||||
|
|
|
@ -4,7 +4,7 @@ import eu.dnetlib.pace.config.Config;
|
|||
import eu.dnetlib.pace.model.Field;
|
||||
import eu.dnetlib.pace.tree.support.AbstractComparator;
|
||||
import eu.dnetlib.pace.tree.support.ComparatorClass;
|
||||
import org.apache.commons.lang.StringUtils;
|
||||
import org.apache.commons.lang3.StringUtils;
|
||||
|
||||
import java.util.Map;
|
||||
|
||||
|
|
|
@ -1,14 +1,11 @@
|
|||
package eu.dnetlib.pace.tree.support;
|
||||
|
||||
import eu.dnetlib.pace.config.Config;
|
||||
import eu.dnetlib.pace.model.*;
|
||||
import eu.dnetlib.pace.model.gt.Match;
|
||||
import eu.dnetlib.pace.model.MapDocument;
|
||||
import eu.dnetlib.pace.util.PaceException;
|
||||
import org.apache.commons.logging.Log;
|
||||
import org.apache.commons.logging.LogFactory;
|
||||
|
||||
import java.io.Serializable;
|
||||
import java.util.Map;
|
||||
|
||||
/**
|
||||
* The compare between two documents is given by the weighted mean of the field distances
|
||||
|
|
|
@ -8,7 +8,7 @@ import eu.dnetlib.pace.tree.support.TreeProcessor;
|
|||
import eu.dnetlib.pace.model.Field;
|
||||
import eu.dnetlib.pace.model.MapDocument;
|
||||
import eu.dnetlib.pace.model.MapDocumentComparator;
|
||||
import org.apache.commons.lang.StringUtils;
|
||||
import org.apache.commons.lang3.StringUtils;
|
||||
import org.apache.commons.logging.Log;
|
||||
import org.apache.commons.logging.LogFactory;
|
||||
|
||||
|
|
|
@ -1,8 +1,8 @@
|
|||
package eu.dnetlib.pace.util;
|
||||
|
||||
import org.apache.commons.lang.WordUtils;
|
||||
|
||||
import com.google.common.base.Function;
|
||||
import org.apache.commons.lang3.text.WordUtils;
|
||||
|
||||
public class Capitalise implements Function<String, String> {
|
||||
|
||||
|
|
Loading…
Reference in New Issue