forked from D-Net/dnet-hadoop
minor change in the author match which now can compute count and percentage
This commit is contained in:
parent
6a6c266dde
commit
297eb207a5
|
@ -26,6 +26,7 @@ public class AuthorsMatch extends AbstractComparator {
|
||||||
private double FULLNAME_THRESHOLD;
|
private double FULLNAME_THRESHOLD;
|
||||||
private String MODE; //full or surname
|
private String MODE; //full or surname
|
||||||
private int SIZE_THRESHOLD;
|
private int SIZE_THRESHOLD;
|
||||||
|
private String TYPE; //count or percentage
|
||||||
private int common;
|
private int common;
|
||||||
|
|
||||||
public AuthorsMatch(Map<String, String> params){
|
public AuthorsMatch(Map<String, String> params){
|
||||||
|
@ -37,6 +38,7 @@ public class AuthorsMatch extends AbstractComparator {
|
||||||
NAME_THRESHOLD = Double.parseDouble(params.getOrDefault("name_th", "0.95"));
|
NAME_THRESHOLD = Double.parseDouble(params.getOrDefault("name_th", "0.95"));
|
||||||
FULLNAME_THRESHOLD = Double.parseDouble(params.getOrDefault("fullname_th", "0.9"));
|
FULLNAME_THRESHOLD = Double.parseDouble(params.getOrDefault("fullname_th", "0.9"));
|
||||||
SIZE_THRESHOLD = Integer.parseInt(params.getOrDefault("size_th", "20"));
|
SIZE_THRESHOLD = Integer.parseInt(params.getOrDefault("size_th", "20"));
|
||||||
|
TYPE = params.getOrDefault("type", "percentage");
|
||||||
common = 0;
|
common = 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -123,8 +125,13 @@ public class AuthorsMatch extends AbstractComparator {
|
||||||
//normalization factor to compute the score
|
//normalization factor to compute the score
|
||||||
int normFactor = aList.size() == bList.size() ? aList.size() : (aList.size() + bList.size() - common);
|
int normFactor = aList.size() == bList.size() ? aList.size() : (aList.size() + bList.size() - common);
|
||||||
|
|
||||||
|
if(TYPE.equals("percentage")) {
|
||||||
return (double) common / normFactor;
|
return (double) common / normFactor;
|
||||||
}
|
}
|
||||||
|
else {
|
||||||
|
return (double) common;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
public boolean compareSurname(Person p1, Person p2) {
|
public boolean compareSurname(Person p1, Person p2) {
|
||||||
return ssalgo.score(normalization(p1.getNormalisedSurname()), normalization(p2.getNormalisedSurname())) > SURNAME_THRESHOLD;
|
return ssalgo.score(normalization(p1.getNormalisedSurname()), normalization(p2.getNormalisedSurname())) > SURNAME_THRESHOLD;
|
||||||
|
|
Loading…
Reference in New Issue