forked from D-Net/dnet-hadoop
code formatting
This commit is contained in:
parent
5d7ac78c41
commit
7181807e64
|
@ -82,10 +82,11 @@ public class AuthorMerger {
|
||||||
.map(ba -> new Tuple2<>(sim(ba, a._2()), ba))
|
.map(ba -> new Tuple2<>(sim(ba, a._2()), ba))
|
||||||
.max(Comparator.comparing(Tuple2::_1));
|
.max(Comparator.comparing(Tuple2::_1));
|
||||||
|
|
||||||
if(simAuthor.isPresent()) {
|
if (simAuthor.isPresent()) {
|
||||||
double th = THRESHOLD;
|
double th = THRESHOLD;
|
||||||
//increase the threshold if the surname is too short
|
// increase the threshold if the surname is too short
|
||||||
if (simAuthor.get()._2().getSurname() != null && simAuthor.get()._2().getSurname().length()<=3)
|
if (simAuthor.get()._2().getSurname() != null
|
||||||
|
&& simAuthor.get()._2().getSurname().length() <= 3)
|
||||||
th = 0.99;
|
th = 0.99;
|
||||||
|
|
||||||
if (simAuthor.get()._1() > th) {
|
if (simAuthor.get()._1() > th) {
|
||||||
|
@ -100,9 +101,10 @@ public class AuthorMerger {
|
||||||
}
|
}
|
||||||
|
|
||||||
public static String pidToComparableString(StructuredProperty pid) {
|
public static String pidToComparableString(StructuredProperty pid) {
|
||||||
return (pid.getQualifier() != null ?
|
return (pid.getQualifier() != null
|
||||||
pid.getQualifier().getClassid() != null ? pid.getQualifier().getClassid().toLowerCase() : "" : "")
|
? pid.getQualifier().getClassid() != null ? pid.getQualifier().getClassid().toLowerCase() : ""
|
||||||
+ (pid.getValue() != null ? pid.getValue().toLowerCase() : "");
|
: "")
|
||||||
|
+ (pid.getValue() != null ? pid.getValue().toLowerCase() : "");
|
||||||
}
|
}
|
||||||
|
|
||||||
public static int countAuthorsPids(List<Author> authors) {
|
public static int countAuthorsPids(List<Author> authors) {
|
||||||
|
@ -123,14 +125,13 @@ public class AuthorMerger {
|
||||||
final Person pa = parse(a);
|
final Person pa = parse(a);
|
||||||
final Person pb = parse(b);
|
final Person pb = parse(b);
|
||||||
|
|
||||||
//if both are accurate (e.g. they have name and surname)
|
// if both are accurate (e.g. they have name and surname)
|
||||||
if (pa.isAccurate() & pb.isAccurate()) {
|
if (pa.isAccurate() & pb.isAccurate()) {
|
||||||
return
|
return new JaroWinkler().score(normalize(pa.getSurnameString()), normalize(pb.getSurnameString())) * 0.5
|
||||||
new JaroWinkler().score(normalize(pa.getSurnameString()), normalize(pb.getSurnameString()))*0.5
|
+ new JaroWinkler().score(normalize(pa.getNameString()), normalize(pb.getNameString())) * 0.5;
|
||||||
+ new JaroWinkler().score(normalize(pa.getNameString()), normalize(pb.getNameString()))*0.5;
|
|
||||||
} else {
|
} else {
|
||||||
return
|
return new JaroWinkler()
|
||||||
new JaroWinkler().score(normalize(pa.getNormalisedFullname()), normalize(pb.getNormalisedFullname()));
|
.score(normalize(pa.getNormalisedFullname()), normalize(pb.getNormalisedFullname()));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -96,7 +96,7 @@ public class EntityMergerTest implements Serializable {
|
||||||
public void publicationMergerTest2() throws InstantiationException, IllegalAccessException, IOException {
|
public void publicationMergerTest2() throws InstantiationException, IllegalAccessException, IOException {
|
||||||
|
|
||||||
Publication pub_merged = DedupRecordFactory
|
Publication pub_merged = DedupRecordFactory
|
||||||
.entityMerger(dedupId, publications2.iterator(), 0, dataInfo, Publication.class);
|
.entityMerger(dedupId, publications2.iterator(), 0, dataInfo, Publication.class);
|
||||||
|
|
||||||
assertEquals(pub_merged.getAuthor().size(), 27);
|
assertEquals(pub_merged.getAuthor().size(), 27);
|
||||||
// insert assertions here
|
// insert assertions here
|
||||||
|
|
Loading…
Reference in New Issue