Added FSpark Implementation of dedup

This commit is contained in:
Sandro La Bruzzo 2018-10-11 15:19:20 +02:00
parent d1c73bcf90
commit 1bb5c26e6d
5 changed files with 10 additions and 5 deletions

View File

@ -1,5 +1,6 @@
package eu.dnetlib.pace.config;
import java.io.Serializable;
import java.util.List;
import java.util.Map;
import java.util.stream.Collectors;
@ -12,7 +13,7 @@ import eu.dnetlib.pace.model.CondDef;
import eu.dnetlib.pace.model.FieldDef;
import org.apache.commons.collections.CollectionUtils;
public class PaceConfig {
public class PaceConfig implements Serializable {
private List<FieldDef> model;
private List<CondDef> strictConditions;

View File

@ -1,5 +1,6 @@
package eu.dnetlib.pace.config;
import java.io.Serializable;
import java.util.HashSet;
import java.util.List;
import java.util.Set;
@ -9,7 +10,7 @@ import com.google.common.collect.Sets;
import com.google.gson.GsonBuilder;
import org.apache.commons.lang.StringUtils;
public class WfConfig {
public class WfConfig implements Serializable {
/**
* Entity type.

View File

@ -1,12 +1,13 @@
package eu.dnetlib.pace.model;
import java.io.Serializable;
import java.util.List;
import java.util.Map;
import com.google.gson.Gson;
import eu.dnetlib.pace.clustering.*;
public class ClusteringDef {
public class ClusteringDef implements Serializable {
private Clustering name;

View File

@ -1,12 +1,13 @@
package eu.dnetlib.pace.model;
import java.io.Serializable;
import java.util.List;
import com.google.gson.Gson;
import eu.dnetlib.pace.condition.*;
import eu.dnetlib.pace.config.Cond;
public class CondDef {
public class CondDef implements Serializable {
private Cond name;

View File

@ -1,5 +1,6 @@
package eu.dnetlib.pace.model;
import java.io.Serializable;
import java.util.List;
import java.util.Map;
@ -14,7 +15,7 @@ import eu.dnetlib.pace.distance.algo.*;
/**
* The schema is composed by field definitions (FieldDef). Each field has a type, a name, and an associated distance algorithm.
*/
public class FieldDef {
public class FieldDef implements Serializable {
public final static String PATH_SEPARATOR = "/";