Added FSpark Implementation of dedup

This commit is contained in:
Sandro La Bruzzo 2018-10-11 15:19:20 +02:00
parent d1c73bcf90
commit 1bb5c26e6d
5 changed files with 10 additions and 5 deletions

View File

@ -1,5 +1,6 @@
package eu.dnetlib.pace.config; package eu.dnetlib.pace.config;
import java.io.Serializable;
import java.util.List; import java.util.List;
import java.util.Map; import java.util.Map;
import java.util.stream.Collectors; import java.util.stream.Collectors;
@ -12,7 +13,7 @@ import eu.dnetlib.pace.model.CondDef;
import eu.dnetlib.pace.model.FieldDef; import eu.dnetlib.pace.model.FieldDef;
import org.apache.commons.collections.CollectionUtils; import org.apache.commons.collections.CollectionUtils;
public class PaceConfig { public class PaceConfig implements Serializable {
private List<FieldDef> model; private List<FieldDef> model;
private List<CondDef> strictConditions; private List<CondDef> strictConditions;

View File

@ -1,5 +1,6 @@
package eu.dnetlib.pace.config; package eu.dnetlib.pace.config;
import java.io.Serializable;
import java.util.HashSet; import java.util.HashSet;
import java.util.List; import java.util.List;
import java.util.Set; import java.util.Set;
@ -9,7 +10,7 @@ import com.google.common.collect.Sets;
import com.google.gson.GsonBuilder; import com.google.gson.GsonBuilder;
import org.apache.commons.lang.StringUtils; import org.apache.commons.lang.StringUtils;
public class WfConfig { public class WfConfig implements Serializable {
/** /**
* Entity type. * Entity type.

View File

@ -1,12 +1,13 @@
package eu.dnetlib.pace.model; package eu.dnetlib.pace.model;
import java.io.Serializable;
import java.util.List; import java.util.List;
import java.util.Map; import java.util.Map;
import com.google.gson.Gson; import com.google.gson.Gson;
import eu.dnetlib.pace.clustering.*; import eu.dnetlib.pace.clustering.*;
public class ClusteringDef { public class ClusteringDef implements Serializable {
private Clustering name; private Clustering name;

View File

@ -1,12 +1,13 @@
package eu.dnetlib.pace.model; package eu.dnetlib.pace.model;
import java.io.Serializable;
import java.util.List; import java.util.List;
import com.google.gson.Gson; import com.google.gson.Gson;
import eu.dnetlib.pace.condition.*; import eu.dnetlib.pace.condition.*;
import eu.dnetlib.pace.config.Cond; import eu.dnetlib.pace.config.Cond;
public class CondDef { public class CondDef implements Serializable {
private Cond name; private Cond name;

View File

@ -1,5 +1,6 @@
package eu.dnetlib.pace.model; package eu.dnetlib.pace.model;
import java.io.Serializable;
import java.util.List; import java.util.List;
import java.util.Map; import java.util.Map;
@ -14,7 +15,7 @@ import eu.dnetlib.pace.distance.algo.*;
/** /**
* The schema is composed by field definitions (FieldDef). Each field has a type, a name, and an associated distance algorithm. * The schema is composed by field definitions (FieldDef). Each field has a type, a name, and an associated distance algorithm.
*/ */
public class FieldDef { public class FieldDef implements Serializable {
public final static String PATH_SEPARATOR = "/"; public final static String PATH_SEPARATOR = "/";