forked from antonis.lempesis/dnet-hadoop
Added FSpark Implementation of dedup
This commit is contained in:
parent
d1c73bcf90
commit
1bb5c26e6d
|
@ -1,5 +1,6 @@
|
|||
package eu.dnetlib.pace.config;
|
||||
|
||||
import java.io.Serializable;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.stream.Collectors;
|
||||
|
@ -12,7 +13,7 @@ import eu.dnetlib.pace.model.CondDef;
|
|||
import eu.dnetlib.pace.model.FieldDef;
|
||||
import org.apache.commons.collections.CollectionUtils;
|
||||
|
||||
public class PaceConfig {
|
||||
public class PaceConfig implements Serializable {
|
||||
|
||||
private List<FieldDef> model;
|
||||
private List<CondDef> strictConditions;
|
||||
|
|
|
@ -1,5 +1,6 @@
|
|||
package eu.dnetlib.pace.config;
|
||||
|
||||
import java.io.Serializable;
|
||||
import java.util.HashSet;
|
||||
import java.util.List;
|
||||
import java.util.Set;
|
||||
|
@ -9,7 +10,7 @@ import com.google.common.collect.Sets;
|
|||
import com.google.gson.GsonBuilder;
|
||||
import org.apache.commons.lang.StringUtils;
|
||||
|
||||
public class WfConfig {
|
||||
public class WfConfig implements Serializable {
|
||||
|
||||
/**
|
||||
* Entity type.
|
||||
|
|
|
@ -1,12 +1,13 @@
|
|||
package eu.dnetlib.pace.model;
|
||||
|
||||
import java.io.Serializable;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
|
||||
import com.google.gson.Gson;
|
||||
import eu.dnetlib.pace.clustering.*;
|
||||
|
||||
public class ClusteringDef {
|
||||
public class ClusteringDef implements Serializable {
|
||||
|
||||
private Clustering name;
|
||||
|
||||
|
|
|
@ -1,12 +1,13 @@
|
|||
package eu.dnetlib.pace.model;
|
||||
|
||||
import java.io.Serializable;
|
||||
import java.util.List;
|
||||
|
||||
import com.google.gson.Gson;
|
||||
import eu.dnetlib.pace.condition.*;
|
||||
import eu.dnetlib.pace.config.Cond;
|
||||
|
||||
public class CondDef {
|
||||
public class CondDef implements Serializable {
|
||||
|
||||
private Cond name;
|
||||
|
||||
|
|
|
@ -1,5 +1,6 @@
|
|||
package eu.dnetlib.pace.model;
|
||||
|
||||
import java.io.Serializable;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
|
||||
|
@ -14,7 +15,7 @@ import eu.dnetlib.pace.distance.algo.*;
|
|||
/**
|
||||
* The schema is composed by field definitions (FieldDef). Each field has a type, a name, and an associated distance algorithm.
|
||||
*/
|
||||
public class FieldDef {
|
||||
public class FieldDef implements Serializable {
|
||||
|
||||
public final static String PATH_SEPARATOR = "/";
|
||||
|
||||
|
|
Loading…
Reference in New Issue