forked from D-Net/dnet-hadoop
Added FSpark Implementation of dedup
This commit is contained in:
parent
d1c73bcf90
commit
1bb5c26e6d
|
@ -1,5 +1,6 @@
|
||||||
package eu.dnetlib.pace.config;
|
package eu.dnetlib.pace.config;
|
||||||
|
|
||||||
|
import java.io.Serializable;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
import java.util.Map;
|
import java.util.Map;
|
||||||
import java.util.stream.Collectors;
|
import java.util.stream.Collectors;
|
||||||
|
@ -12,7 +13,7 @@ import eu.dnetlib.pace.model.CondDef;
|
||||||
import eu.dnetlib.pace.model.FieldDef;
|
import eu.dnetlib.pace.model.FieldDef;
|
||||||
import org.apache.commons.collections.CollectionUtils;
|
import org.apache.commons.collections.CollectionUtils;
|
||||||
|
|
||||||
public class PaceConfig {
|
public class PaceConfig implements Serializable {
|
||||||
|
|
||||||
private List<FieldDef> model;
|
private List<FieldDef> model;
|
||||||
private List<CondDef> strictConditions;
|
private List<CondDef> strictConditions;
|
||||||
|
|
|
@ -1,5 +1,6 @@
|
||||||
package eu.dnetlib.pace.config;
|
package eu.dnetlib.pace.config;
|
||||||
|
|
||||||
|
import java.io.Serializable;
|
||||||
import java.util.HashSet;
|
import java.util.HashSet;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
import java.util.Set;
|
import java.util.Set;
|
||||||
|
@ -9,7 +10,7 @@ import com.google.common.collect.Sets;
|
||||||
import com.google.gson.GsonBuilder;
|
import com.google.gson.GsonBuilder;
|
||||||
import org.apache.commons.lang.StringUtils;
|
import org.apache.commons.lang.StringUtils;
|
||||||
|
|
||||||
public class WfConfig {
|
public class WfConfig implements Serializable {
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Entity type.
|
* Entity type.
|
||||||
|
|
|
@ -1,12 +1,13 @@
|
||||||
package eu.dnetlib.pace.model;
|
package eu.dnetlib.pace.model;
|
||||||
|
|
||||||
|
import java.io.Serializable;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
import java.util.Map;
|
import java.util.Map;
|
||||||
|
|
||||||
import com.google.gson.Gson;
|
import com.google.gson.Gson;
|
||||||
import eu.dnetlib.pace.clustering.*;
|
import eu.dnetlib.pace.clustering.*;
|
||||||
|
|
||||||
public class ClusteringDef {
|
public class ClusteringDef implements Serializable {
|
||||||
|
|
||||||
private Clustering name;
|
private Clustering name;
|
||||||
|
|
||||||
|
|
|
@ -1,12 +1,13 @@
|
||||||
package eu.dnetlib.pace.model;
|
package eu.dnetlib.pace.model;
|
||||||
|
|
||||||
|
import java.io.Serializable;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
|
|
||||||
import com.google.gson.Gson;
|
import com.google.gson.Gson;
|
||||||
import eu.dnetlib.pace.condition.*;
|
import eu.dnetlib.pace.condition.*;
|
||||||
import eu.dnetlib.pace.config.Cond;
|
import eu.dnetlib.pace.config.Cond;
|
||||||
|
|
||||||
public class CondDef {
|
public class CondDef implements Serializable {
|
||||||
|
|
||||||
private Cond name;
|
private Cond name;
|
||||||
|
|
||||||
|
|
|
@ -1,5 +1,6 @@
|
||||||
package eu.dnetlib.pace.model;
|
package eu.dnetlib.pace.model;
|
||||||
|
|
||||||
|
import java.io.Serializable;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
import java.util.Map;
|
import java.util.Map;
|
||||||
|
|
||||||
|
@ -14,7 +15,7 @@ import eu.dnetlib.pace.distance.algo.*;
|
||||||
/**
|
/**
|
||||||
* The schema is composed by field definitions (FieldDef). Each field has a type, a name, and an associated distance algorithm.
|
* The schema is composed by field definitions (FieldDef). Each field has a type, a name, and an associated distance algorithm.
|
||||||
*/
|
*/
|
||||||
public class FieldDef {
|
public class FieldDef implements Serializable {
|
||||||
|
|
||||||
public final static String PATH_SEPARATOR = "/";
|
public final static String PATH_SEPARATOR = "/";
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue