forked from D-Net/dnet-hadoop
Added First Implementation of Spark Test
This commit is contained in:
parent
476c3d7b07
commit
d1c73bcf90
|
@ -23,7 +23,7 @@
|
||||||
<dependency>
|
<dependency>
|
||||||
<groupId>com.google.guava</groupId>
|
<groupId>com.google.guava</groupId>
|
||||||
<artifactId>guava</artifactId>
|
<artifactId>guava</artifactId>
|
||||||
<version>${google.guava.version}</version>
|
<version>15.0</version>
|
||||||
</dependency>
|
</dependency>
|
||||||
<dependency>
|
<dependency>
|
||||||
<groupId>com.google.code.gson</groupId>
|
<groupId>com.google.code.gson</groupId>
|
||||||
|
|
|
@ -22,9 +22,12 @@ public class BlacklistAwareClusteringCombiner extends ClusteringCombiner {
|
||||||
|
|
||||||
private static final Log log = LogFactory.getLog(BlacklistAwareClusteringCombiner.class);
|
private static final Log log = LogFactory.getLog(BlacklistAwareClusteringCombiner.class);
|
||||||
|
|
||||||
public static Collection<String> filterAndCombine(final MapDocument a, final Config conf, final Map<String, List<String>> blacklists) {
|
|
||||||
|
|
||||||
final Document filtered = new BlacklistAwareClusteringCombiner().filter(a, blacklists);
|
|
||||||
|
|
||||||
|
public static Collection<String> filterAndCombine(final MapDocument a, final Config conf) {
|
||||||
|
|
||||||
|
final Document filtered = new BlacklistAwareClusteringCombiner().filter(a, conf.blacklists());
|
||||||
return combine(filtered, conf);
|
return combine(filtered, conf);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -1,6 +1,7 @@
|
||||||
package eu.dnetlib.pace.config;
|
package eu.dnetlib.pace.config;
|
||||||
|
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
|
import java.io.Serializable;
|
||||||
import java.util.HashMap;
|
import java.util.HashMap;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
import java.util.Map;
|
import java.util.Map;
|
||||||
|
@ -19,7 +20,7 @@ import eu.dnetlib.pace.model.FieldDef;
|
||||||
import org.apache.commons.logging.Log;
|
import org.apache.commons.logging.Log;
|
||||||
import org.apache.commons.logging.LogFactory;
|
import org.apache.commons.logging.LogFactory;
|
||||||
|
|
||||||
public class DedupConfig implements Config {
|
public class DedupConfig implements Config, Serializable {
|
||||||
|
|
||||||
private static final Log log = LogFactory.getLog(DedupConfig.class);
|
private static final Log log = LogFactory.getLog(DedupConfig.class);
|
||||||
|
|
||||||
|
|
|
@ -2,10 +2,12 @@ package eu.dnetlib.pace.model;
|
||||||
|
|
||||||
import eu.dnetlib.pace.config.Type;
|
import eu.dnetlib.pace.config.Type;
|
||||||
|
|
||||||
|
import java.io.Serializable;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* The Interface Field.
|
* The Interface Field.
|
||||||
*/
|
*/
|
||||||
public interface Field extends Iterable<Field> {
|
public interface Field extends Iterable<Field>, Serializable {
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Gets the name.
|
* Gets the name.
|
||||||
|
|
|
@ -1,5 +1,6 @@
|
||||||
package eu.dnetlib.pace.model;
|
package eu.dnetlib.pace.model;
|
||||||
|
|
||||||
|
import java.io.Serializable;
|
||||||
import java.util.Map;
|
import java.util.Map;
|
||||||
import java.util.Set;
|
import java.util.Set;
|
||||||
|
|
||||||
|
@ -10,7 +11,7 @@ import com.google.common.collect.Maps;
|
||||||
/**
|
/**
|
||||||
* The Class MapDocument.
|
* The Class MapDocument.
|
||||||
*/
|
*/
|
||||||
public class MapDocument implements Document {
|
public class MapDocument implements Document, Serializable {
|
||||||
|
|
||||||
/** The identifier. */
|
/** The identifier. */
|
||||||
private String identifier;
|
private String identifier;
|
||||||
|
|
Loading…
Reference in New Issue