forked from D-Net/dnet-hadoop
[FoS] changed the default separator from comma to tab to solve the issue in subject value split
This commit is contained in:
parent
5f0906be60
commit
7473093c84
|
@ -21,6 +21,7 @@ public class Constants {
|
|||
public static final String DOI_CLASSNAME = "Digital Object Identifier";
|
||||
|
||||
public static final String DEFAULT_DELIMITER = ",";
|
||||
public static final String DEFAULT_FOS_DELIMITER = "\t";
|
||||
|
||||
public static final String UPDATE_DATA_INFO_TYPE = "update";
|
||||
public static final String UPDATE_SUBJECT_FOS_CLASS_ID = "subject:fos";
|
||||
|
|
|
@ -1,7 +1,7 @@
|
|||
|
||||
package eu.dnetlib.dhp.actionmanager.createunresolvedentities;
|
||||
|
||||
import static eu.dnetlib.dhp.actionmanager.Constants.DEFAULT_DELIMITER;
|
||||
import static eu.dnetlib.dhp.actionmanager.Constants.DEFAULT_FOS_DELIMITER;
|
||||
import static eu.dnetlib.dhp.actionmanager.Constants.isSparkSessionManaged;
|
||||
import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession;
|
||||
|
||||
|
@ -9,8 +9,7 @@ import java.io.Serializable;
|
|||
import java.util.Optional;
|
||||
|
||||
import org.apache.commons.io.IOUtils;
|
||||
import org.apache.hadoop.conf.Configuration;
|
||||
import org.apache.hadoop.fs.FileSystem;
|
||||
|
||||
import org.apache.spark.SparkConf;
|
||||
import org.apache.spark.api.java.function.MapFunction;
|
||||
import org.apache.spark.sql.*;
|
||||
|
@ -49,7 +48,7 @@ public class GetFOSSparkJob implements Serializable {
|
|||
|
||||
final String delimiter = Optional
|
||||
.ofNullable(parser.get("delimiter"))
|
||||
.orElse(DEFAULT_DELIMITER);
|
||||
.orElse(DEFAULT_FOS_DELIMITER);
|
||||
|
||||
SparkConf sconf = new SparkConf();
|
||||
runWithSparkSession(
|
||||
|
|
Loading…
Reference in New Issue