[dumpCSV] ading double quotes enclosing all the fileds

This commit is contained in:
Miriam Baglioni 2023-07-17 16:21:20 +02:00
parent b01573e201
commit 5ff50d115a
8 changed files with 59 additions and 32 deletions

View File

@ -1,6 +1,8 @@
package eu.dnetlib.dhp.oa.graph.dump.csv;
import org.apache.commons.lang.StringUtils;
import java.io.Serializable;
/**
@ -9,4 +11,10 @@ import java.io.Serializable;
*/
public class Constants implements Serializable {
public final static String SEP = "\t";
public static final String addQuotes(String id) {
if(StringUtils.isNotEmpty(id))
return "\"" + id + "\"";
return id;
}
}

View File

@ -290,13 +290,13 @@ public class SparkDumpResults implements Serializable {
private static <R extends Result> CSVResult mapResultInfo(R r) {
CSVResult ret = new CSVResult();
ret.setId(r.getId());
ret.setType(r.getResulttype().getClassid());
ret.setId(removeBreaks(r.getId()));
ret.setType(removeBreaks(r.getResulttype().getClassid()));
ret.setTitle(getTitle(r.getTitle()));
ret.setDescription(getAbstract(r.getDescription()));
ret.setAccessright(r.getBestaccessright().getClassid());
ret.setPublication_date(getFieldValue(r.getDateofacceptance()));
ret.setPublisher(getFieldValue(r.getPublisher()));
ret.setAccessright(removeBreaks(r.getBestaccessright().getClassid()));
ret.setPublication_date(removeBreaks(getFieldValue(r.getDateofacceptance())));
ret.setPublisher(removeBreaks(getFieldValue(r.getPublisher())));
if (Optional.ofNullable(r.getSubject()).isPresent())
ret.setKeywords(String.join(", ", r.getSubject().stream().map(s -> {
@ -354,7 +354,11 @@ public class SparkDumpResults implements Serializable {
}
private static String removeBreaks(String input) {
return input.replace("\n", " ").replace("\t", " ").replace("\r", " ");
return input.replace("\n", " ").replace("\t", " ")
.replace("\r", " ")
.replace("\\\"", " ")
.replace("\"", " ")
;
}

View File

@ -1,6 +1,8 @@
package eu.dnetlib.dhp.oa.graph.dump.csv.model;
import eu.dnetlib.dhp.oa.graph.dump.csv.Constants;
import java.io.Serializable;
/**
@ -28,7 +30,7 @@ public class CSVAuthor implements Serializable {
}
public void setId(String id) {
this.id = id;
this.id = Constants.addQuotes(id);
}
public String getFirstname() {
@ -36,7 +38,7 @@ public class CSVAuthor implements Serializable {
}
public void setFirstname(String firstname) {
this.firstname = firstname;
this.firstname = Constants.addQuotes(firstname);
}
public String getLastname() {
@ -44,7 +46,7 @@ public class CSVAuthor implements Serializable {
}
public void setLastname(String lastname) {
this.lastname = lastname;
this.lastname = Constants.addQuotes(lastname);
}
public String getFullname() {
@ -52,7 +54,7 @@ public class CSVAuthor implements Serializable {
}
public void setFullname(String fullname) {
this.fullname = fullname;
this.fullname = Constants.addQuotes(fullname);
}
public String getOrcid() {
@ -60,7 +62,7 @@ public class CSVAuthor implements Serializable {
}
public void setOrcid(String orcid) {
this.orcid = orcid;
this.orcid = Constants.addQuotes(orcid);
}
}

View File

@ -1,6 +1,8 @@
package eu.dnetlib.dhp.oa.graph.dump.csv.model;
import eu.dnetlib.dhp.oa.graph.dump.csv.Constants;
import java.io.Serializable;
/**
@ -17,7 +19,7 @@ public class CSVCitation implements Serializable {
}
public void setId(String id) {
this.id = id;
this.id = Constants.addQuotes(id);
}
public String getResult_id_cites() {
@ -25,7 +27,7 @@ public class CSVCitation implements Serializable {
}
public void setResult_id_cites(String result_id_cites) {
this.result_id_cites = result_id_cites;
this.result_id_cites = Constants.addQuotes(result_id_cites);
}
public String getResult_id_cited() {
@ -33,6 +35,6 @@ public class CSVCitation implements Serializable {
}
public void setResult_id_cited(String result_id_cited) {
this.result_id_cited = result_id_cited;
this.result_id_cited = Constants.addQuotes(result_id_cited);
}
}

View File

@ -1,6 +1,8 @@
package eu.dnetlib.dhp.oa.graph.dump.csv.model;
import eu.dnetlib.dhp.oa.graph.dump.csv.Constants;
import java.io.Serializable;
/**
@ -19,7 +21,7 @@ public class CSVPid implements Serializable {
}
public void setResult_id(String result_id) {
this.result_id = result_id;
this.result_id = Constants.addQuotes(result_id);
}
public String getPid() {
@ -27,7 +29,7 @@ public class CSVPid implements Serializable {
}
public void setPid(String pid) {
this.pid = pid;
this.pid = Constants.addQuotes(pid);
}
public String getType() {
@ -35,7 +37,7 @@ public class CSVPid implements Serializable {
}
public void setType(String type) {
this.type = type;
this.type = Constants.addQuotes(type);
}
public String getId() {
@ -43,6 +45,6 @@ public class CSVPid implements Serializable {
}
public void setId(String id) {
this.id = id;
this.id = Constants.addQuotes(id);
}
}

View File

@ -1,6 +1,8 @@
package eu.dnetlib.dhp.oa.graph.dump.csv.model;
import eu.dnetlib.dhp.oa.graph.dump.csv.Constants;
import java.io.Serializable;
/**
@ -16,7 +18,7 @@ public class CSVRELCommunityResult implements Serializable {
}
public void setResult_id(String result_id) {
this.result_id = result_id;
this.result_id = Constants.addQuotes(result_id);
}
public String getCommunity_id() {
@ -24,6 +26,6 @@ public class CSVRELCommunityResult implements Serializable {
}
public void setCommunity_id(String community_id) {
this.community_id = community_id;
this.community_id = Constants.addQuotes(community_id);
}
}

View File

@ -1,6 +1,8 @@
package eu.dnetlib.dhp.oa.graph.dump.csv.model;
import eu.dnetlib.dhp.oa.graph.dump.csv.Constants;
import java.io.Serializable;
/**
@ -16,7 +18,7 @@ public class CSVRelResAut implements Serializable {
}
public void setResult_id(String result_id) {
this.result_id = result_id;
this.result_id = Constants.addQuotes(result_id);
}
public String getAuthor_id() {
@ -24,6 +26,6 @@ public class CSVRelResAut implements Serializable {
}
public void setAuthor_id(String author_id) {
this.author_id = author_id;
this.author_id = Constants.addQuotes(author_id);
}
}

View File

@ -3,6 +3,7 @@ package eu.dnetlib.dhp.oa.graph.dump.csv.model;
import java.io.Serializable;
import eu.dnetlib.dhp.oa.graph.dump.csv.Constants;
import org.apache.commons.lang.StringUtils;
import com.fasterxml.jackson.annotation.JsonGetter;
@ -11,6 +12,7 @@ import com.fasterxml.jackson.annotation.JsonSetter;
import eu.dnetlib.dhp.schema.oaf.Country;
import eu.dnetlib.dhp.schema.oaf.StructuredProperty;
import sun.swing.StringUIClientPropertyKey;
/**
* @author miriam.baglioni
@ -33,15 +35,16 @@ public class CSVResult implements Serializable {
}
public void setId(String id) {
this.id = id;
this.id = Constants.addQuotes(id);
}
public String getType() {
return type;
}
public void setType(String type) {
this.type = type;
this.type = Constants.addQuotes(type);
}
public String getTitle() {
@ -49,7 +52,8 @@ public class CSVResult implements Serializable {
}
public void setTitle(String title) {
this.title = title;
this.title =
Constants.addQuotes(title);
}
public String getDescription() {
@ -57,7 +61,8 @@ public class CSVResult implements Serializable {
}
public void setDescription(String description) {
this.description = description;
this.description =
Constants.addQuotes(description);
}
public String getAccessright() {
@ -65,7 +70,7 @@ public class CSVResult implements Serializable {
}
public void setAccessright(String accessright) {
this.accessright = accessright;
this.accessright = Constants.addQuotes(accessright);
}
public String getPublication_date() {
@ -73,7 +78,7 @@ public class CSVResult implements Serializable {
}
public void setPublication_date(String publication_date) {
this.publication_date = publication_date;
this.publication_date = Constants.addQuotes(publication_date);
}
public String getPublisher() {
@ -81,7 +86,7 @@ public class CSVResult implements Serializable {
}
public void setPublisher(String publisher) {
this.publisher = publisher;
this.publisher = Constants.addQuotes(publisher);
}
public String getKeywords() {
@ -89,7 +94,7 @@ public class CSVResult implements Serializable {
}
public void setKeywords(String keywords) {
this.keywords = keywords;
this.keywords = Constants.addQuotes(keywords);
}
public String getCountry() {
@ -97,7 +102,7 @@ public class CSVResult implements Serializable {
}
public void setCountry(String country) {
this.country = country;
this.country = Constants.addQuotes(country);
}
public String getLanguage() {
@ -105,7 +110,7 @@ public class CSVResult implements Serializable {
}
public void setLanguage(String language) {
this.language = language;
this.language = Constants.addQuotes(language);
}
}