[dumpCSV] ading double quotes enclosing all the fileds

This commit is contained in:
Miriam Baglioni 2023-07-17 16:21:20 +02:00
parent b01573e201
commit 5ff50d115a
8 changed files with 59 additions and 32 deletions

View File

@ -1,6 +1,8 @@
package eu.dnetlib.dhp.oa.graph.dump.csv; package eu.dnetlib.dhp.oa.graph.dump.csv;
import org.apache.commons.lang.StringUtils;
import java.io.Serializable; import java.io.Serializable;
/** /**
@ -9,4 +11,10 @@ import java.io.Serializable;
*/ */
public class Constants implements Serializable { public class Constants implements Serializable {
public final static String SEP = "\t"; public final static String SEP = "\t";
public static final String addQuotes(String id) {
if(StringUtils.isNotEmpty(id))
return "\"" + id + "\"";
return id;
}
} }

View File

@ -290,13 +290,13 @@ public class SparkDumpResults implements Serializable {
private static <R extends Result> CSVResult mapResultInfo(R r) { private static <R extends Result> CSVResult mapResultInfo(R r) {
CSVResult ret = new CSVResult(); CSVResult ret = new CSVResult();
ret.setId(r.getId()); ret.setId(removeBreaks(r.getId()));
ret.setType(r.getResulttype().getClassid()); ret.setType(removeBreaks(r.getResulttype().getClassid()));
ret.setTitle(getTitle(r.getTitle())); ret.setTitle(getTitle(r.getTitle()));
ret.setDescription(getAbstract(r.getDescription())); ret.setDescription(getAbstract(r.getDescription()));
ret.setAccessright(r.getBestaccessright().getClassid()); ret.setAccessright(removeBreaks(r.getBestaccessright().getClassid()));
ret.setPublication_date(getFieldValue(r.getDateofacceptance())); ret.setPublication_date(removeBreaks(getFieldValue(r.getDateofacceptance())));
ret.setPublisher(getFieldValue(r.getPublisher())); ret.setPublisher(removeBreaks(getFieldValue(r.getPublisher())));
if (Optional.ofNullable(r.getSubject()).isPresent()) if (Optional.ofNullable(r.getSubject()).isPresent())
ret.setKeywords(String.join(", ", r.getSubject().stream().map(s -> { ret.setKeywords(String.join(", ", r.getSubject().stream().map(s -> {
@ -354,7 +354,11 @@ public class SparkDumpResults implements Serializable {
} }
private static String removeBreaks(String input) { private static String removeBreaks(String input) {
return input.replace("\n", " ").replace("\t", " ").replace("\r", " "); return input.replace("\n", " ").replace("\t", " ")
.replace("\r", " ")
.replace("\\\"", " ")
.replace("\"", " ")
;
} }

View File

@ -1,6 +1,8 @@
package eu.dnetlib.dhp.oa.graph.dump.csv.model; package eu.dnetlib.dhp.oa.graph.dump.csv.model;
import eu.dnetlib.dhp.oa.graph.dump.csv.Constants;
import java.io.Serializable; import java.io.Serializable;
/** /**
@ -28,7 +30,7 @@ public class CSVAuthor implements Serializable {
} }
public void setId(String id) { public void setId(String id) {
this.id = id; this.id = Constants.addQuotes(id);
} }
public String getFirstname() { public String getFirstname() {
@ -36,7 +38,7 @@ public class CSVAuthor implements Serializable {
} }
public void setFirstname(String firstname) { public void setFirstname(String firstname) {
this.firstname = firstname; this.firstname = Constants.addQuotes(firstname);
} }
public String getLastname() { public String getLastname() {
@ -44,7 +46,7 @@ public class CSVAuthor implements Serializable {
} }
public void setLastname(String lastname) { public void setLastname(String lastname) {
this.lastname = lastname; this.lastname = Constants.addQuotes(lastname);
} }
public String getFullname() { public String getFullname() {
@ -52,7 +54,7 @@ public class CSVAuthor implements Serializable {
} }
public void setFullname(String fullname) { public void setFullname(String fullname) {
this.fullname = fullname; this.fullname = Constants.addQuotes(fullname);
} }
public String getOrcid() { public String getOrcid() {
@ -60,7 +62,7 @@ public class CSVAuthor implements Serializable {
} }
public void setOrcid(String orcid) { public void setOrcid(String orcid) {
this.orcid = orcid; this.orcid = Constants.addQuotes(orcid);
} }
} }

View File

@ -1,6 +1,8 @@
package eu.dnetlib.dhp.oa.graph.dump.csv.model; package eu.dnetlib.dhp.oa.graph.dump.csv.model;
import eu.dnetlib.dhp.oa.graph.dump.csv.Constants;
import java.io.Serializable; import java.io.Serializable;
/** /**
@ -17,7 +19,7 @@ public class CSVCitation implements Serializable {
} }
public void setId(String id) { public void setId(String id) {
this.id = id; this.id = Constants.addQuotes(id);
} }
public String getResult_id_cites() { public String getResult_id_cites() {
@ -25,7 +27,7 @@ public class CSVCitation implements Serializable {
} }
public void setResult_id_cites(String result_id_cites) { public void setResult_id_cites(String result_id_cites) {
this.result_id_cites = result_id_cites; this.result_id_cites = Constants.addQuotes(result_id_cites);
} }
public String getResult_id_cited() { public String getResult_id_cited() {
@ -33,6 +35,6 @@ public class CSVCitation implements Serializable {
} }
public void setResult_id_cited(String result_id_cited) { public void setResult_id_cited(String result_id_cited) {
this.result_id_cited = result_id_cited; this.result_id_cited = Constants.addQuotes(result_id_cited);
} }
} }

View File

@ -1,6 +1,8 @@
package eu.dnetlib.dhp.oa.graph.dump.csv.model; package eu.dnetlib.dhp.oa.graph.dump.csv.model;
import eu.dnetlib.dhp.oa.graph.dump.csv.Constants;
import java.io.Serializable; import java.io.Serializable;
/** /**
@ -19,7 +21,7 @@ public class CSVPid implements Serializable {
} }
public void setResult_id(String result_id) { public void setResult_id(String result_id) {
this.result_id = result_id; this.result_id = Constants.addQuotes(result_id);
} }
public String getPid() { public String getPid() {
@ -27,7 +29,7 @@ public class CSVPid implements Serializable {
} }
public void setPid(String pid) { public void setPid(String pid) {
this.pid = pid; this.pid = Constants.addQuotes(pid);
} }
public String getType() { public String getType() {
@ -35,7 +37,7 @@ public class CSVPid implements Serializable {
} }
public void setType(String type) { public void setType(String type) {
this.type = type; this.type = Constants.addQuotes(type);
} }
public String getId() { public String getId() {
@ -43,6 +45,6 @@ public class CSVPid implements Serializable {
} }
public void setId(String id) { public void setId(String id) {
this.id = id; this.id = Constants.addQuotes(id);
} }
} }

View File

@ -1,6 +1,8 @@
package eu.dnetlib.dhp.oa.graph.dump.csv.model; package eu.dnetlib.dhp.oa.graph.dump.csv.model;
import eu.dnetlib.dhp.oa.graph.dump.csv.Constants;
import java.io.Serializable; import java.io.Serializable;
/** /**
@ -16,7 +18,7 @@ public class CSVRELCommunityResult implements Serializable {
} }
public void setResult_id(String result_id) { public void setResult_id(String result_id) {
this.result_id = result_id; this.result_id = Constants.addQuotes(result_id);
} }
public String getCommunity_id() { public String getCommunity_id() {
@ -24,6 +26,6 @@ public class CSVRELCommunityResult implements Serializable {
} }
public void setCommunity_id(String community_id) { public void setCommunity_id(String community_id) {
this.community_id = community_id; this.community_id = Constants.addQuotes(community_id);
} }
} }

View File

@ -1,6 +1,8 @@
package eu.dnetlib.dhp.oa.graph.dump.csv.model; package eu.dnetlib.dhp.oa.graph.dump.csv.model;
import eu.dnetlib.dhp.oa.graph.dump.csv.Constants;
import java.io.Serializable; import java.io.Serializable;
/** /**
@ -16,7 +18,7 @@ public class CSVRelResAut implements Serializable {
} }
public void setResult_id(String result_id) { public void setResult_id(String result_id) {
this.result_id = result_id; this.result_id = Constants.addQuotes(result_id);
} }
public String getAuthor_id() { public String getAuthor_id() {
@ -24,6 +26,6 @@ public class CSVRelResAut implements Serializable {
} }
public void setAuthor_id(String author_id) { public void setAuthor_id(String author_id) {
this.author_id = author_id; this.author_id = Constants.addQuotes(author_id);
} }
} }

View File

@ -3,6 +3,7 @@ package eu.dnetlib.dhp.oa.graph.dump.csv.model;
import java.io.Serializable; import java.io.Serializable;
import eu.dnetlib.dhp.oa.graph.dump.csv.Constants;
import org.apache.commons.lang.StringUtils; import org.apache.commons.lang.StringUtils;
import com.fasterxml.jackson.annotation.JsonGetter; import com.fasterxml.jackson.annotation.JsonGetter;
@ -11,6 +12,7 @@ import com.fasterxml.jackson.annotation.JsonSetter;
import eu.dnetlib.dhp.schema.oaf.Country; import eu.dnetlib.dhp.schema.oaf.Country;
import eu.dnetlib.dhp.schema.oaf.StructuredProperty; import eu.dnetlib.dhp.schema.oaf.StructuredProperty;
import sun.swing.StringUIClientPropertyKey;
/** /**
* @author miriam.baglioni * @author miriam.baglioni
@ -33,15 +35,16 @@ public class CSVResult implements Serializable {
} }
public void setId(String id) { public void setId(String id) {
this.id = id; this.id = Constants.addQuotes(id);
} }
public String getType() { public String getType() {
return type; return type;
} }
public void setType(String type) { public void setType(String type) {
this.type = type; this.type = Constants.addQuotes(type);
} }
public String getTitle() { public String getTitle() {
@ -49,7 +52,8 @@ public class CSVResult implements Serializable {
} }
public void setTitle(String title) { public void setTitle(String title) {
this.title = title; this.title =
Constants.addQuotes(title);
} }
public String getDescription() { public String getDescription() {
@ -57,7 +61,8 @@ public class CSVResult implements Serializable {
} }
public void setDescription(String description) { public void setDescription(String description) {
this.description = description; this.description =
Constants.addQuotes(description);
} }
public String getAccessright() { public String getAccessright() {
@ -65,7 +70,7 @@ public class CSVResult implements Serializable {
} }
public void setAccessright(String accessright) { public void setAccessright(String accessright) {
this.accessright = accessright; this.accessright = Constants.addQuotes(accessright);
} }
public String getPublication_date() { public String getPublication_date() {
@ -73,7 +78,7 @@ public class CSVResult implements Serializable {
} }
public void setPublication_date(String publication_date) { public void setPublication_date(String publication_date) {
this.publication_date = publication_date; this.publication_date = Constants.addQuotes(publication_date);
} }
public String getPublisher() { public String getPublisher() {
@ -81,7 +86,7 @@ public class CSVResult implements Serializable {
} }
public void setPublisher(String publisher) { public void setPublisher(String publisher) {
this.publisher = publisher; this.publisher = Constants.addQuotes(publisher);
} }
public String getKeywords() { public String getKeywords() {
@ -89,7 +94,7 @@ public class CSVResult implements Serializable {
} }
public void setKeywords(String keywords) { public void setKeywords(String keywords) {
this.keywords = keywords; this.keywords = Constants.addQuotes(keywords);
} }
public String getCountry() { public String getCountry() {
@ -97,7 +102,7 @@ public class CSVResult implements Serializable {
} }
public void setCountry(String country) { public void setCountry(String country) {
this.country = country; this.country = Constants.addQuotes(country);
} }
public String getLanguage() { public String getLanguage() {
@ -105,7 +110,7 @@ public class CSVResult implements Serializable {
} }
public void setLanguage(String language) { public void setLanguage(String language) {
this.language = language; this.language = Constants.addQuotes(language);
} }
} }