forked from D-Net/dnet-hadoop
added code to download gold issn list from unibi
This commit is contained in:
parent
1a5b114906
commit
63553a76b3
|
@ -112,6 +112,16 @@
|
||||||
<groupId>eu.dnetlib.dhp</groupId>
|
<groupId>eu.dnetlib.dhp</groupId>
|
||||||
<artifactId>dhp-schemas</artifactId>
|
<artifactId>dhp-schemas</artifactId>
|
||||||
</dependency>
|
</dependency>
|
||||||
|
|
||||||
|
<dependency>
|
||||||
|
<groupId>org.apache.commons</groupId>
|
||||||
|
<artifactId>commons-csv</artifactId>
|
||||||
|
<version>1.8</version>
|
||||||
|
</dependency>
|
||||||
|
|
||||||
</dependencies>
|
</dependencies>
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
</project>
|
</project>
|
||||||
|
|
|
@ -16,10 +16,15 @@ import org.apache.commons.lang.reflect.FieldUtils;
|
||||||
public class CSVParser {
|
public class CSVParser {
|
||||||
|
|
||||||
public <R> List<R> parse(String csvFile, String classForName)
|
public <R> List<R> parse(String csvFile, String classForName)
|
||||||
|
throws ClassNotFoundException, IOException, IllegalAccessException, InstantiationException {
|
||||||
|
return parse(csvFile, classForName, ';');
|
||||||
|
}
|
||||||
|
|
||||||
|
public <R> List<R> parse(String csvFile, String classForName, char delimiter)
|
||||||
throws ClassNotFoundException, IOException, IllegalAccessException, InstantiationException {
|
throws ClassNotFoundException, IOException, IllegalAccessException, InstantiationException {
|
||||||
final CSVFormat format = CSVFormat.EXCEL
|
final CSVFormat format = CSVFormat.EXCEL
|
||||||
.withHeader()
|
.withHeader()
|
||||||
.withDelimiter(';')
|
.withDelimiter(delimiter)
|
||||||
.withQuote('"')
|
.withQuote('"')
|
||||||
.withTrim();
|
.withTrim();
|
||||||
List<R> ret = new ArrayList<>();
|
List<R> ret = new ArrayList<>();
|
||||||
|
|
|
@ -6,6 +6,7 @@ import java.io.Closeable;
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
import java.io.OutputStreamWriter;
|
import java.io.OutputStreamWriter;
|
||||||
import java.nio.charset.StandardCharsets;
|
import java.nio.charset.StandardCharsets;
|
||||||
|
import java.util.Optional;
|
||||||
|
|
||||||
import org.apache.commons.io.IOUtils;
|
import org.apache.commons.io.IOUtils;
|
||||||
import org.apache.commons.logging.Log;
|
import org.apache.commons.logging.Log;
|
||||||
|
@ -29,6 +30,7 @@ public class ReadCSV implements Closeable {
|
||||||
private final BufferedWriter writer;
|
private final BufferedWriter writer;
|
||||||
private final ObjectMapper OBJECT_MAPPER = new ObjectMapper();
|
private final ObjectMapper OBJECT_MAPPER = new ObjectMapper();
|
||||||
private final String csvFile;
|
private final String csvFile;
|
||||||
|
private final char delimiter;
|
||||||
|
|
||||||
public static void main(final String[] args) throws Exception {
|
public static void main(final String[] args) throws Exception {
|
||||||
final ArgumentApplicationParser parser = new ArgumentApplicationParser(
|
final ArgumentApplicationParser parser = new ArgumentApplicationParser(
|
||||||
|
@ -44,19 +46,23 @@ public class ReadCSV implements Closeable {
|
||||||
final String hdfsPath = parser.get("hdfsPath");
|
final String hdfsPath = parser.get("hdfsPath");
|
||||||
final String hdfsNameNode = parser.get("hdfsNameNode");
|
final String hdfsNameNode = parser.get("hdfsNameNode");
|
||||||
final String classForName = parser.get("classForName");
|
final String classForName = parser.get("classForName");
|
||||||
|
Optional<String> delimiter = Optional.ofNullable(parser.get("delimiter"));
|
||||||
try (final ReadCSV readCSV = new ReadCSV(hdfsPath, hdfsNameNode, fileURL)) {
|
char del = ';';
|
||||||
|
if (delimiter.isPresent())
|
||||||
|
del = delimiter.get().charAt(0);
|
||||||
|
try (final ReadCSV readCSV = new ReadCSV(hdfsPath, hdfsNameNode, fileURL, del)) {
|
||||||
|
|
||||||
log.info("Getting CSV file...");
|
log.info("Getting CSV file...");
|
||||||
readCSV.execute(classForName);
|
readCSV.execute(classForName);
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
public void execute(final String classForName) throws Exception {
|
public void execute(final String classForName) throws Exception {
|
||||||
CSVParser csvParser = new CSVParser();
|
CSVParser csvParser = new CSVParser();
|
||||||
csvParser
|
csvParser
|
||||||
.parse(csvFile, classForName)
|
.parse(csvFile, classForName, delimiter)
|
||||||
.stream()
|
.stream()
|
||||||
.forEach(p -> write(p));
|
.forEach(p -> write(p));
|
||||||
|
|
||||||
|
@ -70,7 +76,8 @@ public class ReadCSV implements Closeable {
|
||||||
public ReadCSV(
|
public ReadCSV(
|
||||||
final String hdfsPath,
|
final String hdfsPath,
|
||||||
final String hdfsNameNode,
|
final String hdfsNameNode,
|
||||||
final String fileURL)
|
final String fileURL,
|
||||||
|
char delimiter)
|
||||||
throws Exception {
|
throws Exception {
|
||||||
this.conf = new Configuration();
|
this.conf = new Configuration();
|
||||||
this.conf.set("fs.defaultFS", hdfsNameNode);
|
this.conf.set("fs.defaultFS", hdfsNameNode);
|
||||||
|
@ -85,6 +92,7 @@ public class ReadCSV implements Closeable {
|
||||||
|
|
||||||
this.writer = new BufferedWriter(new OutputStreamWriter(fsDataOutputStream, StandardCharsets.UTF_8));
|
this.writer = new BufferedWriter(new OutputStreamWriter(fsDataOutputStream, StandardCharsets.UTF_8));
|
||||||
this.csvFile = httpConnector.getInputSource(fileURL);
|
this.csvFile = httpConnector.getInputSource(fileURL);
|
||||||
|
this.delimiter = delimiter;
|
||||||
}
|
}
|
||||||
|
|
||||||
protected void write(final Object p) {
|
protected void write(final Object p) {
|
||||||
|
|
|
@ -31,7 +31,7 @@ public class ReadExcel implements Closeable {
|
||||||
final ArgumentApplicationParser parser = new ArgumentApplicationParser(
|
final ArgumentApplicationParser parser = new ArgumentApplicationParser(
|
||||||
IOUtils
|
IOUtils
|
||||||
.toString(
|
.toString(
|
||||||
ReadCSV.class
|
ReadExcel.class
|
||||||
.getResourceAsStream(
|
.getResourceAsStream(
|
||||||
"/eu/dnetlib/dhp/actionmanager/project/parameters.json")));
|
"/eu/dnetlib/dhp/actionmanager/project/parameters.json")));
|
||||||
|
|
||||||
|
|
|
@ -28,6 +28,11 @@
|
||||||
"paramLongName" : "sheetName",
|
"paramLongName" : "sheetName",
|
||||||
"paramDescription" : "the name of the sheet in case the file is excel",
|
"paramDescription" : "the name of the sheet in case the file is excel",
|
||||||
"paramRequired" : false
|
"paramRequired" : false
|
||||||
|
}, {
|
||||||
|
"paramName": "d",
|
||||||
|
"paramLongName" : "delimiter",
|
||||||
|
"paramDescription" : "the delimiter between fields in case it is not ;",
|
||||||
|
"paramRequired" : false
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -82,7 +82,12 @@
|
||||||
<groupId>org.apache.commons</groupId>
|
<groupId>org.apache.commons</groupId>
|
||||||
<artifactId>commons-text</artifactId>
|
<artifactId>commons-text</artifactId>
|
||||||
</dependency>
|
</dependency>
|
||||||
|
<dependency>
|
||||||
|
<groupId>eu.dnetlib.dhp</groupId>
|
||||||
|
<artifactId>dhp-aggregation</artifactId>
|
||||||
|
<version>1.2.4-SNAPSHOT</version>
|
||||||
|
<scope>compile</scope>
|
||||||
|
</dependency>
|
||||||
|
|
||||||
|
|
||||||
</dependencies>
|
</dependencies>
|
||||||
|
|
|
@ -0,0 +1,37 @@
|
||||||
|
|
||||||
|
package eu.dnetlib.doiboost;
|
||||||
|
|
||||||
|
import org.apache.commons.io.IOUtils;
|
||||||
|
import org.apache.commons.logging.Log;
|
||||||
|
import org.apache.commons.logging.LogFactory;
|
||||||
|
|
||||||
|
import eu.dnetlib.dhp.actionmanager.project.utils.ReadCSV;
|
||||||
|
import eu.dnetlib.dhp.application.ArgumentApplicationParser;
|
||||||
|
|
||||||
|
public class GetCSV {
|
||||||
|
private static final Log log = LogFactory.getLog(eu.dnetlib.dhp.actionmanager.project.utils.ReadCSV.class);
|
||||||
|
|
||||||
|
public static void main(final String[] args) throws Exception {
|
||||||
|
final ArgumentApplicationParser parser = new ArgumentApplicationParser(
|
||||||
|
IOUtils
|
||||||
|
.toString(
|
||||||
|
GetCSV.class
|
||||||
|
.getResourceAsStream(
|
||||||
|
"/eu/dnetlib/dhp/doiboost/download_unibi_issn_gold_parameters.json")));
|
||||||
|
|
||||||
|
parser.parseArgument(args);
|
||||||
|
|
||||||
|
final String fileURL = parser.get("fileURL");
|
||||||
|
final String hdfsPath = parser.get("hdfsPath");
|
||||||
|
final String hdfsNameNode = parser.get("hdfsNameNode");
|
||||||
|
final String classForName = parser.get("classForName");
|
||||||
|
|
||||||
|
try (final ReadCSV readCSV = new ReadCSV(hdfsPath, hdfsNameNode, fileURL, ',')) {
|
||||||
|
|
||||||
|
log.info("Getting CSV file...");
|
||||||
|
readCSV.execute(classForName);
|
||||||
|
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
|
@ -0,0 +1,151 @@
|
||||||
|
|
||||||
|
package eu.dnetlib.doiboost;
|
||||||
|
|
||||||
|
import java.io.Serializable;
|
||||||
|
|
||||||
|
public class UnibiGoldModel implements Serializable {
|
||||||
|
private String ISSN;
|
||||||
|
private String ISSN_L;
|
||||||
|
private String ISSN_IN_DOAJ;
|
||||||
|
private String ISSN_IN_ROAD;
|
||||||
|
private String ISSN_IN_PMC;
|
||||||
|
private String ISSN_IN_OAPC;
|
||||||
|
private String ISSN_IN_WOS;
|
||||||
|
private String ISSN_IN_SCOPUS;
|
||||||
|
private String JOURNAL_IN_DOAJ;
|
||||||
|
private String JOURNAL_IN_ROAD;
|
||||||
|
private String JOURNAL_IN_PMC;
|
||||||
|
private String JOURNAL_IN_OAPC;
|
||||||
|
private String JOURNAL_IN_WOS;
|
||||||
|
private String JOURNAL_IN_SCOPUS;
|
||||||
|
private String TITLE;
|
||||||
|
private String TITLE_SOURCE;
|
||||||
|
|
||||||
|
public String getISSN() {
|
||||||
|
return ISSN;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setISSN(String ISSN) {
|
||||||
|
this.ISSN = ISSN;
|
||||||
|
}
|
||||||
|
|
||||||
|
public String getISSN_L() {
|
||||||
|
return ISSN_L;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setISSN_L(String ISSN_L) {
|
||||||
|
this.ISSN_L = ISSN_L;
|
||||||
|
}
|
||||||
|
|
||||||
|
public String getISSN_IN_DOAJ() {
|
||||||
|
return ISSN_IN_DOAJ;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setISSN_IN_DOAJ(String ISSN_IN_DOAJ) {
|
||||||
|
this.ISSN_IN_DOAJ = ISSN_IN_DOAJ;
|
||||||
|
}
|
||||||
|
|
||||||
|
public String getISSN_IN_ROAD() {
|
||||||
|
return ISSN_IN_ROAD;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setISSN_IN_ROAD(String ISSN_IN_ROAD) {
|
||||||
|
this.ISSN_IN_ROAD = ISSN_IN_ROAD;
|
||||||
|
}
|
||||||
|
|
||||||
|
public String getISSN_IN_PMC() {
|
||||||
|
return ISSN_IN_PMC;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setISSN_IN_PMC(String ISSN_IN_PMC) {
|
||||||
|
this.ISSN_IN_PMC = ISSN_IN_PMC;
|
||||||
|
}
|
||||||
|
|
||||||
|
public String getISSN_IN_OAPC() {
|
||||||
|
return ISSN_IN_OAPC;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setISSN_IN_OAPC(String ISSN_IN_OAPC) {
|
||||||
|
this.ISSN_IN_OAPC = ISSN_IN_OAPC;
|
||||||
|
}
|
||||||
|
|
||||||
|
public String getISSN_IN_WOS() {
|
||||||
|
return ISSN_IN_WOS;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setISSN_IN_WOS(String ISSN_IN_WOS) {
|
||||||
|
this.ISSN_IN_WOS = ISSN_IN_WOS;
|
||||||
|
}
|
||||||
|
|
||||||
|
public String getISSN_IN_SCOPUS() {
|
||||||
|
return ISSN_IN_SCOPUS;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setISSN_IN_SCOPUS(String ISSN_IN_SCOPUS) {
|
||||||
|
this.ISSN_IN_SCOPUS = ISSN_IN_SCOPUS;
|
||||||
|
}
|
||||||
|
|
||||||
|
public String getJOURNAL_IN_DOAJ() {
|
||||||
|
return JOURNAL_IN_DOAJ;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setJOURNAL_IN_DOAJ(String JOURNAL_IN_DOAJ) {
|
||||||
|
this.JOURNAL_IN_DOAJ = JOURNAL_IN_DOAJ;
|
||||||
|
}
|
||||||
|
|
||||||
|
public String getJOURNAL_IN_ROAD() {
|
||||||
|
return JOURNAL_IN_ROAD;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setJOURNAL_IN_ROAD(String JOURNAL_IN_ROAD) {
|
||||||
|
this.JOURNAL_IN_ROAD = JOURNAL_IN_ROAD;
|
||||||
|
}
|
||||||
|
|
||||||
|
public String getJOURNAL_IN_PMC() {
|
||||||
|
return JOURNAL_IN_PMC;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setJOURNAL_IN_PMC(String JOURNAL_IN_PMC) {
|
||||||
|
this.JOURNAL_IN_PMC = JOURNAL_IN_PMC;
|
||||||
|
}
|
||||||
|
|
||||||
|
public String getJOURNAL_IN_OAPC() {
|
||||||
|
return JOURNAL_IN_OAPC;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setJOURNAL_IN_OAPC(String JOURNAL_IN_OAPC) {
|
||||||
|
this.JOURNAL_IN_OAPC = JOURNAL_IN_OAPC;
|
||||||
|
}
|
||||||
|
|
||||||
|
public String getJOURNAL_IN_WOS() {
|
||||||
|
return JOURNAL_IN_WOS;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setJOURNAL_IN_WOS(String JOURNAL_IN_WOS) {
|
||||||
|
this.JOURNAL_IN_WOS = JOURNAL_IN_WOS;
|
||||||
|
}
|
||||||
|
|
||||||
|
public String getJOURNAL_IN_SCOPUS() {
|
||||||
|
return JOURNAL_IN_SCOPUS;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setJOURNAL_IN_SCOPUS(String JOURNAL_IN_SCOPUS) {
|
||||||
|
this.JOURNAL_IN_SCOPUS = JOURNAL_IN_SCOPUS;
|
||||||
|
}
|
||||||
|
|
||||||
|
public String getTITLE() {
|
||||||
|
return TITLE;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setTITLE(String TITLE) {
|
||||||
|
this.TITLE = TITLE;
|
||||||
|
}
|
||||||
|
|
||||||
|
public String getTITLE_SOURCE() {
|
||||||
|
return TITLE_SOURCE;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setTITLE_SOURCE(String TITLE_SOURCE) {
|
||||||
|
this.TITLE_SOURCE = TITLE_SOURCE;
|
||||||
|
}
|
||||||
|
}
|
|
@ -0,0 +1,39 @@
|
||||||
|
[
|
||||||
|
|
||||||
|
{
|
||||||
|
"paramName": "fu",
|
||||||
|
"paramLongName" : "fileURL",
|
||||||
|
"paramDescription" : "the url of the file to download",
|
||||||
|
"paramRequired" : true
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"paramName": "hp",
|
||||||
|
"paramLongName" : "hdfsPath",
|
||||||
|
"paramDescription" : "where to save the file",
|
||||||
|
"paramRequired" : true
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"paramName": "hnn",
|
||||||
|
"paramLongName" : "hdfsNameNode",
|
||||||
|
"paramDescription" : "the name node",
|
||||||
|
"paramRequired" : true
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"paramName": "cfn",
|
||||||
|
"paramLongName" : "classForName",
|
||||||
|
"paramDescription" : "the name of the class to deserialize the csv to",
|
||||||
|
"paramRequired" : true
|
||||||
|
}, {
|
||||||
|
"paramName": "sn",
|
||||||
|
"paramLongName" : "sheetName",
|
||||||
|
"paramDescription" : "the name of the sheet in case the file is excel",
|
||||||
|
"paramRequired" : false
|
||||||
|
}, {
|
||||||
|
"paramName": "d",
|
||||||
|
"paramLongName" : "delimiter",
|
||||||
|
"paramDescription" : "the delimiter between fields in case it is not ;",
|
||||||
|
"paramRequired" : false
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
]
|
|
@ -63,6 +63,7 @@
|
||||||
|
|
||||||
<decision name="resume_from">
|
<decision name="resume_from">
|
||||||
<switch>
|
<switch>
|
||||||
|
<case to="DownloadGoldIssn">${wf:conf('resumeFrom') eq 'DownloadGoldIssn'}</case>
|
||||||
<case to="UnpackCrossrefEntries">${wf:conf('resumeFrom') eq 'UnpackCrossrefEntries'}</case>
|
<case to="UnpackCrossrefEntries">${wf:conf('resumeFrom') eq 'UnpackCrossrefEntries'}</case>
|
||||||
<case to="GenerateCrossrefDataset">${wf:conf('resumeFrom') eq 'GenerateCrossrefDataset'}</case>
|
<case to="GenerateCrossrefDataset">${wf:conf('resumeFrom') eq 'GenerateCrossrefDataset'}</case>
|
||||||
<case to="ResetMagWorkingPath">${wf:conf('resumeFrom') eq 'ResetMagWorkingPath'}</case>
|
<case to="ResetMagWorkingPath">${wf:conf('resumeFrom') eq 'ResetMagWorkingPath'}</case>
|
||||||
|
@ -76,6 +77,19 @@
|
||||||
<message>Action failed, error message[${wf:errorMessage(wf:lastErrorNode())}]</message>
|
<message>Action failed, error message[${wf:errorMessage(wf:lastErrorNode())}]</message>
|
||||||
</kill>
|
</kill>
|
||||||
|
|
||||||
|
<action name="DownloadGoldIssn">
|
||||||
|
<java>
|
||||||
|
<main-class>eu.dnetlib.doiboost.GetCSV</main-class>
|
||||||
|
<arg>--hdfsNameNode</arg><arg>${nameNode}</arg>
|
||||||
|
<arg>--fileURL</arg><arg>${unibiGoldIssnFileURL}</arg>
|
||||||
|
<arg>--hdfsPath</arg><arg>${hdfsPath}</arg>
|
||||||
|
<arg>--classForName</arg><arg>eu.dnetlib.doiboost.UnibiGoldModel</arg>
|
||||||
|
</java>
|
||||||
|
<ok to="End"/>
|
||||||
|
<error to="Kill"/>
|
||||||
|
</action>
|
||||||
|
|
||||||
|
|
||||||
<action name="ImportCrossRef">
|
<action name="ImportCrossRef">
|
||||||
<java>
|
<java>
|
||||||
<job-tracker>${jobTracker}</job-tracker>
|
<job-tracker>${jobTracker}</job-tracker>
|
||||||
|
|
|
@ -0,0 +1,43 @@
|
||||||
|
|
||||||
|
package eu.dnetlib.dhp.doiboost;
|
||||||
|
|
||||||
|
import java.util.List;
|
||||||
|
|
||||||
|
import org.apache.commons.io.IOUtils;
|
||||||
|
import org.junit.jupiter.api.Assertions;
|
||||||
|
import org.junit.jupiter.api.Test;
|
||||||
|
|
||||||
|
import com.fasterxml.jackson.core.JsonProcessingException;
|
||||||
|
import com.fasterxml.jackson.databind.ObjectMapper;
|
||||||
|
|
||||||
|
import eu.dnetlib.dhp.actionmanager.project.utils.CSVParser;
|
||||||
|
|
||||||
|
public class GetCSVTest {
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void readUnibiGoldTest() throws Exception {
|
||||||
|
|
||||||
|
String programmecsv = IOUtils
|
||||||
|
.toString(
|
||||||
|
getClass()
|
||||||
|
.getClassLoader()
|
||||||
|
.getResourceAsStream("eu/dnetlib/dhp/doiboost/issn_gold_oa_version_4.csv"));
|
||||||
|
|
||||||
|
CSVParser csvParser = new CSVParser();
|
||||||
|
|
||||||
|
List<Object> pl = csvParser.parse(programmecsv, "eu.dnetlib.doiboost.UnibiGoldModel", ',');
|
||||||
|
|
||||||
|
Assertions.assertEquals(72, pl.size());
|
||||||
|
|
||||||
|
// ObjectMapper OBJECT_MAPPER = new ObjectMapper();
|
||||||
|
//
|
||||||
|
// pl.forEach(res -> {
|
||||||
|
// try {
|
||||||
|
// System.out.println(OBJECT_MAPPER.writeValueAsString(res));
|
||||||
|
// } catch (JsonProcessingException e) {
|
||||||
|
// e.printStackTrace();
|
||||||
|
// }
|
||||||
|
// });
|
||||||
|
|
||||||
|
}
|
||||||
|
}
|
|
@ -0,0 +1,73 @@
|
||||||
|
"ISSN","ISSN_L","ISSN_IN_DOAJ","ISSN_IN_ROAD","ISSN_IN_PMC","ISSN_IN_OAPC","ISSN_IN_WOS","ISSN_IN_SCOPUS","JOURNAL_IN_DOAJ","JOURNAL_IN_ROAD","JOURNAL_IN_PMC","JOURNAL_IN_OAPC","JOURNAL_IN_WOS","JOURNAL_IN_SCOPUS","TITLE","TITLE_SOURCE"
|
||||||
|
"0001-625X","0001-625X",1,1,0,0,0,1,1,1,0,0,0,1,"Acta Mycologica","DOAJ"
|
||||||
|
"0002-0397","0002-0397",1,1,0,0,1,1,1,1,0,0,1,1,"Africa Spectrum","DOAJ"
|
||||||
|
"0003-2565","0003-2565",1,0,0,0,0,0,1,0,0,0,0,0,"Anali Pravnog Fakulteta u Beogradu","DOAJ"
|
||||||
|
"0003-424X","0003-424X",0,1,0,0,1,0,0,1,0,0,1,0,"Annales de zootechnie.","ROAD"
|
||||||
|
"0003-4827","0003-4827",0,1,0,0,0,1,0,1,0,0,0,1,"Annals of Iowa.","ROAD"
|
||||||
|
"0004-0592","0004-0592",1,1,0,0,1,1,1,1,0,0,1,1,"Archivos de Zootecnia","DOAJ"
|
||||||
|
"0004-282X","0004-282X",1,1,0,0,1,1,1,1,0,0,1,1,"Arquivos de Neuro-Psiquiatria","DOAJ"
|
||||||
|
"0006-3096","0006-3096",0,1,0,0,0,0,0,1,0,0,0,0,"Biologia.","ROAD"
|
||||||
|
"0006-8705","0006-8705",1,1,0,0,1,1,1,1,0,0,1,1,"Bragantia","DOAJ"
|
||||||
|
"0007-5124","0007-5124",0,1,0,0,1,0,0,1,1,0,1,1,"Experimental animals.","ROAD"
|
||||||
|
"0007-9502","0007-9502",0,1,0,0,0,0,0,1,0,0,0,0,"Caesaraugusta.","ROAD"
|
||||||
|
"0008-7386","0008-7386",1,1,0,0,0,1,1,1,0,0,0,1,"Časopis pro Moderní Filologii","DOAJ"
|
||||||
|
"0008-7629","0008-7629",1,0,0,0,0,0,1,0,0,0,0,0,"Catalogue and Index","DOAJ"
|
||||||
|
"0015-573X","0015-573X",0,1,0,0,0,0,0,1,0,0,0,0,"Folia quaternaria.","ROAD"
|
||||||
|
"0016-6987","0016-6987",1,0,0,0,1,1,1,0,0,0,1,1,"Genus","DOAJ"
|
||||||
|
"0016-7789","0016-7789",1,1,0,0,0,1,1,1,0,0,0,1,"Geologija ","DOAJ"
|
||||||
|
"0021-5007","0021-5007",0,1,0,0,0,1,0,1,0,0,0,1,"Nihon Seitai Gakkaishi.","ROAD"
|
||||||
|
"0023-4001","0023-4001",0,1,0,0,1,1,0,1,0,0,1,1,"Korean Journal of Parasitology","ROAD"
|
||||||
|
"0023-5415","0023-5415",1,1,0,0,0,0,1,1,0,0,0,0,"Kunst og Kultur","DOAJ"
|
||||||
|
"0026-1165","0026-1165",1,0,0,0,1,1,1,0,0,0,1,1,"Journal of the Meteorological Society of Japan","DOAJ"
|
||||||
|
"0029-0181","0029-0181",0,1,0,0,0,0,0,1,0,0,0,0,"Nihon butsuri gakkaishi.","ROAD"
|
||||||
|
"0034-7000","0034-7000",1,1,0,0,0,1,1,1,0,0,0,1,"Revista Argentina de Cardiología","DOAJ"
|
||||||
|
"0034-7523","0034-7523",0,1,0,0,0,1,0,1,0,0,0,1,"Revista cubana de medicina.","ROAD"
|
||||||
|
"0034-8244","0034-8244",1,0,0,0,1,1,1,0,0,0,1,1,"Revista de Filosofia","DOAJ"
|
||||||
|
"0034-8678","0034-8678",1,0,0,0,0,0,1,0,0,0,0,0,"Revista de Pedagogie","DOAJ"
|
||||||
|
"0036-8709","0036-8709",1,1,1,0,1,1,1,1,1,0,1,1,"Scientia Pharmaceutica","DOAJ"
|
||||||
|
"0044-4855","0044-4855",0,1,0,0,0,0,0,1,0,0,0,0,"Život i škola.","ROAD"
|
||||||
|
"0048-7449","0048-7449",1,1,0,0,1,1,1,1,0,0,1,1,"Reumatismo","DOAJ"
|
||||||
|
"0048-766X","0048-766X",0,1,0,0,0,1,0,1,0,0,0,1,"Revista chilena de obstetricia y ginecología.","ROAD"
|
||||||
|
"0065-1400","0065-1400",0,1,0,0,1,1,0,1,0,0,1,1,"Acta Neurobiologiae Experimentalis.","ROAD"
|
||||||
|
"0066-6742","0066-6742",1,0,0,0,1,1,1,0,0,0,1,1,"Archivo Español de Arqueología","DOAJ"
|
||||||
|
"0073-2435","0073-2435",1,1,0,0,1,1,1,1,0,0,1,1,"Historia (Santiago)","DOAJ"
|
||||||
|
"0073-4918","0073-4918",0,1,0,0,0,0,0,1,0,0,0,0,"Illinois Natural History Survey bulletin.","ROAD"
|
||||||
|
"0075-7411","0075-7411",1,0,0,0,0,0,1,0,0,0,0,0,"Anales","DOAJ"
|
||||||
|
"0077-2704","0077-2704",0,1,0,0,0,0,0,1,0,0,0,0,"Namn och bygd.","ROAD"
|
||||||
|
"0078-5466","0078-5466",0,1,0,0,1,1,0,1,0,0,1,1,"Optica Applicata.","ROAD"
|
||||||
|
"0079-4929","0079-4929",1,1,0,0,0,0,1,1,0,0,0,0,"Právněhistorické studie","DOAJ"
|
||||||
|
"0100-3283","0100-3283",0,1,0,0,0,0,0,1,0,0,0,0,"Hansenologia Internationalis.","ROAD"
|
||||||
|
"0100-4042","0100-4042",1,1,0,0,1,1,1,1,0,0,1,1,"Química Nova","DOAJ"
|
||||||
|
"0100-8692","0100-8692",1,1,0,0,1,0,1,1,0,0,1,1,"Arquivos Brasileiros de Psicologia ","DOAJ"
|
||||||
|
"0102-4469","0102-4469",1,0,0,0,0,0,1,0,0,0,0,0,"Perspectiva Teológica","DOAJ"
|
||||||
|
"0102-6992","0102-6992",1,1,0,0,0,1,1,1,0,0,0,1,"Sociedade e Estado","DOAJ"
|
||||||
|
"0103-1570","0103-1570",1,1,0,0,0,0,1,1,0,0,0,0,"Revista Sociedade & Natureza","DOAJ"
|
||||||
|
"0103-2070","0103-2070",1,1,0,0,1,1,1,1,0,0,1,1,"Tempo Social","DOAJ"
|
||||||
|
"0104-0588","0104-0588",1,1,0,0,1,0,1,1,0,0,1,0,"Revista de Estudos da Linguagem","DOAJ"
|
||||||
|
"0104-6497","0104-6497",1,1,0,0,1,0,1,1,0,0,1,0,"Nauplius","DOAJ"
|
||||||
|
"0104-8929","0104-8929",0,1,0,0,0,0,0,1,0,0,0,0,"Saeculum.","ROAD"
|
||||||
|
"0104-9496","0104-9496",1,0,0,0,0,0,1,0,0,0,0,0,"Revista do Direito","DOAJ"
|
||||||
|
"0120-0380","0120-0380",0,1,0,0,1,0,0,1,0,0,1,0,"Boletín de matemáticas.","ROAD"
|
||||||
|
"0120-100X","0120-100X",1,1,0,0,0,0,1,1,0,0,0,0,"Revista Ion","DOAJ"
|
||||||
|
"0120-4807","0120-4807",1,1,0,0,0,0,1,1,0,0,0,0,"Universitas Humanística","DOAJ"
|
||||||
|
"0121-4004","0121-4004",1,0,0,0,1,1,1,0,0,0,1,1,"Vitae","DOAJ"
|
||||||
|
"0121-4500","0121-4500",1,1,0,0,0,0,1,1,0,0,0,0,"Avances en Enfermería","DOAJ"
|
||||||
|
"0121-8697","0121-8697",1,0,0,0,0,0,1,0,0,0,0,0,"Revista de Derecho","DOAJ"
|
||||||
|
"0122-5197","0122-5197",0,1,0,0,0,1,0,1,0,0,0,1,"Memoria y Sociedad.","ROAD"
|
||||||
|
"0161-0457","0161-0457",1,0,1,1,1,1,1,0,1,1,1,1,"Scanning","DOAJ"
|
||||||
|
"0215-4706","0215-4706",1,1,0,0,0,0,1,1,0,0,0,0,"Floribunda.","ROAD"
|
||||||
|
"0324-6000","0324-6000",0,1,0,0,1,1,0,1,0,0,1,1,"Periodica polytechnica. Electrical engineering","ROAD"
|
||||||
|
"0325-187X","0325-187X",1,1,0,0,0,1,1,1,0,0,0,1,"Meteorologica","DOAJ"
|
||||||
|
"0326-7237","0326-7237",1,1,0,0,0,1,1,1,0,0,0,1,"Geoacta.","ROAD"
|
||||||
|
"0327-1676","0327-1676",0,1,0,0,0,0,1,1,0,0,0,0,"Andes","DOAJ"
|
||||||
|
"0327-2818","0327-2818",1,0,0,0,0,0,1,0,0,0,0,0,"Dominguezia","DOAJ"
|
||||||
|
"0327-5108","0327-5108",1,0,0,0,0,0,1,0,0,0,0,0,"Páginas de Filosofía","DOAJ"
|
||||||
|
"0327-585X","0327-585X",1,1,0,0,0,0,1,1,0,0,0,0,"Actualidad Económica","DOAJ"
|
||||||
|
"0327-6147","0327-6147",0,1,0,0,0,0,0,1,0,0,0,0,"Papeles de trabajo.","ROAD"
|
||||||
|
"0327-7763","0327-7763",0,1,0,0,0,0,0,1,0,0,0,0,"Revista del IICE.","ROAD"
|
||||||
|
"0327-9286","0327-9286",1,1,0,0,0,0,1,1,0,0,0,0,"Acta Toxicológica Argentina","DOAJ"
|
||||||
|
"0328-1205","0328-1205",1,1,0,0,1,1,1,1,0,0,1,1,"Synthesis (La Plata)","DOAJ"
|
||||||
|
"0329-5893","0329-5893",0,1,0,0,0,0,0,1,0,0,0,0,"Investigaciones en psicología..","ROAD"
|
||||||
|
"0329-8213","0329-8213",1,0,0,0,0,0,1,0,0,0,0,0,"Historia Regional","DOAJ"
|
||||||
|
"0332-5024","0332-5024",1,1,0,0,0,0,1,1,0,0,0,0,"Studia Musicologica Norvegica","DOAJ"
|
||||||
|
"0350-185X","0350-185X",1,1,0,0,0,0,1,1,0,0,0,0,"Južnoslovenski Filolog","DOAJ"
|
|
|
@ -16,7 +16,6 @@ import javax.xml.transform.*;
|
||||||
import javax.xml.transform.dom.DOMSource;
|
import javax.xml.transform.dom.DOMSource;
|
||||||
import javax.xml.transform.stream.StreamResult;
|
import javax.xml.transform.stream.StreamResult;
|
||||||
|
|
||||||
import eu.dnetlib.dhp.schema.oaf.utils.IdentifierFactory;
|
|
||||||
import org.apache.commons.lang3.StringUtils;
|
import org.apache.commons.lang3.StringUtils;
|
||||||
import org.apache.spark.util.LongAccumulator;
|
import org.apache.spark.util.LongAccumulator;
|
||||||
import org.dom4j.Document;
|
import org.dom4j.Document;
|
||||||
|
@ -43,6 +42,7 @@ import eu.dnetlib.dhp.schema.common.ModelConstants;
|
||||||
import eu.dnetlib.dhp.schema.common.ModelSupport;
|
import eu.dnetlib.dhp.schema.common.ModelSupport;
|
||||||
import eu.dnetlib.dhp.schema.oaf.*;
|
import eu.dnetlib.dhp.schema.oaf.*;
|
||||||
import eu.dnetlib.dhp.schema.oaf.Result;
|
import eu.dnetlib.dhp.schema.oaf.Result;
|
||||||
|
import eu.dnetlib.dhp.schema.oaf.utils.IdentifierFactory;
|
||||||
|
|
||||||
public class XmlRecordFactory implements Serializable {
|
public class XmlRecordFactory implements Serializable {
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue