forked from D-Net/dnet-hadoop
[HostedByMap] download of doaj from json, modification of test resources, deletion of class no more needed for the CSV download
This commit is contained in:
parent
5d608d6291
commit
2c5087d55a
|
@ -23,7 +23,7 @@ public class DownloadCSV {
|
||||||
|
|
||||||
private static final Logger log = LoggerFactory.getLogger(DownloadCSV.class);
|
private static final Logger log = LoggerFactory.getLogger(DownloadCSV.class);
|
||||||
|
|
||||||
public static final char DEFAULT_DELIMITER = ';';
|
public static final char DEFAULT_DELIMITER = ',';
|
||||||
|
|
||||||
public static void main(final String[] args) throws Exception {
|
public static void main(final String[] args) throws Exception {
|
||||||
final ArgumentApplicationParser parser = new ArgumentApplicationParser(
|
final ArgumentApplicationParser parser = new ArgumentApplicationParser(
|
||||||
|
@ -40,9 +40,6 @@ public class DownloadCSV {
|
||||||
final String fileURL = parser.get("fileURL");
|
final String fileURL = parser.get("fileURL");
|
||||||
log.info("fileURL {}", fileURL);
|
log.info("fileURL {}", fileURL);
|
||||||
|
|
||||||
final String workingPath = parser.get("workingPath");
|
|
||||||
log.info("workingPath {}", workingPath);
|
|
||||||
|
|
||||||
final String outputFile = parser.get("outputFile");
|
final String outputFile = parser.get("outputFile");
|
||||||
log.info("outputFile {}", outputFile);
|
log.info("outputFile {}", outputFile);
|
||||||
|
|
||||||
|
@ -63,31 +60,15 @@ public class DownloadCSV {
|
||||||
|
|
||||||
FileSystem fileSystem = FileSystem.get(conf);
|
FileSystem fileSystem = FileSystem.get(conf);
|
||||||
|
|
||||||
new DownloadCSV().doDownload(fileURL, workingPath, outputFile, classForName, delimiter, fileSystem);
|
new DownloadCSV().doDownload(fileURL, outputFile, classForName, delimiter, fileSystem);
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
protected void doDownload(String fileURL, String workingPath, String outputFile, String classForName,
|
protected void doDownload(String fileURL, String outputFile, String classForName,
|
||||||
char delimiter, FileSystem fs)
|
char delimiter, FileSystem fs)
|
||||||
throws IOException, ClassNotFoundException, CollectorException {
|
throws IOException, ClassNotFoundException, CollectorException {
|
||||||
|
|
||||||
final HttpConnector2 connector2 = new HttpConnector2();
|
try (InputStreamReader reader = new InputStreamReader(new HttpConnector2().getInputSourceAsStream(fileURL))) {
|
||||||
|
|
||||||
final Path path = new Path(workingPath + "/replaced.csv");
|
|
||||||
|
|
||||||
try (BufferedReader in = new BufferedReader(
|
|
||||||
new InputStreamReader(connector2.getInputSourceAsStream(fileURL)))) {
|
|
||||||
|
|
||||||
try (PrintWriter writer = new PrintWriter(
|
|
||||||
new OutputStreamWriter(fs.create(path, true), StandardCharsets.UTF_8))) {
|
|
||||||
String line;
|
|
||||||
while ((line = in.readLine()) != null) {
|
|
||||||
writer.println(line.replace("\\\"", "\""));
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
try (InputStreamReader reader = new InputStreamReader(fs.open(path))) {
|
|
||||||
GetCSV.getCsv(fs, reader, outputFile, classForName, delimiter);
|
GetCSV.getCsv(fs, reader, outputFile, classForName, delimiter);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -1,84 +0,0 @@
|
||||||
|
|
||||||
package eu.dnetlib.dhp.oa.graph.hostedbymap;
|
|
||||||
|
|
||||||
import java.io.*;
|
|
||||||
import java.util.Objects;
|
|
||||||
import java.util.Optional;
|
|
||||||
|
|
||||||
import org.apache.commons.io.FileUtils;
|
|
||||||
import org.apache.commons.io.IOUtils;
|
|
||||||
import org.apache.hadoop.conf.Configuration;
|
|
||||||
import org.apache.hadoop.fs.FileSystem;
|
|
||||||
import org.slf4j.Logger;
|
|
||||||
import org.slf4j.LoggerFactory;
|
|
||||||
|
|
||||||
import eu.dnetlib.dhp.application.ArgumentApplicationParser;
|
|
||||||
import eu.dnetlib.dhp.common.collection.GetCSV;
|
|
||||||
import eu.dnetlib.dhp.common.collection.HttpConnector2;
|
|
||||||
|
|
||||||
public class DownloadCSV2 {
|
|
||||||
|
|
||||||
private static final Logger log = LoggerFactory.getLogger(DownloadCSV2.class);
|
|
||||||
|
|
||||||
public static final char DEFAULT_DELIMITER = ';';
|
|
||||||
|
|
||||||
public static void main(final String[] args) throws Exception {
|
|
||||||
final ArgumentApplicationParser parser = new ArgumentApplicationParser(
|
|
||||||
IOUtils
|
|
||||||
.toString(
|
|
||||||
Objects
|
|
||||||
.requireNonNull(
|
|
||||||
DownloadCSV2.class
|
|
||||||
.getResourceAsStream(
|
|
||||||
"/eu/dnetlib/dhp/oa/graph/hostedbymap/download_csv_parameters.json"))));
|
|
||||||
|
|
||||||
parser.parseArgument(args);
|
|
||||||
|
|
||||||
final String fileURL = parser.get("fileURL");
|
|
||||||
log.info("fileURL {}", fileURL);
|
|
||||||
|
|
||||||
final String tmpFile = parser.get("tmpFile");
|
|
||||||
log.info("tmpFile {}", tmpFile);
|
|
||||||
|
|
||||||
final String outputFile = parser.get("outputFile");
|
|
||||||
log.info("outputFile {}", outputFile);
|
|
||||||
|
|
||||||
final String hdfsNameNode = parser.get("hdfsNameNode");
|
|
||||||
log.info("hdfsNameNode {}", hdfsNameNode);
|
|
||||||
|
|
||||||
final String classForName = parser.get("classForName");
|
|
||||||
log.info("classForName {}", classForName);
|
|
||||||
|
|
||||||
final char delimiter = Optional
|
|
||||||
.ofNullable(parser.get("delimiter"))
|
|
||||||
.map(s -> s.charAt(0))
|
|
||||||
.orElse(DEFAULT_DELIMITER);
|
|
||||||
log.info("delimiter {}", delimiter);
|
|
||||||
|
|
||||||
HttpConnector2 connector2 = new HttpConnector2();
|
|
||||||
|
|
||||||
try (BufferedReader in = new BufferedReader(
|
|
||||||
new InputStreamReader(connector2.getInputSourceAsStream(fileURL)))) {
|
|
||||||
|
|
||||||
try (PrintWriter writer = new PrintWriter(new BufferedWriter(new FileWriter(tmpFile)))) {
|
|
||||||
String line;
|
|
||||||
while ((line = in.readLine()) != null) {
|
|
||||||
writer.println(line.replace("\\\"", "\""));
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
try (BufferedReader in = new BufferedReader(new FileReader(tmpFile))) {
|
|
||||||
Configuration conf = new Configuration();
|
|
||||||
conf.set("fs.defaultFS", hdfsNameNode);
|
|
||||||
|
|
||||||
FileSystem fileSystem = FileSystem.get(conf);
|
|
||||||
|
|
||||||
GetCSV.getCsv(fileSystem, in, outputFile, classForName, delimiter);
|
|
||||||
} finally {
|
|
||||||
FileUtils.deleteQuietly(new File(tmpFile));
|
|
||||||
}
|
|
||||||
|
|
||||||
}
|
|
||||||
|
|
||||||
}
|
|
|
@ -9,7 +9,6 @@ import java.io.PrintWriter;
|
||||||
import java.util.Arrays;
|
import java.util.Arrays;
|
||||||
import java.util.Objects;
|
import java.util.Objects;
|
||||||
|
|
||||||
import eu.dnetlib.dhp.oa.graph.hostedbymap.model.DOAJModel;
|
|
||||||
import org.apache.commons.io.IOUtils;
|
import org.apache.commons.io.IOUtils;
|
||||||
import org.apache.hadoop.conf.Configuration;
|
import org.apache.hadoop.conf.Configuration;
|
||||||
import org.apache.hadoop.fs.*;
|
import org.apache.hadoop.fs.*;
|
||||||
|
@ -24,6 +23,7 @@ import com.fasterxml.jackson.core.JsonProcessingException;
|
||||||
import com.fasterxml.jackson.databind.ObjectMapper;
|
import com.fasterxml.jackson.databind.ObjectMapper;
|
||||||
|
|
||||||
import eu.dnetlib.dhp.application.ArgumentApplicationParser;
|
import eu.dnetlib.dhp.application.ArgumentApplicationParser;
|
||||||
|
import eu.dnetlib.dhp.oa.graph.hostedbymap.model.DOAJModel;
|
||||||
import eu.dnetlib.dhp.oa.graph.hostedbymap.model.doaj.DOAJEntry;
|
import eu.dnetlib.dhp.oa.graph.hostedbymap.model.doaj.DOAJEntry;
|
||||||
|
|
||||||
public class ExtractAndMapDoajJson {
|
public class ExtractAndMapDoajJson {
|
||||||
|
|
|
@ -74,7 +74,9 @@
|
||||||
<decision name="resume_from">
|
<decision name="resume_from">
|
||||||
<switch>
|
<switch>
|
||||||
<case to="produceHBM">${wf:conf('resumeFrom') eq 'ProduceHBM'}</case>
|
<case to="produceHBM">${wf:conf('resumeFrom') eq 'ProduceHBM'}</case>
|
||||||
<case to="remove_hbmpath">${wf:conf('resumeFrom') eq 'download_csv'}</case>
|
<case to="fork_downloads_csv">${wf:conf('resumeFrom') eq 'DownloadBoth'}</case>
|
||||||
|
<case to="downloadGold">${wf:conf('resumeFrom') eq 'DownloadGold'}</case>
|
||||||
|
<case to="downloadDOAJ">${wf:conf('resumeFrom') eq 'DownloadDoaj'}</case>
|
||||||
<default to="prepareInfo"/>
|
<default to="prepareInfo"/>
|
||||||
</switch>
|
</switch>
|
||||||
</decision>
|
</decision>
|
||||||
|
@ -83,18 +85,9 @@
|
||||||
<message>Action failed, error message[${wf:errorMessage(wf:lastErrorNode())}]</message>
|
<message>Action failed, error message[${wf:errorMessage(wf:lastErrorNode())}]</message>
|
||||||
</kill>
|
</kill>
|
||||||
|
|
||||||
<action name="remove_hbmpath">
|
<fork name="fork_downloads_csv">
|
||||||
<fs>
|
|
||||||
<delete path="${hostedByMapPath}"/>
|
|
||||||
<!-- <mkdir path="${hostedByMapPath}"/>-->
|
|
||||||
</fs>
|
|
||||||
<ok to="fork_downloads_csv"/>
|
|
||||||
<error to="Kill"/>
|
|
||||||
</action>
|
|
||||||
|
|
||||||
<fork name="fork_downloads_csv">
|
|
||||||
<path start="download_gold"/>
|
<path start="download_gold"/>
|
||||||
<path start="download_doaj"/>
|
<path start="download_doaj_json"/>
|
||||||
</fork>
|
</fork>
|
||||||
|
|
||||||
<action name="download_gold">
|
<action name="download_gold">
|
||||||
|
@ -103,21 +96,43 @@
|
||||||
<arg>--hdfsNameNode</arg><arg>${nameNode}</arg>
|
<arg>--hdfsNameNode</arg><arg>${nameNode}</arg>
|
||||||
<arg>--fileURL</arg><arg>${unibiFileURL}</arg>
|
<arg>--fileURL</arg><arg>${unibiFileURL}</arg>
|
||||||
<arg>--tmpFile</arg><arg>/tmp/unibi_gold_replaced.csv</arg>
|
<arg>--tmpFile</arg><arg>/tmp/unibi_gold_replaced.csv</arg>
|
||||||
<arg>--outputFile</arg><arg>${workingDir}/unibi_gold.json</arg>
|
<arg>--outputFile</arg><arg>/user/${wf:user()}/data/unibi_gold.json</arg>
|
||||||
<arg>--classForName</arg><arg>eu.dnetlib.dhp.oa.graph.hostedbymap.model.UnibiGoldModel</arg>
|
<arg>--classForName</arg><arg>eu.dnetlib.dhp.oa.graph.hostedbymap.model.UnibiGoldModel</arg>
|
||||||
</java>
|
</java>
|
||||||
<ok to="join_download"/>
|
<ok to="join_download"/>
|
||||||
<error to="Kill"/>
|
<error to="Kill"/>
|
||||||
</action>
|
</action>
|
||||||
|
|
||||||
<action name="download_doaj">
|
<action name="download_doaj_json">
|
||||||
|
<shell xmlns="uri:oozie:shell-action:0.2">
|
||||||
|
<job-tracker>${jobTracker}</job-tracker>
|
||||||
|
<name-node>${nameNode}</name-node>
|
||||||
|
<configuration>
|
||||||
|
<property>
|
||||||
|
<name>mapred.job.queue.name</name>
|
||||||
|
<value>${queueName}</value>
|
||||||
|
</property>
|
||||||
|
</configuration>
|
||||||
|
<exec>download.sh</exec>
|
||||||
|
<argument>${doajJsonFileURL}</argument>
|
||||||
|
<argument>${dumpPath}</argument>
|
||||||
|
<argument>${dumpFileName}</argument>
|
||||||
|
<env-var>HADOOP_USER_NAME=${wf:user()}</env-var>
|
||||||
|
<file>download.sh</file>
|
||||||
|
<capture-output/>
|
||||||
|
</shell>
|
||||||
|
<ok to="extractTarGzAndMap"/>
|
||||||
|
<error to="Kill"/>
|
||||||
|
|
||||||
|
</action>
|
||||||
|
|
||||||
|
<action name="extractTarGzAndMap">
|
||||||
<java>
|
<java>
|
||||||
<main-class>eu.dnetlib.dhp.oa.graph.hostedbymap.DownloadCSV2</main-class>
|
<main-class>eu.dnetlib.dhp.oa.graph.hostedbymap.ExtractAndMapDoajJson</main-class>
|
||||||
<arg>--hdfsNameNode</arg><arg>${nameNode}</arg>
|
<arg>--hdfsNameNode</arg><arg>${nameNode}</arg>
|
||||||
<arg>--fileURL</arg><arg>${doajFileURL}</arg>
|
<arg>--compressedFile</arg><arg>${dumpPath}/${dumpFileName}</arg>
|
||||||
<arg>--tmpFile</arg><arg>/tmp/doaj_replaced.csv</arg>
|
<arg>--workingPath</arg><arg>${workingDir}/DOAJ/</arg>
|
||||||
<arg>--outputFile</arg><arg>${workingDir}/doaj.json</arg>
|
<arg>--outputPath</arg><arg>/user/${wf:user()}/data/doaj.json</arg>
|
||||||
<arg>--classForName</arg><arg>eu.dnetlib.dhp.oa.graph.hostedbymap.model.DOAJModel</arg>
|
|
||||||
</java>
|
</java>
|
||||||
<ok to="join_download"/>
|
<ok to="join_download"/>
|
||||||
<error to="Kill"/>
|
<error to="Kill"/>
|
||||||
|
@ -125,6 +140,54 @@
|
||||||
|
|
||||||
<join name="join_download" to="produceHBM"/>
|
<join name="join_download" to="produceHBM"/>
|
||||||
|
|
||||||
|
<action name="downloadGold">
|
||||||
|
<java>
|
||||||
|
<main-class>eu.dnetlib.dhp.oa.graph.hostedbymap.DownloadCSV</main-class>
|
||||||
|
<arg>--hdfsNameNode</arg><arg>${nameNode}</arg>
|
||||||
|
<arg>--fileURL</arg><arg>${unibiFileURL}</arg>
|
||||||
|
<arg>--tmpFile</arg><arg>/tmp/unibi_gold_replaced.csv</arg>
|
||||||
|
<arg>--outputFile</arg><arg>/user/${wf:user()}/data/unibi_gold.json</arg>
|
||||||
|
<arg>--classForName</arg><arg>eu.dnetlib.dhp.oa.graph.hostedbymap.model.UnibiGoldModel</arg>
|
||||||
|
</java>
|
||||||
|
<ok to="produceHBM"/>
|
||||||
|
<error to="Kill"/>
|
||||||
|
</action>
|
||||||
|
|
||||||
|
<action name="downloadDOAJ">
|
||||||
|
<shell xmlns="uri:oozie:shell-action:0.2">
|
||||||
|
<job-tracker>${jobTracker}</job-tracker>
|
||||||
|
<name-node>${nameNode}</name-node>
|
||||||
|
<configuration>
|
||||||
|
<property>
|
||||||
|
<name>mapred.job.queue.name</name>
|
||||||
|
<value>${queueName}</value>
|
||||||
|
</property>
|
||||||
|
</configuration>
|
||||||
|
<exec>download.sh</exec>
|
||||||
|
<argument>${doajJsonFileURL}</argument>
|
||||||
|
<argument>${dumpPath}</argument>
|
||||||
|
<argument>${dumpFileName}</argument>
|
||||||
|
<env-var>HADOOP_USER_NAME=${wf:user()}</env-var>
|
||||||
|
<file>download.sh</file>
|
||||||
|
<capture-output/>
|
||||||
|
</shell>
|
||||||
|
<ok to="extract"/>
|
||||||
|
<error to="Kill"/>
|
||||||
|
|
||||||
|
</action>
|
||||||
|
|
||||||
|
<action name="extract">
|
||||||
|
<java>
|
||||||
|
<main-class>eu.dnetlib.dhp.oa.graph.hostedbymap.ExtractAndMapDoajJson</main-class>
|
||||||
|
<arg>--hdfsNameNode</arg><arg>${nameNode}</arg>
|
||||||
|
<arg>--compressedFile</arg><arg>${dumpPath}/${dumpFileName}</arg>
|
||||||
|
<arg>--workingPath</arg><arg>${workingDir}/DOAJ/</arg>
|
||||||
|
<arg>--outputPath</arg><arg>/user/${wf:user()}/data/doaj.json</arg>
|
||||||
|
</java>
|
||||||
|
<ok to="produceHBM"/>
|
||||||
|
<error to="Kill"/>
|
||||||
|
</action>
|
||||||
|
|
||||||
<action name="produceHBM">
|
<action name="produceHBM">
|
||||||
<spark xmlns="uri:oozie:spark-action:0.2">
|
<spark xmlns="uri:oozie:spark-action:0.2">
|
||||||
<master>yarn-cluster</master>
|
<master>yarn-cluster</master>
|
||||||
|
|
|
@ -36,9 +36,7 @@ object SparkProduceHostedByMap {
|
||||||
openaire.journal_id,
|
openaire.journal_id,
|
||||||
"",
|
"",
|
||||||
"",
|
"",
|
||||||
isOpenAccess,
|
isOpenAccess
|
||||||
-1,
|
|
||||||
List[String]()
|
|
||||||
)
|
)
|
||||||
case Constants.EISSN =>
|
case Constants.EISSN =>
|
||||||
HostedByItemType(
|
HostedByItemType(
|
||||||
|
@ -47,9 +45,7 @@ object SparkProduceHostedByMap {
|
||||||
"",
|
"",
|
||||||
openaire.journal_id,
|
openaire.journal_id,
|
||||||
"",
|
"",
|
||||||
isOpenAccess,
|
isOpenAccess
|
||||||
-1,
|
|
||||||
List[String]()
|
|
||||||
)
|
)
|
||||||
case Constants.ISSNL =>
|
case Constants.ISSNL =>
|
||||||
HostedByItemType(
|
HostedByItemType(
|
||||||
|
@ -58,9 +54,7 @@ object SparkProduceHostedByMap {
|
||||||
"",
|
"",
|
||||||
"",
|
"",
|
||||||
openaire.journal_id,
|
openaire.journal_id,
|
||||||
isOpenAccess,
|
isOpenAccess
|
||||||
-1,
|
|
||||||
List[String]()
|
|
||||||
)
|
)
|
||||||
|
|
||||||
// catch the default with a variable so you can print it
|
// catch the default with a variable so you can print it
|
||||||
|
@ -85,36 +79,34 @@ object SparkProduceHostedByMap {
|
||||||
issn: String,
|
issn: String,
|
||||||
eissn: String,
|
eissn: String,
|
||||||
issnl: String,
|
issnl: String,
|
||||||
oa: Boolean,
|
oa: Boolean
|
||||||
oaDate: Int,
|
|
||||||
reviewProcess: List[String]
|
|
||||||
): HostedByItemType = {
|
): HostedByItemType = {
|
||||||
if (issn != null) {
|
if (issn != null) {
|
||||||
if (eissn != null) {
|
if (eissn != null) {
|
||||||
if (issnl != null) {
|
if (issnl != null) {
|
||||||
HostedByItemType(id, officialname, issn, eissn, issnl, oa, oaDate, reviewProcess)
|
HostedByItemType(id, officialname, issn, eissn, issnl, oa)
|
||||||
} else {
|
} else {
|
||||||
HostedByItemType(id, officialname, issn, eissn, "", oa, oaDate, reviewProcess)
|
HostedByItemType(id, officialname, issn, eissn, "", oa)
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
if (issnl != null) {
|
if (issnl != null) {
|
||||||
HostedByItemType(id, officialname, issn, "", issnl, oa, oaDate, reviewProcess)
|
HostedByItemType(id, officialname, issn, "", issnl, oa)
|
||||||
} else {
|
} else {
|
||||||
HostedByItemType(id, officialname, issn, "", "", oa, oaDate, reviewProcess)
|
HostedByItemType(id, officialname, issn, "", "", oa)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
if (eissn != null) {
|
if (eissn != null) {
|
||||||
if (issnl != null) {
|
if (issnl != null) {
|
||||||
HostedByItemType(id, officialname, "", eissn, issnl, oa, oaDate, reviewProcess)
|
HostedByItemType(id, officialname, "", eissn, issnl, oa)
|
||||||
} else {
|
} else {
|
||||||
HostedByItemType(id, officialname, "", eissn, "", oa, oaDate, reviewProcess)
|
HostedByItemType(id, officialname, "", eissn, "", oa)
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
if (issnl != null) {
|
if (issnl != null) {
|
||||||
HostedByItemType(id, officialname, "", "", issnl, oa, oaDate, reviewProcess)
|
HostedByItemType(id, officialname, "", "", issnl, oa)
|
||||||
} else {
|
} else {
|
||||||
HostedByItemType("", "", "", "", "", oa, oaDate, reviewProcess)
|
HostedByItemType("", "", "", "", "", oa)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -129,12 +121,10 @@ object SparkProduceHostedByMap {
|
||||||
dats.getJournal.getIssnPrinted,
|
dats.getJournal.getIssnPrinted,
|
||||||
dats.getJournal.getIssnOnline,
|
dats.getJournal.getIssnOnline,
|
||||||
dats.getJournal.getIssnLinking,
|
dats.getJournal.getIssnLinking,
|
||||||
false,
|
false
|
||||||
-1,
|
|
||||||
List[String]()
|
|
||||||
)
|
)
|
||||||
}
|
}
|
||||||
HostedByItemType("", "", "", "", "", false, -1, List[String]())
|
HostedByItemType("", "", "", "", "", false)
|
||||||
}
|
}
|
||||||
|
|
||||||
def oaHostedByDataset(spark: SparkSession, datasourcePath: String): Dataset[HostedByItemType] = {
|
def oaHostedByDataset(spark: SparkSession, datasourcePath: String): Dataset[HostedByItemType] = {
|
||||||
|
@ -160,9 +150,7 @@ object SparkProduceHostedByMap {
|
||||||
gold.getIssn,
|
gold.getIssn,
|
||||||
"",
|
"",
|
||||||
gold.getIssnL,
|
gold.getIssnL,
|
||||||
true,
|
true
|
||||||
-1,
|
|
||||||
List[String]()
|
|
||||||
)
|
)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -192,9 +180,7 @@ object SparkProduceHostedByMap {
|
||||||
doaj.getIssn,
|
doaj.getIssn,
|
||||||
doaj.getEissn,
|
doaj.getEissn,
|
||||||
"",
|
"",
|
||||||
true,
|
true
|
||||||
-1,
|
|
||||||
doaj.getReviewProcess.asScala.toList
|
|
||||||
)
|
)
|
||||||
}
|
}
|
||||||
return getHostedByItemType(
|
return getHostedByItemType(
|
||||||
|
@ -203,9 +189,7 @@ object SparkProduceHostedByMap {
|
||||||
doaj.getIssn,
|
doaj.getIssn,
|
||||||
doaj.getEissn,
|
doaj.getEissn,
|
||||||
"",
|
"",
|
||||||
true,
|
true
|
||||||
doaj.getOaStart,
|
|
||||||
doaj.getReviewProcess.asScala.toList
|
|
||||||
)
|
)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -55,7 +55,6 @@ public class DownloadCsvTest {
|
||||||
new DownloadCSV()
|
new DownloadCSV()
|
||||||
.doDownload(
|
.doDownload(
|
||||||
fileURL,
|
fileURL,
|
||||||
workingDir + "/unibi_gold",
|
|
||||||
outputFile,
|
outputFile,
|
||||||
UnibiGoldModel.class.getName(),
|
UnibiGoldModel.class.getName(),
|
||||||
',',
|
',',
|
||||||
|
@ -91,56 +90,6 @@ public class DownloadCsvTest {
|
||||||
assertEquals(67028, count);
|
assertEquals(67028, count);
|
||||||
}
|
}
|
||||||
|
|
||||||
@Disabled
|
|
||||||
@Test
|
|
||||||
void getDoajFileTest() throws CollectorException, IOException, ClassNotFoundException {
|
|
||||||
|
|
||||||
String fileURL = "https://doaj.org/csv";
|
|
||||||
|
|
||||||
final String outputFile = workingDir + "/doaj.json";
|
|
||||||
new DownloadCSV()
|
|
||||||
.doDownload(
|
|
||||||
fileURL,
|
|
||||||
workingDir + "/doaj",
|
|
||||||
outputFile,
|
|
||||||
DOAJModel.class.getName(),
|
|
||||||
',',
|
|
||||||
fs);
|
|
||||||
|
|
||||||
BufferedReader in = new BufferedReader(new InputStreamReader(fs.open(new Path(outputFile))));
|
|
||||||
|
|
||||||
String line;
|
|
||||||
int count = 0;
|
|
||||||
while ((line = in.readLine()) != null) {
|
|
||||||
DOAJModel doaj = new ObjectMapper().readValue(line, DOAJModel.class);
|
|
||||||
if (count == 0) {
|
|
||||||
assertEquals("0001-3765", doaj.getIssn());
|
|
||||||
assertEquals("1678-2690", doaj.getEissn());
|
|
||||||
assertEquals("Anais da Academia Brasileira de Ciências", doaj.getJournalTitle());
|
|
||||||
}
|
|
||||||
if (count == 22) {
|
|
||||||
log.info(new ObjectMapper().writeValueAsString(doaj));
|
|
||||||
System.out.println(new ObjectMapper().writeValueAsString(doaj));
|
|
||||||
}
|
|
||||||
if (count == 7904) {
|
|
||||||
// log.info(new ObjectMapper().writeValueAsString(doaj));
|
|
||||||
assertEquals("", doaj.getIssn());
|
|
||||||
assertEquals("2055-7159", doaj.getEissn());
|
|
||||||
assertEquals("BJR|case reports", doaj.getJournalTitle());
|
|
||||||
}
|
|
||||||
if (count == 16707) {
|
|
||||||
|
|
||||||
assertEquals("2783-1043", doaj.getIssn());
|
|
||||||
assertEquals("2783-1051", doaj.getEissn());
|
|
||||||
assertEquals("فیزیک کاربردی ایران", doaj.getJournalTitle());
|
|
||||||
}
|
|
||||||
|
|
||||||
count += 1;
|
|
||||||
}
|
|
||||||
|
|
||||||
assertEquals(16715, count);
|
|
||||||
}
|
|
||||||
|
|
||||||
@AfterAll
|
@AfterAll
|
||||||
public static void cleanup() {
|
public static void cleanup() {
|
||||||
FileUtils.deleteQuietly(new File(workingDir));
|
FileUtils.deleteQuietly(new File(workingDir));
|
||||||
|
|
|
@ -1,25 +1,25 @@
|
||||||
{"journalTitle":"Lëd i Sneg","issn":"2076-6734","eissn":"2412-3765","reviewProcess":"Double blind peer review"}
|
{"journalTitle":"Lëd i Sneg","issn":"2076-6734","eissn":"2412-3765","reviewProcess":["Double blind peer review"],"oaStart":2015}
|
||||||
{"journalTitle":"Компьютерные исследования и моделирование","issn":"2076-7633","eissn":"2077-6853","reviewProcess":"Blind peer review"}
|
{"journalTitle":"Компьютерные исследования и моделирование","issn":"2076-7633","eissn":"2077-6853","reviewProcess":["Blind peer review"],"oaStart":2009}
|
||||||
{"journalTitle":" Историко-биологические исследования","issn":"2076-8176","eissn":"2500-1221","reviewProcess":"Double blind peer review"}
|
{"journalTitle":" Историко-биологические исследования","issn":"2076-8176","eissn":"2500-1221","reviewProcess":["Double blind peer review"],"oaStart":2010}
|
||||||
{"journalTitle":"Інформаційні технології і засоби навчання","issn":"2076-8184","eissn":"","reviewProcess":"Double blind peer review"}
|
{"journalTitle":"Інформаційні технології і засоби навчання","issn":"2076-8184","eissn":null,"reviewProcess":["Double blind peer review"],"oaStart":2006}
|
||||||
{"journalTitle":"Revue Internationale de Pédagogie de l’Enseignement Supérieur","issn":"","eissn":"2076-8427","reviewProcess":"Double blind peer review"}
|
{"journalTitle":"Revue Internationale de Pédagogie de l’Enseignement Supérieur","issn":null,"eissn":"2076-8427","reviewProcess":["Double blind peer review"],"oaStart":2009}
|
||||||
{"journalTitle":"Проблемы развития территории","issn":"2076-8915","eissn":"2409-9007","reviewProcess":"Double blind peer review"}
|
{"journalTitle":"Проблемы развития территории","issn":"2076-8915","eissn":"2409-9007","reviewProcess":["Double blind peer review"],"oaStart":2008}
|
||||||
{"journalTitle":"Rambam Maimonides Medical Journal","issn":"","eissn":"2076-9172","reviewProcess":"Peer review"}
|
{"journalTitle":"Rambam Maimonides Medical Journal","issn":null,"eissn":"2076-9172","reviewProcess":["Peer review"],"oaStart":2010}
|
||||||
{"journalTitle":"Membranes","issn":"2077-0375","eissn":"","reviewProcess":"Blind peer review"}
|
{"journalTitle":"Membranes","issn":"2077-0375","eissn":null,"reviewProcess":["Blind peer review"],"oaStart":2011}
|
||||||
{"journalTitle":"Journal of Clinical Medicine","issn":"","eissn":"2077-0383","reviewProcess":"Blind peer review"}
|
{"journalTitle":"Journal of Clinical Medicine","issn":null,"eissn":"2077-0383","reviewProcess":["Blind peer review"],"oaStart":2012}
|
||||||
{"journalTitle":"Agriculture","issn":"","eissn":"2077-0472","reviewProcess":"Blind peer review"}
|
{"journalTitle":"Agriculture","issn":null,"eissn":"2077-0472","reviewProcess":["Blind peer review"],"oaStart":2011}
|
||||||
{"journalTitle":"Standartnye Obrazcy","issn":"2077-1177","eissn":"","reviewProcess":"Double blind peer review"}
|
{"journalTitle":"Standartnye Obrazcy","issn":"2077-1177","eissn":null,"reviewProcess":["Double blind peer review"],"oaStart":2014}
|
||||||
{"journalTitle":"Металл и литье Украины","issn":"2077-1304","eissn":"2706-5529","reviewProcess":"Double blind peer review"}
|
{"journalTitle":"Металл и литье Украины","issn":"2077-1304","eissn":"2706-5529","reviewProcess":["Double blind peer review"],"oaStart":2019}
|
||||||
{"journalTitle":"Journal of Marine Science and Engineering","issn":"","eissn":"2077-1312","reviewProcess":"Blind peer review"}
|
{"journalTitle":"Journal of Marine Science and Engineering","issn":null,"eissn":"2077-1312","reviewProcess":["Blind peer review"],"oaStart":2013}
|
||||||
{"journalTitle":"Religions","issn":"","eissn":"2077-1444","reviewProcess":"Double blind peer review"}
|
{"journalTitle":"Religions","issn":null,"eissn":"2077-1444","reviewProcess":["Double blind peer review"],"oaStart":2010}
|
||||||
{"journalTitle":"GW-Unterricht","issn":"2077-1517","eissn":"2414-4169","reviewProcess":"Double blind peer review"}
|
{"journalTitle":"GW-Unterricht","issn":"2077-1517","eissn":"2414-4169","reviewProcess":["Double blind peer review"],"oaStart":2010}
|
||||||
{"journalTitle":"UCV-Scientia","issn":"2077-172X","eissn":"","reviewProcess":"Peer review"}
|
{"journalTitle":"UCV-Scientia","issn":"2077-172X","eissn":null,"reviewProcess":["Peer review"],"oaStart":2009}
|
||||||
{"journalTitle":"Sovremennye Issledovaniâ Socialʹnyh Problem","issn":"2077-1770","eissn":"2218-7405","reviewProcess":"Double blind peer review"}
|
{"journalTitle":"Sovremennye Issledovaniâ Socialʹnyh Problem","issn":"2077-1770","eissn":"2218-7405","reviewProcess":["Double blind peer review"],"oaStart":2010}
|
||||||
{"journalTitle":"Granì","issn":"2077-1800","eissn":"2413-8738","reviewProcess":"Double blind peer review"}
|
{"journalTitle":"Granì","issn":"2077-1800","eissn":"2413-8738","reviewProcess":["Double blind peer review"],"oaStart":2014}
|
||||||
{"journalTitle":"Journal of Economics Finance and Administrative Science","issn":"2077-1886","eissn":"2218-0648","reviewProcess":"Double blind peer review"}
|
{"journalTitle":"Journal of Economics Finance and Administrative Science","issn":"2077-1886","eissn":"2218-0648","reviewProcess":["Double blind peer review"],"oaStart":2017}
|
||||||
{"journalTitle":"Science Education International","issn":"","eissn":"2077-2327","reviewProcess":"Double blind peer review"}
|
{"journalTitle":"Science Education International","issn":null,"eissn":"2077-2327","reviewProcess":["Double blind peer review"],"oaStart":2017}
|
||||||
{"journalTitle":"Edumecentro","issn":"","eissn":"2077-2874","reviewProcess":"Double blind peer review"}
|
{"journalTitle":"Edumecentro","issn":null,"eissn":"2077-2874","reviewProcess":["Double blind peer review"],"oaStart":2013}
|
||||||
{"journalTitle":"Monteverdia","issn":"","eissn":"2077-2890","reviewProcess":"Double blind peer review"}
|
{"journalTitle":"Monteverdia","issn":null,"eissn":"2077-2890","reviewProcess":["Double blind peer review"],"oaStart":2008}
|
||||||
{"journalTitle":"Transformación","issn":"","eissn":"2077-2955","reviewProcess":"Double blind peer review"}
|
{"journalTitle":"Transformación","issn":null,"eissn":"2077-2955","reviewProcess":["Double blind peer review"],"oaStart":2010}
|
||||||
{"journalTitle":"Journal of Space Technology","issn":"2077-3099","eissn":"2411-5029","reviewProcess":"Double blind peer review"}
|
{"journalTitle":"Journal of Space Technology","issn":"2077-3099","eissn":"2411-5029","reviewProcess":["Double blind peer review"],"oaStart":2011}
|
||||||
{"journalTitle":"Revue de Primatologie","issn":"","eissn":"2077-3757","reviewProcess":"Peer review"}
|
{"journalTitle":"Revue de Primatologie","issn":null,"eissn":"2077-3757","reviewProcess":["Peer review"],"oaStart":2009}
|
Loading…
Reference in New Issue