diff --git a/dhp-common/src/main/java/eu/dnetlib/dhp/common/collection/DecompressTarGz.java b/dhp-common/src/main/java/eu/dnetlib/dhp/common/collection/DecompressTarGz.java new file mode 100644 index 0000000000..49126984cf --- /dev/null +++ b/dhp-common/src/main/java/eu/dnetlib/dhp/common/collection/DecompressTarGz.java @@ -0,0 +1,39 @@ +package eu.dnetlib.dhp.common.collection; + +import java.io.BufferedOutputStream; +import java.io.IOException; +import java.util.zip.GZIPOutputStream; + +import org.apache.commons.compress.archivers.tar.TarArchiveEntry; +import org.apache.commons.compress.archivers.tar.TarArchiveInputStream; +import org.apache.commons.compress.compressors.gzip.GzipCompressorInputStream; +import org.apache.commons.io.IOUtils; +import org.apache.hadoop.fs.FSDataInputStream; +import org.apache.hadoop.fs.FSDataOutputStream; +import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.Path; + +public class DecompressTarGz { + + public static void doExtract(FileSystem fs, String outputPath, String tarGzPath) throws IOException { + + FSDataInputStream inputFileStream = fs.open(new Path(tarGzPath)); + try (TarArchiveInputStream tais = new TarArchiveInputStream( + new GzipCompressorInputStream(inputFileStream))) { + TarArchiveEntry entry = null; + while ((entry = tais.getNextTarEntry()) != null) { + if (!entry.isDirectory()) { + try ( + FSDataOutputStream out = fs + .create(new Path(outputPath.concat(entry.getName()).concat(".gz"))); + GZIPOutputStream gzipOs = new GZIPOutputStream(new BufferedOutputStream(out))) { + + IOUtils.copy(tais, gzipOs); + + } + + } + } + } + } +} \ No newline at end of file diff --git a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/crossref/ExtractCrossrefRecords.java b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/crossref/ExtractCrossrefRecords.java index d1861ff0a7..6060b619ef 100644 --- a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/crossref/ExtractCrossrefRecords.java +++ b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/crossref/ExtractCrossrefRecords.java @@ -1,19 +1,13 @@ package eu.dnetlib.doiboost.crossref; -import java.io.BufferedOutputStream; -import java.net.URI; -import java.util.zip.GZIPOutputStream; +import static eu.dnetlib.dhp.common.collection.DecompressTarGz.doExtract; + +import java.net.URI; -import org.apache.commons.compress.archivers.tar.TarArchiveEntry; -import org.apache.commons.compress.archivers.tar.TarArchiveInputStream; -import org.apache.commons.compress.compressors.gzip.GzipCompressorInputStream; import org.apache.commons.io.IOUtils; import org.apache.hadoop.conf.Configuration; -import org.apache.hadoop.fs.FSDataInputStream; -import org.apache.hadoop.fs.FSDataOutputStream; import org.apache.hadoop.fs.FileSystem; -import org.apache.hadoop.fs.Path; import org.mortbay.log.Log; import eu.dnetlib.dhp.application.ArgumentApplicationParser; @@ -33,31 +27,16 @@ public class ExtractCrossrefRecords { final String outputPath = parser.get("outputPath"); final String crossrefFileNameTarGz = parser.get("crossrefFileNameTarGz"); - Path hdfsreadpath = new Path(workingPath.concat("/").concat(crossrefFileNameTarGz)); Configuration conf = new Configuration(); conf.set("fs.defaultFS", workingPath); conf.set("fs.hdfs.impl", org.apache.hadoop.hdfs.DistributedFileSystem.class.getName()); conf.set("fs.file.impl", org.apache.hadoop.fs.LocalFileSystem.class.getName()); FileSystem fs = FileSystem.get(URI.create(workingPath), conf); - FSDataInputStream crossrefFileStream = fs.open(hdfsreadpath); - try (TarArchiveInputStream tais = new TarArchiveInputStream( - new GzipCompressorInputStream(crossrefFileStream))) { - TarArchiveEntry entry = null; - while ((entry = tais.getNextTarEntry()) != null) { - if (!entry.isDirectory()) { - try ( - FSDataOutputStream out = fs - .create(new Path(outputPath.concat(entry.getName()).concat(".gz"))); - GZIPOutputStream gzipOs = new GZIPOutputStream(new BufferedOutputStream(out))) { - IOUtils.copy(tais, gzipOs); + doExtract(fs, outputPath, workingPath.concat("/").concat(crossrefFileNameTarGz)); - } - - } - } - } Log.info("Crossref dump reading completed"); } + } diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/DownloadCSV.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/DownloadCSV.java index dff761c34e..bbadde524d 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/DownloadCSV.java +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/DownloadCSV.java @@ -23,7 +23,7 @@ public class DownloadCSV { private static final Logger log = LoggerFactory.getLogger(DownloadCSV.class); - public static final char DEFAULT_DELIMITER = ';'; + public static final char DEFAULT_DELIMITER = ','; public static void main(final String[] args) throws Exception { final ArgumentApplicationParser parser = new ArgumentApplicationParser( @@ -40,9 +40,6 @@ public class DownloadCSV { final String fileURL = parser.get("fileURL"); log.info("fileURL {}", fileURL); - final String workingPath = parser.get("workingPath"); - log.info("workingPath {}", workingPath); - final String outputFile = parser.get("outputFile"); log.info("outputFile {}", outputFile); @@ -63,31 +60,15 @@ public class DownloadCSV { FileSystem fileSystem = FileSystem.get(conf); - new DownloadCSV().doDownload(fileURL, workingPath, outputFile, classForName, delimiter, fileSystem); + new DownloadCSV().doDownload(fileURL, outputFile, classForName, delimiter, fileSystem); } - protected void doDownload(String fileURL, String workingPath, String outputFile, String classForName, + protected void doDownload(String fileURL, String outputFile, String classForName, char delimiter, FileSystem fs) throws IOException, ClassNotFoundException, CollectorException { - final HttpConnector2 connector2 = new HttpConnector2(); - - final Path path = new Path(workingPath + "/replaced.csv"); - - try (BufferedReader in = new BufferedReader( - new InputStreamReader(connector2.getInputSourceAsStream(fileURL)))) { - - try (PrintWriter writer = new PrintWriter( - new OutputStreamWriter(fs.create(path, true), StandardCharsets.UTF_8))) { - String line; - while ((line = in.readLine()) != null) { - writer.println(line.replace("\\\"", "\"")); - } - } - } - - try (InputStreamReader reader = new InputStreamReader(fs.open(path))) { + try (InputStreamReader reader = new InputStreamReader(new HttpConnector2().getInputSourceAsStream(fileURL))) { GetCSV.getCsv(fs, reader, outputFile, classForName, delimiter); } } diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/DownloadCSV2.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/DownloadCSV2.java deleted file mode 100644 index d82d008629..0000000000 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/DownloadCSV2.java +++ /dev/null @@ -1,84 +0,0 @@ - -package eu.dnetlib.dhp.oa.graph.hostedbymap; - -import java.io.*; -import java.util.Objects; -import java.util.Optional; - -import org.apache.commons.io.FileUtils; -import org.apache.commons.io.IOUtils; -import org.apache.hadoop.conf.Configuration; -import org.apache.hadoop.fs.FileSystem; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -import eu.dnetlib.dhp.application.ArgumentApplicationParser; -import eu.dnetlib.dhp.common.collection.GetCSV; -import eu.dnetlib.dhp.common.collection.HttpConnector2; - -public class DownloadCSV2 { - - private static final Logger log = LoggerFactory.getLogger(DownloadCSV2.class); - - public static final char DEFAULT_DELIMITER = ';'; - - public static void main(final String[] args) throws Exception { - final ArgumentApplicationParser parser = new ArgumentApplicationParser( - IOUtils - .toString( - Objects - .requireNonNull( - DownloadCSV2.class - .getResourceAsStream( - "/eu/dnetlib/dhp/oa/graph/hostedbymap/download_csv_parameters.json")))); - - parser.parseArgument(args); - - final String fileURL = parser.get("fileURL"); - log.info("fileURL {}", fileURL); - - final String tmpFile = parser.get("tmpFile"); - log.info("tmpFile {}", tmpFile); - - final String outputFile = parser.get("outputFile"); - log.info("outputFile {}", outputFile); - - final String hdfsNameNode = parser.get("hdfsNameNode"); - log.info("hdfsNameNode {}", hdfsNameNode); - - final String classForName = parser.get("classForName"); - log.info("classForName {}", classForName); - - final char delimiter = Optional - .ofNullable(parser.get("delimiter")) - .map(s -> s.charAt(0)) - .orElse(DEFAULT_DELIMITER); - log.info("delimiter {}", delimiter); - - HttpConnector2 connector2 = new HttpConnector2(); - - try (BufferedReader in = new BufferedReader( - new InputStreamReader(connector2.getInputSourceAsStream(fileURL)))) { - - try (PrintWriter writer = new PrintWriter(new BufferedWriter(new FileWriter(tmpFile)))) { - String line; - while ((line = in.readLine()) != null) { - writer.println(line.replace("\\\"", "\"")); - } - } - } - - try (BufferedReader in = new BufferedReader(new FileReader(tmpFile))) { - Configuration conf = new Configuration(); - conf.set("fs.defaultFS", hdfsNameNode); - - FileSystem fileSystem = FileSystem.get(conf); - - GetCSV.getCsv(fileSystem, in, outputFile, classForName, delimiter); - } finally { - FileUtils.deleteQuietly(new File(tmpFile)); - } - - } - -} diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/ExtractAndMapDoajJson.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/ExtractAndMapDoajJson.java new file mode 100644 index 0000000000..b3a79e1478 --- /dev/null +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/ExtractAndMapDoajJson.java @@ -0,0 +1,116 @@ +package eu.dnetlib.dhp.oa.graph.hostedbymap; + +import static eu.dnetlib.dhp.common.collection.DecompressTarGz.doExtract; + +import java.io.BufferedOutputStream; +import java.io.IOException; +import java.io.PrintWriter; +import java.util.Arrays; +import java.util.Objects; + +import org.apache.commons.io.IOUtils; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.*; +import org.apache.hadoop.io.compress.CompressionCodec; +import org.apache.hadoop.io.compress.CompressionCodecFactory; +import org.apache.hadoop.io.compress.CompressionInputStream; +import org.jetbrains.annotations.NotNull; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import com.fasterxml.jackson.core.JsonProcessingException; +import com.fasterxml.jackson.databind.ObjectMapper; + +import eu.dnetlib.dhp.application.ArgumentApplicationParser; +import eu.dnetlib.dhp.oa.graph.hostedbymap.model.DOAJModel; +import eu.dnetlib.dhp.oa.graph.hostedbymap.model.doaj.DOAJEntry; + +public class ExtractAndMapDoajJson { + + private static final Logger log = LoggerFactory.getLogger(ExtractAndMapDoajJson.class); + + public static void main(final String[] args) throws Exception { + final ArgumentApplicationParser parser = new ArgumentApplicationParser( + IOUtils + .toString( + Objects + .requireNonNull( + ExtractAndMapDoajJson.class + .getResourceAsStream( + "/eu/dnetlib/dhp/oa/graph/hostedbymap/download_json_parameters.json")))); + + parser.parseArgument(args); + + final String compressedInput = parser.get("compressedFile"); + log.info("compressedInput {}", compressedInput); + + final String hdfsNameNode = parser.get("hdfsNameNode"); + log.info("hdfsNameNode {}", hdfsNameNode); + + final String outputPath = parser.get("outputPath"); + log.info("outputPath {}", outputPath); + + final String workingPath = parser.get("workingPath"); + log.info("workingPath {}", workingPath); + + Configuration conf = new Configuration(); + conf.set("fs.defaultFS", hdfsNameNode); + + FileSystem fs = FileSystem.get(conf); + CompressionCodecFactory factory = new CompressionCodecFactory(conf); + CompressionCodec codec = factory.getCodecByClassName("org.apache.hadoop.io.compress.GzipCodec"); + doExtract(fs, workingPath, compressedInput); + doMap(fs, workingPath, outputPath, codec); + + } + + private static void doMap(FileSystem fs, String workingPath, String outputPath, CompressionCodec codec) + throws IOException { + RemoteIterator fileStatusListIterator = fs + .listFiles( + new Path(workingPath), true); + + Path hdfsWritePath = new Path(outputPath); + if (fs.exists(hdfsWritePath)) { + fs.delete(hdfsWritePath, true); + + } + try ( + + FSDataOutputStream out = fs + .create(hdfsWritePath); + PrintWriter writer = new PrintWriter(new BufferedOutputStream(out))) { + + while (fileStatusListIterator.hasNext()) { + Path path = fileStatusListIterator.next().getPath(); + if (!fs.isDirectory(path)) { + FSDataInputStream is = fs.open(path); + CompressionInputStream compressionInputStream = codec.createInputStream(is); + DOAJEntry[] doajEntries = new ObjectMapper().readValue(compressionInputStream, DOAJEntry[].class); + Arrays.stream(doajEntries).forEach(doaj -> { + try { + writer.println(new ObjectMapper().writeValueAsString(getDoajModel(doaj))); + } catch (JsonProcessingException e) { + e.printStackTrace(); + } + }); + } + + } + + } + + } + + @NotNull + public static DOAJModel getDoajModel(DOAJEntry doaj) { + DOAJModel doajModel = new DOAJModel(); + doajModel.setOaStart(doaj.getBibjson().getOa_start()); + doajModel.setEissn(doaj.getBibjson().getEissn()); + doajModel.setIssn(doaj.getBibjson().getPissn()); + doajModel.setJournalTitle(doaj.getBibjson().getTitle()); + doajModel.setReviewProcess(doaj.getBibjson().getEditorial().getReview_process()); + return doajModel; + } + +} \ No newline at end of file diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/model/DOAJModel.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/model/DOAJModel.java index 4b5dc22a61..c3b6f1f30f 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/model/DOAJModel.java +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/model/DOAJModel.java @@ -2,6 +2,7 @@ package eu.dnetlib.dhp.oa.graph.hostedbymap.model; import java.io.Serializable; +import java.util.List; import com.opencsv.bean.CsvBindByName; @@ -17,7 +18,17 @@ public class DOAJModel implements Serializable { private String eissn; @CsvBindByName(column = "Review process") - private String reviewProcess; + private List reviewProcess; + + private Integer oaStart; + + public Integer getOaStart() { + return oaStart; + } + + public void setOaStart(Integer oaStart) { + this.oaStart = oaStart; + } public String getJournalTitle() { return journalTitle; @@ -43,11 +54,11 @@ public class DOAJModel implements Serializable { this.eissn = eissn; } - public String getReviewProcess() { + public List getReviewProcess() { return reviewProcess; } - public void setReviewProcess(String reviewProcess) { + public void setReviewProcess(List reviewProcess) { this.reviewProcess = reviewProcess; } } diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/model/doaj/APC.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/model/doaj/APC.java new file mode 100644 index 0000000000..3473e0f9da --- /dev/null +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/model/doaj/APC.java @@ -0,0 +1,35 @@ + +package eu.dnetlib.dhp.oa.graph.hostedbymap.model.doaj; + +import java.io.Serializable; +import java.util.List; + +public class APC implements Serializable { + private Boolean has_apc; + private String url; + private List max; + + public List getMax() { + return max; + } + + public void setMax(List max) { + this.max = max; + } + + public Boolean getHas_apc() { + return has_apc; + } + + public void setHas_apc(Boolean has_apc) { + this.has_apc = has_apc; + } + + public String getUrl() { + return url; + } + + public void setUrl(String url) { + this.url = url; + } +} diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/model/doaj/Admin.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/model/doaj/Admin.java new file mode 100644 index 0000000000..b823e34503 --- /dev/null +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/model/doaj/Admin.java @@ -0,0 +1,25 @@ + +package eu.dnetlib.dhp.oa.graph.hostedbymap.model.doaj; + +import java.io.Serializable; + +public class Admin implements Serializable { + private Boolean ticked; + private Boolean seal; + + public Boolean getTicked() { + return ticked; + } + + public void setTicked(Boolean ticked) { + this.ticked = ticked; + } + + public Boolean getSeal() { + return seal; + } + + public void setSeal(Boolean seal) { + this.seal = seal; + } +} diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/model/doaj/Article.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/model/doaj/Article.java new file mode 100644 index 0000000000..7e90e24a2f --- /dev/null +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/model/doaj/Article.java @@ -0,0 +1,44 @@ + +package eu.dnetlib.dhp.oa.graph.hostedbymap.model.doaj; + +import java.io.Serializable; +import java.util.List; + +public class Article implements Serializable { + private String license_display_example_url; + private List license_display; + private Boolean orcid; + private Boolean i4oc_open_citations; + + public String getLicense_display_example_url() { + return license_display_example_url; + } + + public void setLicense_display_example_url(String license_display_example_url) { + this.license_display_example_url = license_display_example_url; + } + + public List getLicense_display() { + return license_display; + } + + public void setLicense_display(List license_display) { + this.license_display = license_display; + } + + public Boolean getOrcid() { + return orcid; + } + + public void setOrcid(Boolean orcid) { + this.orcid = orcid; + } + + public Boolean getI4oc_open_citations() { + return i4oc_open_citations; + } + + public void setI4oc_open_citations(Boolean i4oc_open_citations) { + this.i4oc_open_citations = i4oc_open_citations; + } +} diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/model/doaj/BibJson.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/model/doaj/BibJson.java new file mode 100644 index 0000000000..f7c0d501f4 --- /dev/null +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/model/doaj/BibJson.java @@ -0,0 +1,253 @@ + +package eu.dnetlib.dhp.oa.graph.hostedbymap.model.doaj; + +import java.io.Serializable; +import java.util.List; + +import com.fasterxml.jackson.annotation.JsonIgnore; + +public class BibJson implements Serializable { + private Editorial editorial; + private PidScheme pid_scheme; + private Copyright copyright; + private List keywords; + private Plagiarism plagiarism; + private List subject; + private String eissn; + private String pissn; + private List language; + private String title; + private Article article; + private Institution institution; + private Preservation preservation; + private List license; + private Ref ref; + private Integer oa_start; + private APC apc; + private OtherCharges other_charges; + private Integer publication_time_weeks; + private DepositPolicy deposit_policy; + private Publisher publisher; + private Boolean boai; + private Waiver waiver; + private String alternative_title; + private List is_replaced_by; + private List replaces; + private String discontinued_date; + + public String getDiscontinued_date() { + return discontinued_date; + } + + public void setDiscontinued_date(String discontinued_date) { + this.discontinued_date = discontinued_date; + } + + public List getReplaces() { + return replaces; + } + + public void setReplaces(List replaces) { + this.replaces = replaces; + } + + public List getIs_replaced_by() { + return is_replaced_by; + } + + public void setIs_replaced_by(List is_replaced_by) { + this.is_replaced_by = is_replaced_by; + } + + public String getAlternative_title() { + return alternative_title; + } + + public void setAlternative_title(String alternative_title) { + this.alternative_title = alternative_title; + } + + public String getPissn() { + return pissn; + } + + public void setPissn(String pissn) { + this.pissn = pissn; + } + + public Editorial getEditorial() { + return editorial; + } + + public void setEditorial(Editorial editorial) { + this.editorial = editorial; + } + + public PidScheme getPid_scheme() { + return pid_scheme; + } + + public void setPid_scheme(PidScheme pid_scheme) { + this.pid_scheme = pid_scheme; + } + + public Copyright getCopyright() { + return copyright; + } + + public void setCopyright(Copyright copyright) { + this.copyright = copyright; + } + + public List getKeywords() { + return keywords; + } + + public void setKeywords(List keywords) { + this.keywords = keywords; + } + + public Plagiarism getPlagiarism() { + return plagiarism; + } + + public void setPlagiarism(Plagiarism plagiarism) { + this.plagiarism = plagiarism; + } + + public List getSubject() { + return subject; + } + + public void setSubject(List subject) { + this.subject = subject; + } + + public String getEissn() { + return eissn; + } + + public void setEissn(String eissn) { + this.eissn = eissn; + } + + public List getLanguage() { + return language; + } + + public void setLanguage(List language) { + this.language = language; + } + + public String getTitle() { + return title; + } + + public void setTitle(String title) { + this.title = title; + } + + public Article getArticle() { + return article; + } + + public void setArticle(Article article) { + this.article = article; + } + + public Institution getInstitution() { + return institution; + } + + public void setInstitution(Institution institution) { + this.institution = institution; + } + + public Preservation getPreservation() { + return preservation; + } + + public void setPreservation(Preservation preservation) { + this.preservation = preservation; + } + + public List getLicense() { + return license; + } + + public void setLicense(List license) { + this.license = license; + } + + public Ref getRef() { + return ref; + } + + public void setRef(Ref ref) { + this.ref = ref; + } + + public Integer getOa_start() { + return oa_start; + } + + public void setOa_start(Integer oa_start) { + this.oa_start = oa_start; + } + + public APC getApc() { + return apc; + } + + public void setApc(APC apc) { + this.apc = apc; + } + + public OtherCharges getOther_charges() { + return other_charges; + } + + public void setOther_charges(OtherCharges other_charges) { + this.other_charges = other_charges; + } + + public Integer getPublication_time_weeks() { + return publication_time_weeks; + } + + public void setPublication_time_weeks(Integer publication_time_weeks) { + this.publication_time_weeks = publication_time_weeks; + } + + public DepositPolicy getDeposit_policy() { + return deposit_policy; + } + + public void setDeposit_policy(DepositPolicy deposit_policy) { + this.deposit_policy = deposit_policy; + } + + public Publisher getPublisher() { + return publisher; + } + + public void setPublisher(Publisher publisher) { + this.publisher = publisher; + } + + public Boolean getBoai() { + return boai; + } + + public void setBoai(Boolean boai) { + this.boai = boai; + } + + public Waiver getWaiver() { + return waiver; + } + + public void setWaiver(Waiver waiver) { + this.waiver = waiver; + } +} diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/model/doaj/Copyright.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/model/doaj/Copyright.java new file mode 100644 index 0000000000..c595c4c88d --- /dev/null +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/model/doaj/Copyright.java @@ -0,0 +1,25 @@ + +package eu.dnetlib.dhp.oa.graph.hostedbymap.model.doaj; + +import java.io.Serializable; + +public class Copyright implements Serializable { + private Boolean author_retains; + private String url; + + public Boolean getAuthor_retains() { + return author_retains; + } + + public void setAuthor_retains(Boolean author_retains) { + this.author_retains = author_retains; + } + + public String getUrl() { + return url; + } + + public void setUrl(String url) { + this.url = url; + } +} diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/model/doaj/DOAJEntry.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/model/doaj/DOAJEntry.java new file mode 100644 index 0000000000..add5bf8bf7 --- /dev/null +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/model/doaj/DOAJEntry.java @@ -0,0 +1,52 @@ + +package eu.dnetlib.dhp.oa.graph.hostedbymap.model.doaj; + +import java.io.Serializable; + +public class DOAJEntry implements Serializable { + private String last_updated; + private BibJson bibjson; + private Admin admin; + private String created_date; + private String id; + + public String getLast_updated() { + return last_updated; + } + + public void setLast_updated(String last_updated) { + this.last_updated = last_updated; + } + + public BibJson getBibjson() { + return bibjson; + } + + public void setBibjson(BibJson bibjson) { + this.bibjson = bibjson; + } + + public Admin getAdmin() { + return admin; + } + + public void setAdmin(Admin admin) { + this.admin = admin; + } + + public String getCreated_date() { + return created_date; + } + + public void setCreated_date(String created_date) { + this.created_date = created_date; + } + + public String getId() { + return id; + } + + public void setId(String id) { + this.id = id; + } +} diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/model/doaj/DepositPolicy.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/model/doaj/DepositPolicy.java new file mode 100644 index 0000000000..d86c97f34b --- /dev/null +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/model/doaj/DepositPolicy.java @@ -0,0 +1,35 @@ + +package eu.dnetlib.dhp.oa.graph.hostedbymap.model.doaj; + +import java.io.Serializable; +import java.util.List; + +public class DepositPolicy implements Serializable { + private List service; + private String url; + private Boolean has_policy; + + public List getService() { + return service; + } + + public void setService(List service) { + this.service = service; + } + + public String getUrl() { + return url; + } + + public void setUrl(String url) { + this.url = url; + } + + public Boolean getHas_policy() { + return has_policy; + } + + public void setHas_policy(Boolean has_policy) { + this.has_policy = has_policy; + } +} diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/model/doaj/Editorial.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/model/doaj/Editorial.java new file mode 100644 index 0000000000..35bfba158e --- /dev/null +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/model/doaj/Editorial.java @@ -0,0 +1,35 @@ + +package eu.dnetlib.dhp.oa.graph.hostedbymap.model.doaj; + +import java.io.Serializable; +import java.util.List; + +public class Editorial implements Serializable { + private List review_process; + private String review_url; + private String board_url; + + public List getReview_process() { + return review_process; + } + + public void setReview_process(List review_process) { + this.review_process = review_process; + } + + public String getReview_url() { + return review_url; + } + + public void setReview_url(String review_url) { + this.review_url = review_url; + } + + public String getBoard_url() { + return board_url; + } + + public void setBoard_url(String board_url) { + this.board_url = board_url; + } +} diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/model/doaj/Institution.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/model/doaj/Institution.java new file mode 100644 index 0000000000..3b4d904935 --- /dev/null +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/model/doaj/Institution.java @@ -0,0 +1,25 @@ + +package eu.dnetlib.dhp.oa.graph.hostedbymap.model.doaj; + +import java.io.Serializable; + +public class Institution implements Serializable { + private String country; + private String name; + + public String getCountry() { + return country; + } + + public void setCountry(String country) { + this.country = country; + } + + public String getName() { + return name; + } + + public void setName(String name) { + this.name = name; + } +} diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/model/doaj/License.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/model/doaj/License.java new file mode 100644 index 0000000000..64c7bc18fd --- /dev/null +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/model/doaj/License.java @@ -0,0 +1,67 @@ + +package eu.dnetlib.dhp.oa.graph.hostedbymap.model.doaj; + +import java.io.Serializable; + +import com.fasterxml.jackson.annotation.JsonProperty; + +public class License implements Serializable { + private Boolean nc; + private Boolean nd; + private Boolean by; + private String type; + private Boolean sa; + private String url; + + public Boolean getnC() { + return nc; + } + + @JsonProperty("NC") + public void setnC(Boolean NC) { + this.nc = NC; + } + + public String getType() { + return type; + } + + public void setType(String type) { + this.type = type; + } + + public Boolean getNd() { + return nd; + } + + @JsonProperty("ND") + public void setNd(Boolean nd) { + this.nd = nd; + } + + public Boolean getBy() { + return by; + } + + @JsonProperty("BY") + public void setBy(Boolean by) { + this.by = by; + } + + public Boolean getSa() { + return sa; + } + + @JsonProperty("SA") + public void setSa(Boolean sa) { + this.sa = sa; + } + + public String getUrl() { + return url; + } + + public void setUrl(String url) { + this.url = url; + } +} diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/model/doaj/Max.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/model/doaj/Max.java new file mode 100644 index 0000000000..0e292b631a --- /dev/null +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/model/doaj/Max.java @@ -0,0 +1,25 @@ + +package eu.dnetlib.dhp.oa.graph.hostedbymap.model.doaj; + +import java.io.Serializable; + +public class Max implements Serializable { + private Integer price; + private String currency; + + public Integer getPrice() { + return price; + } + + public void setPrice(Integer price) { + this.price = price; + } + + public String getCurrency() { + return currency; + } + + public void setCurrency(String currency) { + this.currency = currency; + } +} diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/model/doaj/OtherCharges.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/model/doaj/OtherCharges.java new file mode 100644 index 0000000000..1583481d28 --- /dev/null +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/model/doaj/OtherCharges.java @@ -0,0 +1,25 @@ + +package eu.dnetlib.dhp.oa.graph.hostedbymap.model.doaj; + +import java.io.Serializable; + +public class OtherCharges implements Serializable { + private Boolean has_other_charges; + private String url; + + public String getUrl() { + return url; + } + + public void setUrl(String url) { + this.url = url; + } + + public Boolean getHas_other_charges() { + return has_other_charges; + } + + public void setHas_other_charges(Boolean has_other_charges) { + this.has_other_charges = has_other_charges; + } +} diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/model/doaj/PidScheme.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/model/doaj/PidScheme.java new file mode 100644 index 0000000000..bd7e710ddb --- /dev/null +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/model/doaj/PidScheme.java @@ -0,0 +1,26 @@ + +package eu.dnetlib.dhp.oa.graph.hostedbymap.model.doaj; + +import java.io.Serializable; +import java.util.List; + +public class PidScheme implements Serializable { + private List scheme; + private Boolean has_pid_scheme; + + public List getScheme() { + return scheme; + } + + public void setScheme(List scheme) { + this.scheme = scheme; + } + + public Boolean getHas_pid_scheme() { + return has_pid_scheme; + } + + public void setHas_pid_scheme(Boolean has_pid_scheme) { + this.has_pid_scheme = has_pid_scheme; + } +} diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/model/doaj/Plagiarism.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/model/doaj/Plagiarism.java new file mode 100644 index 0000000000..f2230b3ea8 --- /dev/null +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/model/doaj/Plagiarism.java @@ -0,0 +1,27 @@ + +package eu.dnetlib.dhp.oa.graph.hostedbymap.model.doaj; + +import java.io.Serializable; + +import javax.sql.rowset.serial.SerialArray; + +public class Plagiarism implements Serializable { + private Boolean detection; + private String url; + + public Boolean getDetection() { + return detection; + } + + public void setDetection(Boolean detection) { + this.detection = detection; + } + + public String getUrl() { + return url; + } + + public void setUrl(String url) { + this.url = url; + } +} diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/model/doaj/Preservation.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/model/doaj/Preservation.java new file mode 100644 index 0000000000..f56ea9953d --- /dev/null +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/model/doaj/Preservation.java @@ -0,0 +1,44 @@ + +package eu.dnetlib.dhp.oa.graph.hostedbymap.model.doaj; + +import java.io.Serializable; +import java.util.List; + +public class Preservation implements Serializable { + private Boolean has_preservation; + private List service; + private List national_library; + private String url; + + public String getUrl() { + return url; + } + + public void setUrl(String url) { + this.url = url; + } + + public Boolean getHas_preservation() { + return has_preservation; + } + + public void setHas_preservation(Boolean has_preservation) { + this.has_preservation = has_preservation; + } + + public List getService() { + return service; + } + + public void setService(List service) { + this.service = service; + } + + public List getNational_library() { + return national_library; + } + + public void setNational_library(List national_library) { + this.national_library = national_library; + } +} diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/model/doaj/Publisher.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/model/doaj/Publisher.java new file mode 100644 index 0000000000..6d97a79694 --- /dev/null +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/model/doaj/Publisher.java @@ -0,0 +1,25 @@ + +package eu.dnetlib.dhp.oa.graph.hostedbymap.model.doaj; + +import java.io.Serializable; + +public class Publisher implements Serializable { + private String country; + private String name; + + public String getCountry() { + return country; + } + + public void setCountry(String country) { + this.country = country; + } + + public String getName() { + return name; + } + + public void setName(String name) { + this.name = name; + } +} diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/model/doaj/Ref.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/model/doaj/Ref.java new file mode 100644 index 0000000000..0f7c7dc953 --- /dev/null +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/model/doaj/Ref.java @@ -0,0 +1,52 @@ + +package eu.dnetlib.dhp.oa.graph.hostedbymap.model.doaj; + +import java.io.Serializable; + +public class Ref implements Serializable { + private String aims_scope; + private String journal; + private String oa_statement; + private String author_instructions; + private String license_terms; + + public String getAims_scope() { + return aims_scope; + } + + public void setAims_scope(String aims_scope) { + this.aims_scope = aims_scope; + } + + public String getJournal() { + return journal; + } + + public void setJournal(String journal) { + this.journal = journal; + } + + public String getOa_statement() { + return oa_statement; + } + + public void setOa_statement(String oa_statement) { + this.oa_statement = oa_statement; + } + + public String getAuthor_instructions() { + return author_instructions; + } + + public void setAuthor_instructions(String author_instructions) { + this.author_instructions = author_instructions; + } + + public String getLicense_terms() { + return license_terms; + } + + public void setLicense_terms(String license_terms) { + this.license_terms = license_terms; + } +} diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/model/doaj/Subject.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/model/doaj/Subject.java new file mode 100644 index 0000000000..811638e762 --- /dev/null +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/model/doaj/Subject.java @@ -0,0 +1,34 @@ + +package eu.dnetlib.dhp.oa.graph.hostedbymap.model.doaj; + +import java.io.Serializable; + +public class Subject implements Serializable { + private String code; + private String scheme; + private String term; + + public String getCode() { + return code; + } + + public void setCode(String code) { + this.code = code; + } + + public String getScheme() { + return scheme; + } + + public void setScheme(String scheme) { + this.scheme = scheme; + } + + public String getTerm() { + return term; + } + + public void setTerm(String term) { + this.term = term; + } +} diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/model/doaj/Waiver.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/model/doaj/Waiver.java new file mode 100644 index 0000000000..ca67dde0ca --- /dev/null +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/model/doaj/Waiver.java @@ -0,0 +1,25 @@ + +package eu.dnetlib.dhp.oa.graph.hostedbymap.model.doaj; + +import java.io.Serializable; + +public class Waiver implements Serializable { + private Boolean has_waiver; + private String url; + + public String getUrl() { + return url; + } + + public void setUrl(String url) { + this.url = url; + } + + public Boolean getHas_waiver() { + return has_waiver; + } + + public void setHas_waiver(Boolean has_waiver) { + this.has_waiver = has_waiver; + } +} diff --git a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/hostedbymap/oozie_app/workflow.xml b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/hostedbymap/oozie_app/workflow.xml index 84035fe4ef..1a3261ffbc 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/hostedbymap/oozie_app/workflow.xml +++ b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/hostedbymap/oozie_app/workflow.xml @@ -74,7 +74,9 @@ ${wf:conf('resumeFrom') eq 'ProduceHBM'} - ${wf:conf('resumeFrom') eq 'download_csv'} + ${wf:conf('resumeFrom') eq 'DownloadBoth'} + ${wf:conf('resumeFrom') eq 'DownloadGold'} + ${wf:conf('resumeFrom') eq 'DownloadDoaj'} @@ -83,18 +85,9 @@ Action failed, error message[${wf:errorMessage(wf:lastErrorNode())}] - - - - - - - - - - + - + @@ -103,21 +96,43 @@ --hdfsNameNode${nameNode} --fileURL${unibiFileURL} --tmpFile/tmp/unibi_gold_replaced.csv - --outputFile${workingDir}/unibi_gold.json + --outputFile/user/${wf:user()}/data/unibi_gold.json --classForNameeu.dnetlib.dhp.oa.graph.hostedbymap.model.UnibiGoldModel - + + + ${jobTracker} + ${nameNode} + + + mapred.job.queue.name + ${queueName} + + + download.sh + ${doajJsonFileURL} + ${dumpPath} + ${dumpFileName} + HADOOP_USER_NAME=${wf:user()} + download.sh + + + + + + + + - eu.dnetlib.dhp.oa.graph.hostedbymap.DownloadCSV2 + eu.dnetlib.dhp.oa.graph.hostedbymap.ExtractAndMapDoajJson --hdfsNameNode${nameNode} - --fileURL${doajFileURL} - --tmpFile/tmp/doaj_replaced.csv - --outputFile${workingDir}/doaj.json - --classForNameeu.dnetlib.dhp.oa.graph.hostedbymap.model.DOAJModel + --compressedFile${dumpPath}/${dumpFileName} + --workingPath${workingDir}/DOAJ/ + --outputPath/user/${wf:user()}/data/doaj.json @@ -125,6 +140,54 @@ + + + eu.dnetlib.dhp.oa.graph.hostedbymap.DownloadCSV + --hdfsNameNode${nameNode} + --fileURL${unibiFileURL} + --tmpFile/tmp/unibi_gold_replaced.csv + --outputFile/user/${wf:user()}/data/unibi_gold.json + --classForNameeu.dnetlib.dhp.oa.graph.hostedbymap.model.UnibiGoldModel + + + + + + + + ${jobTracker} + ${nameNode} + + + mapred.job.queue.name + ${queueName} + + + download.sh + ${doajJsonFileURL} + ${dumpPath} + ${dumpFileName} + HADOOP_USER_NAME=${wf:user()} + download.sh + + + + + + + + + + eu.dnetlib.dhp.oa.graph.hostedbymap.ExtractAndMapDoajJson + --hdfsNameNode${nameNode} + --compressedFile${dumpPath}/${dumpFileName} + --workingPath${workingDir}/DOAJ/ + --outputPath/user/${wf:user()}/data/doaj.json + + + + + yarn-cluster diff --git a/dhp-workflows/dhp-graph-mapper/src/main/scala/eu/dnetlib/dhp/oa/graph/hostedbymap/SparkProduceHostedByMap.scala b/dhp-workflows/dhp-graph-mapper/src/main/scala/eu/dnetlib/dhp/oa/graph/hostedbymap/SparkProduceHostedByMap.scala index 8d8965866f..0188dac282 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/scala/eu/dnetlib/dhp/oa/graph/hostedbymap/SparkProduceHostedByMap.scala +++ b/dhp-workflows/dhp-graph-mapper/src/main/scala/eu/dnetlib/dhp/oa/graph/hostedbymap/SparkProduceHostedByMap.scala @@ -2,9 +2,10 @@ package eu.dnetlib.dhp.oa.graph.hostedbymap import com.fasterxml.jackson.databind.ObjectMapper import eu.dnetlib.dhp.application.ArgumentApplicationParser +import eu.dnetlib.dhp.common.HdfsSupport import eu.dnetlib.dhp.oa.graph.hostedbymap.model.{DOAJModel, UnibiGoldModel} import eu.dnetlib.dhp.schema.oaf.Datasource -import org.apache.commons.io.IOUtils +import org.apache.commons.io.{FileUtils, IOUtils} import org.apache.hadoop.conf.Configuration import org.apache.hadoop.fs.{FileSystem, Path} import org.apache.hadoop.io.compress.GzipCodec @@ -13,7 +14,8 @@ import org.apache.spark.sql.{Dataset, Encoder, Encoders, SparkSession} import org.json4s.DefaultFormats import org.slf4j.{Logger, LoggerFactory} -import java.io.PrintWriter +import java.io.{File, PrintWriter} +import scala.collection.JavaConverters._ object SparkProduceHostedByMap { @@ -256,6 +258,8 @@ object SparkProduceHostedByMap { logger.info("Getting the Datasources") + HdfsSupport.remove(outputPath, spark.sparkContext.hadoopConfiguration) + Aggregators .explodeHostedByItemType( oaHostedByDataset(spark, datasourcePath) diff --git a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/hostedbymap/DownloadCsvTest.java b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/hostedbymap/DownloadCsvTest.java index edf74fc6a6..48f1e0c064 100644 --- a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/hostedbymap/DownloadCsvTest.java +++ b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/hostedbymap/DownloadCsvTest.java @@ -55,7 +55,6 @@ public class DownloadCsvTest { new DownloadCSV() .doDownload( fileURL, - workingDir + "/unibi_gold", outputFile, UnibiGoldModel.class.getName(), ',', @@ -91,56 +90,6 @@ public class DownloadCsvTest { assertEquals(67028, count); } - @Disabled - @Test - void getDoajFileTest() throws CollectorException, IOException, ClassNotFoundException { - - String fileURL = "https://doaj.org/csv"; - - final String outputFile = workingDir + "/doaj.json"; - new DownloadCSV() - .doDownload( - fileURL, - workingDir + "/doaj", - outputFile, - DOAJModel.class.getName(), - ',', - fs); - - BufferedReader in = new BufferedReader(new InputStreamReader(fs.open(new Path(outputFile)))); - - String line; - int count = 0; - while ((line = in.readLine()) != null) { - DOAJModel doaj = new ObjectMapper().readValue(line, DOAJModel.class); - if (count == 0) { - assertEquals("0001-3765", doaj.getIssn()); - assertEquals("1678-2690", doaj.getEissn()); - assertEquals("Anais da Academia Brasileira de Ciências", doaj.getJournalTitle()); - } - if (count == 22) { - log.info(new ObjectMapper().writeValueAsString(doaj)); - System.out.println(new ObjectMapper().writeValueAsString(doaj)); - } - if (count == 7904) { - // log.info(new ObjectMapper().writeValueAsString(doaj)); - assertEquals("", doaj.getIssn()); - assertEquals("2055-7159", doaj.getEissn()); - assertEquals("BJR|case reports", doaj.getJournalTitle()); - } - if (count == 16707) { - - assertEquals("2783-1043", doaj.getIssn()); - assertEquals("2783-1051", doaj.getEissn()); - assertEquals("فیزیک کاربردی ایران", doaj.getJournalTitle()); - } - - count += 1; - } - - assertEquals(16715, count); - } - @AfterAll public static void cleanup() { FileUtils.deleteQuietly(new File(workingDir)); diff --git a/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/hostedbymap/doaj_transformed.json b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/hostedbymap/doaj_transformed.json index 9cec80eb48..09730f1da7 100644 --- a/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/hostedbymap/doaj_transformed.json +++ b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/hostedbymap/doaj_transformed.json @@ -1,25 +1,25 @@ -{"journalTitle":"Lëd i Sneg","issn":"2076-6734","eissn":"2412-3765","reviewProcess":"Double blind peer review"} -{"journalTitle":"Компьютерные исследования и моделирование","issn":"2076-7633","eissn":"2077-6853","reviewProcess":"Blind peer review"} -{"journalTitle":" Историко-биологические исследования","issn":"2076-8176","eissn":"2500-1221","reviewProcess":"Double blind peer review"} -{"journalTitle":"Інформаційні технології і засоби навчання","issn":"2076-8184","eissn":"","reviewProcess":"Double blind peer review"} -{"journalTitle":"Revue Internationale de Pédagogie de l’Enseignement Supérieur","issn":"","eissn":"2076-8427","reviewProcess":"Double blind peer review"} -{"journalTitle":"Проблемы развития территории","issn":"2076-8915","eissn":"2409-9007","reviewProcess":"Double blind peer review"} -{"journalTitle":"Rambam Maimonides Medical Journal","issn":"","eissn":"2076-9172","reviewProcess":"Peer review"} -{"journalTitle":"Membranes","issn":"2077-0375","eissn":"","reviewProcess":"Blind peer review"} -{"journalTitle":"Journal of Clinical Medicine","issn":"","eissn":"2077-0383","reviewProcess":"Blind peer review"} -{"journalTitle":"Agriculture","issn":"","eissn":"2077-0472","reviewProcess":"Blind peer review"} -{"journalTitle":"Standartnye Obrazcy","issn":"2077-1177","eissn":"","reviewProcess":"Double blind peer review"} -{"journalTitle":"Металл и литье Украины","issn":"2077-1304","eissn":"2706-5529","reviewProcess":"Double blind peer review"} -{"journalTitle":"Journal of Marine Science and Engineering","issn":"","eissn":"2077-1312","reviewProcess":"Blind peer review"} -{"journalTitle":"Religions","issn":"","eissn":"2077-1444","reviewProcess":"Double blind peer review"} -{"journalTitle":"GW-Unterricht","issn":"2077-1517","eissn":"2414-4169","reviewProcess":"Double blind peer review"} -{"journalTitle":"UCV-Scientia","issn":"2077-172X","eissn":"","reviewProcess":"Peer review"} -{"journalTitle":"Sovremennye Issledovaniâ Socialʹnyh Problem","issn":"2077-1770","eissn":"2218-7405","reviewProcess":"Double blind peer review"} -{"journalTitle":"Granì","issn":"2077-1800","eissn":"2413-8738","reviewProcess":"Double blind peer review"} -{"journalTitle":"Journal of Economics Finance and Administrative Science","issn":"2077-1886","eissn":"2218-0648","reviewProcess":"Double blind peer review"} -{"journalTitle":"Science Education International","issn":"","eissn":"2077-2327","reviewProcess":"Double blind peer review"} -{"journalTitle":"Edumecentro","issn":"","eissn":"2077-2874","reviewProcess":"Double blind peer review"} -{"journalTitle":"Monteverdia","issn":"","eissn":"2077-2890","reviewProcess":"Double blind peer review"} -{"journalTitle":"Transformación","issn":"","eissn":"2077-2955","reviewProcess":"Double blind peer review"} -{"journalTitle":"Journal of Space Technology","issn":"2077-3099","eissn":"2411-5029","reviewProcess":"Double blind peer review"} -{"journalTitle":"Revue de Primatologie","issn":"","eissn":"2077-3757","reviewProcess":"Peer review"} \ No newline at end of file +{"journalTitle":"Lëd i Sneg","issn":"2076-6734","eissn":"2412-3765","reviewProcess":["Double blind peer review"],"oaStart":2015} +{"journalTitle":"Компьютерные исследования и моделирование","issn":"2076-7633","eissn":"2077-6853","reviewProcess":["Blind peer review"],"oaStart":2009} +{"journalTitle":" Историко-биологические исследования","issn":"2076-8176","eissn":"2500-1221","reviewProcess":["Double blind peer review"],"oaStart":2010} +{"journalTitle":"Інформаційні технології і засоби навчання","issn":"2076-8184","eissn":null,"reviewProcess":["Double blind peer review"],"oaStart":2006} +{"journalTitle":"Revue Internationale de Pédagogie de l’Enseignement Supérieur","issn":null,"eissn":"2076-8427","reviewProcess":["Double blind peer review"],"oaStart":2009} +{"journalTitle":"Проблемы развития территории","issn":"2076-8915","eissn":"2409-9007","reviewProcess":["Double blind peer review"],"oaStart":2008} +{"journalTitle":"Rambam Maimonides Medical Journal","issn":null,"eissn":"2076-9172","reviewProcess":["Peer review"],"oaStart":2010} +{"journalTitle":"Membranes","issn":"2077-0375","eissn":null,"reviewProcess":["Blind peer review"],"oaStart":2011} +{"journalTitle":"Journal of Clinical Medicine","issn":null,"eissn":"2077-0383","reviewProcess":["Blind peer review"],"oaStart":2012} +{"journalTitle":"Agriculture","issn":null,"eissn":"2077-0472","reviewProcess":["Blind peer review"],"oaStart":2011} +{"journalTitle":"Standartnye Obrazcy","issn":"2077-1177","eissn":null,"reviewProcess":["Double blind peer review"],"oaStart":2014} +{"journalTitle":"Металл и литье Украины","issn":"2077-1304","eissn":"2706-5529","reviewProcess":["Double blind peer review"],"oaStart":2019} +{"journalTitle":"Journal of Marine Science and Engineering","issn":null,"eissn":"2077-1312","reviewProcess":["Blind peer review"],"oaStart":2013} +{"journalTitle":"Religions","issn":null,"eissn":"2077-1444","reviewProcess":["Double blind peer review"],"oaStart":2010} +{"journalTitle":"GW-Unterricht","issn":"2077-1517","eissn":"2414-4169","reviewProcess":["Double blind peer review"],"oaStart":2010} +{"journalTitle":"UCV-Scientia","issn":"2077-172X","eissn":null,"reviewProcess":["Peer review"],"oaStart":2009} +{"journalTitle":"Sovremennye Issledovaniâ Socialʹnyh Problem","issn":"2077-1770","eissn":"2218-7405","reviewProcess":["Double blind peer review"],"oaStart":2010} +{"journalTitle":"Granì","issn":"2077-1800","eissn":"2413-8738","reviewProcess":["Double blind peer review"],"oaStart":2014} +{"journalTitle":"Journal of Economics Finance and Administrative Science","issn":"2077-1886","eissn":"2218-0648","reviewProcess":["Double blind peer review"],"oaStart":2017} +{"journalTitle":"Science Education International","issn":null,"eissn":"2077-2327","reviewProcess":["Double blind peer review"],"oaStart":2017} +{"journalTitle":"Edumecentro","issn":null,"eissn":"2077-2874","reviewProcess":["Double blind peer review"],"oaStart":2013} +{"journalTitle":"Monteverdia","issn":null,"eissn":"2077-2890","reviewProcess":["Double blind peer review"],"oaStart":2008} +{"journalTitle":"Transformación","issn":null,"eissn":"2077-2955","reviewProcess":["Double blind peer review"],"oaStart":2010} +{"journalTitle":"Journal of Space Technology","issn":"2077-3099","eissn":"2411-5029","reviewProcess":["Double blind peer review"],"oaStart":2011} +{"journalTitle":"Revue de Primatologie","issn":null,"eissn":"2077-3757","reviewProcess":["Peer review"],"oaStart":2009} \ No newline at end of file