refactoring; webapp context set to bioschemas
This commit is contained in:
parent
079b2506e6
commit
8a18fe11ec
|
@ -1,7 +1,7 @@
|
||||||
{
|
{
|
||||||
"type_source": "SVN",
|
"type_source": "SVN",
|
||||||
"goal": "package -U source:jar",
|
"goal": "package -U source:jar",
|
||||||
"url": "http://svn-public.driver.research-infrastructures.eu/driver/dnet50/modules/dnet-bmuse-webapp/trunk/",
|
"url": "http://svn-public.driver.research-infrastructures.eu/driver/dnet50/modules/dnet-bioschemas-api/trunk/",
|
||||||
"deploy_repository": "dnet5-snapshots",
|
"deploy_repository": "dnet5-snapshots",
|
||||||
"version": "5",
|
"version": "5",
|
||||||
"mail": "sandro.labruzzo@isti.cnr.it,michele.artini@isti.cnr.it, claudio.atzori@isti.cnr.it, alessia.bardi@isti.cnr.it, enrico.ottonello@isti.cnr.it",
|
"mail": "sandro.labruzzo@isti.cnr.it,michele.artini@isti.cnr.it, claudio.atzori@isti.cnr.it, alessia.bardi@isti.cnr.it, enrico.ottonello@isti.cnr.it",
|
||||||
|
|
|
@ -1,4 +1,4 @@
|
||||||
package eu.dnetlib.bmuse_webapp;
|
package eu.dnetlib.bioschemas.api;
|
||||||
|
|
||||||
import org.springframework.context.annotation.Configuration;
|
import org.springframework.context.annotation.Configuration;
|
||||||
import org.springframework.context.annotation.Profile;
|
import org.springframework.context.annotation.Profile;
|
|
@ -1,4 +1,4 @@
|
||||||
package eu.dnetlib.bmuse_webapp;
|
package eu.dnetlib.bioschemas.api;
|
||||||
|
|
||||||
import eu.dnetlib.common.app.AbstractDnetApp;
|
import eu.dnetlib.common.app.AbstractDnetApp;
|
||||||
import org.springframework.boot.SpringApplication;
|
import org.springframework.boot.SpringApplication;
|
|
@ -1,10 +1,10 @@
|
||||||
package eu.dnetlib.bmuse_webapp;
|
package eu.dnetlib.bioschemas.api;
|
||||||
|
|
||||||
import eu.dnetlib.bmuse_webapp.crawl.CrawlRecord;
|
import eu.dnetlib.bioschemas.api.crawl.CrawlRecord;
|
||||||
import eu.dnetlib.bmuse_webapp.scraper.BMUSEScraper;
|
import eu.dnetlib.bioschemas.api.scraper.BMUSEScraper;
|
||||||
import eu.dnetlib.bmuse_webapp.scraper.ScrapeState;
|
import eu.dnetlib.bioschemas.api.scraper.ScrapeState;
|
||||||
import eu.dnetlib.bmuse_webapp.scraper.ScrapeThread;
|
import eu.dnetlib.bioschemas.api.scraper.ScrapeThread;
|
||||||
import eu.dnetlib.bmuse_webapp.utils.UrlParser;
|
import eu.dnetlib.bioschemas.api.utils.UrlParser;
|
||||||
import org.apache.commons.logging.Log;
|
import org.apache.commons.logging.Log;
|
||||||
import org.apache.commons.logging.LogFactory;
|
import org.apache.commons.logging.LogFactory;
|
||||||
import org.jsoup.nodes.Element;
|
import org.jsoup.nodes.Element;
|
|
@ -1,4 +1,4 @@
|
||||||
package eu.dnetlib.bmuse_webapp.crawl;
|
package eu.dnetlib.bioschemas.api.crawl;
|
||||||
|
|
||||||
import java.util.Date;
|
import java.util.Date;
|
||||||
|
|
|
@ -1,8 +1,8 @@
|
||||||
package eu.dnetlib.bmuse_webapp.crawl;
|
package eu.dnetlib.bioschemas.api.crawl;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
*
|
*
|
||||||
* {@link eu.dnetlib.bmuse_webapp.crawl.StatusOfScrape} describes the possible status levels the scrape for each URL/CrawlRecord.
|
* {@link StatusOfScrape} describes the possible status levels the scrape for each URL/CrawlRecord.
|
||||||
*
|
*
|
||||||
* Each URL/CrawlRecord can have one of the following:
|
* Each URL/CrawlRecord can have one of the following:
|
||||||
* DOES_NOT_EXIST = 404.
|
* DOES_NOT_EXIST = 404.
|
|
@ -1,6 +1,6 @@
|
||||||
package eu.dnetlib.bmuse_webapp.publisher;
|
package eu.dnetlib.bioschemas.api.publisher;
|
||||||
|
|
||||||
import eu.dnetlib.bmuse_webapp.ServiceScrapeDriver;
|
import eu.dnetlib.bioschemas.api.ServiceScrapeDriver;
|
||||||
import eu.dnetlib.common.controller.AbstractDnetController;
|
import eu.dnetlib.common.controller.AbstractDnetController;
|
||||||
import org.apache.commons.io.FileUtils;
|
import org.apache.commons.io.FileUtils;
|
||||||
import org.apache.commons.io.LineIterator;
|
import org.apache.commons.io.LineIterator;
|
||||||
|
@ -24,17 +24,17 @@ import java.nio.charset.StandardCharsets;
|
||||||
|
|
||||||
@RestController
|
@RestController
|
||||||
@RequestMapping("/api")
|
@RequestMapping("/api")
|
||||||
public class BMUSEWebappController extends AbstractDnetController {
|
public class BioschemasAPIController extends AbstractDnetController {
|
||||||
|
|
||||||
@Value("${outputFolder}")
|
@Value("${outputFolder}")
|
||||||
private String outputFolder;
|
private String outputFolder;
|
||||||
@Value("${outputDataPattern}")
|
@Value("${outputDataPattern}")
|
||||||
private String outputDataPattern;
|
private String outputDataPattern;
|
||||||
|
|
||||||
private static Logger logger = LoggerFactory.getLogger(BMUSEWebappController.class);
|
private static Logger logger = LoggerFactory.getLogger(BioschemasAPIController.class);
|
||||||
|
|
||||||
@RequestMapping(value = "/startScraping", method = RequestMethod.GET)
|
@RequestMapping(value = "/startScraping", method = RequestMethod.GET)
|
||||||
public String startScraping(@RequestParam final String datasourceKey, @RequestParam final String sitemapUrl) throws BMUSEWebappException, IOException {
|
public String startScraping(@RequestParam final String datasourceKey, @RequestParam final String sitemapUrl) throws BioschemasException, IOException {
|
||||||
|
|
||||||
logger.info("<STARTSCRAPING> datasourceKey: "+datasourceKey+" sitemapUrl:"+sitemapUrl);
|
logger.info("<STARTSCRAPING> datasourceKey: "+datasourceKey+" sitemapUrl:"+sitemapUrl);
|
||||||
|
|
||||||
|
@ -46,7 +46,7 @@ public class BMUSEWebappController extends AbstractDnetController {
|
||||||
}
|
}
|
||||||
|
|
||||||
@RequestMapping(value = "/getNQuads", method = RequestMethod.GET)
|
@RequestMapping(value = "/getNQuads", method = RequestMethod.GET)
|
||||||
public String getNQuads(@RequestParam final String datasourceKey, HttpServletResponse response) throws BMUSEWebappException, IOException {
|
public String getNQuads(@RequestParam final String datasourceKey, HttpServletResponse response) throws BioschemasException, IOException {
|
||||||
|
|
||||||
logger.info("<GETNQUADS> datasourceKey: "+datasourceKey);
|
logger.info("<GETNQUADS> datasourceKey: "+datasourceKey);
|
||||||
|
|
|
@ -0,0 +1,28 @@
|
||||||
|
package eu.dnetlib.bioschemas.api.publisher;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @author enrico.ottonello
|
||||||
|
*
|
||||||
|
*/
|
||||||
|
|
||||||
|
public class BioschemasException extends Exception{
|
||||||
|
|
||||||
|
public BioschemasException() {
|
||||||
|
}
|
||||||
|
|
||||||
|
public BioschemasException(final String message) {
|
||||||
|
super(message);
|
||||||
|
}
|
||||||
|
|
||||||
|
public BioschemasException(final String message, final Throwable cause) {
|
||||||
|
super(message, cause);
|
||||||
|
}
|
||||||
|
|
||||||
|
public BioschemasException(final Throwable cause) {
|
||||||
|
super(cause);
|
||||||
|
}
|
||||||
|
|
||||||
|
public BioschemasException(final String message, final Throwable cause, final boolean enableSuppression, final boolean writableStackTrace) {
|
||||||
|
super(message, cause, enableSuppression, writableStackTrace);
|
||||||
|
}
|
||||||
|
}
|
|
@ -1,4 +1,4 @@
|
||||||
package eu.dnetlib.bmuse_webapp.publisher;
|
package eu.dnetlib.bioschemas.api.publisher;
|
||||||
|
|
||||||
import eu.dnetlib.common.controller.AbstractDnetController;
|
import eu.dnetlib.common.controller.AbstractDnetController;
|
||||||
import org.springframework.stereotype.Controller;
|
import org.springframework.stereotype.Controller;
|
|
@ -1,5 +1,5 @@
|
||||||
|
|
||||||
package eu.dnetlib.bmuse_webapp.scraper;
|
package eu.dnetlib.bioschemas.api.scraper;
|
||||||
|
|
||||||
import hwu.elixir.scrape.exceptions.MissingMarkupException;
|
import hwu.elixir.scrape.exceptions.MissingMarkupException;
|
||||||
import hwu.elixir.scrape.scraper.ScraperFilteredCore;
|
import hwu.elixir.scrape.scraper.ScraperFilteredCore;
|
|
@ -1,7 +1,7 @@
|
||||||
package eu.dnetlib.bmuse_webapp.scraper;
|
package eu.dnetlib.bioschemas.api.scraper;
|
||||||
|
|
||||||
import eu.dnetlib.bmuse_webapp.crawl.CrawlRecord;
|
import eu.dnetlib.bioschemas.api.crawl.StatusOfScrape;
|
||||||
import eu.dnetlib.bmuse_webapp.crawl.StatusOfScrape;
|
import eu.dnetlib.bioschemas.api.crawl.CrawlRecord;
|
||||||
|
|
||||||
import java.util.ArrayList;
|
import java.util.ArrayList;
|
||||||
import java.util.Collections;
|
import java.util.Collections;
|
|
@ -1,7 +1,7 @@
|
||||||
package eu.dnetlib.bmuse_webapp.scraper;
|
package eu.dnetlib.bioschemas.api.scraper;
|
||||||
|
|
||||||
import eu.dnetlib.bmuse_webapp.crawl.CrawlRecord;
|
import eu.dnetlib.bioschemas.api.crawl.CrawlRecord;
|
||||||
import eu.dnetlib.bmuse_webapp.utils.CompressorUtil;
|
import eu.dnetlib.bioschemas.api.utils.CompressorUtil;
|
||||||
import hwu.elixir.scrape.exceptions.CannotWriteException;
|
import hwu.elixir.scrape.exceptions.CannotWriteException;
|
||||||
import hwu.elixir.scrape.exceptions.FourZeroFourException;
|
import hwu.elixir.scrape.exceptions.FourZeroFourException;
|
||||||
import hwu.elixir.scrape.exceptions.JsonLDInspectionException;
|
import hwu.elixir.scrape.exceptions.JsonLDInspectionException;
|
||||||
|
@ -63,7 +63,7 @@ public class ScrapeThread extends Thread {
|
||||||
|
|
||||||
try {
|
try {
|
||||||
String nquads = process.getNQUADSFromUrl(record.getUrl(), true);
|
String nquads = process.getNQUADSFromUrl(record.getUrl(), true);
|
||||||
logger.info("downloaded "+record.getUrl() + " leftToScrape:" + scrapeState.pagesLeftToScrape());
|
logger.info("downloaded "+record.getUrl() + " leftToScrape:" + scrapeState.getNumberPagesLeftToScrape());
|
||||||
record.setNquads(CompressorUtil.compressValue(nquads));
|
record.setNquads(CompressorUtil.compressValue(nquads));
|
||||||
if (!nquads.isEmpty()) {
|
if (!nquads.isEmpty()) {
|
||||||
scrapeState.addSuccessfulScrapedURL(record);
|
scrapeState.addSuccessfulScrapedURL(record);
|
|
@ -1,6 +1,6 @@
|
||||||
package eu.dnetlib.bmuse_webapp.scraper;
|
package eu.dnetlib.bioschemas.api.scraper;
|
||||||
|
|
||||||
import eu.dnetlib.bmuse_webapp.crawl.StatusOfScrape;
|
import eu.dnetlib.bioschemas.api.crawl.StatusOfScrape;
|
||||||
import hwu.elixir.scrape.exceptions.*;
|
import hwu.elixir.scrape.exceptions.*;
|
||||||
import hwu.elixir.scrape.scraper.ScraperFilteredCore;
|
import hwu.elixir.scrape.scraper.ScraperFilteredCore;
|
||||||
import org.slf4j.Logger;
|
import org.slf4j.Logger;
|
|
@ -1,4 +1,4 @@
|
||||||
package eu.dnetlib.bmuse_webapp.utils;
|
package eu.dnetlib.bioschemas.api.utils;
|
||||||
|
|
||||||
import org.apache.commons.codec.binary.Base64;
|
import org.apache.commons.codec.binary.Base64;
|
||||||
import org.apache.commons.io.IOUtils;
|
import org.apache.commons.io.IOUtils;
|
|
@ -1,5 +1,5 @@
|
||||||
|
|
||||||
package eu.dnetlib.bmuse_webapp.utils;
|
package eu.dnetlib.bioschemas.api.utils;
|
||||||
|
|
||||||
import hwu.elixir.utils.Helpers;
|
import hwu.elixir.utils.Helpers;
|
||||||
import org.jsoup.Jsoup;
|
import org.jsoup.Jsoup;
|
|
@ -1,28 +0,0 @@
|
||||||
package eu.dnetlib.bmuse_webapp.publisher;
|
|
||||||
|
|
||||||
/**
|
|
||||||
* @author enrico.ottonello
|
|
||||||
*
|
|
||||||
*/
|
|
||||||
|
|
||||||
public class BMUSEWebappException extends Exception{
|
|
||||||
|
|
||||||
public BMUSEWebappException() {
|
|
||||||
}
|
|
||||||
|
|
||||||
public BMUSEWebappException(final String message) {
|
|
||||||
super(message);
|
|
||||||
}
|
|
||||||
|
|
||||||
public BMUSEWebappException(final String message, final Throwable cause) {
|
|
||||||
super(message, cause);
|
|
||||||
}
|
|
||||||
|
|
||||||
public BMUSEWebappException(final Throwable cause) {
|
|
||||||
super(cause);
|
|
||||||
}
|
|
||||||
|
|
||||||
public BMUSEWebappException(final String message, final Throwable cause, final boolean enableSuppression, final boolean writableStackTrace) {
|
|
||||||
super(message, cause, enableSuppression, writableStackTrace);
|
|
||||||
}
|
|
||||||
}
|
|
|
@ -1,4 +1,4 @@
|
||||||
server.servlet.context-path=/dnet-bmuse-webapp
|
server.servlet.context-path=/bioschemas
|
||||||
server.port=8281
|
server.port=8281
|
||||||
|
|
||||||
spring.profiles.active=garr
|
spring.profiles.active=garr
|
||||||
|
|
Loading…
Reference in New Issue