2022-06-07 13:38:14 +02:00
|
|
|
package eu.dnetlib.bioschemas.api.publisher;
|
2022-06-06 09:37:29 +02:00
|
|
|
|
2022-06-07 13:38:14 +02:00
|
|
|
import eu.dnetlib.bioschemas.api.ServiceScrapeDriver;
|
2022-06-07 15:12:46 +02:00
|
|
|
import eu.dnetlib.bioschemas.api.scraper.ScrapingExecution;
|
|
|
|
import eu.dnetlib.bioschemas.api.scraper.ScrapingExecutor;
|
2022-06-06 09:37:29 +02:00
|
|
|
import eu.dnetlib.common.controller.AbstractDnetController;
|
|
|
|
import org.apache.commons.io.FileUtils;
|
|
|
|
import org.apache.commons.io.LineIterator;
|
2022-06-07 15:12:46 +02:00
|
|
|
import org.apache.commons.logging.Log;
|
|
|
|
import org.apache.commons.logging.LogFactory;
|
2022-06-07 13:02:36 +02:00
|
|
|
import org.slf4j.Logger;
|
|
|
|
import org.slf4j.LoggerFactory;
|
2022-06-07 15:12:46 +02:00
|
|
|
import org.springframework.beans.factory.annotation.Autowired;
|
2022-06-07 13:02:36 +02:00
|
|
|
import org.springframework.beans.factory.annotation.Value;
|
2022-06-07 15:12:46 +02:00
|
|
|
import org.springframework.web.bind.annotation.*;
|
2022-06-06 09:37:29 +02:00
|
|
|
|
2022-06-07 15:12:46 +02:00
|
|
|
import javax.servlet.http.HttpServletRequest;
|
2022-06-06 09:37:29 +02:00
|
|
|
import javax.servlet.http.HttpServletResponse;
|
|
|
|
import java.io.File;
|
|
|
|
import java.io.IOException;
|
|
|
|
import java.nio.charset.StandardCharsets;
|
|
|
|
|
|
|
|
/**
|
|
|
|
* @author enrico.ottonello
|
|
|
|
*
|
|
|
|
*/
|
|
|
|
|
|
|
|
@RestController
|
|
|
|
@RequestMapping("/api")
|
2022-06-07 13:38:14 +02:00
|
|
|
public class BioschemasAPIController extends AbstractDnetController {
|
2022-06-06 09:37:29 +02:00
|
|
|
|
2022-06-07 13:02:36 +02:00
|
|
|
@Value("${outputFolder}")
|
|
|
|
private String outputFolder;
|
|
|
|
@Value("${outputDataPattern}")
|
|
|
|
private String outputDataPattern;
|
2022-06-06 09:37:29 +02:00
|
|
|
|
2022-06-07 13:38:14 +02:00
|
|
|
private static Logger logger = LoggerFactory.getLogger(BioschemasAPIController.class);
|
2022-06-07 13:02:36 +02:00
|
|
|
|
2022-06-07 15:12:46 +02:00
|
|
|
@Autowired
|
|
|
|
private ScrapingExecutor scrapingExecutor;
|
2022-06-06 09:37:29 +02:00
|
|
|
|
2022-06-07 15:12:46 +02:00
|
|
|
private static final Log log = LogFactory.getLog(BioschemasAPIController.class);
|
|
|
|
|
|
|
|
@GetMapping("/startScraping")
|
|
|
|
public ScrapingExecution startScraping(@RequestParam final String datasourceKey, @RequestParam final String sitemapUrl, final HttpServletRequest req) {
|
2022-06-07 13:02:36 +02:00
|
|
|
logger.info("<STARTSCRAPING> datasourceKey: "+datasourceKey+" sitemapUrl:"+sitemapUrl);
|
2022-06-07 15:12:46 +02:00
|
|
|
return scrapingExecutor.startScraping(datasourceKey, sitemapUrl, getOutputDataPattern(), req.getRemoteAddr());
|
|
|
|
}
|
2022-06-06 09:37:29 +02:00
|
|
|
|
2022-06-07 15:12:46 +02:00
|
|
|
@GetMapping("/startScraping/status")
|
|
|
|
public final ScrapingExecution statusScraping() {
|
|
|
|
return scrapingExecutor.getLastScrapingExecution();
|
2022-06-06 09:37:29 +02:00
|
|
|
}
|
|
|
|
|
2022-06-07 13:02:36 +02:00
|
|
|
@RequestMapping(value = "/getNQuads", method = RequestMethod.GET)
|
2022-06-07 13:38:14 +02:00
|
|
|
public String getNQuads(@RequestParam final String datasourceKey, HttpServletResponse response) throws BioschemasException, IOException {
|
2022-06-07 13:02:36 +02:00
|
|
|
|
|
|
|
logger.info("<GETNQUADS> datasourceKey: "+datasourceKey);
|
|
|
|
|
|
|
|
LineIterator it = FileUtils.lineIterator(new File(getOutputFolder().concat("/").concat(datasourceKey).concat(getOutputDataPattern())), "UTF-8");
|
2022-06-06 09:37:29 +02:00
|
|
|
try {
|
|
|
|
while (it.hasNext()) {
|
|
|
|
String line = it.nextLine();
|
|
|
|
response.getOutputStream().write(line.getBytes(StandardCharsets.UTF_8));
|
|
|
|
response.getOutputStream().println();
|
|
|
|
}
|
|
|
|
} finally {
|
|
|
|
}
|
|
|
|
return "";
|
|
|
|
}
|
2022-06-07 13:02:36 +02:00
|
|
|
|
|
|
|
public String getOutputFolder() {
|
|
|
|
return outputFolder;
|
|
|
|
}
|
|
|
|
|
|
|
|
public String getOutputDataPattern() {
|
|
|
|
return outputDataPattern;
|
|
|
|
}
|
2022-06-06 09:37:29 +02:00
|
|
|
}
|