98 lines
3.8 KiB
Java
98 lines
3.8 KiB
Java
package eu.dnetlib.bioschemas.api.controller;
|
|
|
|
import eu.dnetlib.bioschemas.api.MainApplication;
|
|
import eu.dnetlib.bioschemas.api.scraper.ScrapingExecution;
|
|
import eu.dnetlib.bioschemas.api.scraper.ScrapingExecutor;
|
|
import eu.dnetlib.bioschemas.api.utils.BioschemasException;
|
|
import eu.dnetlib.common.controller.AbstractDnetController;
|
|
import io.swagger.v3.oas.annotations.Operation;
|
|
import io.swagger.v3.oas.annotations.Parameter;
|
|
import io.swagger.v3.oas.annotations.enums.ParameterIn;
|
|
import io.swagger.v3.oas.annotations.tags.Tag;
|
|
import org.apache.commons.io.FileUtils;
|
|
import org.apache.commons.io.LineIterator;
|
|
import org.apache.commons.logging.Log;
|
|
import org.apache.commons.logging.LogFactory;
|
|
import org.slf4j.Logger;
|
|
import org.slf4j.LoggerFactory;
|
|
import org.springframework.beans.factory.annotation.Autowired;
|
|
import org.springframework.beans.factory.annotation.Value;
|
|
import org.springframework.web.bind.annotation.*;
|
|
|
|
import javax.servlet.http.HttpServletRequest;
|
|
import javax.servlet.http.HttpServletResponse;
|
|
import java.io.File;
|
|
import java.io.IOException;
|
|
import java.nio.charset.StandardCharsets;
|
|
|
|
/**
|
|
* @author enrico.ottonello
|
|
*
|
|
*/
|
|
|
|
@RestController
|
|
@RequestMapping("/api")
|
|
@Tag(name = MainApplication.BIOSCHEMAS_APIS)
|
|
public class BioschemasAPIController extends AbstractDnetController {
|
|
|
|
@Value("${outputFolder}")
|
|
private String outputFolder;
|
|
@Value("${outputDataPattern}")
|
|
private String outputDataPattern;
|
|
|
|
private static Logger logger = LoggerFactory.getLogger(BioschemasAPIController.class);
|
|
|
|
@Autowired
|
|
private ScrapingExecutor scrapingExecutor;
|
|
|
|
private static final Log log = LogFactory.getLog(BioschemasAPIController.class);
|
|
|
|
@Operation(summary = "start the scraping operation", description = "<H1>Working input values are in the following table</H1><BR><TABLE><TR><TH>datasourceKey</TH><TH>sitemapUrl</TH></TR><TR><TD>ped</TD><TD>https://proteinensemble.org/sitemap2.xml.gz</TD></TR><TR><TD>disprot</TD><TD>https://disprot.org/sitemap2.xml.gz</TD></TR><TR><TD>mobidb</TD><TD>https://mobidb.org/sitemap2.xml.gz</TD></TR></TABLE>")
|
|
@GetMapping("/startScraping")
|
|
public ScrapingExecution startScraping(@Parameter(name = "datasourceKey") @RequestParam final String datasourceKey,
|
|
@Parameter(name = "sitemapUrl") @RequestParam final String sitemapUrl,
|
|
final HttpServletRequest req) {
|
|
logger.info("<STARTSCRAPING> datasourceKey: "+datasourceKey+" sitemapUrl:"+sitemapUrl);
|
|
return scrapingExecutor.startScraping(datasourceKey, sitemapUrl, getOutputDataPattern(), req.getRemoteAddr(), getOutputFolder());
|
|
}
|
|
|
|
@Operation(summary = "check the status of last scraping operation")
|
|
@GetMapping("/startScraping/status")
|
|
public final ScrapingExecution statusScraping() {
|
|
return scrapingExecutor.getLastScrapingExecution();
|
|
}
|
|
|
|
@Operation(summary = "retrieve the nquads downloaded for one specific provider")
|
|
@RequestMapping(value = "/getNQuads", method = RequestMethod.GET)
|
|
public String getNQuads(@Parameter(name = "datasourceKey") @RequestParam final String datasourceKey, HttpServletResponse response) throws BioschemasException, IOException {
|
|
|
|
logger.info("<GETNQUADS> datasourceKey: "+datasourceKey);
|
|
|
|
LineIterator it = FileUtils.lineIterator(new File(getOutputFolder().concat("/").concat(datasourceKey).concat(getOutputDataPattern())), "UTF-8");
|
|
try {
|
|
while (it.hasNext()) {
|
|
String line = it.nextLine();
|
|
response.getOutputStream().write(line.getBytes(StandardCharsets.UTF_8));
|
|
response.getOutputStream().println();
|
|
}
|
|
} finally {
|
|
}
|
|
return "";
|
|
}
|
|
|
|
public String getOutputFolder() {
|
|
return outputFolder;
|
|
}
|
|
|
|
public String getOutputDataPattern() {
|
|
return outputDataPattern;
|
|
}
|
|
|
|
public void setOutputFolder(String outputFolder) {
|
|
this.outputFolder = outputFolder;
|
|
}
|
|
|
|
public void setOutputDataPattern(String outputDataPattern) {
|
|
this.outputDataPattern = outputDataPattern;
|
|
}
|
|
} |