dnet-applications/apps/bioschemas-api/src/main/java/eu/dnetlib/bioschemas/api/controller/BioschemasAPIController.java

98 lines
3.8 KiB
Java

package eu.dnetlib.bioschemas.api.controller;
import eu.dnetlib.bioschemas.api.MainApplication;
import eu.dnetlib.bioschemas.api.scraper.ScrapingExecution;
import eu.dnetlib.bioschemas.api.scraper.ScrapingExecutor;
import eu.dnetlib.bioschemas.api.utils.BioschemasException;
import eu.dnetlib.common.controller.AbstractDnetController;
import io.swagger.v3.oas.annotations.Operation;
import io.swagger.v3.oas.annotations.Parameter;
import io.swagger.v3.oas.annotations.enums.ParameterIn;
import io.swagger.v3.oas.annotations.tags.Tag;
import org.apache.commons.io.FileUtils;
import org.apache.commons.io.LineIterator;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.beans.factory.annotation.Value;
import org.springframework.web.bind.annotation.*;
import javax.servlet.http.HttpServletRequest;
import javax.servlet.http.HttpServletResponse;
import java.io.File;
import java.io.IOException;
import java.nio.charset.StandardCharsets;
/**
* @author enrico.ottonello
*
*/
@RestController
@RequestMapping("/api")
@Tag(name = MainApplication.BIOSCHEMAS_APIS)
public class BioschemasAPIController extends AbstractDnetController {
@Value("${outputFolder}")
private String outputFolder;
@Value("${outputDataPattern}")
private String outputDataPattern;
private static Logger logger = LoggerFactory.getLogger(BioschemasAPIController.class);
@Autowired
private ScrapingExecutor scrapingExecutor;
private static final Log log = LogFactory.getLog(BioschemasAPIController.class);
@Operation(summary = "start the scraping operation", description = "<H1>Working input values are in the following table</H1><BR><TABLE><TR><TH>datasourceKey</TH><TH>sitemapUrl</TH></TR><TR><TD>ped</TD><TD>https://proteinensemble.org/sitemap2.xml.gz</TD></TR><TR><TD>disprot</TD><TD>https://disprot.org/sitemap2.xml.gz</TD></TR><TR><TD>mobidb</TD><TD>https://mobidb.org/sitemap2.xml.gz</TD></TR></TABLE>")
@GetMapping("/startScraping")
public ScrapingExecution startScraping(@Parameter(name = "datasourceKey") @RequestParam final String datasourceKey,
@Parameter(name = "sitemapUrl") @RequestParam final String sitemapUrl,
final HttpServletRequest req) {
logger.info("<STARTSCRAPING> datasourceKey: "+datasourceKey+" sitemapUrl:"+sitemapUrl);
return scrapingExecutor.startScraping(datasourceKey, sitemapUrl, getOutputDataPattern(), req.getRemoteAddr(), getOutputFolder());
}
@Operation(summary = "check the status of last scraping operation")
@GetMapping("/startScraping/status")
public final ScrapingExecution statusScraping() {
return scrapingExecutor.getLastScrapingExecution();
}
@Operation(summary = "retrieve the nquads downloaded for one specific provider")
@RequestMapping(value = "/getNQuads", method = RequestMethod.GET)
public String getNQuads(@Parameter(name = "datasourceKey") @RequestParam final String datasourceKey, HttpServletResponse response) throws BioschemasException, IOException {
logger.info("<GETNQUADS> datasourceKey: "+datasourceKey);
LineIterator it = FileUtils.lineIterator(new File(getOutputFolder().concat("/").concat(datasourceKey).concat(getOutputDataPattern())), "UTF-8");
try {
while (it.hasNext()) {
String line = it.nextLine();
response.getOutputStream().write(line.getBytes(StandardCharsets.UTF_8));
response.getOutputStream().println();
}
} finally {
}
return "";
}
public String getOutputFolder() {
return outputFolder;
}
public String getOutputDataPattern() {
return outputDataPattern;
}
public void setOutputFolder(String outputFolder) {
this.outputFolder = outputFolder;
}
public void setOutputDataPattern(String outputDataPattern) {
this.outputDataPattern = outputDataPattern;
}
}