package eu.dnetlib.bioschemas.api.controller; import eu.dnetlib.bioschemas.api.MainApplication; import eu.dnetlib.bioschemas.api.scraper.ScrapingExecution; import eu.dnetlib.bioschemas.api.scraper.ScrapingExecutor; import eu.dnetlib.bioschemas.api.utils.BioschemasException; import eu.dnetlib.common.controller.AbstractDnetController; import io.swagger.v3.oas.annotations.Operation; import io.swagger.v3.oas.annotations.Parameter; import io.swagger.v3.oas.annotations.enums.ParameterIn; import io.swagger.v3.oas.annotations.tags.Tag; import org.apache.commons.io.FileUtils; import org.apache.commons.io.LineIterator; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import org.springframework.beans.factory.annotation.Autowired; import org.springframework.beans.factory.annotation.Value; import org.springframework.web.bind.annotation.*; import javax.servlet.http.HttpServletRequest; import javax.servlet.http.HttpServletResponse; import java.io.File; import java.io.IOException; import java.nio.charset.StandardCharsets; /** * @author enrico.ottonello * */ @RestController @RequestMapping("/api") @Tag(name = MainApplication.BIOSCHEMAS_APIS) public class BioschemasAPIController extends AbstractDnetController { @Value("${outputFolder}") private String outputFolder; @Value("${outputDataPattern}") private String outputDataPattern; private static Logger logger = LoggerFactory.getLogger(BioschemasAPIController.class); @Autowired private ScrapingExecutor scrapingExecutor; private static final Log log = LogFactory.getLog(BioschemasAPIController.class); @Operation(summary = "start the scraping operation", description = "

Working input values are in the following table


datasourceKeysitemapUrl
pedhttps://proteinensemble.org/sitemap2.xml.gz
disprothttps://disprot.org/sitemap2.xml.gz
mobidbhttps://mobidb.org/sitemap2.xml.gz
") @GetMapping("/startScraping") public ScrapingExecution startScraping(@Parameter(name = "datasourceKey") @RequestParam final String datasourceKey, @Parameter(name = "sitemapUrl") @RequestParam final String sitemapUrl, final HttpServletRequest req) { logger.info(" datasourceKey: "+datasourceKey+" sitemapUrl:"+sitemapUrl); return scrapingExecutor.startScraping(datasourceKey, sitemapUrl, getOutputDataPattern(), req.getRemoteAddr(), getOutputFolder()); } @Operation(summary = "check the status of last scraping operation") @GetMapping("/startScraping/status") public final ScrapingExecution statusScraping() { return scrapingExecutor.getLastScrapingExecution(); } @Operation(summary = "retrieve the nquads downloaded for one specific provider") @RequestMapping(value = "/getNQuads", method = RequestMethod.GET) public String getNQuads(@Parameter(name = "datasourceKey") @RequestParam final String datasourceKey, HttpServletResponse response) throws BioschemasException, IOException { logger.info(" datasourceKey: "+datasourceKey); LineIterator it = FileUtils.lineIterator(new File(getOutputFolder().concat("/").concat(datasourceKey).concat(getOutputDataPattern())), "UTF-8"); try { while (it.hasNext()) { String line = it.nextLine(); response.getOutputStream().write(line.getBytes(StandardCharsets.UTF_8)); response.getOutputStream().println(); } } finally { } return ""; } public String getOutputFolder() { return outputFolder; } public String getOutputDataPattern() { return outputDataPattern; } public void setOutputFolder(String outputFolder) { this.outputFolder = outputFolder; } public void setOutputDataPattern(String outputDataPattern) { this.outputDataPattern = outputDataPattern; } }