2022-06-08 09:44:39 +02:00
package eu.dnetlib.bioschemas.api.controller ;
2022-06-06 09:37:29 +02:00
2022-08-31 14:06:10 +02:00
import eu.dnetlib.bioschemas.api.MainApplication ;
2022-06-07 15:12:46 +02:00
import eu.dnetlib.bioschemas.api.scraper.ScrapingExecution ;
import eu.dnetlib.bioschemas.api.scraper.ScrapingExecutor ;
2022-06-08 09:44:39 +02:00
import eu.dnetlib.bioschemas.api.utils.BioschemasException ;
2022-06-06 09:37:29 +02:00
import eu.dnetlib.common.controller.AbstractDnetController ;
2022-08-31 14:06:10 +02:00
import io.swagger.v3.oas.annotations.Operation ;
import io.swagger.v3.oas.annotations.Parameter ;
import io.swagger.v3.oas.annotations.enums.ParameterIn ;
import io.swagger.v3.oas.annotations.tags.Tag ;
2022-06-06 09:37:29 +02:00
import org.apache.commons.io.FileUtils ;
import org.apache.commons.io.LineIterator ;
2022-06-07 15:12:46 +02:00
import org.apache.commons.logging.Log ;
import org.apache.commons.logging.LogFactory ;
2022-06-07 13:02:36 +02:00
import org.slf4j.Logger ;
import org.slf4j.LoggerFactory ;
2022-06-07 15:12:46 +02:00
import org.springframework.beans.factory.annotation.Autowired ;
2022-06-07 13:02:36 +02:00
import org.springframework.beans.factory.annotation.Value ;
2022-06-07 15:12:46 +02:00
import org.springframework.web.bind.annotation.* ;
2022-06-06 09:37:29 +02:00
2022-06-07 15:12:46 +02:00
import javax.servlet.http.HttpServletRequest ;
2022-06-06 09:37:29 +02:00
import javax.servlet.http.HttpServletResponse ;
import java.io.File ;
import java.io.IOException ;
import java.nio.charset.StandardCharsets ;
/ * *
* @author enrico . ottonello
*
* /
@RestController
@RequestMapping ( " /api " )
2022-08-31 14:06:10 +02:00
@Tag ( name = MainApplication . BIOSCHEMAS_APIS )
2022-06-07 13:38:14 +02:00
public class BioschemasAPIController extends AbstractDnetController {
2022-06-06 09:37:29 +02:00
2022-06-07 13:02:36 +02:00
@Value ( " ${outputFolder} " )
private String outputFolder ;
@Value ( " ${outputDataPattern} " )
private String outputDataPattern ;
2022-06-06 09:37:29 +02:00
2022-06-07 13:38:14 +02:00
private static Logger logger = LoggerFactory . getLogger ( BioschemasAPIController . class ) ;
2022-06-07 13:02:36 +02:00
2022-06-07 15:12:46 +02:00
@Autowired
private ScrapingExecutor scrapingExecutor ;
2022-06-06 09:37:29 +02:00
2022-06-07 15:12:46 +02:00
private static final Log log = LogFactory . getLog ( BioschemasAPIController . class ) ;
2022-08-31 14:06:10 +02:00
@Operation ( summary = " start the scraping operation " , description = " <H1>Working input values are in the following table</H1><BR><TABLE><TR><TH>datasourceKey</TH><TH>sitemapUrl</TH></TR><TR><TD>ped</TD><TD>https://proteinensemble.org/sitemap2.xml.gz</TD></TR><TR><TD>disprot</TD><TD>https://disprot.org/sitemap2.xml.gz</TD></TR><TR><TD>mobidb</TD><TD>https://mobidb.org/sitemap2.xml.gz</TD></TR></TABLE> " )
2022-06-07 15:12:46 +02:00
@GetMapping ( " /startScraping " )
2022-08-31 14:06:10 +02:00
public ScrapingExecution startScraping ( @Parameter ( name = " datasourceKey " ) @RequestParam final String datasourceKey ,
@Parameter ( name = " sitemapUrl " ) @RequestParam final String sitemapUrl ,
final HttpServletRequest req ) {
2022-06-07 13:02:36 +02:00
logger . info ( " <STARTSCRAPING> datasourceKey: " + datasourceKey + " sitemapUrl: " + sitemapUrl ) ;
2022-06-24 11:52:13 +02:00
return scrapingExecutor . startScraping ( datasourceKey , sitemapUrl , getOutputDataPattern ( ) , req . getRemoteAddr ( ) , getOutputFolder ( ) ) ;
2022-06-07 15:12:46 +02:00
}
2022-06-06 09:37:29 +02:00
2022-08-31 14:06:10 +02:00
@Operation ( summary = " check the status of last scraping operation " )
2022-06-07 15:12:46 +02:00
@GetMapping ( " /startScraping/status " )
public final ScrapingExecution statusScraping ( ) {
return scrapingExecutor . getLastScrapingExecution ( ) ;
2022-06-06 09:37:29 +02:00
}
2022-08-31 14:06:10 +02:00
@Operation ( summary = " retrieve the nquads downloaded for one specific provider " )
2022-06-07 13:02:36 +02:00
@RequestMapping ( value = " /getNQuads " , method = RequestMethod . GET )
2022-08-31 14:06:10 +02:00
public String getNQuads ( @Parameter ( name = " datasourceKey " ) @RequestParam final String datasourceKey , HttpServletResponse response ) throws BioschemasException , IOException {
2022-06-07 13:02:36 +02:00
logger . info ( " <GETNQUADS> datasourceKey: " + datasourceKey ) ;
LineIterator it = FileUtils . lineIterator ( new File ( getOutputFolder ( ) . concat ( " / " ) . concat ( datasourceKey ) . concat ( getOutputDataPattern ( ) ) ) , " UTF-8 " ) ;
2022-06-06 09:37:29 +02:00
try {
while ( it . hasNext ( ) ) {
String line = it . nextLine ( ) ;
response . getOutputStream ( ) . write ( line . getBytes ( StandardCharsets . UTF_8 ) ) ;
response . getOutputStream ( ) . println ( ) ;
}
} finally {
}
return " " ;
}
2022-06-07 13:02:36 +02:00
public String getOutputFolder ( ) {
return outputFolder ;
}
public String getOutputDataPattern ( ) {
return outputDataPattern ;
}
2022-06-24 11:52:13 +02:00
public void setOutputFolder ( String outputFolder ) {
this . outputFolder = outputFolder ;
}
public void setOutputDataPattern ( String outputDataPattern ) {
this . outputDataPattern = outputDataPattern ;
}
2022-06-06 09:37:29 +02:00
}