changed log level

This commit is contained in:
Enrico Ottonello 2021-12-09 22:21:04 +01:00
parent 1ffc8d4945
commit 4797cc460b
2 changed files with 5 additions and 12 deletions

View File

@ -25,7 +25,6 @@ import eu.dnetlib.dhp.bmuse.utils.UrlParser;
public class ScrapingJob { public class ScrapingJob {
static Logger logger = LoggerFactory.getLogger(ScrapingJob.class); static Logger logger = LoggerFactory.getLogger(ScrapingJob.class);
// private static SimpleDateFormat formatter = new SimpleDateFormat("yyyy-MM-dd 'at' HH:mm:ss z");
public static void main(String[] args) throws Exception { public static void main(String[] args) throws Exception {
@ -50,9 +49,6 @@ public class ScrapingJob {
} }
final boolean scrapingType = dynamicValue.booleanValue(); final boolean scrapingType = dynamicValue.booleanValue();
// AtomicLong scraped = new AtomicLong(0l);
// AtomicLong errors = new AtomicLong(0l);
logger logger
.info( .info(
"*************************** STARTING_SCRAPE"); "*************************** STARTING_SCRAPE");
@ -60,7 +56,6 @@ public class ScrapingJob {
BMUSEScraper scraper = new BMUSEScraper(); BMUSEScraper scraper = new BMUSEScraper();
String url = sitemapUrl.toLowerCase(); String url = sitemapUrl.toLowerCase();
Elements urls = UrlParser.getSitemapList(url, sitemapURLKey); Elements urls = UrlParser.getSitemapList(url, sitemapURLKey);
long total = urls.size();
Path output = new Path( Path output = new Path(
nameNode nameNode
@ -91,10 +86,8 @@ public class ScrapingJob {
nquads = scraper.scrapeUrl(site, scrapingType); nquads = scraper.scrapeUrl(site, scrapingType);
final Text value = new Text(nquads); final Text value = new Text(nquads);
writer.append(key, value); writer.append(key, value);
// scraped.getAndIncrement();
} catch (Throwable t) { } catch (Throwable t) {
logger.error(u.text() + " " + t.getMessage()); logger.error(u.text() + " " + t.getMessage());
// errors.getAndIncrement();
} }
}); });
} }

View File

@ -31,7 +31,7 @@ public class BMUSEScraper extends ScraperFilteredCore {
private static final Logger logger = LoggerFactory.getLogger(BMUSEScraper.class.getName()); private static final Logger logger = LoggerFactory.getLogger(BMUSEScraper.class.getName());
public String scrapeUrl(String url, Boolean dynamic) throws Exception { public String scrapeUrl(String url, Boolean dynamic) throws Exception {
logger.info(url + " > scraping"); logger.debug(url + " > scraping");
url = fixURL(url); url = fixURL(url);
String html = ""; String html = "";
@ -49,24 +49,24 @@ public class BMUSEScraper extends ScraperFilteredCore {
html = injectId(html, url); html = injectId(html, url);
logger.info(url + " > html scraped from " + url); logger.debug(url + " > html scraped from " + url);
DocumentSource source = new StringDocumentSource(html, url); DocumentSource source = new StringDocumentSource(html, url);
String n3 = html2Triples(source, url); String n3 = html2Triples(source, url);
if (n3 == null) { if (n3 == null) {
throw new MissingMarkupException(url); throw new MissingMarkupException(url);
} }
logger.info(url + " > processing triples"); logger.debug(url + " > processing triples");
IRI sourceIRI = SimpleValueFactory.getInstance().createIRI(source.getDocumentIRI()); IRI sourceIRI = SimpleValueFactory.getInstance().createIRI(source.getDocumentIRI());
Model updatedModel = updatedModel = processTriples(n3, sourceIRI, 0l); Model updatedModel = updatedModel = processTriples(n3, sourceIRI, 0l);
if (updatedModel == null) { if (updatedModel == null) {
throw new Exception("rdf model null"); throw new Exception("rdf model null");
} }
logger.info(url + " > generating nquads"); logger.debug(url + " > generating nquads");
try (StringWriter jsonLDWriter = new StringWriter()) { try (StringWriter jsonLDWriter = new StringWriter()) {
Rio.write(updatedModel, jsonLDWriter, RDFFormat.NQUADS); Rio.write(updatedModel, jsonLDWriter, RDFFormat.NQUADS);
logger.info(url + " > nquads generated"); logger.debug(url + " > nquads generated");
return jsonLDWriter.toString(); return jsonLDWriter.toString();
} catch (Exception e) { } catch (Exception e) {
throw e; throw e;