Merge pull request #320 from GSA/iis-date-parser

add more date parser for IIS server
This commit is contained in:
Adrià Mercader 2023-11-10 15:35:47 +01:00 committed by GitHub
commit 7350dbd4bc
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
1 changed files with 22 additions and 11 deletions

View File

@ -244,17 +244,26 @@ nginx = parse.SkipTo(parse.CaselessLiteral("<a href="), include=True).suppress
,adjacent=False, joinString=' ').setResultsName('date')
)
iis = parse.SkipTo("<br>").suppress() \
+ parse.OneOrMore("<br>").suppress() \
+ parse.Optional(parse.Combine(
parse.Word(parse.alphanums+'/') +
parse.Word(parse.alphanums+':') +
parse.Word(parse.alphas)
, adjacent=False, joinString=' ').setResultsName('date')
) \
+ parse.Word(parse.nums).suppress() \
+ parse.Literal('<A HREF=').suppress() \
+ parse.quotedString.setParseAction(parse.removeQuotes).setResultsName('url')
iis = parse.SkipTo("<br>").suppress() \
+ parse.OneOrMore("<br>").suppress() \
+ parse.Optional(parse.Combine(
parse.Word(parse.alphanums+'/') +
parse.Word(parse.alphanums+':') +
parse.Word(parse.alphas)
, adjacent=False, joinString=' ').setResultsName('date')
) \
+ parse.Optional(parse.Combine(
parse.Word(parse.alphas+',') +
parse.Word(parse.alphas) +
parse.Word(parse.nums+',') +
parse.Word(parse.nums) +
parse.Word(parse.nums+':') +
parse.Word(parse.alphas)
, adjacent=False, joinString=' ').setResultsName('date')
) \
+ parse.Word(parse.nums).suppress() \
+ parse.Literal('<A HREF=').suppress() \
+ parse.quotedString.setParseAction(parse.removeQuotes).setResultsName('url')
other = parse.SkipTo(parse.CaselessLiteral("<a href="), include=True).suppress() \
+ parse.quotedString.setParseAction(parse.removeQuotes).setResultsName('url')
@ -328,6 +337,8 @@ def _extract_waf(content, base_url, scraper, results = None, depth=0):
except Exception as e:
raise
date = None
if not date:
log.debug('failed to get date for %s', url)
results.append((urljoin(base_url, record.url), date))
return results