Commit 58ca0284 authored by Ad Schellevis's avatar Ad Schellevis

(IDS) extend support for compressed files with "inline::" keyword to be able...

(IDS) extend support for compressed files with "inline::" keyword to be able to download parts of an archive (required to download snort rules).
Change processing order to always stream data to tempfiles first, so we can reuse downloaded files (an archive scan isn't very expensive)
parent fe81d2e8
...@@ -40,6 +40,7 @@ import requests ...@@ -40,6 +40,7 @@ import requests
class Downloader(object): class Downloader(object):
def __init__(self, target_dir): def __init__(self, target_dir):
self._target_dir = target_dir self._target_dir = target_dir
self._download_cache = dict()
def filter(self, in_data, filter_type): def filter(self, in_data, filter_type):
""" apply input filter to downloaded data """ apply input filter to downloaded data
...@@ -68,8 +69,15 @@ class Downloader(object): ...@@ -68,8 +69,15 @@ class Downloader(object):
return '\n'.join(output) return '\n'.join(output)
@staticmethod @staticmethod
def _unpack(req_obj, source_url): def _unpack(src, source_url, filename=None):
source_url = source_url.strip().lower() """ unpack data if archived
:param src: handle to temp file
:param source_url: location where file was downloaded from
:param filename: filename to extract
:return: text
"""
src.seek(0)
source_url = source_url.strip().lower().split('?')[0]
unpack_type=None unpack_type=None
if source_url.endswith('.tar.gz') or source_url.endswith('.tgz'): if source_url.endswith('.tar.gz') or source_url.endswith('.tgz'):
unpack_type = 'tar' unpack_type = 'tar'
...@@ -80,15 +88,14 @@ class Downloader(object): ...@@ -80,15 +88,14 @@ class Downloader(object):
if unpack_type is not None: if unpack_type is not None:
rule_content = list() rule_content = list()
# flush to temp
src = tempfile.NamedTemporaryFile()
shutil.copyfileobj(req_obj.raw, src)
src.seek(0)
# handle compression types # handle compression types
if unpack_type == 'tar': if unpack_type == 'tar':
tf = tarfile.open(fileobj=src) tf = tarfile.open(fileobj=src)
for tf_file in tf.getmembers(): for tf_file in tf.getmembers():
if tf_file.isfile() and tf_file.name.lower().endswith('.rules'): # extract partial or all (*.rules) from archive
if filename is not None and tf_file.name == filename:
rule_content.append(tf.extractfile(tf_file).read())
elif filename is None and tf_file.isfile() and tf_file.name.lower().endswith('.rules'):
rule_content.append(tf.extractfile(tf_file).read()) rule_content.append(tf.extractfile(tf_file).read())
elif unpack_type == 'gz': elif unpack_type == 'gz':
gf = gzip.GzipFile(mode='r', fileobj=src) gf = gzip.GzipFile(mode='r', fileobj=src)
...@@ -96,13 +103,15 @@ class Downloader(object): ...@@ -96,13 +103,15 @@ class Downloader(object):
elif unpack_type == 'zip': elif unpack_type == 'zip':
with zipfile.ZipFile(src, mode='r', compression=zipfile.ZIP_DEFLATED) as zf: with zipfile.ZipFile(src, mode='r', compression=zipfile.ZIP_DEFLATED) as zf:
for item in zf.infolist(): for item in zf.infolist():
if item.file_size > 0 and item.filename.lower().endswith('.rules'): if filename is not None and item.filename == filename:
rule_content.append(zf.open(item).read())
elif filename is None and item.file_size > 0 and item.filename.lower().endswith('.rules'):
rule_content.append(zf.open(item).read()) rule_content.append(zf.open(item).read())
return '\n'.join(rule_content) return '\n'.join(rule_content)
else: else:
return req_obj.text return src.read()
def download(self, proto, url, filename, input_filter): def download(self, proto, url, url_filename, filename, input_filter):
""" download ruleset file """ download ruleset file
:param proto: protocol (http,https) :param proto: protocol (http,https)
:param url: download url :param url: download url
...@@ -111,11 +120,19 @@ class Downloader(object): ...@@ -111,11 +120,19 @@ class Downloader(object):
""" """
if proto in ('http', 'https'): if proto in ('http', 'https'):
frm_url = url.replace('//', '/').replace(':/', '://') frm_url = url.replace('//', '/').replace(':/', '://')
# stream to temp file
if frm_url not in self._download_cache:
req = requests.get(url=frm_url, stream=True) req = requests.get(url=frm_url, stream=True)
if req.status_code == 200: if req.status_code == 200:
src = tempfile.NamedTemporaryFile()
shutil.copyfileobj(req.raw, src)
self._download_cache[frm_url] = src
# process rules from tempfile (prevent duplicate download for files within an archive)
if frm_url in self._download_cache:
try: try:
target_filename = '%s/%s' % (self._target_dir, filename) target_filename = '%s/%s' % (self._target_dir, filename)
save_data = self._unpack(req, url) save_data = self._unpack(self._download_cache[frm_url], url, url_filename)
save_data = self.filter(save_data, input_filter) save_data = self.filter(save_data, input_filter)
open(target_filename, 'wb').write(save_data) open(target_filename, 'wb').write(save_data)
except IOError: except IOError:
......
...@@ -68,7 +68,12 @@ class Metadata(object): ...@@ -68,7 +68,12 @@ class Metadata(object):
metadata_record['documentation_url'] = documentation_url metadata_record['documentation_url'] = documentation_url
metadata_record['source'] = src_location.attrib metadata_record['source'] = src_location.attrib
metadata_record['filename'] = rule_filename.text.strip() metadata_record['filename'] = rule_filename.text.strip()
if 'url' in rule_filename.attrib: # for an archive, define file to extract
metadata_record['url_filename'] = None
if 'url' in rule_filename.attrib and rule_filename.attrib['url'].startswith('inline::'):
metadata_record['url'] = (metadata_record['source']['url'])
metadata_record['url_filename'] = rule_filename.attrib['url'][8:]
elif 'url' in rule_filename.attrib:
metadata_record['url'] = (rule_filename.attrib['url']) metadata_record['url'] = (rule_filename.attrib['url'])
else: else:
metadata_record['url'] = ('%s/%s' % (metadata_record['source']['url'], metadata_record['url'] = ('%s/%s' % (metadata_record['source']['url'],
......
...@@ -78,5 +78,5 @@ if __name__ == '__main__': ...@@ -78,5 +78,5 @@ if __name__ == '__main__':
pass pass
else: else:
input_filter = enabled_rulefiles[rule['filename']]['filter'] input_filter = enabled_rulefiles[rule['filename']]['filter']
dl.download(proto=download_proto, url=rule['url'], dl.download(proto=download_proto, url=rule['url'], url_filename=rule['url_filename'],
filename=rule['filename'], input_filter=input_filter) filename=rule['filename'], input_filter=input_filter)
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment