(IDS) add support for compressed rule files

(cherry picked from commit fe81d2e8) (cherry picked from commit 58ca0284)

(IDS) add support for compressed rule files
(cherry picked from commit fe81d2e8) (cherry picked from commit 58ca0284)
a89c09bd · Ad Schellevis · Franco Fichtner · 4b7951d3 · a89c09bd · a89c09bd
Commit a89c09bd authored Oct 16, 2016 by Ad Schellevis Committed by Franco Fichtner Oct 24, 2016
Showing with 69 additions and 6 deletions

downloader.py src/opnsense/scripts/suricata/lib/downloader.py +62 -4

metadata.py src/opnsense/scripts/suricata/lib/metadata.py +6 -1

rule-updater.py src/opnsense/scripts/suricata/rule-updater.py +1 -1

No files found.
--- a/src/opnsense/scripts/suricata/lib/downloader.py
+++ b/src/opnsense/scripts/suricata/lib/downloader.py
@@ -29,12 +29,18 @@
 """

 import syslog
+import tarfile
+import gzip
+import zipfile
+import shutil
+import tempfile
 import requests


 class Downloader(object):
    def __init__(self, target_dir):
        self._target_dir = target_dir
+        self._download_cache = dict()

    def filter(self, in_data, filter_type):
        """ apply input filter to downloaded data
@@ -62,7 +68,50 @@ class Downloader(object):
            output.append(line)
        return '\n'.join(output)

-    def download(self, proto, url, filename, input_filter):
+    @staticmethod
+    def _unpack(src, source_url, filename=None):
+        """ unpack data if archived
+            :param src: handle to temp file
+            :param source_url: location where file was downloaded from
+            :param filename: filename to extract
+            :return: text
+        """
+        src.seek(0)
+        source_url = source_url.strip().lower().split('?')[0]
+        unpack_type=None
+        if source_url.endswith('.tar.gz') or source_url.endswith('.tgz'):
+            unpack_type = 'tar'
+        elif source_url.endswith('.gz'):
+            unpack_type = 'gz'
+        elif source_url.endswith('.zip'):
+            unpack_type = 'zip'
+
+        if unpack_type is not None:
+            rule_content = list()
+            # handle compression types
+            if unpack_type == 'tar':
+                tf = tarfile.open(fileobj=src)
+                for tf_file in tf.getmembers():
+                    # extract partial or all (*.rules) from archive
+                    if filename is not None and tf_file.name == filename:
+                        rule_content.append(tf.extractfile(tf_file).read())
+                    elif filename is None and tf_file.isfile() and tf_file.name.lower().endswith('.rules'):
+                        rule_content.append(tf.extractfile(tf_file).read())
+            elif unpack_type == 'gz':
+                gf = gzip.GzipFile(mode='r', fileobj=src)
+                rule_content.append(gf.read())
+            elif unpack_type == 'zip':
+                with zipfile.ZipFile(src, mode='r', compression=zipfile.ZIP_DEFLATED) as zf:
+                    for item in zf.infolist():
+                        if filename is not None and item.filename == filename:
+                            rule_content.append(zf.open(item).read())
+                        elif filename is None and item.file_size > 0 and item.filename.lower().endswith('.rules'):
+                            rule_content.append(zf.open(item).read())
+            return '\n'.join(rule_content)
+        else:
+            return src.read()
+
+    def download(self, proto, url, url_filename, filename, input_filter):
        """ download ruleset file
            :param proto: protocol (http,https)
            :param url: download url
@@ -71,11 +120,20 @@ class Downloader(object):
        """
        if proto in ('http', 'https'):
            frm_url = url.replace('//', '/').replace(':/', '://')
-            req = requests.get(url=frm_url)
-            if req.status_code == 200:
+            # stream to temp file
+            if frm_url not in self._download_cache:
+                req = requests.get(url=frm_url, stream=True)
+                if req.status_code == 200:
+                    src = tempfile.NamedTemporaryFile()
+                    shutil.copyfileobj(req.raw, src)
+                    self._download_cache[frm_url] = src
+
+            # process rules from tempfile (prevent duplicate download for files within an archive)
+            if frm_url in self._download_cache:
                try:
                    target_filename = '%s/%s' % (self._target_dir, filename)
-                    save_data = self.filter(req.text, input_filter)
+                    save_data = self._unpack(self._download_cache[frm_url], url, url_filename)
+                    save_data = self.filter(save_data, input_filter)
                    open(target_filename, 'wb').write(save_data)
                except IOError:
                    syslog.syslog(syslog.LOG_ERR, 'cannot write to %s' % target_filename)

--- a/src/opnsense/scripts/suricata/lib/metadata.py
+++ b/src/opnsense/scripts/suricata/lib/metadata.py
@@ -68,7 +68,12 @@ class Metadata(object):
                        metadata_record['documentation_url'] = documentation_url
                        metadata_record['source'] = src_location.attrib
                        metadata_record['filename'] = rule_filename.text.strip()
-                        if 'url' in rule_filename.attrib:
+                        # for an archive, define file to extract
+                        metadata_record['url_filename'] = None
+                        if 'url' in rule_filename.attrib and rule_filename.attrib['url'].startswith('inline::'):
+                            metadata_record['url'] = (metadata_record['source']['url'])
+                            metadata_record['url_filename'] = rule_filename.attrib['url'][8:]
+                        elif 'url' in rule_filename.attrib:
                            metadata_record['url'] = (rule_filename.attrib['url'])
                        else:
                            metadata_record['url'] = ('%s/%s' % (metadata_record['source']['url'],

--- a/src/opnsense/scripts/suricata/rule-updater.py
+++ b/src/opnsense/scripts/suricata/rule-updater.py
@@ -78,5 +78,5 @@ if __name__ == '__main__':
                        pass
                else:
                    input_filter = enabled_rulefiles[rule['filename']]['filter']
-                    dl.download(proto=download_proto, url=rule['url'],
+                    dl.download(proto=download_proto, url=rule['url'], url_filename=rule['url_filename'],
                                filename=rule['filename'], input_filter=input_filter)