Commit 932e2d69 authored by Ad Schellevis's avatar Ad Schellevis

(proxy) optimize acl download, split domains/urls and add index file

parent 2a1afe9e
...@@ -28,6 +28,8 @@ ...@@ -28,6 +28,8 @@
import urllib2 import urllib2
import os import os
import json
import glob
import os.path import os.path
import tarfile import tarfile
import gzip import gzip
...@@ -61,8 +63,9 @@ class ACLDownload(object): ...@@ -61,8 +63,9 @@ class ACLDownload(object):
syslog.syslog(syslog.LOG_ERR, 'proxy acl: error downloading %s'%self._url) syslog.syslog(syslog.LOG_ERR, 'proxy acl: error downloading %s'%self._url)
self._source_data = None self._source_data = None
def pre_process(self): def get_files(self):
""" pre process downloaded data, handle compression """ process downloaded data, handle compression
:return: iterator filename, content
""" """
if self._source_data is not None: if self._source_data is not None:
# handle compressed data # handle compressed data
...@@ -71,49 +74,43 @@ class ACLDownload(object): ...@@ -71,49 +74,43 @@ class ACLDownload(object):
# source is in tar.gz format, extract all into a single string # source is in tar.gz format, extract all into a single string
try: try:
tf = tarfile.open(fileobj=StringIO.StringIO(self._source_data)) tf = tarfile.open(fileobj=StringIO.StringIO(self._source_data))
target_data = []
for tf_file in tf.getmembers(): for tf_file in tf.getmembers():
if tf_file.isfile(): if tf_file.isfile():
target_data.append(tf.extractfile(tf_file).read()) yield tf_file.name, tf.extractfile(tf_file).read()
self._target_data = ''.join(target_data)
except IOError as e: except IOError as e:
syslog.syslog(syslog.LOG_ERR, 'proxy acl: error downloading %s (%s)'%(self._url, e)) syslog.syslog(syslog.LOG_ERR, 'proxy acl: error downloading %s (%s)'%(self._url, e))
elif len(self._url) > 4 and self._url[-3:] == '.gz': elif len(self._url) > 4 and self._url[-3:] == '.gz':
# source is in .gz format unpack # source is in .gz format unpack
try: try:
gf = gzip.GzipFile(mode='r', fileobj=StringIO.StringIO(self._source_data)) gf = gzip.GzipFile(mode='r', fileobj=StringIO.StringIO(self._source_data))
self._target_data = gf.read() yield os.path.basename(self._url), gf.read()
except IOError as e: except IOError as e:
syslog.syslog(syslog.LOG_ERR, 'proxy acl: error downloading %s (%s)'%(self._url, e)) syslog.syslog(syslog.LOG_ERR, 'proxy acl: error downloading %s (%s)'%(self._url, e))
elif len(self._url) > 5 and self._url[-4:] == '.zip': elif len(self._url) > 5 and self._url[-4:] == '.zip':
# source is in .zip format, extract all into a single string # source is in .zip format, extract all into a single string
target_data = [] target_data = dict()
with zipfile.ZipFile(StringIO.StringIO(self._source_data), with zipfile.ZipFile(StringIO.StringIO(self._source_data),
mode='r', mode='r',
compression=zipfile.ZIP_DEFLATED) as zf: compression=zipfile.ZIP_DEFLATED) as zf:
for item in zf.infolist(): for item in zf.infolist():
target_data.append(zf.read(item)) if item.file_size > 0:
self._target_data = ''.join(target_data) yield item.filename, zf.read(item)
self._target_data = target_data
else: else:
self._target_data = self._source_data yield os.path.basename(self._url), self._source_data
def download(self): def download(self):
self.fetch() """ download / unpack ACL
self.pre_process() :return: iterator filename, type, content
def is_valid(self):
""" did this ACL download successful
"""
if self._target_data is not None:
return True
else:
return False
def get_data(self):
""" retrieve data
""" """
# XXX: maybe some postprocessing is needed here, all will be used with a squid dstdom_regex tag self.fetch()
return self._target_data for filename, filedata in self.get_files():
for line in filedata.split('\n'):
if line.find('/') > -1:
file_type = 'url'
else:
file_type = 'domain'
yield filename, file_type, line
# parse OPNsense external ACLs config # parse OPNsense external ACLs config
...@@ -121,6 +118,10 @@ if os.path.exists(acl_config_fn): ...@@ -121,6 +118,10 @@ if os.path.exists(acl_config_fn):
# create acl directory (if new) # create acl directory (if new)
if not os.path.exists(acl_target_dir): if not os.path.exists(acl_target_dir):
os.mkdir(acl_target_dir) os.mkdir(acl_target_dir)
else:
# remove index files
for filename in glob.glob('%s/*.index'%acl_target_dir):
os.remove(filename)
# read config and download per section # read config and download per section
cnf = ConfigParser() cnf = ConfigParser()
cnf.read(acl_config_fn) cnf.read(acl_config_fn)
...@@ -129,22 +130,42 @@ if os.path.exists(acl_config_fn): ...@@ -129,22 +130,42 @@ if os.path.exists(acl_config_fn):
if cnf.has_option(section,'enabled'): if cnf.has_option(section,'enabled'):
# if enabled fetch file # if enabled fetch file
target_filename = acl_target_dir+'/'+section target_filename = acl_target_dir+'/'+section
if cnf.get(section,'enabled')=='1': if cnf.has_option(section,'url'):
if cnf.has_option(section,'url'): # define targets
targets = {'domain': {'filename': target_filename, 'handle' : None},
'url': {'filename': '%s.url'%target_filename, 'handle': None}}
# download file
if cnf.get(section,'enabled') == '1':
# only generate files if enabled, otherwise dump empty files
download_url = cnf.get(section,'url') download_url = cnf.get(section,'url')
acl = ACLDownload(download_url, acl_max_timeout) acl = ACLDownload(download_url, acl_max_timeout)
acl.download() all_filenames = list()
if acl.is_valid(): for filename, filetype, line in acl.download():
output_data = acl.get_data() if filename not in all_filenames:
with open(target_filename, "wb") as code: all_filenames.append(filename)
code.write(output_data) if filetype in targets and targets[filetype]['handle'] is None:
elif not os.path.isfile(target_filename): targets[filetype]['handle'] = open(targets[filetype]['filename'], 'wb')
# if there's no file available, create an empty one (otherwise leave the last download). if filetype in targets:
with open(target_filename, "wb") as code: targets[filetype]['handle'].write('%s\n'%line)
code.write("") # save index to disc
# if disabled or not 1 try to remove old file with open('%s.index'%target_filename,'wb') as idx_out:
elif cnf.get(section,'enabled')!='1': index_data = dict()
try: for filename in all_filenames:
os.remove(acl_target_dir+'/'+section) if len(filename.split('/')) > 3:
except OSError: index_key = '/'.join(filename.split('/')[1:-1])
pass if index_key not in index_data:
index_data[index_key] = index_key
idx_out.write(json.dumps(index_data))
# cleanup
for filetype in targets:
if targets[filetype]['handle'] is not None:
targets[filetype]['handle'].close()
elif cnf.get(section,'enabled') != '1':
if os.path.isfile(targets[filetype]['filename']):
# disabled, remove previous data
os.remove(targets[filetype]['filename'])
elif not os.path.isfile(targets[filetype]['filename']):
# no data fetched and no file available, create new empty file
with open(targets[filetype]['filename'], 'wb') as target_out:
target_out.write("")
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment