Commit 932e2d69 authored by Ad Schellevis's avatar Ad Schellevis

(proxy) optimize acl download, split domains/urls and add index file

parent 2a1afe9e
......@@ -28,6 +28,8 @@
import urllib2
import os
import json
import glob
import os.path
import tarfile
import gzip
......@@ -61,8 +63,9 @@ class ACLDownload(object):
syslog.syslog(syslog.LOG_ERR, 'proxy acl: error downloading %s'%self._url)
self._source_data = None
def pre_process(self):
""" pre process downloaded data, handle compression
def get_files(self):
""" process downloaded data, handle compression
:return: iterator filename, content
"""
if self._source_data is not None:
# handle compressed data
......@@ -71,49 +74,43 @@ class ACLDownload(object):
# source is in tar.gz format, extract all into a single string
try:
tf = tarfile.open(fileobj=StringIO.StringIO(self._source_data))
target_data = []
for tf_file in tf.getmembers():
if tf_file.isfile():
target_data.append(tf.extractfile(tf_file).read())
self._target_data = ''.join(target_data)
yield tf_file.name, tf.extractfile(tf_file).read()
except IOError as e:
syslog.syslog(syslog.LOG_ERR, 'proxy acl: error downloading %s (%s)'%(self._url, e))
elif len(self._url) > 4 and self._url[-3:] == '.gz':
# source is in .gz format unpack
try:
gf = gzip.GzipFile(mode='r', fileobj=StringIO.StringIO(self._source_data))
self._target_data = gf.read()
yield os.path.basename(self._url), gf.read()
except IOError as e:
syslog.syslog(syslog.LOG_ERR, 'proxy acl: error downloading %s (%s)'%(self._url, e))
elif len(self._url) > 5 and self._url[-4:] == '.zip':
# source is in .zip format, extract all into a single string
target_data = []
target_data = dict()
with zipfile.ZipFile(StringIO.StringIO(self._source_data),
mode='r',
compression=zipfile.ZIP_DEFLATED) as zf:
for item in zf.infolist():
target_data.append(zf.read(item))
self._target_data = ''.join(target_data)
if item.file_size > 0:
yield item.filename, zf.read(item)
self._target_data = target_data
else:
self._target_data = self._source_data
yield os.path.basename(self._url), self._source_data
def download(self):
self.fetch()
self.pre_process()
def is_valid(self):
""" did this ACL download successful
""" download / unpack ACL
:return: iterator filename, type, content
"""
if self._target_data is not None:
return True
self.fetch()
for filename, filedata in self.get_files():
for line in filedata.split('\n'):
if line.find('/') > -1:
file_type = 'url'
else:
return False
def get_data(self):
""" retrieve data
"""
# XXX: maybe some postprocessing is needed here, all will be used with a squid dstdom_regex tag
return self._target_data
file_type = 'domain'
yield filename, file_type, line
# parse OPNsense external ACLs config
......@@ -121,6 +118,10 @@ if os.path.exists(acl_config_fn):
# create acl directory (if new)
if not os.path.exists(acl_target_dir):
os.mkdir(acl_target_dir)
else:
# remove index files
for filename in glob.glob('%s/*.index'%acl_target_dir):
os.remove(filename)
# read config and download per section
cnf = ConfigParser()
cnf.read(acl_config_fn)
......@@ -129,22 +130,42 @@ if os.path.exists(acl_config_fn):
if cnf.has_option(section,'enabled'):
# if enabled fetch file
target_filename = acl_target_dir+'/'+section
if cnf.get(section,'enabled')=='1':
if cnf.has_option(section,'url'):
# define targets
targets = {'domain': {'filename': target_filename, 'handle' : None},
'url': {'filename': '%s.url'%target_filename, 'handle': None}}
# download file
if cnf.get(section,'enabled') == '1':
# only generate files if enabled, otherwise dump empty files
download_url = cnf.get(section,'url')
acl = ACLDownload(download_url, acl_max_timeout)
acl.download()
if acl.is_valid():
output_data = acl.get_data()
with open(target_filename, "wb") as code:
code.write(output_data)
elif not os.path.isfile(target_filename):
# if there's no file available, create an empty one (otherwise leave the last download).
with open(target_filename, "wb") as code:
code.write("")
# if disabled or not 1 try to remove old file
elif cnf.get(section,'enabled')!='1':
try:
os.remove(acl_target_dir+'/'+section)
except OSError:
pass
all_filenames = list()
for filename, filetype, line in acl.download():
if filename not in all_filenames:
all_filenames.append(filename)
if filetype in targets and targets[filetype]['handle'] is None:
targets[filetype]['handle'] = open(targets[filetype]['filename'], 'wb')
if filetype in targets:
targets[filetype]['handle'].write('%s\n'%line)
# save index to disc
with open('%s.index'%target_filename,'wb') as idx_out:
index_data = dict()
for filename in all_filenames:
if len(filename.split('/')) > 3:
index_key = '/'.join(filename.split('/')[1:-1])
if index_key not in index_data:
index_data[index_key] = index_key
idx_out.write(json.dumps(index_data))
# cleanup
for filetype in targets:
if targets[filetype]['handle'] is not None:
targets[filetype]['handle'].close()
elif cnf.get(section,'enabled') != '1':
if os.path.isfile(targets[filetype]['filename']):
# disabled, remove previous data
os.remove(targets[filetype]['filename'])
elif not os.path.isfile(targets[filetype]['filename']):
# no data fetched and no file available, create new empty file
with open(targets[filetype]['filename'], 'wb') as target_out:
target_out.write("")
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment