Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
O
OpnSense
Project
Project
Details
Activity
Releases
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Boards
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
Kulya
OpnSense
Commits
a03cc14f
Commit
a03cc14f
authored
Mar 01, 2016
by
Ad Schellevis
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
(proxy) improve remote acl handling, sort and structure output for squid.
parent
78c3d912
Changes
1
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
216 additions
and
94 deletions
+216
-94
fetchACLs.py
src/opnsense/scripts/proxy/fetchACLs.py
+216
-94
No files found.
src/opnsense/scripts/proxy/fetchACLs.py
View file @
a03cc14f
#!/usr/local/bin/python2.7
"""
Copyright (c) 2016 Ad Schellevis - Deciso B.V.
Copyright (c) 2015 Jos Schellevis - Deciso B.V.
All rights reserved.
...
...
@@ -26,6 +27,7 @@
POSSIBILITY OF SUCH DAMAGE.
"""
import
tempfile
import
urllib2
import
os
import
json
...
...
@@ -42,54 +44,65 @@ acl_config_fn = ('/usr/local/etc/squid/externalACLs.conf')
acl_target_dir
=
(
'/usr/local/etc/squid/acl'
)
acl_max_timeout
=
30
class
ACLDownload
(
object
):
class
Downloader
(
object
):
""" Download helper
"""
def
__init__
(
self
,
url
,
timeout
):
""" init new
:param url: source url
:param timeout: timeout in seconds
"""
self
.
_url
=
url
self
.
_timeout
=
timeout
self
.
_source_
data
=
None
self
.
_source_
handle
=
None
self
.
_target_data
=
None
def
fetch
(
self
):
""" fetch (raw) source data into
self._source_data
""" fetch (raw) source data into
tempfile using self._source_handle
"""
try
:
f
=
urllib2
.
urlopen
(
self
.
_url
,
timeout
=
self
.
_timeout
)
self
.
_source_data
=
f
.
read
()
f
=
urllib2
.
urlopen
(
self
.
_url
,
timeout
=
self
.
_timeout
)
# flush to temp file
self
.
_source_handle
=
tempfile
.
NamedTemporaryFile
()
while
True
:
data
=
f
.
read
(
1024
)
if
not
data
:
break
else
:
self
.
_source_handle
.
write
(
data
)
self
.
_source_handle
.
seek
(
0
)
f
.
close
()
except
(
urllib2
.
URLError
,
urllib2
.
HTTPError
,
IOError
)
as
e
:
syslog
.
syslog
(
syslog
.
LOG_ERR
,
'proxy acl: error downloading
%
s'
%
self
.
_url
)
self
.
_source_
data
=
None
self
.
_source_
handle
=
None
def
get_files
(
self
):
""" process downloaded data, handle compression
:return: iterator filename,
content
:return: iterator filename,
file handle
"""
if
self
.
_source_
data
is
not
None
:
if
self
.
_source_
handle
is
not
None
:
# handle compressed data
if
(
len
(
self
.
_url
)
>
8
and
self
.
_url
[
-
7
:]
==
'.tar.gz'
)
\
or
(
len
(
self
.
_url
)
>
4
and
self
.
_url
[
-
4
:]
==
'.tgz'
):
# source is in tar.gz format, extract all into a single string
try
:
tf
=
tarfile
.
open
(
fileobj
=
StringIO
.
StringIO
(
self
.
_source_data
)
)
tf
=
tarfile
.
open
(
fileobj
=
self
.
_source_handle
)
for
tf_file
in
tf
.
getmembers
():
if
tf_file
.
isfile
():
yield
tf_file
.
name
,
tf
.
extractfile
(
tf_file
)
.
read
()
yield
tf_file
.
name
,
tf
.
extractfile
(
tf_file
)
except
IOError
as
e
:
syslog
.
syslog
(
syslog
.
LOG_ERR
,
'proxy acl: error downloading
%
s (
%
s)'
%
(
self
.
_url
,
e
))
elif
len
(
self
.
_url
)
>
4
and
self
.
_url
[
-
3
:]
==
'.gz'
:
# source is in .gz format unpack
try
:
gf
=
gzip
.
GzipFile
(
mode
=
'r'
,
fileobj
=
StringIO
.
StringIO
(
self
.
_source_data
)
)
yield
os
.
path
.
basename
(
self
.
_url
),
gf
.
read
()
gf
=
gzip
.
GzipFile
(
mode
=
'r'
,
fileobj
=
self
.
_source_handle
)
yield
os
.
path
.
basename
(
self
.
_url
),
gf
except
IOError
as
e
:
syslog
.
syslog
(
syslog
.
LOG_ERR
,
'proxy acl: error downloading
%
s (
%
s)'
%
(
self
.
_url
,
e
))
elif
len
(
self
.
_url
)
>
5
and
self
.
_url
[
-
4
:]
==
'.zip'
:
# source is in .zip format, extract all into a single string
target_data
=
dict
()
with
zipfile
.
ZipFile
(
StringIO
.
StringIO
(
self
.
_source_data
)
,
with
zipfile
.
ZipFile
(
self
.
_source_handle
,
mode
=
'r'
,
compression
=
zipfile
.
ZIP_DEFLATED
)
as
zf
:
for
item
in
zf
.
infolist
():
...
...
@@ -97,20 +110,121 @@ class ACLDownload(object):
yield
item
.
filename
,
zf
.
read
(
item
)
self
.
_target_data
=
target_data
else
:
yield
os
.
path
.
basename
(
self
.
_url
),
self
.
_source_
data
yield
os
.
path
.
basename
(
self
.
_url
),
self
.
_source_
handle
def
download
(
self
):
""" download / unpack ACL
:return: iterator filename, type, content
"""
self
.
fetch
()
for
filename
,
filedata
in
self
.
get_files
():
for
line
in
filedata
.
split
(
'
\n
'
):
if
line
.
find
(
'/'
)
>
-
1
:
file_type
=
'url'
for
filename
,
filehandle
in
self
.
get_files
():
while
True
:
line
=
filehandle
.
readline
()
if
not
line
:
break
yield
filename
,
line
class
DomainSorter
(
object
):
""" Helper class for building sorted squid domain acl list.
Use as file type object, close flushes the actual (sorted) data to disc
"""
def
__init__
(
self
,
filename
=
None
,
mode
=
None
):
""" new sorted output file, uses an acl record in reverse order as sort key
:param filename: target filename
:param mode: file open mode
"""
self
.
_num_targets
=
20
self
.
_seperator
=
'|'
self
.
_buckets
=
dict
()
self
.
_sort_map
=
dict
()
# setup target
self
.
_target_filename
=
filename
self
.
_target_mode
=
mode
# setup temp files
self
.
generate_targets
()
def
generate_targets
(
self
):
""" generate ordered targets
"""
sets
=
255
for
i
in
range
(
sets
):
target
=
chr
(
i
+
1
)
setid
=
int
(
i
/
(
sets
/
self
.
_num_targets
))
if
setid
not
in
self
.
_buckets
:
self
.
_buckets
[
setid
]
=
tempfile
.
NamedTemporaryFile
()
self
.
_sort_map
[
target
]
=
self
.
_buckets
[
setid
]
def
write
(
self
,
data
):
""" save content, send reverse sorted to buffers
:param data: line to write
"""
line
=
data
.
strip
()
if
len
(
line
)
>
0
:
self
.
add
(
line
[::
-
1
],
line
)
def
add
(
self
,
key
,
value
):
""" spool data to temp
:param key: key to use
:param value: value to store
"""
target
=
key
[
0
]
if
target
in
self
.
_sort_map
:
self
.
_sort_map
[
target
]
.
write
(
'
%
s
%
s
%
s
\n
'
%
(
key
,
self
.
_seperator
,
value
))
else
:
# not supposed to happen, every key should have a calculated target pool
pass
def
reader
(
self
):
""" read reverse
"""
for
target
in
sorted
(
self
.
_buckets
):
self
.
_buckets
[
target
]
.
seek
(
0
)
set_content
=
dict
()
while
True
:
line
=
self
.
_buckets
[
target
]
.
readline
()
if
not
line
:
break
else
:
file_type
=
'domain'
yield
filename
,
file_type
,
line
set_content
[
line
.
split
(
'|'
)[
0
]]
=
'|'
.
join
(
line
.
split
(
'|'
)[
1
:])
for
itemkey
in
sorted
(
set_content
,
reverse
=
True
):
yield
set_content
[
itemkey
]
@
staticmethod
def
is_domain
(
tag
):
""" check if tag is probably a domain name
:param tag: tag to inspect
:return: boolean
"""
has_chars
=
False
for
tag_item
in
tag
:
if
not
tag_item
.
isdigit
()
and
tag_item
not
in
(
'.'
,
','
,
'|'
,
'/'
,
'
\n
'
):
has_chars
=
True
elif
tag_item
in
(
':'
,
'|'
,
'/'
):
return
False
if
has_chars
:
return
True
else
:
return
False
def
close
(
self
):
""" close and dump content
"""
if
self
.
_target_filename
is
not
None
and
self
.
_target_mode
is
not
None
:
# flush to file on close
with
open
(
self
.
_target_filename
,
self
.
_target_mode
)
as
f_out
:
prev_line
=
None
for
line
in
self
.
reader
():
line
=
line
.
lstrip
(
'.'
)
if
prev_line
==
line
:
# duplicate, skip
continue
if
self
.
is_domain
(
line
):
# prefix domain, but only if the chances are very small it will overlap
if
prev_line
is
None
or
line
not
in
prev_line
:
f_out
.
write
(
'.'
)
f_out
.
write
(
line
)
prev_line
=
line
def
filename_in_ignorelist
(
filename
):
...
...
@@ -119,82 +233,90 @@ def filename_in_ignorelist(filename):
"""
if
(
filename
.
lower
()
.
split
(
'.'
)[
-
1
]
in
[
'pdf'
,
'txt'
,
'doc'
]):
return
True
elif
(
filename
.
lower
()
in
(
'readme'
,
'license'
)):
elif
(
filename
.
lower
()
in
(
'readme'
,
'license'
,
'usage'
,
'categories'
)):
return
True
return
False
# parse OPNsense external ACLs config
if
os
.
path
.
exists
(
acl_config_fn
):
# create acl directory (if new)
if
not
os
.
path
.
exists
(
acl_target_dir
):
os
.
mkdir
(
acl_target_dir
)
else
:
# remove index files
for
filename
in
glob
.
glob
(
'
%
s/*.index'
%
acl_target_dir
):
os
.
remove
(
filename
)
# read config and download per section
cnf
=
ConfigParser
()
cnf
.
read
(
acl_config_fn
)
for
section
in
cnf
.
sections
():
target_filename
=
acl_target_dir
+
'/'
+
section
if
cnf
.
has_option
(
section
,
'url'
):
# collect filters to apply
acl_filters
=
list
()
if
cnf
.
has_option
(
section
,
'filter'
):
for
acl_filter
in
cnf
.
get
(
section
,
'filter'
)
.
strip
()
.
split
(
','
):
if
len
(
acl_filter
.
strip
())
>
0
:
acl_filters
.
append
(
acl_filter
)
# define targets
targets
=
{
'domain'
:
{
'filename'
:
target_filename
,
'handle'
:
None
},
'url'
:
{
'filename'
:
'
%
s.url'
%
target_filename
,
'handle'
:
None
}}
# only generate files if enabled, otherwise dump empty files
if
cnf
.
has_option
(
section
,
'enabled'
)
and
cnf
.
get
(
section
,
'enabled'
)
==
'1'
:
download_url
=
cnf
.
get
(
section
,
'url'
)
acl
=
ACLDownload
(
download_url
,
acl_max_timeout
)
all_filenames
=
list
()
for
filename
,
filetype
,
line
in
acl
.
download
():
if
filename_in_ignorelist
(
os
.
path
.
basename
(
filename
)):
# ignore documents, licenses and readme's
continue
def
main
():
# parse OPNsense external ACLs config
if
os
.
path
.
exists
(
acl_config_fn
):
# create acl directory (if new)
if
not
os
.
path
.
exists
(
acl_target_dir
):
os
.
mkdir
(
acl_target_dir
)
else
:
# remove index files
for
filename
in
glob
.
glob
(
'
%
s/*.index'
%
acl_target_dir
):
os
.
remove
(
filename
)
# read config and download per section
cnf
=
ConfigParser
()
cnf
.
read
(
acl_config_fn
)
for
section
in
cnf
.
sections
():
target_filename
=
acl_target_dir
+
'/'
+
section
if
cnf
.
has_option
(
section
,
'url'
):
# collect filters to apply
acl_filters
=
list
()
if
cnf
.
has_option
(
section
,
'filter'
):
for
acl_filter
in
cnf
.
get
(
section
,
'filter'
)
.
strip
()
.
split
(
','
):
if
len
(
acl_filter
.
strip
())
>
0
:
acl_filters
.
append
(
acl_filter
)
# define target(s)
targets
=
{
'domain'
:
{
'filename'
:
target_filename
,
'handle'
:
None
,
'class'
:
DomainSorter
}}
if
filename
not
in
all_filenames
:
all_filenames
.
append
(
filename
)
if
len
(
acl_filters
)
>
0
:
acl_found
=
False
for
acl_filter
in
acl_filters
:
if
filename
.
find
(
acl_filter
)
>
-
1
:
acl_found
=
True
break
if
not
acl_found
:
# skip this acl entry
# only generate files if enabled, otherwise dump empty files
if
cnf
.
has_option
(
section
,
'enabled'
)
and
cnf
.
get
(
section
,
'enabled'
)
==
'1'
:
download_url
=
cnf
.
get
(
section
,
'url'
)
acl
=
Downloader
(
download_url
,
acl_max_timeout
)
all_filenames
=
list
()
for
filename
,
line
in
acl
.
download
():
if
filename_in_ignorelist
(
os
.
path
.
basename
(
filename
)):
# ignore documents, licenses and readme's
continue
if
filetype
in
targets
and
targets
[
filetype
][
'handle'
]
is
None
:
targets
[
filetype
][
'handle'
]
=
open
(
targets
[
filetype
][
'filename'
],
'wb'
)
if
filetype
in
targets
:
targets
[
filetype
][
'handle'
]
.
write
(
'
%
s
\n
'
%
line
)
# save index to disc
with
open
(
'
%
s.index'
%
target_filename
,
'wb'
)
as
idx_out
:
index_data
=
dict
()
for
filename
in
all_filenames
:
if
len
(
filename
.
split
(
'/'
))
>
2
:
index_key
=
'/'
.
join
(
filename
.
split
(
'/'
)[
1
:
-
1
])
if
index_key
not
in
index_data
:
index_data
[
index_key
]
=
index_key
idx_out
.
write
(
json
.
dumps
(
index_data
))
# cleanup
for
filetype
in
targets
:
if
targets
[
filetype
][
'handle'
]
is
not
None
:
targets
[
filetype
][
'handle'
]
.
close
()
elif
cnf
.
has_option
(
section
,
'enabled'
)
and
cnf
.
get
(
section
,
'enabled'
)
!=
'1'
:
if
os
.
path
.
isfile
(
targets
[
filetype
][
'filename'
]):
# disabled, remove previous data
os
.
remove
(
targets
[
filetype
][
'filename'
])
elif
not
os
.
path
.
isfile
(
targets
[
filetype
][
'filename'
]):
# no data fetched and no file available, create new empty file
with
open
(
targets
[
filetype
][
'filename'
],
'wb'
)
as
target_out
:
target_out
.
write
(
""
)
# detect output type
if
'/'
in
line
or
'|'
in
line
:
file_type
=
'url'
else
:
file_type
=
'domain'
if
filename
not
in
all_filenames
:
all_filenames
.
append
(
filename
)
if
len
(
acl_filters
)
>
0
:
acl_found
=
False
for
acl_filter
in
acl_filters
:
if
acl_filter
in
filename
:
acl_found
=
True
break
if
not
acl_found
:
# skip this acl entry
continue
if
filetype
in
targets
and
targets
[
filetype
][
'handle'
]
is
None
:
targets
[
filetype
][
'handle'
]
=
targets
[
filetype
][
'class'
](
targets
[
filetype
][
'filename'
],
'wb'
)
if
filetype
in
targets
:
targets
[
filetype
][
'handle'
]
.
write
(
'
%
s
\n
'
%
line
)
# save index to disc
with
open
(
'
%
s.index'
%
target_filename
,
'wb'
)
as
idx_out
:
index_data
=
dict
()
for
filename
in
all_filenames
:
if
len
(
filename
.
split
(
'/'
))
>
2
:
index_key
=
'/'
.
join
(
filename
.
split
(
'/'
)[
1
:
-
1
])
if
index_key
not
in
index_data
:
index_data
[
index_key
]
=
index_key
idx_out
.
write
(
json
.
dumps
(
index_data
))
# cleanup
for
filetype
in
targets
:
if
targets
[
filetype
][
'handle'
]
is
not
None
:
targets
[
filetype
][
'handle'
]
.
close
()
elif
cnf
.
has_option
(
section
,
'enabled'
)
and
cnf
.
get
(
section
,
'enabled'
)
!=
'1'
:
if
os
.
path
.
isfile
(
targets
[
filetype
][
'filename'
]):
# disabled, remove previous data
os
.
remove
(
targets
[
filetype
][
'filename'
])
elif
not
os
.
path
.
isfile
(
targets
[
filetype
][
'filename'
]):
# no data fetched and no file available, create new empty file
with
open
(
targets
[
filetype
][
'filename'
],
'wb'
)
as
target_out
:
target_out
.
write
(
""
)
# execute downloader
main
()
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment