Commit a206e64b authored by Ad Schellevis's avatar Ad Schellevis

(netflow/flowd) data aggregator, initial version / concept

parent 0c358e0d
#!/usr/local/bin/python2.7
"""
Copyright (c) 2016 Ad Schellevis
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:
1. Redistributions of source code must retain the above copyright notice,
this list of conditions and the following disclaimer.
2. Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,
INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY
AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY,
OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
POSSIBILITY OF SUCH DAMAGE.
--------------------------------------------------------------------------------------
Aggregate flowd data for reporting
"""
import time
import datetime
import os
import sys
from lib.parse import parse_flow
from lib.aggregate import BaseFlowAggregator, AggMetadata
import lib.aggregates
# init metadata (progress maintenance)
metadata = AggMetadata()
# register aggregate classes to stream data to
stream_agg_objects = list()
resolutions = [60, 60*5]
for agg_class in lib.aggregates.get_aggregators():
for resolution in resolutions:
stream_agg_objects.append(agg_class(resolution))
# parse flow data and stream to registered consumers
prev_recv=metadata.last_sync()
for flow_record in parse_flow(prev_recv):
if flow_record is None or prev_recv != flow_record['recv']:
# commit data on receive timestamp change or last record
for stream_agg_object in stream_agg_objects:
stream_agg_object.commit()
metadata.update_sync_time(prev_recv)
if flow_record is not None:
# send to aggregator
for stream_agg_object in stream_agg_objects:
stream_agg_object.add(flow_record)
prev_recv = flow_record['recv']
......@@ -24,13 +24,56 @@
POSSIBILITY OF SUCH DAMAGE.
--------------------------------------------------------------------------------------
aggregate flow data (format in parse.py) into sqlite structured container, using a table per resolution and a
file per collection.
aggregate flow data (format in parse.py) into sqlite structured container per type/resolution.
Implementations are collected in lib\aggregates\
"""
import os
import datetime
import sqlite3
class AggMetadata(object):
""" store some metadata needed to keep track of parse progress
"""
def __init__(self):
self._filename = '/var/netflow/metadata.sqlite'
# make sure the target directory exists
target_path = os.path.dirname(self._filename)
if not os.path.isdir(target_path):
os.makedirs(target_path)
# open sqlite database and cursor
self._db_connection = sqlite3.connect(self._filename)
self._db_cursor = self._db_connection.cursor()
# known tables
self._tables = list()
# cache known tables
self._update_known_tables()
def _update_known_tables(self):
""" request known tables
"""
result = list()
self._db_cursor.execute('SELECT name FROM sqlite_master')
for record in self._db_cursor.fetchall():
self._tables.append(record[0])
def update_sync_time(self, timestamp):
""" update (last) sync timestamp
"""
if 'sync_timestamp' not in self._tables:
self._db_cursor.execute('create table sync_timestamp(mtime timestamp)')
self._db_cursor.execute('insert into sync_timestamp(mtime) values(0)')
self._db_connection.commit()
self._update_known_tables()
# update last sync timestamp, if this date > timestamp
self._db_cursor.execute('update sync_timestamp set mtime = :mtime where mtime < :mtime', {'mtime': timestamp})
self._db_connection.commit()
def last_sync(self):
if 'sync_timestamp' not in self._tables:
return 0.0
else:
self._db_cursor.execute('select max(mtime) from sync_timestamp')
return self._db_cursor.fetchall()[0][0]
class BaseFlowAggregator(object):
# target location ('/var/netflow/<store>.sqlite')
......@@ -43,19 +86,15 @@ class BaseFlowAggregator(object):
"""
self.resolution = resolution
# target table name, data_<resolution in seconds>
self._target_table_name = 'data_%d' % self.resolution
self._db_connection = None
self._update_cur = None
self._known_targets = list()
# construct update and insert sql statements
tmp = 'update %s set octets = octets + :octets_consumed, packets = packets + :packets_consumed '
tmp = 'update timeserie set octets = octets + :octets_consumed, packets = packets + :packets_consumed '
tmp += 'where mtime = :mtime and %s '
self._update_stmt = tmp % (self._target_table_name,
' and '.join(map(lambda x: '%s = :%s' % (x, x), self.agg_fields)))
tmp = 'insert into %s (mtime, octets, packets, %s) values (:mtime, :octets_consumed, :packets_consumed, %s)'
self._insert_stmt = tmp % (self._target_table_name,
','.join(self.agg_fields),
','.join(map(lambda x: ':%s' % x, self.agg_fields)))
self._update_stmt = tmp % (' and '.join(map(lambda x: '%s = :%s' % (x, x), self.agg_fields)))
tmp = 'insert into timeserie (mtime, octets, packets, %s) values (:mtime, :octets_consumed, :packets_consumed, %s)'
self._insert_stmt = tmp % (','.join(self.agg_fields), ','.join(map(lambda x: ':%s' % x, self.agg_fields)))
# open database
self._open_db()
self._fetch_known_targets()
......@@ -77,7 +116,7 @@ class BaseFlowAggregator(object):
if self._db_connection is not None:
# construct new aggregate table
sql_text = list()
sql_text.append('create table %s ( ' % self._target_table_name)
sql_text.append('create table timeserie ( ')
sql_text.append(' mtime timestamp')
for agg_field in self.agg_fields:
sql_text.append(', %s varchar(255)' % agg_field)
......@@ -104,11 +143,11 @@ class BaseFlowAggregator(object):
"""
if self.target_filename is not None:
# make sure the target directory exists
target_path = os.path.dirname(self.target_filename)
target_path = os.path.dirname(self.target_filename % self.resolution)
if not os.path.isdir(target_path):
os.makedirs(target_path)
# open sqlite database
self._db_connection = sqlite3.connect(self.target_filename)
self._db_connection = sqlite3.connect(self.target_filename % self.resolution)
# open update/insert cursor
self._update_cur = self._db_connection.cursor()
......@@ -123,7 +162,7 @@ class BaseFlowAggregator(object):
:param flow: flow data (from parse.py)
"""
# make sure target exists
if self._target_table_name not in self._known_targets:
if 'timeserie' not in self._known_targets:
self._create_target_table()
# push record(s) depending on resolution
......@@ -139,7 +178,7 @@ class BaseFlowAggregator(object):
# upsert data
flow['octets_consumed'] = consume_perc * flow['octets']
flow['packets_consumed'] = consume_perc * flow['packets']
flow['mtime'] = datetime.datetime.utcfromtimestamp(start_time)
flow['mtime'] = datetime.datetime.fromtimestamp(start_time)
self._update_cur.execute(self._update_stmt, flow)
if self._update_cur.rowcount == 0:
self._update_cur.execute(self._insert_stmt, flow)
......
"""
Copyright (c) 2016 Ad Schellevis
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:
1. Redistributions of source code must retain the above copyright notice,
this list of conditions and the following disclaimer.
2. Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,
INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY
AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY,
OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
POSSIBILITY OF SUCH DAMAGE.
--------------------------------------------------------------------------------------
data aggregator loader
"""
import sys
import os
import glob
from lib.aggregate import BaseFlowAggregator
def get_aggregators():
""" collect and return available aggregators
:return: list of class references
"""
result = list()
for filename in glob.glob('%s/*.py'%os.path.dirname(__file__)):
filename_base = os.path.basename(filename)
if filename_base[0:2] != '__':
module_name = 'lib.aggregates.%s' % '.'.join(filename_base.split('.')[:-1])
__import__(module_name)
for clsname in dir(sys.modules[module_name]):
clshandle = getattr(sys.modules[module_name], clsname)
if type(clshandle) == type and issubclass(clshandle, BaseFlowAggregator):
if hasattr(clshandle, 'target_filename') and clshandle.target_filename is not None:
result.append(clshandle)
return result
"""
Copyright (c) 2016 Ad Schellevis
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:
1. Redistributions of source code must retain the above copyright notice,
this list of conditions and the following disclaimer.
2. Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,
INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY
AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY,
OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
POSSIBILITY OF SUCH DAMAGE.
--------------------------------------------------------------------------------------
data aggregator type
"""
from lib.aggregate import BaseFlowAggregator
class FlowInterfaceTotals(BaseFlowAggregator):
""" collect interface totals
"""
target_filename = '/var/netflow/interface_%06d.sqlite'
agg_fields = ['if_in', 'if_out']
def __init__(self, resolution):
super(FlowInterfaceTotals, self).__init__(resolution)
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment