legacy_deps.py 10.6 KB
Newer Older
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32
"""
    Copyright (c) 2015 Ad Schellevis
    All rights reserved.

    Redistribution and use in source and binary forms, with or without
    modification, are permitted provided that the following conditions are met:

    1. Redistributions of source code must retain the above copyright notice,
     this list of conditions and the following disclaimer.

    2. Redistributions in binary form must reproduce the above copyright
     notice, this list of conditions and the following disclaimer in the
     documentation and/or other materials provided with the distribution.

    THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,
    INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY
    AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
    AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY,
    OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
    SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
    INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
    CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
    ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
    POSSIBILITY OF SUCH DAMAGE.

    --------------------------------------------------------------------------------------
    Crawler class to find module (require/include) dependencies
"""
import os
import os.path

class DependancyCrawler(object):
33
    """ Legacy dependency crawler and grapher
34
    """
35 36 37 38 39 40 41 42
    def __init__(self, root):
        """ init
        :param root: start crawling at
        :return:
        """
        self._all_dependencies = {}
        self._all_dependencies_src = {}
        self._all_functions = {}
43
        self._exclude_deps = ['/usr/local/opnsense/mvc/app/config/config.php']
44 45 46 47 48 49 50 51 52 53 54 55
        self.root = root

    def get_dependency_by_src(self, src_filename):
        """ dependencies are stored by a single name, this method maps a filename back to it's name
                usually the basename of the file.
        :param src_filename:
        :return:
        """
        if src_filename in self._all_dependencies_src:
            return self._all_dependencies_src[src_filename]
        else:
            return None
56

57
    def fetch_php_modules(self, src_filename):
58 59
        # create a new list for this base filename
        base_filename = os.path.basename(src_filename)
60
        if base_filename in self._all_dependencies:
61
            base_filename = '%s__%s' % (src_filename.split('/')[-2], base_filename)
62 63
        self._all_dependencies[base_filename] = []
        self._all_dependencies_src[src_filename] = base_filename
64 65 66

        source_data = open(src_filename).read()
        # fetch all include, include_once, require, require_once statements and
67
        # add dependencies to object dependency list.
68 69 70 71 72 73 74 75 76 77 78 79 80 81 82
        for tag in ('include', 'require'):
            data = source_data
            while True:
                startpos = data.find(tag)
                if startpos == -1:
                    break
                else:
                    strlen = data[startpos:].find(';')
                    if strlen > -1:
                        # parse (single) statement, check if this could be an include type command
                        dep_stmt = data[startpos-1:strlen+startpos]
                        if dep_stmt[0] in (' ', '\n'):
                            dep_stmt = dep_stmt[1:].replace("'", '"')
                            if dep_stmt.find('\n') == -1 and dep_stmt.count('"') == 2:
                                dep_filename = dep_stmt.split('"')[1]
83
                                if dep_filename not in self._all_dependencies[base_filename]:
84 85
                                    if dep_filename not in self._exclude_deps:
                                        self._all_dependencies[base_filename].append(dep_filename)
86 87
                        data = data[strlen+startpos:]

88 89 90 91 92 93 94 95 96 97 98 99
    def fetch_php_functions(self, src_filename):
        """ find php functions
        :param src_filename:
        :return:
        """
        base_filename = os.path.basename(src_filename)
        if base_filename in self._all_functions:
            base_filename = '%s__%s' % (src_filename.split('/')[-2], base_filename)

        function_list = []
        for line in open(src_filename,'r').read().split('\n'):
            if line.find('function ') > -1 and line.find('(') > -1:
100 101
                if line.find('*') > -1 and line.find('function') > line.find('*'):
                    continue
102
                function_nm = line.split('(')[0].strip().split(' ')[-1].strip()
103 104
                function_list.append(function_nm)

105 106 107 108 109 110
        self._all_functions[base_filename] = function_list

    def find_files(self, analyse_dirs=('etc','www', 'captiveportal', 'sbin')):
        """
        :param analyse_dirs: directories to analyse
        :return:
111 112
        """
        for analyse_dir in analyse_dirs:
113
            analyse_dir = ('%s/%s' % (self.root, analyse_dir)).replace('//', '/')
114 115 116 117 118
            for wroot, wdirs, wfiles in os.walk(analyse_dir):
                for src_filename in wfiles:
                    src_filename = '%s/%s' % (wroot, src_filename)
                    if src_filename.split('.')[-1] in ('php', 'inc','class') \
                            or open(src_filename).read(1024).find('/bin/php') > -1:
119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136
                        yield src_filename

    def crawl(self):
        """ Crawl through legacy code
        :param analyse_dirs: only analyse these directories
        :return: None
        """
        for src_filename in self.find_files():
            self.fetch_php_modules(src_filename)
            self.fetch_php_functions(src_filename)

    def where_used(self, src):
        """
        :param src: source object name (base name)
        :return: dictionary containing files and functions
        """
        where_used_lst={}
        for src_filename in self.find_files():
137
            data = open(src_filename,'r').read().replace('\n',' ').replace('\t',' ').replace('@',' ')
138 139 140
            use_list = []
            for function in self._all_functions[src]:
                if data.find(' %s(' % (function)) > -1 or \
141 142 143 144
                                data.find('!%s ' % (function)) > -1 or \
                                data.find('!%s(' % (function)) > -1 or \
                                data.find('(%s(' % (function)) > -1 or \
                                data.find('(%s ' % (function)) > -1 or \
145 146 147 148 149 150 151
                                data.find(' %s ' % (function)) > -1:
                    use_list.append(function)

            if len(use_list) > 0:
                where_used_lst[src_filename] = sorted(use_list)

        return where_used_lst
152 153 154 155 156

    def get_total_files(self):
        """ get total number of analysed files
        :return: int
        """
157
        return len(self._all_dependencies)
158

159
    def get_total_dependencies(self):
160 161 162 163
        """ get total number of dependencies
        :return: int
        """
        count = 0
164 165
        for src_filename in self._all_dependencies:
            count += len(self._all_dependencies[src_filename])
166 167 168 169 170 171
        return count

    def get_files(self):
        """ retrieve all analysed files as iterator (ordered by name)
        :return: iterator
        """
172
        for src_filename in sorted(self._all_dependencies):
173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190
            yield src_filename

    def trace(self, src_filename, parent_filename=None, result=None, level=0):
        """ trace dependencies (recursive)
        :param src_filename:
        :param parent_filename:
        :param result:
        :param level:
        :return:
        """
        if result is None:
            result = {}
        if src_filename not in result:
            result[src_filename] = {'level': level, 'dup': list(), 'parent': parent_filename}
        else:
            result[src_filename]['dup'].append(parent_filename)
            return

191 192
        if src_filename in self._all_dependencies:
            for dependency in self._all_dependencies[src_filename]:
193 194 195 196 197 198 199 200 201 202
                self.trace(dependency, src_filename, result, level=level+1)

        return result

    def file_info(self, src_filename):
        """ retrieve file info, like maximum recursive depth and number of duplicate dependencies
        :param src_filename:
        :return:
        """
        result = {'levels': 0,'dup_count':0}
203
        if src_filename in self._all_dependencies:
204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253
            data = self.trace(src_filename)
            for dep_filename in data:
                if data[dep_filename]['level'] > result['levels']:
                    result['levels'] = data[dep_filename]['level']
                result['dup_count'] += len(data[dep_filename]['dup'])

        return result

    def generate_dot(self, filename_to_inspect):
        """ convert trace data to do graph
        :param filename_to_inspect: source filename to generate graph for
        :return: string (dot) data
        """
        trace_data = self.trace(filename_to_inspect)
        result = list()
        result.append('digraph dependencies {')
        result.append('\toverlap=scale;')
        nodes = {}
        for level in range(100):
            for src_filename in trace_data:
                if trace_data[src_filename]['level'] == level:
                    if trace_data[src_filename]['parent'] is not None:
                        result.append('\tedge [color=black style=filled];')
                        result.append('\t"%s" -> "%s" [weight=%d];' % (trace_data[src_filename]['parent'],
                                                                       src_filename, trace_data[src_filename]['level']))
                        if len(trace_data[src_filename]['dup']) > 0:
                            for target in trace_data[src_filename]['dup']:
                                result.append('\tedge [color=red style=dotted];')
                                result.append('\t"%s" -> "%s";' % (target, src_filename))

                    if trace_data[src_filename]['parent'] is None:
                        nodes[src_filename] = '[shape=Mdiamond]'
                    elif len(trace_data[src_filename]['dup']) > 0:
                        nodes[src_filename] = '[shape=box,style=filled,color=".7 .3 1.0"]'
                    else:
                        nodes[src_filename] = '[shape=box]'

        for node in nodes:
            result.append('\t"%s" %s;' % (node, nodes[node]))

        result.append('}')
        return '\n'.join(result)

    @staticmethod
    def generate_index_html(filelist):
        html_body = "<html><head><title></title></head><body><table><tr><th>Name</th></tr>\n%s</body>"
        html_row = '<tr><td><a href="%s">%s</a></td></tr>\n'
        html = html_body % ('\n'.join(map(lambda x: html_row % (x, x), sorted(filelist))))

        return html