python/xrelfo: the ELF xref extractor

This creates JSON dumps of all the xref structs littered around FRR. Signed-off-by: David Lamparter <equinox@diac24.net>
2020-04-30 21:33:58 +02:00 · 2020-04-30 21:33:58 +02:00 · 36a8fdfd74
parent 5609b3af49
commit 36a8fdfd74
9 changed files with 1516 additions and 0 deletions
--- a/Makefile.am
+++ b/Makefile.am
@ -187,8 +187,16 @@ EXTRA_DIST += \
 	\
 	python/clidef.py \
 	python/clippy/__init__.py \
 	python/clippy/elf.py \
 	python/clippy/uidhash.py \
 	python/makevars.py \
 	python/makefile.py \
 	python/tiabwarfo.py \
 	python/xrelfo.py \
 	python/test_xrelfo.py \
 	python/runtests.py \
 	\
 	python/xrefstructs.json \
 	\
 	redhat/frr.logrotate \
 	redhat/frr.pam \
--- a/python/clippy/init.py
+++ b/python/clippy/init.py
@ -21,6 +21,8 @@ import _clippy
 from _clippy import parse, Graph, GraphNode
 frr_top_src = os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
 def graph_iterate(graph):
    """iterator yielding all nodes of a graph
--- a/python/clippy/elf.py
+++ b/python/clippy/elf.py
@ -0,0 +1,574 @@
 # FRR libelf wrapper
 #
 # Copyright (C) 2020  David Lamparter for NetDEF, Inc.
 #
 # This program is free software; you can redistribute it and/or modify it
 # under the terms of the GNU General Public License as published by the Free
 # Software Foundation; either version 2 of the License, or (at your option)
 # any later version.
 #
 # This program is distributed in the hope that it will be useful, but WITHOUT
 # ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
 # FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
 # more details.
 #
 # You should have received a copy of the GNU General Public License along
 # with this program; see the file COPYING; if not, write to the Free Software
 # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
 '''
 Wrapping layer and additional utility around _clippy.ELFFile.
 Essentially, the C bits have the low-level ELF access bits that should be
 fast while this has the bits that string everything together (and would've
 been a PITA to do in C.)
 Surprisingly - or maybe through proper engineering - this actually works
 across architecture, word size and even endianness boundaries.  Both the C
 module (through GElf_*) and this code (cf. struct.unpack format mangling
 in ELFDissectStruct) will take appropriate measures to flip and resize
 fields as needed.
 '''
 import struct
 from collections import OrderedDict
 from weakref import WeakValueDictionary
 from _clippy import ELFFile, ELFAccessError
 #
 # data access
 #
 class ELFNull(object):
    '''
    NULL pointer, returned instead of ELFData
    '''
    def __init__(self):
        self.symname = None
        self._dstsect = None
    def __repr__(self):
        return '<ptr: NULL>'
    def __hash__(self):
        return hash(None)
    def get_string(self):
        return None
 class ELFUnresolved(object):
    '''
    Reference to an unresolved external symbol, returned instead of ELFData
    :param symname: name of the referenced symbol
    :param addend:  offset added to the symbol, normally zero
    '''
    def __init__(self, symname, addend):
        self.addend = addend
        self.symname = symname
        self._dstsect = None
    def __repr__(self):
        return '<unresolved: %s+%d>' % (self.symname, self.addend)
    def __hash__(self):
        return hash((self.symname, self.addend))
 class ELFData(object):
    '''
    Actual data somewhere in the ELF file.
    :type dstsect:  ELFSubset
    :param dstsect: container data area (section or entire file)
    :param dstoffs: byte offset into dstsect
    :param dstlen:  byte size of object, or None if unknown, open-ended or string
    '''
    def __init__(self, dstsect, dstoffs, dstlen):
        self._dstsect = dstsect
        self._dstoffs = dstoffs
        self._dstlen = dstlen
        self.symname = None
    def __repr__(self):
        return '<ptr: %s+0x%05x/%d>' % (self._dstsect.name, self._dstoffs, self._dstlen or -1)
    def __hash__(self):
        return hash((self._dstsect, self._dstoffs))
    def get_string(self):
        '''
        Interpret as C string / null terminated UTF-8 and get the actual text.
        '''
        try:
            return self._dstsect[self._dstoffs:str].decode('UTF-8')
        except:
            import pdb; pdb.set_trace()
    def get_data(self, reflen):
        '''
        Interpret as some structure (and check vs. expected length)
        :param reflen: expected size of the object, compared against actual
            size (which is only known in rare cases, mostly when directly
            accessing a symbol since symbols have their destination object
            size recorded)
        '''
        if self._dstlen is not None and self._dstlen != reflen:
            raise ValueError('symbol size mismatch (got %d, expected %d)' % (self._dstlen, reflen))
        return self._dstsect[self._dstoffs:self._dstoffs+reflen]
    def offset(self, offs, within_symbol=False):
        '''
        Get another ELFData at an offset
        :param offs:          byte offset, can be negative (e.g. in container_of)
        :param within_symbol: retain length information
        '''
        if self._dstlen is None or not within_symbol:
            return ELFData(self._dstsect, self._dstoffs + offs, None)
        else:
            return ELFData(self._dstsect, self._dstoffs + offs, self._dstlen - offs)
 #
 # dissection data items
 #
 class ELFDissectData(object):
    '''
    Common bits for ELFDissectStruct and ELFDissectUnion
    '''
    def __len__(self):
        '''
        Used for boolean evaluation, e.g. "if struct: ..."
        '''
        return not (isinstance(self._data, ELFNull) or isinstance(self._data, ELFUnresolved))
    def container_of(self, parent, fieldname):
        '''
        Assume this struct is embedded in a larger struct and get at the larger
        Python ``self.container_of(a, b)`` = C ``container_of(self, a, b)``
        :param parent:    class (not instance) of the larger struct
        :param fieldname: fieldname that refers back to this
        :returns:         instance of parent, with fieldname set to this object
        '''
        offset = 0
        if not hasattr(parent, '_efields'):
            parent._setup_efields()
        for field in parent._efields[self.elfclass]:
            if field[0] == fieldname:
                break
            offset += struct.calcsize(field[1])
        else:
            raise AttributeError('%r not found in %r.fields' % (fieldname, parent))
        return parent(self._data.offset(-offset), replace = {fieldname: self})
 class ELFDissectStruct(ELFDissectData):
    '''
    Decode and provide access to a struct somewhere in the ELF file
    Handles pointers and strings somewhat nicely.  Create a subclass for each
    struct that is to be accessed, and give a field list in a "fields"
    class-member.
    :param dataptr: ELFData referring to the data bits to decode.
    :param parent:  where this was instantiated from; only for reference, has
        no functional impact.
    :param replace: substitute data values for specific fields.  Used by
        `container_of` to replace the inner struct when creating the outer
        one.
    .. attribute:: fields
       List of tuples describing the struct members.  Items can be:
       - ``('name', ELFDissectData)`` - directly embed another struct
       - ``('name', 'I')`` - simple data types; second item for struct.unpack
       - ``('name', 'I', None)`` - field to ignore
       - ``('name', 'P', str)`` - pointer to string
       - ``('name', 'P', ELFDissectData)`` - pointer to another struct
       ``P`` is added as unpack format for pointers (sized appropriately for
       the ELF file.)
       Refer to tiabwarfo.py for extracting this from ``pahole``.
       TBD: replace tuples with a class.
    .. attribute:: fieldrename
       Dictionary to rename fields, useful if fields comes from tiabwarfo.py.
    '''
    class Pointer(object):
        '''
        Quick wrapper for pointers to further structs
        This is just here to avoid going into infinite loops when loading
        structs that have pointers to each other (e.g. struct xref <-->
        struct xrefdata.)  The pointer destination is only instantiated when
        actually accessed.
        '''
        def __init__(self, cls, ptr):
            self.cls = cls
            self.ptr = ptr
        def __repr__(self):
            return '<Pointer:%s %r>' % (self.cls.__name__, self.ptr)
        def __call__(self):
            if isinstance(self.ptr, ELFNull):
                return None
            return self.cls(self.ptr)
    def __new__(cls, dataptr, parent = None, replace = None):
        if dataptr._dstsect is None:
            return super().__new__(cls)
        obj = dataptr._dstsect._pointers.get((cls, dataptr))
        if obj is not None:
            return obj
        obj = super().__new__(cls)
        dataptr._dstsect._pointers[(cls, dataptr)] = obj
        return obj
    replacements = 'lLnN'
    @classmethod
    def _preproc_structspec(cls, elfclass, spec):
        elfbits = elfclass
        if hasattr(spec, 'calcsize'):
            spec = '%ds' % (spec.calcsize(elfclass),)
        if elfbits == 32:
            repl = ['i', 'I']
        else:
            repl = ['q', 'Q']
        for c in cls.replacements:
            spec = spec.replace(c, repl[int(c.isupper())])
        return spec
    @classmethod
    def _setup_efields(cls):
        cls._efields = {}
        cls._esize = {}
        for elfclass in [32, 64]:
            cls._efields[elfclass] = []
            size = 0
            for f in cls.fields:
                newf = (f[0], cls._preproc_structspec(elfclass, f[1])) + f[2:]
                cls._efields[elfclass].append(newf)
                size += struct.calcsize(newf[1])
            cls._esize[elfclass] = size
    def __init__(self, dataptr, parent = None, replace = None):
        if not hasattr(self.__class__, '_efields'):
            self._setup_efields()
        self._fdata = None
        self._data = dataptr
        self._parent = parent
        self.symname = dataptr.symname
        if isinstance(dataptr, ELFNull) or isinstance(dataptr, ELFUnresolved):
            self._fdata = {}
            return
        self._elfsect = dataptr._dstsect
        self.elfclass = self._elfsect._elffile.elfclass
        self.offset = dataptr._dstoffs
        pspecl = [f[1] for f in self._efields[self.elfclass]]
        # need to correlate output from struct.unpack with extra metadata
        # about the particular fields, so note down byte offsets (in locs)
        # and tuple indices of pointers (in ptrs)
        pspec = ''
        locs = {}
        ptrs = set()
        for idx, spec in enumerate(pspecl):
            if spec == 'P':
                ptrs.add(idx)
                spec = self._elfsect.ptrtype
            locs[idx] = struct.calcsize(pspec)
            pspec = pspec + spec
        self._total_size = struct.calcsize(pspec)
        def replace_ptrs(v):
            idx, val = v[0], v[1]
            if idx not in ptrs:
                return val
            return self._elfsect.pointer(self.offset + locs[idx])
        data = dataptr.get_data(struct.calcsize(pspec))
        unpacked = struct.unpack(self._elfsect.endian + pspec, data)
        unpacked = list(map(replace_ptrs, enumerate(unpacked)))
        self._fraw = unpacked
        self._fdata = OrderedDict()
        replace = replace or {}
        for i, item in enumerate(unpacked):
            name = self.fields[i][0]
            if name is None:
                continue
            if name in replace:
                self._fdata[name] = replace[name]
                continue
            if isinstance(self.fields[i][1], type) and issubclass(self.fields[i][1], ELFDissectData):
                dataobj = self.fields[i][1](dataptr.offset(locs[i]), self)
                self._fdata[name] = dataobj
                continue
            if len(self.fields[i]) == 3:
                if self.fields[i][2] == str:
                    self._fdata[name] = item.get_string()
                    continue
                elif self.fields[i][2] is None:
                    pass
                elif issubclass(self.fields[i][2], ELFDissectData):
                    cls = self.fields[i][2]
                    dataobj = self.Pointer(cls, item)
                    self._fdata[name] = dataobj
                    continue
            self._fdata[name] = item
    def __getattr__(self, attrname):
        if attrname not in self._fdata:
            raise AttributeError(attrname)
        if isinstance(self._fdata[attrname], self.Pointer):
            self._fdata[attrname] = self._fdata[attrname]()
        return self._fdata[attrname]
    def __repr__(self):
        if not isinstance(self._data, ELFData):
            return '<%s: %r>' % (self.__class__.__name__, self._data)
        return '<%s: %s>' % (self.__class__.__name__,
                ', '.join(['%s=%r' % t for t in self._fdata.items()]))
    @classmethod
    def calcsize(cls, elfclass):
        '''
        Sum up byte size of this struct
        Wraps struct.calcsize with some extra features.
        '''
        if not hasattr(cls, '_efields'):
            cls._setup_efields()
        pspec = ''.join([f[1] for f in cls._efields[elfclass]])
        ptrtype = 'I' if elfclass == 32 else 'Q'
        pspec = pspec.replace('P', ptrtype)
        return struct.calcsize(pspec)
 class ELFDissectUnion(ELFDissectData):
    '''
    Decode multiple structs in the same place.
    Not currently used (and hence not tested.)  Worked at some point but not
    needed anymore and may be borked now.  Remove this comment when using.
    '''
    def __init__(self, dataptr, parent = None):
        self._dataptr = dataptr
        self._parent = parent
        self.members = []
        for name, membercls in self.__class__.members:
            item = membercls(dataptr, parent)
            self.members.append(item)
            setattr(self, name, item)
    def __repr__(self):
        return '<%s: %s>' % (self.__class__.__name__, ', '.join([repr(i) for i in self.members]))
    @classmethod
    def calcsize(cls, elfclass):
        return max([member.calcsize(elfclass) for name, member in cls.members])
 #
 # wrappers for spans of ELF data
 #
 class ELFSubset(object):
    '''
    Common abstract base for section-level and file-level access.
    '''
    def __init__(self):
        super().__init__()
        self._pointers = WeakValueDictionary()
    def __hash__(self):
        return hash(self.name)
    def __getitem__(self, k):
        '''
        Read data from slice
        Subscript **must** be a slice; a simple index will not return a byte
        but rather throw an exception.  Valid slice syntaxes are defined by
        the C module:
        - `this[123:456]` - extract specific range
        - `this[123:str]` - extract until null byte.  The slice stop value is
            the `str` type (or, technically, `unicode`.)
        '''
        return self._obj[k]
    def getreloc(self, offset):
        '''
        Check for a relocation record at the specified offset.
        '''
        return self._obj.getreloc(offset)
    def iter_data(self, scls, slice_ = slice(None)):
        '''
        Assume an array of structs present at a particular slice and decode
        :param scls:   ELFDissectData subclass for the struct
        :param slice_: optional range specification
        '''
        size = scls.calcsize(self._elffile.elfclass)
        offset = slice_.start or 0
        stop = slice_.stop or self._obj.len
        if stop < 0:
            stop = self._obj.len - stop
        while offset < stop:
            yield scls(ELFData(self, offset, size))
            offset += size
    def pointer(self, offset):
        '''
        Try to dereference a pointer value
        This checks whether there's a relocation at the given offset and
        uses that;  otherwise (e.g. in a non-PIE executable where the pointer
        is already resolved by the linker) the data at the location is used.
        :param offset: byte offset from beginning of section,
            or virtual address in file
        :returns:      ELFData wrapping pointed-to object
        '''
        ptrsize = struct.calcsize(self.ptrtype)
        data = struct.unpack(self.endian + self.ptrtype, self[offset:offset + ptrsize])[0]
        reloc = self.getreloc(offset)
        dstsect = None
        if reloc:
            # section won't be available in whole-file operation
            dstsect = reloc.getsection(data)
            addend = reloc.r_addend
            if reloc.relative:
                # old-style ELF REL instead of RELA, not well-tested
                addend += data
            if reloc.unresolved and reloc.symvalid:
                return ELFUnresolved(reloc.symname, addend)
            elif reloc.symvalid:
                data = addend + reloc.st_value
            else:
                data = addend
        # 0 could technically be a valid pointer for a shared library,
        # since libraries may use 0 as default virtual start address (it'll
        # be adjusted on loading)
        # That said, if the library starts at 0, that's where the ELF header
        # would be so it's still an invalid pointer.
        if data == 0 and dstsect == None:
            return ELFNull()
        # wrap_data is different between file & section
        return self._wrap_data(data, dstsect)
 class ELFDissectSection(ELFSubset):
    '''
    Access the contents of an ELF section like ``.text`` or ``.data``
    :param elfwrap: ELFDissectFile wrapper for the file
    :param idx:     section index in section header table
    :param section: section object from C module
    '''
    def __init__(self, elfwrap, idx, section):
        super().__init__()
        self._elfwrap = elfwrap
        self._elffile = elfwrap._elffile
        self._idx = idx
        self._section = self._obj = section
        self.name = section.name
        self.ptrtype = elfwrap.ptrtype
        self.endian = elfwrap.endian
    def _wrap_data(self, data, dstsect):
        if dstsect is None:
            dstsect = self._elfwrap._elffile.get_section_addr(data)
        offs = data - dstsect.sh_addr
        dstsect = self._elfwrap.get_section(dstsect.idx)
        return ELFData(dstsect, offs, None)
 class ELFDissectFile(ELFSubset):
    '''
    Access the contents of an ELF file.
    Note that offsets for array subscript and relocation/pointer access are
    based on the file's virtual address space and are NOT offsets to the
    start of the file on disk!
    (Shared libraries frequently have a virtual address space starting at 0,
    but non-PIE executables have an architecture specific default loading
    address like 0x400000 on x86.
    :param filename: ELF file to open
    '''
    def __init__(self, filename):
        super().__init__()
        self.name = filename
        self._elffile = self._obj = ELFFile(filename)
        self._sections = {}
        self.ptrtype = 'I' if self._elffile.elfclass == 32 else 'Q'
        self.endian = '>' if self._elffile.bigendian else '<'
    @property
    def _elfwrap(self):
        return self
    def _wrap_data(self, data, dstsect):
        return ELFData(self, data, None)
    def get_section(self, secname):
        '''
        Look up section by name or index
        '''
        if isinstance(secname, int):
            sh_idx = secname
            section = self._elffile.get_section_idx(secname)
        else:
            section = self._elffile.get_section(secname)
        if section is None:
            return None
        sh_idx = section.idx
        if sh_idx not in self._sections:
            self._sections[sh_idx] = ELFDissectSection(self, sh_idx, section)
        return self._sections[sh_idx]
--- a/python/clippy/uidhash.py
+++ b/python/clippy/uidhash.py
@ -0,0 +1,71 @@
 # xref unique ID hash calculation
 #
 # Copyright (C) 2020  David Lamparter for NetDEF, Inc.
 #
 # This program is free software; you can redistribute it and/or modify it
 # under the terms of the GNU General Public License as published by the Free
 # Software Foundation; either version 2 of the License, or (at your option)
 # any later version.
 #
 # This program is distributed in the hope that it will be useful, but WITHOUT
 # ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
 # FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
 # more details.
 #
 # You should have received a copy of the GNU General Public License along
 # with this program; see the file COPYING; if not, write to the Free Software
 # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
 import struct
 from hashlib import sha256
 def bititer(data, bits, startbit = True):
    '''
    just iterate the individual bits out from a bytes object
    if startbit is True, an '1' bit is inserted at the very beginning
    goes <bits> at a time, starts at LSB.
    '''
    bitavail, v = 0, 0
    if startbit and len(data) > 0:
        v = data.pop(0)
        yield (v & ((1 << bits) - 1)) | (1 << (bits - 1))
        bitavail = 9 - bits
        v >>= bits - 1
    while len(data) > 0:
        while bitavail < bits:
            v |= data.pop(0) << bitavail
            bitavail += 8
        yield v & ((1 << bits) - 1)
        bitavail -= bits
        v >>= bits
 def base32c(data):
    '''
    Crockford base32 with extra dashes
    '''
    chs = "0123456789ABCDEFGHJKMNPQRSTVWXYZ"
    o = ''
    if type(data) == str:
        data = [ord(v) for v in data]
    else:
        data = list(data)
    for i, bits in enumerate(bititer(data, 5)):
        if i == 5:
            o = o + '-'
        elif i == 10:
            break
        o = o + chs[bits]
    return o
 def uidhash(filename, hashstr, hashu32a, hashu32b):
    '''
    xref Unique ID hash used in FRRouting
    '''
    filename = '/'.join(filename.rsplit('/')[-2:])
    hdata = filename.encode('UTF-8') + hashstr.encode('UTF-8')
    hdata += struct.pack('>II', hashu32a, hashu32b)
    i = sha256(hdata).digest()
    return base32c(i)
--- a/python/runtests.py
+++ b/python/runtests.py
@ -0,0 +1,14 @@
 import pytest
 import sys
 import os
 try:
    import _clippy
 except ImportError:
    sys.stderr.write('''these tests need to be run with the _clippy C extension
 module available.  Try running "clippy runtests.py ...".
 ''')
    sys.exit(1)
 os.chdir(os.path.dirname(os.path.abspath(__file__)))
 raise SystemExit(pytest.main(sys.argv[1:]))
--- a/python/test_xrelfo.py
+++ b/python/test_xrelfo.py
@ -0,0 +1,65 @@
 # some basic tests for xrelfo & the python ELF machinery
 #
 # Copyright (C) 2020  David Lamparter for NetDEF, Inc.
 #
 # This program is free software; you can redistribute it and/or modify it
 # under the terms of the GNU General Public License as published by the Free
 # Software Foundation; either version 2 of the License, or (at your option)
 # any later version.
 #
 # This program is distributed in the hope that it will be useful, but WITHOUT
 # ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
 # FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
 # more details.
 #
 # You should have received a copy of the GNU General Public License along
 # with this program; see the file COPYING; if not, write to the Free Software
 # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
 import sys
 import os
 import pytest
 from pprint import pprint
 root = os.path.dirname(os.path.dirname(__file__))
 sys.path.append(os.path.join(root, 'python'))
 import xrelfo
 from clippy import elf, uidhash
 def test_uidhash():
    assert uidhash.uidhash("lib/test_xref.c", "logging call", 3, 0) \
            == 'H7KJB-67TBH'
 def test_xrelfo_other():
    for data in [
            elf.ELFNull(),
            elf.ELFUnresolved('somesym', 0),
        ]:
        dissect = xrelfo.XrefPtr(data)
        print(repr(dissect))
        with pytest.raises(AttributeError):
            dissect.xref
 def test_xrelfo_obj():
    xrelfo_ = xrelfo.Xrelfo()
    edf = xrelfo_.load_elf(os.path.join(root, 'lib/.libs/zclient.o'), 'zclient.lo')
    xrefs = xrelfo_._xrefs
    with pytest.raises(elf.ELFAccessError):
        edf[0:4]
    pprint(xrefs[0])
    pprint(xrefs[0]._data)
 def test_xrelfo_bin():
    xrelfo_ = xrelfo.Xrelfo()
    edf = xrelfo_.load_elf(os.path.join(root, 'lib/.libs/libfrr.so'), 'libfrr.la')
    xrefs = xrelfo_._xrefs
    assert edf[0:4] == b'\x7fELF'
    pprint(xrefs[0])
    pprint(xrefs[0]._data)
--- a/python/tiabwarfo.py
+++ b/python/tiabwarfo.py
@ -0,0 +1,195 @@
 # FRR DWARF structure definition extractor
 #
 # Copyright (C) 2020  David Lamparter for NetDEF, Inc.
 #
 # This program is free software; you can redistribute it and/or modify it
 # under the terms of the GNU General Public License as published by the Free
 # Software Foundation; either version 2 of the License, or (at your option)
 # any later version.
 #
 # This program is distributed in the hope that it will be useful, but WITHOUT
 # ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
 # FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
 # more details.
 #
 # You should have received a copy of the GNU General Public License along
 # with this program; see the file COPYING; if not, write to the Free Software
 # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
 import sys
 import os
 import subprocess
 import re
 import argparse
 import subprocess
 import json
 structs = ['xref', 'xref_logmsg', 'xref_threadsched', 'xref_install_element', 'xrefdata', 'xrefdata_logmsg', 'cmd_element']
 def extract(filename='lib/.libs/libfrr.so'):
    '''
    Convert output from "pahole" to JSON.
    Example pahole output:
    $ pahole -C xref lib/.libs/libfrr.so
    struct xref {
        struct xrefdata *          xrefdata;             /*     0     8 */
        enum xref_type             type;                 /*     8     4 */
        int                        line;                 /*    12     4 */
        const char  *              file;                 /*    16     8 */
        const char  *              func;                 /*    24     8 */
        /* size: 32, cachelines: 1, members: 5 */
        /* last cacheline: 32 bytes */
    };
    '''
    pahole = subprocess.check_output(['pahole', '-C', ','.join(structs), filename]).decode('UTF-8')
    struct_re = re.compile(r'^struct ([^ ]+) \{([^\}]+)};', flags=re.M | re.S)
    field_re = re.compile(r'^\s*(?P<type>[^;\(]+)\s+(?P<name>[^;\[\]]+)(?:\[(?P<array>\d+)\])?;\s*\/\*(?P<comment>.*)\*\/\s*$')
    comment_re = re.compile(r'^\s*\/\*.*\*\/\s*$')
    pastructs = struct_re.findall(pahole)
    out = {}
    for name, data in pastructs:
        this = out.setdefault(name, {})
        fields = this.setdefault('fields', [])
        lines = data.strip().splitlines()
        for line in lines:
            if line.strip() == '':
                continue
            m = comment_re.match(line)
            if m is not None:
                continue
            m = field_re.match(line)
            if m is not None:
                offs, size = m.group('comment').strip().split()
                offs = int(offs)
                size = int(size)
                typ_ = m.group('type').strip()
                name = m.group('name')
                if name.startswith('(*'):
                    # function pointer
                    typ_ = typ_ + ' *'
                    name = name[2:].split(')')[0]
                data = {
                    'name': name,
                    'type': typ_,
                    'offset': offs,
                    'size': size,
                }
                if m.group('array'):
                    data['array'] = int(m.group('array'))
                fields.append(data)
                continue
            raise ValueError('cannot process line: %s' % line)
    return out
 class FieldApplicator(object):
    '''
    Fill ELFDissectStruct fields list from pahole/JSON
    Uses the JSON file created by the above code to fill in the struct fields
    in subclasses of ELFDissectStruct.
    '''
    # only what we really need.  add more as needed.
    packtypes = {
        'int': 'i',
        'uint8_t': 'B',
        'uint16_t': 'H',
        'uint32_t': 'I',
        'char': 's',
    }
    def __init__(self, data):
        self.data = data
        self.classes = []
        self.clsmap = {}
    def add(self, cls):
        self.classes.append(cls)
        self.clsmap[cls.struct] = cls
    def resolve(self, cls):
        out = []
        offset = 0
        fieldrename = getattr(cls, 'fieldrename', {})
        def mkname(n):
            return (fieldrename.get(n, n),)
        for field in self.data[cls.struct]['fields']:
            typs = field['type'].split()
            typs = [i for i in typs if i not in ['const']]
            if field['offset'] != offset:
                assert offset < field['offset']
                out.append(('_pad', '%ds' % (field['offset'] - offset,)))
            # pretty hacky C types handling, but covers what we need
            ptrlevel = 0
            while typs[-1] == '*':
                typs.pop(-1)
                ptrlevel += 1
            if ptrlevel > 0:
                packtype = ('P', None)
                if ptrlevel == 1:
                    if typs[0] == 'char':
                        packtype = ('P', str)
                    elif typs[0] == 'struct' and typs[1] in self.clsmap:
                        packtype = ('P', self.clsmap[typs[1]])
            elif typs[0] == 'enum':
                packtype = ('I',)
            elif typs[0] in self.packtypes:
                packtype = (self.packtypes[typs[0]],)
            elif typs[0] == 'struct':
                if typs[1] in self.clsmap:
                    packtype = (self.clsmap[typs[1]],)
                else:
                    packtype = ('%ds' % field['size'],)
            else:
                raise ValueError('cannot decode field %s in struct %s (%s)' % (
                        cls.struct, field['name'], field['type']))
            if 'array' in field and typs[0] == 'char':
                packtype = ('%ds' % field['array'],)
                out.append(mkname(field['name']) + packtype)
            elif 'array' in field:
                for i in range(0, field['array']):
                    out.append(mkname('%s_%d' % (field['name'], i)) + packtype)
            else:
                out.append(mkname(field['name']) + packtype)
            offset = field['offset'] + field['size']
        cls.fields = out
    def __call__(self):
        for cls in self.classes:
            self.resolve(cls)
 def main():
    argp = argparse.ArgumentParser(description = 'FRR DWARF structure extractor')
    argp.add_argument('-o', dest='output', type=str, help='write JSON output', default='python/xrefstructs.json')
    argp.add_argument('-i', dest='input',  type=str, help='ELF file to read',  default='lib/.libs/libfrr.so')
    args = argp.parse_args()
    out = extract(args.input)
    with open(args.output + '.tmp', 'w') as fd:
        json.dump(out, fd, indent=2, sort_keys=True)
    os.rename(args.output + '.tmp', args.output)
 if __name__ == '__main__':
    main()
--- a/python/xrefstructs.json
+++ b/python/xrefstructs.json
@ -0,0 +1,190 @@
 {
  "cmd_element": {
    "fields": [
      {
        "name": "string",
        "offset": 0,
        "size": 8,
        "type": "const char  *"
      },
      {
        "name": "doc",
        "offset": 8,
        "size": 8,
        "type": "const char  *"
      },
      {
        "name": "daemon",
        "offset": 16,
        "size": 4,
        "type": "int"
      },
      {
        "name": "attr",
        "offset": 20,
        "size": 1,
        "type": "uint8_t"
      },
      {
        "name": "func",
        "offset": 24,
        "size": 8,
        "type": "int *"
      },
      {
        "name": "name",
        "offset": 32,
        "size": 8,
        "type": "const char  *"
      },
      {
        "name": "xref",
        "offset": 40,
        "size": 32,
        "type": "struct xref"
      }
    ]
  },
  "xref": {
    "fields": [
      {
        "name": "xrefdata",
        "offset": 0,
        "size": 8,
        "type": "struct xrefdata *"
      },
      {
        "name": "type",
        "offset": 8,
        "size": 4,
        "type": "enum xref_type"
      },
      {
        "name": "line",
        "offset": 12,
        "size": 4,
        "type": "int"
      },
      {
        "name": "file",
        "offset": 16,
        "size": 8,
        "type": "const char  *"
      },
      {
        "name": "func",
        "offset": 24,
        "size": 8,
        "type": "const char  *"
      }
    ]
  },
  "xref_install_element": {
    "fields": [
      {
        "name": "xref",
        "offset": 0,
        "size": 32,
        "type": "struct xref"
      },
      {
        "name": "cmd_element",
        "offset": 32,
        "size": 8,
        "type": "const struct cmd_element  *"
      },
      {
        "name": "node_type",
        "offset": 40,
        "size": 4,
        "type": "enum node_type"
      }
    ]
  },
  "xref_logmsg": {
    "fields": [
      {
        "name": "xref",
        "offset": 0,
        "size": 32,
        "type": "struct xref"
      },
      {
        "name": "fmtstring",
        "offset": 32,
        "size": 8,
        "type": "const char  *"
      },
      {
        "name": "priority",
        "offset": 40,
        "size": 4,
        "type": "uint32_t"
      },
      {
        "name": "ec",
        "offset": 44,
        "size": 4,
        "type": "uint32_t"
      }
    ]
  },
  "xref_threadsched": {
    "fields": [
      {
        "name": "xref",
        "offset": 0,
        "size": 32,
        "type": "struct xref"
      },
      {
        "name": "funcname",
        "offset": 32,
        "size": 8,
        "type": "const char  *"
      },
      {
        "name": "dest",
        "offset": 40,
        "size": 8,
        "type": "const char  *"
      },
      {
        "name": "thread_type",
        "offset": 48,
        "size": 4,
        "type": "uint32_t"
      }
    ]
  },
  "xrefdata": {
    "fields": [
      {
        "name": "xref",
        "offset": 0,
        "size": 8,
        "type": "const struct xref  *"
      },
      {
        "array": 16,
        "name": "uid",
        "offset": 8,
        "size": 16,
        "type": "char"
      },
      {
        "name": "hashstr",
        "offset": 24,
        "size": 8,
        "type": "const char  *"
      },
      {
        "array": 2,
        "name": "hashu32",
        "offset": 32,
        "size": 8,
        "type": "uint32_t"
      }
    ]
  }
 }
--- a/python/xrelfo.py
+++ b/python/xrelfo.py
@ -0,0 +1,397 @@
 # FRR ELF xref extractor
 #
 # Copyright (C) 2020  David Lamparter for NetDEF, Inc.
 #
 # This program is free software; you can redistribute it and/or modify it
 # under the terms of the GNU General Public License as published by the Free
 # Software Foundation; either version 2 of the License, or (at your option)
 # any later version.
 #
 # This program is distributed in the hope that it will be useful, but WITHOUT
 # ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
 # FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
 # more details.
 #
 # You should have received a copy of the GNU General Public License along
 # with this program; see the file COPYING; if not, write to the Free Software
 # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
 import sys
 import os
 import struct
 import re
 import traceback
 import json
 import argparse
 from clippy.uidhash import uidhash
 from clippy.elf import *
 from clippy import frr_top_src
 from tiabwarfo import FieldApplicator
 try:
    with open(os.path.join(frr_top_src, 'python', 'xrefstructs.json'), 'r') as fd:
        xrefstructs = json.load(fd)
 except FileNotFoundError:
    sys.stderr.write('''
 The "xrefstructs.json" file (created by running tiabwarfo.py with the pahole
 tool available) could not be found.  It should be included with the sources.
 ''')
    sys.exit(1)
 # constants, need to be kept in sync manually...
 XREFT_THREADSCHED = 0x100
 XREFT_LOGMSG = 0x200
 XREFT_DEFUN = 0x300
 XREFT_INSTALL_ELEMENT = 0x301
 # LOG_*
 priovals = {}
 prios = ['0', '1', '2', 'E', 'W', 'N', 'I', 'D']
 class XrelfoJson(object):
    def dump(self):
        pass
    def check(self, wopt):
        yield from []
    def to_dict(self, refs):
        pass
 class Xref(ELFDissectStruct, XrelfoJson):
    struct = 'xref'
    fieldrename = {'type': 'typ'}
    containers = {}
    def __init__(self, *args, **kwargs):
        super().__init__(*args, **kwargs)
        self._container = None
        if self.xrefdata:
            self.xrefdata.ref_from(self, self.typ)
    def container(self):
        if self._container is None:
            if self.typ in self.containers:
                self._container = self.container_of(self.containers[self.typ], 'xref')
        return self._container
    def check(self, *args, **kwargs):
        if self._container:
            yield from self._container.check(*args, **kwargs)
 class Xrefdata(ELFDissectStruct):
    struct = 'xrefdata'
    # uid is all zeroes in the data loaded from ELF
    fieldrename = {'uid': '_uid'}
    def ref_from(self, xref, typ):
        self.xref = xref
    @property
    def uid(self):
        if self.hashstr is None:
            return None
        return uidhash(self.xref.file, self.hashstr, self.hashu32_0, self.hashu32_1)
 class XrefPtr(ELFDissectStruct):
    fields = [
        ('xref', 'P', Xref),
    ]
 class XrefThreadSched(ELFDissectStruct, XrelfoJson):
    struct = 'xref_threadsched'
 Xref.containers[XREFT_THREADSCHED] = XrefThreadSched
 class XrefLogmsg(ELFDissectStruct, XrelfoJson):
    struct = 'xref_logmsg'
    def _warn_fmt(self, text):
        yield ((self.xref.file, self.xref.line), '%s:%d: %s (in %s())\n' % (self.xref.file, self.xref.line, text, self.xref.func))
    regexes = [
        (re.compile(r'([\n\t]+)'), 'error: log message contains tab or newline'),
    #    (re.compile(r'^(\s+)'),   'warning: log message starts with whitespace'),
        (re.compile(r'^((?:warn(?:ing)?|error):\s*)', re.I), 'warning: log message starts with severity'),
    ]
    def check(self, wopt):
        if wopt.Wlog_format:
            for rex, msg in self.regexes:
                if not rex.search(self.fmtstring):
                    continue
                if sys.stderr.isatty():
                    items = rex.split(self.fmtstring)
                    out = []
                    for i, text in enumerate(items):
                        if (i % 2) == 1:
                            out.append('\033[41;37;1m%s\033[m' % repr(text)[1:-1])
                        else:
                            out.append(repr(text)[1:-1])
                    excerpt = ''.join(out)
                else:
                    excerpt = repr(self.fmtstring)[1:-1]
                yield from self._warn_fmt('%s: "%s"' % (msg, excerpt))
    def dump(self):
        print('%-60s %s%s %-25s [EC %d] %s' % (
            '%s:%d %s()' % (self.xref.file, self.xref.line, self.xref.func),
            prios[self.priority & 7],
            priovals.get(self.priority & 0x30, ' '),
            self.xref.xrefdata.uid, self.ec, self.fmtstring))
    def to_dict(self, xrelfo):
        jsobj = dict([(i, getattr(self.xref, i)) for i in ['file', 'line', 'func']])
        if self.ec != 0:
            jsobj['ec'] = self.ec
        jsobj['fmtstring'] = self.fmtstring
        jsobj['priority'] = self.priority & 7
        jsobj['type'] = 'logmsg'
        jsobj['binary'] = self._elfsect._elfwrap.orig_filename
        if self.priority & 0x10:
            jsobj.setdefault('flags', []).append('errno')
        if self.priority & 0x20:
            jsobj.setdefault('flags', []).append('getaddrinfo')
        xrelfo['refs'].setdefault(self.xref.xrefdata.uid, []).append(jsobj)
 Xref.containers[XREFT_LOGMSG] = XrefLogmsg
 class CmdElement(ELFDissectStruct, XrelfoJson):
    struct = 'cmd_element'
    cmd_attrs = { 0: None, 1: 'deprecated', 2: 'hidden'}
    def __init__(self, *args, **kwargs):
        super().__init__(*args, **kwargs)
    def to_dict(self, xrelfo):
        jsobj = xrelfo['cli'].setdefault(self.name, {}).setdefault(self._elfsect._elfwrap.orig_filename, {})
        jsobj.update({
            'string': self.string,
            'doc': self.doc,
            'attr': self.cmd_attrs.get(self.attr, self.attr),
        })
        if jsobj['attr'] is None:
            del jsobj['attr']
        jsobj['defun'] = dict([(i, getattr(self.xref, i)) for i in ['file', 'line', 'func']])
 Xref.containers[XREFT_DEFUN] = CmdElement
 class XrefInstallElement(ELFDissectStruct, XrelfoJson):
    struct = 'xref_install_element'
    def to_dict(self, xrelfo):
        jsobj = xrelfo['cli'].setdefault(self.cmd_element.name, {}).setdefault(self._elfsect._elfwrap.orig_filename, {})
        nodes = jsobj.setdefault('nodes', [])
        nodes.append({
            'node': self.node_type,
            'install': dict([(i, getattr(self.xref, i)) for i in ['file', 'line', 'func']]),
        })
 Xref.containers[XREFT_INSTALL_ELEMENT] = XrefInstallElement
 # shove in field defs
 fieldapply = FieldApplicator(xrefstructs)
 fieldapply.add(Xref)
 fieldapply.add(Xrefdata)
 fieldapply.add(XrefLogmsg)
 fieldapply.add(XrefThreadSched)
 fieldapply.add(CmdElement)
 fieldapply.add(XrefInstallElement)
 fieldapply()
 class Xrelfo(dict):
    def __init__(self):
        super().__init__({
            'refs': {},
            'cli': {},
        })
        self._xrefs = []
    def load_file(self, filename):
        orig_filename = filename
        if filename.endswith('.la') or filename.endswith('.lo'):
            with open(filename, 'r') as fd:
                for line in fd:
                    line = line.strip()
                    if line.startswith('#') or line == '' or '=' not in line:
                        continue
                    var, val = line.split('=', 1)
                    if var not in ['library_names', 'pic_object']:
                        continue
                    if val.startswith("'") or val.startswith('"'):
                        val = val[1:-1]
                    if var == 'pic_object':
                        filename = os.path.join(os.path.dirname(filename), val)
                        break
                    val = val.strip().split()[0]
                    filename = os.path.join(os.path.dirname(filename), '.libs', val)
                    break
                else:
                    raise ValueError('could not process libtool file "%s"' % orig_filename)
        while True:
            with open(filename, 'rb') as fd:
                hdr = fd.read(4)
            if hdr == b'\x7fELF':
                self.load_elf(filename, orig_filename)
                return
            if hdr[:2] == b'#!':
                path, name = os.path.split(filename)
                filename = os.path.join(path, '.libs', name)
                continue
            if hdr[:1] == b'{':
                with open(filename, 'r') as fd:
                    self.load_json(fd)
                return
            raise ValueError('cannot determine file type for %s' % (filename))
    def load_elf(self, filename, orig_filename):
        edf = ELFDissectFile(filename)
        edf.orig_filename = orig_filename
        note = edf._elffile.find_note('FRRouting', 'XREF')
        if note is not None:
            endian = '>' if edf._elffile.bigendian else '<'
            mem = edf._elffile[note]
            if edf._elffile.elfclass == 64:
                start, end = struct.unpack(endian + 'QQ', mem)
                start += note.start
                end += note.start + 8
            else:
                start, end = struct.unpack(endian + 'II', mem)
                start += note.start
                end += note.start + 4
            ptrs = edf.iter_data(XrefPtr, slice(start, end))
        else:
            xrefarray = edf.get_section('xref_array')
            if xrefarray is None:
                raise ValueError('file has neither xref note nor xref_array section')
            ptrs = xrefarray.iter_data(XrefPtr)
        for ptr in ptrs:
            if ptr.xref is None:
                print('NULL xref')
                continue
            self._xrefs.append(ptr.xref)
            container = ptr.xref.container()
            if container is None:
                continue
            container.to_dict(self)
        return edf
    def load_json(self, fd):
        data = json.load(fd)
        for uid, items in data['refs'].items():
            myitems = self['refs'].setdefault(uid, [])
            for item in items:
                if item in myitems:
                    continue
                myitems.append(item)
        for cmd, items in data['cli'].items():
            self['cli'].setdefault(cmd, {}).update(items)
        return data
    def check(self, checks):
        for xref in self._xrefs:
            yield from xref.check(checks)
 def main():
    argp = argparse.ArgumentParser(description = 'FRR xref ELF extractor')
    argp.add_argument('-o', dest='output', type=str, help='write JSON output')
    argp.add_argument('--out-by-file',     type=str, help='write by-file JSON output')
    argp.add_argument('-Wlog-format',      action='store_const', const=True)
    argp.add_argument('--profile',         action='store_const', const=True)
    argp.add_argument('binaries', metavar='BINARY', nargs='+', type=str, help='files to read (ELF files or libtool objects)')
    args = argp.parse_args()
    if args.profile:
        import cProfile
        cProfile.runctx('_main(args)', globals(), {'args': args}, sort='cumtime')
    else:
        _main(args)
 def _main(args):
    errors = 0
    xrelfo = Xrelfo()
    for fn in args.binaries:
        try:
            xrelfo.load_file(fn)
        except:
            errors += 1
            sys.stderr.write('while processing %s:\n' % (fn))
            traceback.print_exc()
    for option in dir(args):
        if option.startswith('W'):
            checks = sorted(xrelfo.check(args))
            sys.stderr.write(''.join([c[-1] for c in checks]))
            break
    refs = xrelfo['refs']
    counts = {}
    for k, v in refs.items():
        strs = set([i['fmtstring'] for i in v])
        if len(strs) != 1:
            print('\033[31;1m%s\033[m' % k)
        counts[k] = len(v)
    out = xrelfo
    outbyfile = {}
    for uid, locs in refs.items():
        for loc in locs:
            filearray = outbyfile.setdefault(loc['file'], [])
            loc = dict(loc)
            del loc['file']
            filearray.append(loc)
    for k in outbyfile.keys():
        outbyfile[k] = sorted(outbyfile[k], key=lambda x: x['line'])
    if errors:
        sys.exit(1)
    if args.output:
        with open(args.output + '.tmp', 'w') as fd:
            json.dump(out, fd, indent=2, sort_keys=True)
        os.rename(args.output + '.tmp', args.output)
    if args.out_by_file:
        with open(args.out_by_file + '.tmp', 'w') as fd:
            json.dump(outbyfile, fd, indent=2, sort_keys=True)
        os.rename(args.out_by_file + '.tmp', args.out_by_file)
 if __name__ == '__main__':
    main()