Debug: remove lxml dependency, pickle SVD tree for faster processing. (#909)

* Debug: remove lxml dependency, pickle SVD tree for faster processing. * Debug: remove unused import in svd.py
2021-12-17 04:28:51 +03:00
parent 4013da5b59
commit e109e2e3e8
6 changed files with 624 additions and 35 deletions
--- a/.gitignore
+++ b/.gitignore
@@ -19,6 +19,7 @@ venv/
 __pycache__/
 *.py[cod]
 *$py.class
+*.pickle

 .obj/
 bindings/
--- a/debug/PyCortexMDebug/README.md
+++ b/debug/PyCortexMDebug/README.md
@@ -16,7 +16,7 @@ My implementation so far has only tested STM32 chips but should hold for others.
 expect plenty of errors in the file. Like GPIOA having a register named GPIOB_OSPEEDR and lots of 16-bit registers
 that are listed as 32!

-The implementation consists of two components -- An lxml-based parser module (pysvd) and a GDB file (gdb_svd).
+The implementation consists of two components -- An xml parser module (pysvd) and a GDB file (gdb_svd).
 I haven't yet worked out a perfect workflow for this, though it's quite easy to use when
 you already tend to have a GDB initialization file for starting up OpenOCD and the like.
 However your workflow works, just make sure to, in GDB:
--- a/debug/PyCortexMDebug/cmdebug/svd.py
+++ b/debug/PyCortexMDebug/cmdebug/svd.py
@@ -16,7 +16,6 @@ You should have received a copy of the GNU General Public License
 along with PyCortexMDebug.  If not, see <http://www.gnu.org/licenses/>.
 """

-import lxml.objectify as objectify
 import sys
 from collections import OrderedDict
 import os
@@ -24,6 +23,7 @@ import pickle
 import traceback
 import re
 import warnings
+import x2d


 class SmartDict:
@@ -126,26 +126,31 @@ class SVDFile:

    def __init__(self, fname):
        """
-
        Args:
            fname: Filename for the SVD file
        """
-        f = objectify.parse(os.path.expanduser(fname))
-        root = f.getroot()
-        periph = root.peripherals.getchildren()
        self.peripherals = SmartDict()
        self.base_address = 0

-        # XML elements
-        for p in periph:
-            try:
-                if p.tag == "peripheral":
-                    self.peripherals[str(p.name)] = SVDPeripheral(p, self)
+        xml_file_name = os.path.expanduser(fname)
+        pickle_file_name = xml_file_name + ".pickle"
+        root = None
+        if os.path.exists(pickle_file_name):
+            print("Loading pickled SVD")
+            root = pickle.load(open(pickle_file_name, "rb"))
        else:
-                    # This is some other tag
-                    pass
+            print("Loading XML SVD and pickling it")
+            root = x2d.parse(open(xml_file_name, "rb"))
+            pickle.dump(root, open(pickle_file_name, "wb"), pickle.HIGHEST_PROTOCOL)
+        print("Processing SVD tree")
+        # XML elements
+        for p in root["device"]["peripherals"]["peripheral"]:
+            try:
+                self.peripherals[p["name"]] = SVDPeripheral(p, self)
            except SVDNonFatalError as e:
-                print(e)
+                # print(e)
+                pass
+        print("SVD Ready")


 def add_register(parent, node):
@@ -265,11 +270,11 @@ class SVDPeripheral:
        self.parent_base_address = parent.base_address

        # Look for a base address, as it is required
-        if not hasattr(svd_elem, "baseAddress"):
+        if "baseAddress" not in svd_elem:
            raise SVDNonFatalError("Periph without base address")
        self.base_address = int(str(svd_elem.baseAddress), 0)
-        if "derivedFrom" in svd_elem.attrib:
-            derived_from = svd_elem.attrib["derivedFrom"]
+        if "@derivedFrom" in svd_elem:
+            derived_from = svd_elem["@derivedFrom"]
            try:
                self.name = str(svd_elem.name)
            except AttributeError:
@@ -295,16 +300,14 @@ class SVDPeripheral:
            self.clusters = SmartDict()

            if hasattr(svd_elem, "registers"):
-                registers = [
-                    r
-                    for r in svd_elem.registers.getchildren()
-                    if r.tag in ["cluster", "register"]
-                ]
-                for r in registers:
-                    if r.tag == "cluster":
-                        add_cluster(self, r)
-                    elif r.tag == "register":
+                if "register" in svd_elem.registers:
+                    for r in svd_elem.registers.register:
+                        if isinstance(r, x2d.ObjectDict):
                            add_register(self, r)
+                if "cluster" in svd_elem.registers:
+                    for c in svd_elem.registers.cluster:
+                        if isinstance(c, x2d.ObjectDict):
+                            add_cluster(self, c)

    def refactor_parent(self, parent):
        self.parent_base_address = parent.base_address
@@ -338,10 +341,10 @@ class SVDPeripheralRegister:
        else:
            self.size = 0x20
        self.fields = SmartDict()
-        if hasattr(svd_elem, "fields"):
+        if "fields" in svd_elem:
            # Filter fields to only consider those of tag "field"
-            fields = [f for f in svd_elem.fields.getchildren() if f.tag == "field"]
-            for f in fields:
+            for f in svd_elem.fields.field:
+                if isinstance(f, x2d.ObjectDict):
                    self.fields[str(f.name)] = SVDPeripheralRegisterField(f, self)

    def refactor_parent(self, parent):
--- a/debug/PyCortexMDebug/cmdebug/svd_gdb.py
+++ b/debug/PyCortexMDebug/cmdebug/svd_gdb.py
@@ -23,6 +23,7 @@ import sys
 import struct
 import pkg_resources
 import fnmatch
+import traceback

 from .svd import SVDFile

@@ -99,6 +100,7 @@ class LoadSVD(gdb.Command):
        try:
            SVD(SVDFile(f))
        except Exception as e:
+            traceback.print_exc()
            raise gdb.GdbError("Could not load SVD file {} : {}...\n".format(f, e))


--- a/debug/PyCortexMDebug/cmdebug/x2d.py
+++ b/debug/PyCortexMDebug/cmdebug/x2d.py
@@ -0,0 +1,586 @@
+#!/usr/bin/env python
+"Makes working with XML feel like you are working with JSON"
+
+try:
+    from defusedexpat import pyexpat as expat
+except ImportError:
+    from xml.parsers import expat
+
+from xml.sax.saxutils import XMLGenerator
+from xml.sax.xmlreader import AttributesImpl
+
+try:  # pragma no cover
+    from cStringIO import StringIO
+except ImportError:  # pragma no cover
+    try:
+        from StringIO import StringIO
+    except ImportError:
+        from io import StringIO
+
+from inspect import isgenerator
+
+
+class ObjectDict(dict):
+    def __getattr__(self, name):
+        if name in self:
+            return self[name]
+        else:
+            raise AttributeError("No such attribute: " + name)
+
+
+try:  # pragma no cover
+    _basestring = basestring
+except NameError:  # pragma no cover
+    _basestring = str
+try:  # pragma no cover
+    _unicode = unicode
+except NameError:  # pragma no cover
+    _unicode = str
+
+__author__ = "Martin Blech"
+__version__ = "0.12.0"
+__license__ = "MIT"
+
+
+class ParsingInterrupted(Exception):
+    pass
+
+
+class _DictSAXHandler(object):
+    def __init__(
+        self,
+        item_depth=0,
+        item_callback=lambda *args: True,
+        xml_attribs=True,
+        attr_prefix="@",
+        cdata_key="#text",
+        force_cdata=False,
+        cdata_separator="",
+        postprocessor=None,
+        dict_constructor=ObjectDict,
+        strip_whitespace=True,
+        namespace_separator=":",
+        namespaces=None,
+        force_list=None,
+        comment_key="#comment",
+    ):
+        self.path = []
+        self.stack = []
+        self.data = []
+        self.item = None
+        self.item_depth = item_depth
+        self.xml_attribs = xml_attribs
+        self.item_callback = item_callback
+        self.attr_prefix = attr_prefix
+        self.cdata_key = cdata_key
+        self.force_cdata = force_cdata
+        self.cdata_separator = cdata_separator
+        self.postprocessor = postprocessor
+        self.dict_constructor = dict_constructor
+        self.strip_whitespace = strip_whitespace
+        self.namespace_separator = namespace_separator
+        self.namespaces = namespaces
+        self.namespace_declarations = ObjectDict()
+        self.force_list = force_list
+        self.comment_key = comment_key
+
+    def _build_name(self, full_name):
+        if self.namespaces is None:
+            return full_name
+        i = full_name.rfind(self.namespace_separator)
+        if i == -1:
+            return full_name
+        namespace, name = full_name[:i], full_name[i + 1 :]
+        try:
+            short_namespace = self.namespaces[namespace]
+        except KeyError:
+            short_namespace = namespace
+        if not short_namespace:
+            return name
+        else:
+            return self.namespace_separator.join((short_namespace, name))
+
+    def _attrs_to_dict(self, attrs):
+        if isinstance(attrs, dict):
+            return attrs
+        return self.dict_constructor(zip(attrs[0::2], attrs[1::2]))
+
+    def startNamespaceDecl(self, prefix, uri):
+        self.namespace_declarations[prefix or ""] = uri
+
+    def startElement(self, full_name, attrs):
+        name = self._build_name(full_name)
+        attrs = self._attrs_to_dict(attrs)
+        if attrs and self.namespace_declarations:
+            attrs["xmlns"] = self.namespace_declarations
+            self.namespace_declarations = ObjectDict()
+        self.path.append((name, attrs or None))
+        if len(self.path) > self.item_depth:
+            self.stack.append((self.item, self.data))
+            if self.xml_attribs:
+                attr_entries = []
+                for key, value in attrs.items():
+                    key = self.attr_prefix + self._build_name(key)
+                    if self.postprocessor:
+                        entry = self.postprocessor(self.path, key, value)
+                    else:
+                        entry = (key, value)
+                    if entry:
+                        attr_entries.append(entry)
+                attrs = self.dict_constructor(attr_entries)
+            else:
+                attrs = None
+            self.item = attrs or None
+            self.data = []
+
+    def endElement(self, full_name):
+        name = self._build_name(full_name)
+        if len(self.path) == self.item_depth:
+            item = self.item
+            if item is None:
+                item = None if not self.data else self.cdata_separator.join(self.data)
+
+            should_continue = self.item_callback(self.path, item)
+            if not should_continue:
+                raise ParsingInterrupted()
+        if len(self.stack):
+            data = None if not self.data else self.cdata_separator.join(self.data)
+            item = self.item
+            self.item, self.data = self.stack.pop()
+            if self.strip_whitespace and data:
+                data = data.strip() or None
+            if data and self.force_cdata and item is None:
+                item = self.dict_constructor()
+            if item is not None:
+                if data:
+                    self.push_data(item, self.cdata_key, data)
+                self.item = self.push_data(self.item, name, item)
+            else:
+                self.item = self.push_data(self.item, name, data)
+        else:
+            self.item = None
+            self.data = []
+        self.path.pop()
+
+    def characters(self, data):
+        if not self.data:
+            self.data = [data]
+        else:
+            self.data.append(data)
+
+    def comments(self, data):
+        if self.strip_whitespace:
+            data = data.strip()
+        self.item = self.push_data(self.item, self.comment_key, data)
+
+    def push_data(self, item, key, data):
+        if self.postprocessor is not None:
+            result = self.postprocessor(self.path, key, data)
+            if result is None:
+                return item
+            key, data = result
+        if item is None:
+            item = self.dict_constructor()
+        try:
+            value = item[key]
+            if isinstance(value, list):
+                value.append(data)
+            else:
+                item[key] = [value, data]
+        except KeyError:
+            if self._should_force_list(key, data):
+                item[key] = [data]
+            else:
+                item[key] = data
+        return item
+
+    def _should_force_list(self, key, value):
+        if not self.force_list:
+            return False
+        if isinstance(self.force_list, bool):
+            return self.force_list
+        try:
+            return key in self.force_list
+        except TypeError:
+            return self.force_list(self.path[:-1], key, value)
+
+
+def parse(
+    xml_input,
+    encoding=None,
+    expat=expat,
+    process_namespaces=False,
+    namespace_separator=":",
+    disable_entities=True,
+    process_comments=False,
+    **kwargs
+):
+    """Parse the given XML input and convert it into a dictionary.
+
+    `xml_input` can either be a `string`, a file-like object, or a generator of strings.
+
+    If `xml_attribs` is `True`, element attributes are put in the dictionary
+    among regular child elements, using `@` as a prefix to avoid collisions. If
+    set to `False`, they are just ignored.
+
+    Simple example::
+
+        >>> import xmltodict
+        >>> doc = xmltodict.parse(\"\"\"
+        ... <a prop="x">
+        ...   <b>1</b>
+        ...   <b>2</b>
+        ... </a>
+        ... \"\"\")
+        >>> doc['a']['@prop']
+        u'x'
+        >>> doc['a']['b']
+        [u'1', u'2']
+
+    If `item_depth` is `0`, the function returns a dictionary for the root
+    element (default behavior). Otherwise, it calls `item_callback` every time
+    an item at the specified depth is found and returns `None` in the end
+    (streaming mode).
+
+    The callback function receives two parameters: the `path` from the document
+    root to the item (name-attribs pairs), and the `item` (dict). If the
+    callback's return value is false-ish, parsing will be stopped with the
+    :class:`ParsingInterrupted` exception.
+
+    Streaming example::
+
+        >>> def handle(path, item):
+        ...     print('path:%s item:%s' % (path, item))
+        ...     return True
+        ...
+        >>> xmltodict.parse(\"\"\"
+        ... <a prop="x">
+        ...   <b>1</b>
+        ...   <b>2</b>
+        ... </a>\"\"\", item_depth=2, item_callback=handle)
+        path:[(u'a', {u'prop': u'x'}), (u'b', None)] item:1
+        path:[(u'a', {u'prop': u'x'}), (u'b', None)] item:2
+
+    The optional argument `postprocessor` is a function that takes `path`,
+    `key` and `value` as positional arguments and returns a new `(key, value)`
+    pair where both `key` and `value` may have changed. Usage example::
+
+        >>> def postprocessor(path, key, value):
+        ...     try:
+        ...         return key + ':int', int(value)
+        ...     except (ValueError, TypeError):
+        ...         return key, value
+        >>> xmltodict.parse('<a><b>1</b><b>2</b><b>x</b></a>',
+        ...                 postprocessor=postprocessor)
+        ObjectDict([(u'a', ObjectDict([(u'b:int', [1, 2]), (u'b', u'x')]))])
+
+    You can pass an alternate version of `expat` (such as `defusedexpat`) by
+    using the `expat` parameter. E.g:
+
+        >>> import defusedexpat
+        >>> xmltodict.parse('<a>hello</a>', expat=defusedexpat.pyexpat)
+        ObjectDict([(u'a', u'hello')])
+
+    You can use the force_list argument to force lists to be created even
+    when there is only a single child of a given level of hierarchy. The
+    force_list argument is a tuple of keys. If the key for a given level
+    of hierarchy is in the force_list argument, that level of hierarchy
+    will have a list as a child (even if there is only one sub-element).
+    The index_keys operation takes precedence over this. This is applied
+    after any user-supplied postprocessor has already run.
+
+        For example, given this input:
+        <servers>
+          <server>
+            <name>host1</name>
+            <os>Linux</os>
+            <interfaces>
+              <interface>
+                <name>em0</name>
+                <ip_address>10.0.0.1</ip_address>
+              </interface>
+            </interfaces>
+          </server>
+        </servers>
+
+        If called with force_list=('interface',), it will produce
+        this dictionary:
+        {'servers':
+          {'server':
+            {'name': 'host1',
+             'os': 'Linux'},
+             'interfaces':
+              {'interface':
+                [ {'name': 'em0', 'ip_address': '10.0.0.1' } ] } } }
+
+        `force_list` can also be a callable that receives `path`, `key` and
+        `value`. This is helpful in cases where the logic that decides whether
+        a list should be forced is more complex.
+
+
+        If `process_comment` is `True` then comment will be added with comment_key
+        (default=`'#comment'`) to then tag which contains comment
+
+            For example, given this input:
+            <a>
+              <b>
+                <!-- b comment -->
+                <c>
+                    <!-- c comment -->
+                    1
+                </c>
+                <d>2</d>
+              </b>
+            </a>
+
+            If called with process_comment=True, it will produce
+            this dictionary:
+            'a': {
+                'b': {
+                    '#comment': 'b comment',
+                    'c': {
+
+                        '#comment': 'c comment',
+                        '#text': '1',
+                    },
+                    'd': '2',
+                },
+            }
+    """
+    handler = _DictSAXHandler(namespace_separator=namespace_separator, **kwargs)
+    if isinstance(xml_input, _unicode):
+        if not encoding:
+            encoding = "utf-8"
+        xml_input = xml_input.encode(encoding)
+    if not process_namespaces:
+        namespace_separator = None
+    parser = expat.ParserCreate(encoding, namespace_separator)
+    try:
+        parser.ordered_attributes = True
+    except AttributeError:
+        # Jython's expat does not support ordered_attributes
+        pass
+    parser.StartNamespaceDeclHandler = handler.startNamespaceDecl
+    parser.StartElementHandler = handler.startElement
+    parser.EndElementHandler = handler.endElement
+    parser.CharacterDataHandler = handler.characters
+    if process_comments:
+        parser.CommentHandler = handler.comments
+    parser.buffer_text = True
+    if disable_entities:
+        try:
+            # Attempt to disable DTD in Jython's expat parser (Xerces-J).
+            feature = "http://apache.org/xml/features/disallow-doctype-decl"
+            parser._reader.setFeature(feature, True)
+        except AttributeError:
+            # For CPython / expat parser.
+            # Anything not handled ends up here and entities aren't expanded.
+            parser.DefaultHandler = lambda x: None
+            # Expects an integer return; zero means failure -> expat.ExpatError.
+            parser.ExternalEntityRefHandler = lambda *x: 1
+    if hasattr(xml_input, "read"):
+        parser.ParseFile(xml_input)
+    elif isgenerator(xml_input):
+        for chunk in xml_input:
+            parser.Parse(chunk, False)
+        parser.Parse(b"", True)
+    else:
+        parser.Parse(xml_input, True)
+    return handler.item
+
+
+def _process_namespace(name, namespaces, ns_sep=":", attr_prefix="@"):
+    if not namespaces:
+        return name
+    try:
+        ns, name = name.rsplit(ns_sep, 1)
+    except ValueError:
+        pass
+    else:
+        ns_res = namespaces.get(ns.strip(attr_prefix))
+        name = (
+            "{}{}{}{}".format(
+                attr_prefix if ns.startswith(attr_prefix) else "", ns_res, ns_sep, name
+            )
+            if ns_res
+            else name
+        )
+    return name
+
+
+def _emit(
+    key,
+    value,
+    content_handler,
+    attr_prefix="@",
+    cdata_key="#text",
+    depth=0,
+    preprocessor=None,
+    pretty=False,
+    newl="\n",
+    indent="\t",
+    namespace_separator=":",
+    namespaces=None,
+    full_document=True,
+    expand_iter=None,
+):
+    key = _process_namespace(key, namespaces, namespace_separator, attr_prefix)
+    if preprocessor is not None:
+        result = preprocessor(key, value)
+        if result is None:
+            return
+        key, value = result
+    if (
+        not hasattr(value, "__iter__")
+        or isinstance(value, _basestring)
+        or isinstance(value, dict)
+    ):
+        value = [value]
+    for index, v in enumerate(value):
+        if full_document and depth == 0 and index > 0:
+            raise ValueError("document with multiple roots")
+        if v is None:
+            v = ObjectDict()
+        elif isinstance(v, bool):
+            if v:
+                v = _unicode("true")
+            else:
+                v = _unicode("false")
+        elif not isinstance(v, dict):
+            if (
+                expand_iter
+                and hasattr(v, "__iter__")
+                and not isinstance(v, _basestring)
+            ):
+                v = ObjectDict(((expand_iter, v),))
+            else:
+                v = _unicode(v)
+        if isinstance(v, _basestring):
+            v = ObjectDict(((cdata_key, v),))
+        cdata = None
+        attrs = ObjectDict()
+        children = []
+        for ik, iv in v.items():
+            if ik == cdata_key:
+                cdata = iv
+                continue
+            if ik.startswith(attr_prefix):
+                ik = _process_namespace(
+                    ik, namespaces, namespace_separator, attr_prefix
+                )
+                if ik == "@xmlns" and isinstance(iv, dict):
+                    for k, v in iv.items():
+                        attr = "xmlns{}".format(":{}".format(k) if k else "")
+                        attrs[attr] = _unicode(v)
+                    continue
+                if not isinstance(iv, _unicode):
+                    iv = _unicode(iv)
+                attrs[ik[len(attr_prefix) :]] = iv
+                continue
+            children.append((ik, iv))
+        if pretty:
+            content_handler.ignorableWhitespace(depth * indent)
+        content_handler.startElement(key, AttributesImpl(attrs))
+        if pretty and children:
+            content_handler.ignorableWhitespace(newl)
+        for child_key, child_value in children:
+            _emit(
+                child_key,
+                child_value,
+                content_handler,
+                attr_prefix,
+                cdata_key,
+                depth + 1,
+                preprocessor,
+                pretty,
+                newl,
+                indent,
+                namespaces=namespaces,
+                namespace_separator=namespace_separator,
+                expand_iter=expand_iter,
+            )
+        if cdata is not None:
+            content_handler.characters(cdata)
+        if pretty and children:
+            content_handler.ignorableWhitespace(depth * indent)
+        content_handler.endElement(key)
+        if pretty and depth:
+            content_handler.ignorableWhitespace(newl)
+
+
+def unparse(
+    input_dict,
+    output=None,
+    encoding="utf-8",
+    full_document=True,
+    short_empty_elements=False,
+    **kwargs
+):
+    """Emit an XML document for the given `input_dict` (reverse of `parse`).
+
+    The resulting XML document is returned as a string, but if `output` (a
+    file-like object) is specified, it is written there instead.
+
+    Dictionary keys prefixed with `attr_prefix` (default=`'@'`) are interpreted
+    as XML node attributes, whereas keys equal to `cdata_key`
+    (default=`'#text'`) are treated as character data.
+
+    The `pretty` parameter (default=`False`) enables pretty-printing. In this
+    mode, lines are terminated with `'\n'` and indented with `'\t'`, but this
+    can be customized with the `newl` and `indent` parameters.
+
+    """
+    if full_document and len(input_dict) != 1:
+        raise ValueError("Document must have exactly one root.")
+    must_return = False
+    if output is None:
+        output = StringIO()
+        must_return = True
+    if short_empty_elements:
+        content_handler = XMLGenerator(output, encoding, True)
+    else:
+        content_handler = XMLGenerator(output, encoding)
+    if full_document:
+        content_handler.startDocument()
+    for key, value in input_dict.items():
+        _emit(key, value, content_handler, full_document=full_document, **kwargs)
+    if full_document:
+        content_handler.endDocument()
+    if must_return:
+        value = output.getvalue()
+        try:  # pragma no cover
+            value = value.decode(encoding)
+        except AttributeError:  # pragma no cover
+            pass
+        return value
+
+
+if __name__ == "__main__":  # pragma: no cover
+    import sys
+    import marshal
+
+    try:
+        stdin = sys.stdin.buffer
+        stdout = sys.stdout.buffer
+    except AttributeError:
+        stdin = sys.stdin
+        stdout = sys.stdout
+
+    (item_depth,) = sys.argv[1:]
+    item_depth = int(item_depth)
+
+    def handle_item(path, item):
+        marshal.dump((path, item), stdout)
+        return True
+
+    try:
+        root = parse(
+            stdin,
+            item_depth=item_depth,
+            item_callback=handle_item,
+            dict_constructor=dict,
+        )
+        if item_depth == 0:
+            handle_item([], root)
+    except KeyboardInterrupt:
+        pass
--- a/docker/Dockerfile
+++ b/docker/Dockerfile
@@ -25,9 +25,6 @@ RUN wget --progress=dot:giga "https://developer.arm.com/-/media/Files/downloads/
    for file in * ; do ln -s "${PWD}/${file}" "/usr/bin/${file}" ; done && \
    cd / && arm-none-eabi-gcc -v && arm-none-eabi-gdb -v

-RUN wget --progress=dot:giga -O - https://bootstrap.pypa.io/pip/2.7/get-pip.py | python2 && \
-    pip install --no-cache-dir lxml==4.6.3
-
 RUN git clone --depth 1 --branch v0.4.1 https://github.com/atomicobject/heatshrink.git && \
    cd heatshrink && make && mv ./heatshrink /usr/local/bin/heatshrink