Source code for wdom.node

#!/usr/bin/env python3
# -*- coding: utf-8 -*-

"""Node related basic interface/classes."""

import html
import logging
from typing import TYPE_CHECKING
from typing import Any, Callable, Iterator, Optional, Sequence, Union

from xml.dom import Node as _Node

    from typing import List  # noqa
    from wdom.element import Element  # noqa

logger = logging.getLogger(__name__)

[docs]class AbstractNode(_Node): """Abstract Base Class for Node classes.""" # DOM Level 1 nodeType = None nodeName = '' nodeValue = '' # type: Optional[str] # DOM Level 2 namespaceURI = '' prefix = '' # DOM Level 3 baseURI = '' # DOM Level 4 parentElement = None # should escape text contents _should_escape_text = False
[docs]class Node(AbstractNode): """Base Class for Node interface.""" @property def connected(self) -> bool: """When this instance has any connection, return True.""" return False def __init__(self, parent: AbstractNode = None) -> None: """Initialize node object with parent node. :param Node parent: parent node. """ super().__init__() # Need to call init in multiple inheritce self.__children = list() # type: List[Node] self.__parent = None if parent: parent.appendChild(self) def __bool__(self) -> bool: """Return always True.""" return True def __len__(self) -> int: """Return number of child nodes.""" return self.length def __contains__(self, other: AbstractNode) -> bool: return other in self.__children # DOM Level 1 @property def length(self) -> int: """Return number of child nodes.""" return len(self.childNodes) @property def parentNode(self) -> Optional[AbstractNode]: """Return parent node. If this node does not have a parent, return ``None``. """ return self.__parent @property def childNodes(self) -> 'NodeList': """Return child nodes of this node. Returned object is an instance of NodeList, which is a list like object but not support any modification. NodeList is a **live object**, which means that changes on this node is reflected to the object. """ return NodeList(self.__children) @property def firstChild(self) -> Optional[AbstractNode]: """Return the first child node. If this node does not have any child, return ``None``. """ if self.hasChildNodes(): return self.childNodes[0] return None @property def lastChild(self) -> Optional[AbstractNode]: """Return the last child node. If this node does not have any child, return ``None``. """ if self.hasChildNodes(): return self.childNodes[-1] return None @property def previousSibling(self) -> Optional[AbstractNode]: """Return the previous sibling of this node. If there is no previous sibling, return ``None``. """ parent = self.parentNode if parent is None: return None return parent.childNodes.item(parent.childNodes.index(self) - 1) @property def nextSibling(self) -> Optional[AbstractNode]: """Return the next sibling of this node. If there is no next sibling, return ``None``. """ parent = self.parentNode if parent is None: return None return parent.childNodes.item(parent.childNodes.index(self) + 1) # DOM Level 2 @property def ownerDocument(self) -> Optional[AbstractNode]: """Return the owner document of this node. Owner document is an ancestor document node of this node. If this node (or node tree including this node) is not appended to any document node, this property returns ``None``. :rtype: Document or None """ if self.nodeType == Node.DOCUMENT_NODE: return self elif self.parentNode: return self.parentNode.ownerDocument return None # Methods def _append_document_fragment(self, node: AbstractNode) -> AbstractNode: for c in tuple(node.childNodes): self._append_child(c) return node def _append_element(self, node: AbstractNode) -> AbstractNode: if node.parentNode: node.parentNode.removeChild(node) self.__children.append(node) node.__parent = self return node def _append_child(self, node: AbstractNode) -> AbstractNode: if not isinstance(node, Node): raise TypeError( 'appndChild() only accepts a Node instance, but get {}. ' 'If you want to add string or mupltiple nodes once, ' 'use append() method instead.'.format(type(node))) if node.nodeType == Node.DOCUMENT_FRAGMENT_NODE: return self._append_document_fragment(node) return self._append_element(node)
[docs] def appendChild(self, node: AbstractNode) -> AbstractNode: """Append the node at the last of this child nodes.""" return self._append_child(node)
[docs] def index(self, node: AbstractNode) -> int: """Return index of the node. If the node is not a child of this node, raise ``ValueError``. """ if node in self.childNodes: return self.childNodes.index(node) elif isinstance(node, Text): for i, n in enumerate(self.childNodes): # should consider multiple match? if isinstance(n, Text) and == node: return i raise ValueError('node is not in this node')
def _insert_document_fragment_before(self, node: AbstractNode, ref_node: AbstractNode ) -> AbstractNode: for c in tuple(node.childNodes): self._insert_before(c, ref_node) return node def _insert_element_before(self, node: AbstractNode, ref_node: AbstractNode) -> AbstractNode: if node.parentNode: node.parentNode.removeChild(node) self.__children.insert(self.index(ref_node), node) node.__parent = self return node def _insert_before(self, node: AbstractNode, ref_node: AbstractNode) -> AbstractNode: if not isinstance(node, Node): raise TypeError( 'insertBefore() only accepts a Node instance, but get {}.' 'If you want to insert string or mupltiple nodes, ' 'use ref_node.before() instead.'.format(type(node))) if node.nodeType == Node.DOCUMENT_FRAGMENT_NODE: return self._insert_document_fragment_before(node, ref_node) return self._insert_element_before(node, ref_node)
[docs] def insertBefore(self, node: AbstractNode, ref_node: AbstractNode) -> AbstractNode: """Insert a node just before the reference node.""" return self._insert_before(node, ref_node)
[docs] def hasChildNodes(self) -> bool: """Return True if this node has child nodes, otherwise return False.""" return bool(self.childNodes)
def _remove_child(self, node: AbstractNode) -> AbstractNode: if node not in self.__children: raise ValueError('node to be removed is not a child of this node.') self.__children.remove(node) node.__parent = None return node
[docs] def removeChild(self, node: AbstractNode) -> AbstractNode: """Remove a node from this node. If node is not a child of this node, raise ``ValueError``. """ return self._remove_child(node)
def _replace_child(self, new_child: AbstractNode, old_child: AbstractNode) -> AbstractNode: self._insert_before(new_child, old_child) return self._remove_child(old_child)
[docs] def replaceChild(self, new_child: AbstractNode, old_child: AbstractNode) -> AbstractNode: """Replace an old child with new child.""" return self._replace_child(new_child, old_child)
[docs] def hasAttributes(self) -> bool: """Return True if this node has attributes.""" return hasattr(self, 'attributes') and bool(self.attributes)
def _clone_node(self) -> 'Node': clone = type(self)() return clone def _clone_node_deep(self) -> 'Node': clone = self._clone_node() for child in self.childNodes: clone.appendChild(child._clone_node_deep()) return clone
[docs] def cloneNode(self, deep: bool=False) -> AbstractNode: """Return new copy of this node. If optional argument ``deep`` is specified and is True, new node has clones of child nodes of this node (if presents). """ if deep: return self._clone_node_deep() return self._clone_node()
__copy__ = _clone_node # alias def __deepcopy__(self, memo: Any) -> 'Node': return self.cloneNode(True) def _empty(self) -> None: for child in tuple(self.__children): self._remove_child(child)
[docs] def empty(self) -> None: """[Not Standard] Remove all child nodes from this node. This is equivalent to ``node.textContent = ''``. """ self._empty()
def _get_text_content(self) -> str: return ''.join(child.textContent for child in self.childNodes) def _set_text_content(self, value: str) -> None: self._empty() if value: self._append_child(Text(value)) @property def textContent(self) -> str: """Return text contents of this node and all chid nodes. When any value is set to this property, all child nodes are removed and new value is set as a text node. """ return self._get_text_content() @textContent.setter def textContent(self, value: str) -> None: """Remove all child nodes and set new text.""" self._set_text_content(value)
[docs]class NodeList(Sequence[Node]): """Collection of Node objects.""" def __init__(self, nodes: Sequence[Node]) -> None: """Initialize NodeList by iterable `nodes`.""" self.__nodes = nodes def __getitem__(self, index: int) -> Node: # type: ignore """Get `index`-th node.""" return self.__nodes[index] def __len__(self) -> int: return len(self.__nodes) def __contains__(self, other: object) -> bool: return other in self.__nodes def __iter__(self) -> Iterator[AbstractNode]: for n in self.__nodes: yield n @property def length(self) -> int: """Return number of nodes in this list.""" return len(self)
[docs] def item(self, index: int) -> Optional[Node]: """Return item with the index. If the index is negative number or out of the list, return None. """ if not isinstance(index, int): raise TypeError( 'Indeces must be integer, not {}'.format(type(index))) return self.__nodes[index] if 0 <= index < self.length else None
[docs] def index(self, node: Node) -> int: # type: ignore """Get index of the node.""" return self.__nodes.index(node)
class HTMLCollection(NodeList): """Collection of HTML elements.""" def namedItem(self, name: str) -> Optional[Node]: """TODO.""" for n in self: if n.getAttribute('id') == name: return n for n in self: if n.getAttribute('name') == name: return n return None def _ensure_node(node: Union[str, AbstractNode]) -> AbstractNode: """Ensure to be node. If ``node`` is string, convert it to ``Text`` node. """ if isinstance(node, str): return Text(node) elif isinstance(node, Node): return node else: raise TypeError('Invalid type to append: {}'.format(node)) def _to_node_list(nodes: Sequence[Union[str, AbstractNode]]) -> AbstractNode: if len(nodes) == 1: return _ensure_node(nodes[0]) df = DocumentFragment() for n in nodes: df.appendChild(_ensure_node(n)) return df
[docs]class ParentNode(AbstractNode): """Mixin class for Node classes which can have child nodes. This class is inherited by Document, DocumentFragment, and Element class. """ @property def children(self) -> NodeList: """Return list of child nodes. Currently this is not a live object. """ return NodeList([e for e in self.childNodes if e.nodeType == Node.ELEMENT_NODE]) @property def firstElementChild(self) -> Optional[AbstractNode]: """First Element child node. If this node has no element child, return None. """ for child in self.childNodes: if child.nodeType == Node.ELEMENT_NODE: return child return None @property def lastElementChild(self) -> Optional[AbstractNode]: """Last Element child node. If this node has no element child, return None. """ for child in reversed(self.childNodes): # type: ignore if child.nodeType == Node.ELEMENT_NODE: return child return None
[docs] def prepend(self, *nodes: Union[str, AbstractNode]) -> None: """Insert new nodes before first child node.""" node = _to_node_list(nodes) if self.firstChild: self.insertBefore(node, self.firstChild) else: self.appendChild(node)
[docs] def append(self, *nodes: Union[AbstractNode, str]) -> None: """Append new nodes after last child node.""" node = _to_node_list(nodes) self.appendChild(node)
[docs] def getElementsBy(self, cond: Callable[[AbstractNode], bool]) -> NodeList: """Return list of child nodes which matches ``cond``. ``cond`` must be a function which gets a single argument ``Element``, and returns bool. If the node matches requested condition, ``cond`` should return True. This searches all child nodes recursively. :arg cond: Callable[[Element], bool] :rtype: NodeList[Element] """ elements = [] for child in self.children: if cond(child): elements.append(child) elements.extend(child.getElementsBy(cond)) return NodeList(elements)
[docs] def getElementsByTagName(self, tag: str) -> NodeList: """Get child nodes which tag name is ``tag``.""" _tag = tag.upper() return self.getElementsBy(lambda n: getattr(n, 'tagName') == _tag)
[docs] def getElementsByClassName(self, class_name: str) -> NodeList: """Get child nodes which has ``class_name`` class attribute.""" return self.getElementsBy( lambda node: class_name in getattr(node, 'classList'))
[docs] def query(self, relativeSelectors: str) -> AbstractNode: """Not Implemented.""" raise NotImplementedError
[docs] def queryAll(self, relativeSelectors: str) -> NodeList: """Not Implemented.""" raise NotImplementedError
[docs] def querySelector(self, selectors: str) -> AbstractNode: """Not Implemented.""" raise NotImplementedError
[docs] def querySelectorAll(self, selectors: str) -> NodeList: """Not Implemented.""" raise NotImplementedError
[docs]class NonDocumentTypeChildNode(AbstractNode): """Mixin class for ``CharacterData`` and ``DocumentType`` class.""" @property def previousElementSibling(self) -> Optional[AbstractNode]: """Previous Element Node. If this node has no previous element node, return None. """ if self.parentNode is None: return None siblings = self.parentNode.childNodes for i in range(siblings.index(self), 0, -1): n = siblings[i-1] if n.nodeType == Node.ELEMENT_NODE: return n return None @property def nextElementSibling(self) -> Optional[AbstractNode]: """Next Element Node. If this node has no next element node, return None. """ if self.parentNode is None: return None siblings = self.parentNode.childNodes for i in range(siblings.index(self) + 1, len(siblings)): n = siblings[i] if n.nodeType == Node.ELEMENT_NODE: return n return None
[docs]class ChildNode(AbstractNode): """Mixin class for Node classes which can have parent node. This class is inherited by DocumentType, Element, and CharacterData (super class of Text, Comment, and RawHtml) classes. """
[docs] def before(self, *nodes: Union[AbstractNode, str]) -> None: """Insert nodes before this node. If nodes contains ``str``, it will be converted to Text node. """ if self.parentNode: node = _to_node_list(nodes) self.parentNode.insertBefore(node, self) # type: ignore
[docs] def after(self, *nodes: Union[AbstractNode, str]) -> None: """Append nodes after this node. If nodes contains ``str``, it will be converted to Text node. """ if self.parentNode: node = _to_node_list(nodes) _next_node = self.nextSibling if _next_node is None: self.parentNode.appendChild(node) else: self.parentNode.insertBefore(node, _next_node)
[docs] def replaceWith(self, *nodes: Union[AbstractNode, str]) -> None: """Replace this node with nodes. If nodes contains ``str``, it will be converted to Text node. """ if self.parentNode: node = _to_node_list(nodes) self.parentNode.replaceChild(node, self) # type: ignore
def _remove(self) -> None: if self.parentNode: self.parentNode.removeChild(self) # type: ignore
[docs] def remove(self) -> None: """Remove this node from the parent node.""" self._remove()
[docs]class CharacterData(Node, ChildNode, NonDocumentTypeChildNode): """Abstract class for classes which wraps text data. This class is a super class of ``Text`` and ``Comment``. """ # DOM Level 1 firstChild = None lastChild = None specified = False def __init__(self, text: str='', parent: Node = None) -> None: # noqa super().__init__(parent=parent) = text def _clone_node(self) -> 'CharacterData': clone = type(self)( return clone @property def html(self) -> str: """Return html representation of this node.""" return self.textContent def _get_text_content(self) -> str: return def _set_text_content(self, value: str) -> None: = value def __len__(self) -> int: return len( @property def length(self) -> int: """Return length of content.""" return len(self) def _append_data(self, string: str) -> None: += string
[docs] def appendData(self, string: str) -> None: """Add ``string`` to end of this node.""" self._append_data(string)
def _insert_data(self, offset: int, string: str) -> None: = ''.join(([:offset], string,[offset:]))
[docs] def insertData(self, offset: int, string: str) -> None: """Insert ``string`` at offset on this node.""" self._insert_data(offset, string)
def _delete_data(self, offset: int, count: int) -> None: = ''.join(([:offset],[offset+count:]))
[docs] def deleteData(self, offset: int, count: int) -> None: """Delete data by offset to count letters.""" self._delete_data(offset, count)
def _replace_data(self, offset: int, count: int, string: str) -> None: = ''.join(([:offset], string,[offset+count:]))
[docs] def replaceData(self, offset: int, count: int, string: str) -> None: """Replace data from offset to count by string.""" self._replace_data(offset, count, string)
@property def childNodes(self) -> NodeList: """Return child nodes. This node can't have child, so return empty ``NodeList`` object. """ return NodeList([]) # Methods
[docs] def appendChild(self, node: Node) -> Node: """Not supported.""" raise NotImplementedError('This node does not support this method.')
[docs] def insertBefore(self, node: Node, ref_node: Node) -> Node: """Not supported.""" raise NotImplementedError('This node does not support this method.')
[docs] def hasChildNodes(self) -> bool: """Return false.""" return False
[docs] def removeChild(self, node: Node) -> Node: """Not supported.""" raise NotImplementedError('This node does not support this method.')
[docs] def replaceChild(self, new_child: Node, old_child: Node) -> Node: """Not supported.""" raise NotImplementedError('This node does not support this method.')
[docs] def hasAttributes(self) -> bool: """Return false.""" return False
[docs]class Text(CharacterData): """Node class to wrap text contents.""" nodeType = Node.TEXT_NODE nodeName = '#text' @property def html(self) -> str: """Return html-escaped string representation of this node.""" if self.parentNode and self.parentNode._should_escape_text: return html.escape( return
[docs]class RawHtml(Text): """Very similar to ``Text`` class, but contents are always not escaped. This node is [NOT DOM Standard]. """ @property def html(self) -> str: """Return html representation.""" return
[docs]class Comment(CharacterData): """Comment node class.""" nodeType = Node.COMMENT_NODE nodeName = '#comment' @property def html(self) -> str: """Return html representation.""" return ''.join(('<!--',, '-->'))
[docs]class DocumentType(Node, NonDocumentTypeChildNode): """DocumentType node class.""" nodeType = Node.DOCUMENT_TYPE_NODE nodeValue = None textContent = None # type: ignore _should_escape_text = True def _clone_node(self) -> 'CharacterData': clone = type(self)( return clone @property def nodeName(self) -> str: # type: ignore """Return node name (=type).""" return def __init__(self, type: str = 'html', parent: Node = None) -> None: """Initialize DocumentType node with `type` doctype.""" super().__init__(parent=parent) self.__type = type @property def name(self) -> str: """Return node type.""" return self.__type @name.setter def name(self, name: str) -> None: self.__type = name @property def html(self) -> str: """Return html representation.""" return '<!DOCTYPE {}>'.format(
[docs]class DocumentFragment(Node, ParentNode): """DocumentFragument node class.""" nodeType = Node.DOCUMENT_FRAGMENT_NODE nodeName = '#document-fragment' parentNode = None previousSibling = None nextSibling = None _should_escape_text = True @property def html(self) -> str: """Return html representation.""" return ''.join(child.html for child in self.childNodes)