|
@@ -14,6 +14,24 @@ def normalize_whitespace(string):
|
|
|
return ASCII_WHITESPACE.sub(' ', string)
|
|
|
|
|
|
|
|
|
+def normalize_attributes(attributes):
|
|
|
+ normalized = []
|
|
|
+ for name, value in attributes:
|
|
|
+ if name == 'class' and value:
|
|
|
+ # Special case handling of 'class' attribute, so that comparisons
|
|
|
+ # of DOM instances are not sensitive to ordering of classes.
|
|
|
+ value = ' '.join(sorted(
|
|
|
+ value for value in ASCII_WHITESPACE.split(value) if value
|
|
|
+ ))
|
|
|
+ # Attributes without a value is same as attribute with value that
|
|
|
+ # equals the attributes name:
|
|
|
+ # <input checked> == <input checked="checked">
|
|
|
+ if not value or value == name:
|
|
|
+ value = None
|
|
|
+ normalized.append((name, value))
|
|
|
+ return normalized
|
|
|
+
|
|
|
+
|
|
|
class Element:
|
|
|
def __init__(self, name, attributes):
|
|
|
self.name = name
|
|
@@ -55,21 +73,8 @@ class Element:
|
|
|
def __eq__(self, element):
|
|
|
if not hasattr(element, 'name') or self.name != element.name:
|
|
|
return False
|
|
|
- if len(self.attributes) != len(element.attributes):
|
|
|
- return False
|
|
|
if self.attributes != element.attributes:
|
|
|
- # attributes without a value is same as attribute with value that
|
|
|
- # equals the attributes name:
|
|
|
- # <input checked> == <input checked="checked">
|
|
|
- for i in range(len(self.attributes)):
|
|
|
- attr, value = self.attributes[i]
|
|
|
- other_attr, other_value = element.attributes[i]
|
|
|
- if not value:
|
|
|
- value = attr
|
|
|
- if not other_value:
|
|
|
- other_value = other_attr
|
|
|
- if attr != other_attr or value != other_value:
|
|
|
- return False
|
|
|
+ return False
|
|
|
return self.children == element.children
|
|
|
|
|
|
def __hash__(self):
|
|
@@ -194,14 +199,7 @@ class Parser(HTMLParser):
|
|
|
self.handle_endtag(tag)
|
|
|
|
|
|
def handle_starttag(self, tag, attrs):
|
|
|
- # Special case handling of 'class' attribute, so that comparisons of DOM
|
|
|
- # instances are not sensitive to ordering of classes.
|
|
|
- attrs = [
|
|
|
- (name, ' '.join(sorted(value for value in ASCII_WHITESPACE.split(value) if value)))
|
|
|
- if name == "class"
|
|
|
- else (name, value)
|
|
|
- for name, value in attrs
|
|
|
- ]
|
|
|
+ attrs = normalize_attributes(attrs)
|
|
|
element = Element(tag, attrs)
|
|
|
self.current.append(element)
|
|
|
if tag not in self.SELF_CLOSING_TAGS:
|