ValueError Traceback (most recent call last)
<ipython-input-8-45ad24ffcda1> in <module>()
9 try:
10 with open(fn, 'rb') as f:
---> 11 annotated = ner.annotate(f.read())
12
13 path, filename = os.path.split(fn)
/Users/kmike/svn/webstruct/webstruct/model.pyc in annotate(self, bytes_data, pretty_print)
105 html_tokens, tags = self.extract_raw(bytes_data)
106 tree = self.html_tokenizer.detokenize_single(html_tokens, tags)
--> 107 tree = to_webannotator(tree, self.entity_colors)
108 return tostring(tree, pretty_print=pretty_print)
109
/Users/kmike/svn/webstruct/webstruct/webannotator.py in to_webannotator(tree, entity_colors)
258 """
259 handler = _WaContentHandler(entity_colors)
--> 260 lxml.sax.saxify(tree, handler)
261 tree = handler.out.etree
262 _copy_title(tree)
/Users/kmike/envs/scraping/lib/python2.7/site-packages/lxml/sax.pyc in saxify(element_or_tree, content_handler)
245 them against a SAX ContentHandler.
246 """
--> 247 return ElementTreeProducer(element_or_tree, content_handler).saxify()
/Users/kmike/envs/scraping/lib/python2.7/site-packages/lxml/sax.pyc in saxify(self)
178 self._recursive_saxify(sibling, {})
179
--> 180 self._recursive_saxify(element, {})
181
182 if hasattr(element, 'getnext'):
/Users/kmike/envs/scraping/lib/python2.7/site-packages/lxml/sax.pyc in _recursive_saxify(self, element, prefixes)
224 content_handler.characters(element.text)
225 for child in element:
--> 226 self._recursive_saxify(child, prefixes)
227 content_handler.endElementNS((ns_uri, local_name), qname)
228 for prefix, uri in new_prefixes:
/Users/kmike/envs/scraping/lib/python2.7/site-packages/lxml/sax.pyc in _recursive_saxify(self, element, prefixes)
224 content_handler.characters(element.text)
225 for child in element:
--> 226 self._recursive_saxify(child, prefixes)
227 content_handler.endElementNS((ns_uri, local_name), qname)
228 for prefix, uri in new_prefixes:
/Users/kmike/envs/scraping/lib/python2.7/site-packages/lxml/sax.pyc in _recursive_saxify(self, element, prefixes)
224 content_handler.characters(element.text)
225 for child in element:
--> 226 self._recursive_saxify(child, prefixes)
227 content_handler.endElementNS((ns_uri, local_name), qname)
228 for prefix, uri in new_prefixes:
/Users/kmike/envs/scraping/lib/python2.7/site-packages/lxml/sax.pyc in _recursive_saxify(self, element, prefixes)
224 content_handler.characters(element.text)
225 for child in element:
--> 226 self._recursive_saxify(child, prefixes)
227 content_handler.endElementNS((ns_uri, local_name), qname)
228 for prefix, uri in new_prefixes:
/Users/kmike/envs/scraping/lib/python2.7/site-packages/lxml/sax.pyc in _recursive_saxify(self, element, prefixes)
224 content_handler.characters(element.text)
225 for child in element:
--> 226 self._recursive_saxify(child, prefixes)
227 content_handler.endElementNS((ns_uri, local_name), qname)
228 for prefix, uri in new_prefixes:
/Users/kmike/envs/scraping/lib/python2.7/site-packages/lxml/sax.pyc in _recursive_saxify(self, element, prefixes)
224 content_handler.characters(element.text)
225 for child in element:
--> 226 self._recursive_saxify(child, prefixes)
227 content_handler.endElementNS((ns_uri, local_name), qname)
228 for prefix, uri in new_prefixes:
/Users/kmike/envs/scraping/lib/python2.7/site-packages/lxml/sax.pyc in _recursive_saxify(self, element, prefixes)
224 content_handler.characters(element.text)
225 for child in element:
--> 226 self._recursive_saxify(child, prefixes)
227 content_handler.endElementNS((ns_uri, local_name), qname)
228 for prefix, uri in new_prefixes:
/Users/kmike/envs/scraping/lib/python2.7/site-packages/lxml/sax.pyc in _recursive_saxify(self, element, prefixes)
224 content_handler.characters(element.text)
225 for child in element:
--> 226 self._recursive_saxify(child, prefixes)
227 content_handler.endElementNS((ns_uri, local_name), qname)
228 for prefix, uri in new_prefixes:
/Users/kmike/envs/scraping/lib/python2.7/site-packages/lxml/sax.pyc in _recursive_saxify(self, element, prefixes)
224 content_handler.characters(element.text)
225 for child in element:
--> 226 self._recursive_saxify(child, prefixes)
227 content_handler.endElementNS((ns_uri, local_name), qname)
228 for prefix, uri in new_prefixes:
/Users/kmike/envs/scraping/lib/python2.7/site-packages/lxml/sax.pyc in _recursive_saxify(self, element, prefixes)
224 content_handler.characters(element.text)
225 for child in element:
--> 226 self._recursive_saxify(child, prefixes)
227 content_handler.endElementNS((ns_uri, local_name), qname)
228 for prefix, uri in new_prefixes:
/Users/kmike/envs/scraping/lib/python2.7/site-packages/lxml/sax.pyc in _recursive_saxify(self, element, prefixes)
224 content_handler.characters(element.text)
225 for child in element:
--> 226 self._recursive_saxify(child, prefixes)
227 content_handler.endElementNS((ns_uri, local_name), qname)
228 for prefix, uri in new_prefixes:
/Users/kmike/envs/scraping/lib/python2.7/site-packages/lxml/sax.pyc in _recursive_saxify(self, element, prefixes)
224 content_handler.characters(element.text)
225 for child in element:
--> 226 self._recursive_saxify(child, prefixes)
227 content_handler.endElementNS((ns_uri, local_name), qname)
228 for prefix, uri in new_prefixes:
/Users/kmike/envs/scraping/lib/python2.7/site-packages/lxml/sax.pyc in _recursive_saxify(self, element, prefixes)
224 content_handler.characters(element.text)
225 for child in element:
--> 226 self._recursive_saxify(child, prefixes)
227 content_handler.endElementNS((ns_uri, local_name), qname)
228 for prefix, uri in new_prefixes:
/Users/kmike/envs/scraping/lib/python2.7/site-packages/lxml/sax.pyc in _recursive_saxify(self, element, prefixes)
224 content_handler.characters(element.text)
225 for child in element:
--> 226 self._recursive_saxify(child, prefixes)
227 content_handler.endElementNS((ns_uri, local_name), qname)
228 for prefix, uri in new_prefixes:
/Users/kmike/envs/scraping/lib/python2.7/site-packages/lxml/sax.pyc in _recursive_saxify(self, element, prefixes)
224 content_handler.characters(element.text)
225 for child in element:
--> 226 self._recursive_saxify(child, prefixes)
227 content_handler.endElementNS((ns_uri, local_name), qname)
228 for prefix, uri in new_prefixes:
/Users/kmike/envs/scraping/lib/python2.7/site-packages/lxml/sax.pyc in _recursive_saxify(self, element, prefixes)
224 content_handler.characters(element.text)
225 for child in element:
--> 226 self._recursive_saxify(child, prefixes)
227 content_handler.endElementNS((ns_uri, local_name), qname)
228 for prefix, uri in new_prefixes:
/Users/kmike/envs/scraping/lib/python2.7/site-packages/lxml/sax.pyc in _recursive_saxify(self, element, prefixes)
220 content_handler.startPrefixMapping(prefix, uri)
221 content_handler.startElementNS((ns_uri, local_name),
--> 222 qname, sax_attributes)
223 if element.text:
224 content_handler.characters(element.text)
/Users/kmike/svn/webstruct/webstruct/webannotator.py in startElementNS(self, name, qname, attributes)
122 self._closeSpan()
123 # print('start %s' % qname)
--> 124 self.out.startElementNS(name, qname, attributes)
125 self._openSpan()
126
/Users/kmike/envs/scraping/lib/python2.7/site-packages/lxml/sax.pyc in startElementNS(self, ns_name, qname, attributes)
110 else:
111 element = SubElement(element_stack[-1], el_name,
--> 112 attrs, self._new_mappings)
113 element_stack.append(element)
114
/Users/kmike/envs/scraping/lib/python2.7/site-packages/lxml/etree.so in lxml.etree.SubElement (src/lxml/lxml.etree.c:67070)()
/Users/kmike/envs/scraping/lib/python2.7/site-packages/lxml/etree.so in lxml.etree._makeSubElement (src/lxml/lxml.etree.c:15492)()
/Users/kmike/envs/scraping/lib/python2.7/site-packages/lxml/etree.so in lxml.etree._makeSubElement (src/lxml/lxml.etree.c:15423)()
/Users/kmike/envs/scraping/lib/python2.7/site-packages/lxml/etree.so in lxml.etree._initNodeAttributes (src/lxml/lxml.etree.c:16529)()
/Users/kmike/envs/scraping/lib/python2.7/site-packages/lxml/etree.so in lxml.etree._addAttributeToNode (src/lxml/lxml.etree.c:16701)()
/Users/kmike/envs/scraping/lib/python2.7/site-packages/lxml/etree.so in lxml.etree._utf8 (src/lxml/lxml.etree.c:26485)()
ValueError: All strings must be XML compatible: Unicode or ASCII, no NULL bytes or control characters