Thanks to visit codestin.com
Credit goes to github.com

Skip to content

Commit 99b8bd6

Browse files
committed
add htmlparser sample
1 parent ad0786f commit 99b8bd6

File tree

1 file changed

+36
-0
lines changed

1 file changed

+36
-0
lines changed

py3/commonlib/use_htmlparser.py

Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,36 @@
1+
#!/usr/bin/env python3
2+
# -*- coding: utf-8 -*-
3+
4+
from html.parser import HTMLParser
5+
from html.entities import name2codepoint
6+
7+
class MyHTMLParser(HTMLParser):
8+
9+
def handle_starttag(self, tag, attrs):
10+
print('<%s>' % tag)
11+
12+
def handle_endtag(self, tag):
13+
print('</%s>' % tag)
14+
15+
def handle_startendtag(self, tag, attrs):
16+
print('<%s/>' % tag)
17+
18+
def handle_data(self, data):
19+
print(data)
20+
21+
def handle_comment(self, data):
22+
print('<!--', data, '-->')
23+
24+
def handle_entityref(self, name):
25+
print('&%s;' % name)
26+
27+
def handle_charref(self, name):
28+
print('&#%s;' % name)
29+
30+
parser = MyHTMLParser()
31+
parser.feed('''<html>
32+
<head></head>
33+
<body>
34+
<!-- test html parser -->
35+
<p>Some <a href=\"#\">html</a> HTML&nbsp;tutorial...<br>END</p>
36+
</body></html>''')

0 commit comments

Comments
 (0)