feature:recite with zuohaitao.com/word.html

codepongo · codepongo · commit 1b7b84beeb14 · 2017-07-10T10:43:01.000+08:00
diff --git a/README.md b/README.md
@@ -3,7 +3,8 @@ utocode
 
 it is only a storage some demos and documents are in it.
 
-
+* cs - C# demo
+  + zlib for .NET 
 * recite - recite English words, python script in windows.
 * windows - the source can be used only in the windows os.
 	+ crrunner chrome launcher to avoid to be rising hijack.
diff --git a/recite/install.py b/recite/install.py
@@ -31,6 +31,7 @@ def copytree_f(s, d):
             'r.bat',
             'review.bat',
             's.bat',
+            'zuohaitao_com_word.py',
     ]
     for e in excutes:
         shutil.copy(e, out)
diff --git a/recite/recite.py b/recite/recite.py
@@ -7,18 +7,26 @@
 import sound
 import datetime
 import phonetic_with_bing as phonetic
-
+import zuohaitao_com_word
+book = 'zuohaitao.com/word.html'
 def learning(unknowV):
+    def all():
+        for ww in unknowV:
+            print ww['word'],
+        print ''
     print ''
     print len(unknowV)
-    for ww in unknowV:
-        print ww['word'],
-    print ''
+    all()
     while True:
         i = raw_input('>')
-        if i == ',quit':
+        if i == '':
+            continue
+        if i == ',quit'or i == ',q':
             break
-        elif i[0] == ',':
+        if i == ',l':
+            all()
+            continue
+        if i[0] == ',':
             for w in unknowV:
                 if w['word'] == i[1:]:
                     sound.sound(i[1:])
@@ -28,21 +36,38 @@ def learning(unknowV):
     print ''
 
 def words(folder='vocabulary'):
+    global book
+    if -1 != book.find('zuohaitao.com/word.html'):
+        book = zuohaitao_com_word.fetch()
+    folder = os.path.join(book, folder)
+    sound.sound_dir = os.path.join(book, 'sound')
     vocabulary = []
     for f in os.listdir(folder):
         if os.path.splitext(f)[1] != '.txt':
             continue
         with open(os.path.join(folder, f) , 'rb') as f:
             for line in f.readlines():
-                word, t = line.split('\t ')
+                if line == '\r\n':
+                    continue
+                word = ''
+                t = ''
+                try:
+                    word, t = line.split('\t ')
+                except:
+                    print f
+                    print line
                 w = {}
                 w['word'] = word
                 w['translation'] = t
                 sep = t.find('/ ') 
                 if sep != -1:
                     w['phonetic'] = '/' + t[0:sep].replace('/', '') + '/'
-                    w['translation'] = t[sep+len('/ '):]
-                vocabulary.append(w)
+                    try:
+                        w['translation'] = t[sep+len('/ '):].decode('utf8')
+                    except:
+                        w['translation'] = t[sep+len('/ '):]
+                if w['word'] != '':
+                    vocabulary.append(w)
     return vocabulary
 def exit():
     print '\ndo you want to exit? (Y)es/(n)o'
@@ -121,12 +146,11 @@ def loop(v, save=True):
 if __name__ == '__main__':
     start = 0
     save = False
-    if len(sys.argv) >= 1:
+    if len(sys.argv) > 1:
         try:
             start = int(sys.argv[1])
         except:
             start = 0
         save = True
-
     v = words()[start:]
     loop(v, save)
diff --git a/recite/zuohaitao_com_word.py b/recite/zuohaitao_com_word.py
@@ -0,0 +1,48 @@
+import urllib
+import os
+import datetime
+import HTMLParser
+book = 'zuohaitao.com_word'
+def fetch():
+    if not os.path.exists(book):
+        os.mkdir(book)
+    vocabulary = os.path.join(book, 'vocabulary')
+    if not os.path.exists(vocabulary):
+        os.mkdir(vocabulary)
+    vocabulary_txt = os.path.join(vocabulary, str(datetime.date.today())) + '.txt'
+    if os.path.isfile(vocabulary_txt):
+        return book
+    sound = os.path.join(book, 'sound')
+    if not os.path.exists(sound):
+        os.mkdir(sound)
+    url = 'http://zuohaitao.com/word.html'
+    word_html = os.path.join(book, str(datetime.date.today())) + '.html'
+    response = ''
+    if not os.path.isfile(word_html):
+        response = urllib.urlopen(url).read()
+        with open(word_html, 'wb') as f:
+            f.write(response);
+    else:
+        with open(word_html, 'rb') as f:
+            response = f.read()
+    class Parser(HTMLParser.HTMLParser):
+        def __init__(self):
+            HTMLParser.HTMLParser.__init__(self)
+            self.flag = False
+            self.data = ''
+        def handle_starttag(self, tag, attrs):
+            if tag == 'pre':
+                for key, value in attrs:
+                    if key == 'style' and value.find('display:none') != -1:
+                        self.flag = True
+                        break
+        def handle_endtag(self, tag):
+                self.flag = False
+        def handle_data(self, data):
+            if self.flag:
+                self.data += data
+    p = Parser()
+    p.feed(response)
+    with open(vocabulary_txt, 'wb') as f:
+        f.write(p.data.replace(' /', '\t /'))
+    return book

Original file line number	Diff line number	Diff line change
`@@ -31,6 +31,7 @@ def copytree_f(s, d):`
`31`	`31`	`'r.bat',`
`32`	`32`	`'review.bat',`
`33`	`33`	`'s.bat',`
	`34`	`+ 'zuohaitao_com_word.py',`
`34`	`35`	`]`
`35`	`36`	`for e in excutes:`
`36`	`37`	`shutil.copy(e, out)`