Thanks to visit codestin.com
Credit goes to github.com

Skip to content

Commit d505f11

Browse files
committed
Updating to deal with changes in the Gutenberg format
1 parent 2a13b7e commit d505f11

File tree

7 files changed

+16649
-17085
lines changed

7 files changed

+16649
-17085
lines changed

code/158-0.txt

Lines changed: 16633 additions & 0 deletions
Large diffs are not rendered by default.

code/analyze_book1.py

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,9 @@ def process_file(filename, skip_header):
2929
skip_gutenberg_header(fp)
3030

3131
for line in fp:
32+
if line.startswith('*** END OF THIS'):
33+
break
34+
3235
process_line(line, hist)
3336

3437
return hist
@@ -40,7 +43,7 @@ def skip_gutenberg_header(fp):
4043
fp: open file object
4144
"""
4245
for line in fp:
43-
if line.startswith('*** START OF TH'):
46+
if line.startswith('*** START OF THIS'):
4447
break
4548

4649

@@ -132,7 +135,7 @@ def random_word(hist):
132135

133136

134137
def main():
135-
hist = process_file('emma.txt', skip_header=True)
138+
hist = process_file('158-0.txt', skip_header=True)
136139
print('Total number of words:', total_words(hist))
137140
print('Number of different words:', different_words(hist))
138141

code/analyze_book2.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,7 @@ def subtract(d1, d2):
2323

2424

2525
def main():
26-
hist = process_file('emma.txt', skip_header=True)
26+
hist = process_file('158-0.txt', skip_header=True)
2727
words = process_file('words.txt', skip_header=False)
2828

2929
diff = subtract(hist, words)

code/analyze_book3.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -45,7 +45,7 @@ def random_word(hist):
4545

4646

4747
def main():
48-
hist = process_file('emma.txt', skip_header=True)
48+
hist = process_file('158-0.txt', skip_header=True)
4949

5050
print("\n\nHere are some random words from the book")
5151
for i in range(100):

0 commit comments

Comments
 (0)