|
| 1 | +#! /usr/local/bin/python |
| 2 | + |
| 3 | +class Markov: |
| 4 | + def __init__(self, histsize, choice): |
| 5 | + self.histsize = histsize |
| 6 | + self.choice = choice |
| 7 | + self.trans = {} |
| 8 | + def add(self, state, next): |
| 9 | + if not self.trans.has_key(state): |
| 10 | + self.trans[state] = [next] |
| 11 | + else: |
| 12 | + self.trans[state].append(next) |
| 13 | + def put(self, seq): |
| 14 | + n = self.histsize |
| 15 | + add = self.add |
| 16 | + add(None, seq[:0]) |
| 17 | + for i in range(len(seq)): |
| 18 | + add(seq[max(0, i-n):i], seq[i:i+1]) |
| 19 | + add(seq[len(seq)-n:], None) |
| 20 | + def get(self): |
| 21 | + choice = self.choice |
| 22 | + trans = self.trans |
| 23 | + n = self.histsize |
| 24 | + seq = choice(trans[None]) |
| 25 | + while 1: |
| 26 | + subseq = seq[max(0, len(seq)-n):] |
| 27 | + options = trans[subseq] |
| 28 | + next = choice(options) |
| 29 | + if not next: break |
| 30 | + seq = seq + next |
| 31 | + return seq |
| 32 | + |
| 33 | +def test(): |
| 34 | + import sys, string, whrandom, getopt |
| 35 | + args = sys.argv[1:] |
| 36 | + try: |
| 37 | + opts, args = getopt.getopt(args, '0123456789cdw') |
| 38 | + except getopt.error: |
| 39 | + print 'Usage: markov [-#] [-cddqw] [file] ...' |
| 40 | + print 'Options:' |
| 41 | + print '-#: 1-digit history size (default 2)' |
| 42 | + print '-c: characters (default)' |
| 43 | + print '-w: words' |
| 44 | + print '-d: more debugging output' |
| 45 | + print '-q: no debugging output' |
| 46 | + print 'Input files (default stdin) are split in paragraphs' |
| 47 | + print 'separated blank lines and each paragraph is split' |
| 48 | + print 'in words by whitespace, then reconcatenated with' |
| 49 | + print 'exactly one space separating words.' |
| 50 | + print 'Output consists of paragraphs separated by blank' |
| 51 | + print 'lines, where lines are no longer than 72 characters.' |
| 52 | + histsize = 2 |
| 53 | + do_words = 0 |
| 54 | + debug = 1 |
| 55 | + for o, a in opts: |
| 56 | + if '-0' <= o <= '-9': histsize = eval(o[1:]) |
| 57 | + if o == '-c': do_words = 0 |
| 58 | + if o == '-d': debug = debug + 1 |
| 59 | + if o == '-q': debug = 0 |
| 60 | + if o == '-w': do_words = 1 |
| 61 | + if not args: args = ['-'] |
| 62 | + m = Markov(histsize, whrandom.choice) |
| 63 | + try: |
| 64 | + for filename in args: |
| 65 | + if filename == '-': |
| 66 | + f = sys.stdin |
| 67 | + if f.isatty(): |
| 68 | + print 'Sorry, need stdin from file' |
| 69 | + continue |
| 70 | + else: |
| 71 | + f = open(filename, 'r') |
| 72 | + if debug: print 'processing', filename, '...' |
| 73 | + text = f.read() |
| 74 | + f.close() |
| 75 | + paralist = string.splitfields(text, '\n\n') |
| 76 | + for para in paralist: |
| 77 | + if debug > 1: print 'feeding ...' |
| 78 | + words = string.split(para) |
| 79 | + if words: |
| 80 | + if do_words: data = tuple(words) |
| 81 | + else: data = string.joinfields(words, ' ') |
| 82 | + m.put(data) |
| 83 | + except KeyboardInterrupt: |
| 84 | + print 'Interrupted -- continue with data read so far' |
| 85 | + if not m.trans: |
| 86 | + print 'No valid input files' |
| 87 | + return |
| 88 | + if debug: print 'done.' |
| 89 | + if debug > 1: |
| 90 | + for key in m.trans.keys(): |
| 91 | + if key is None or len(key) < histsize: |
| 92 | + print `key`, m.trans[key] |
| 93 | + if histsize == 0: print `''`, m.trans[''] |
| 94 | + print |
| 95 | + while 1: |
| 96 | + data = m.get() |
| 97 | + if do_words: words = data |
| 98 | + else: words = string.split(data) |
| 99 | + n = 0 |
| 100 | + limit = 72 |
| 101 | + for w in words: |
| 102 | + if n + len(w) > limit: |
| 103 | + print |
| 104 | + n = 0 |
| 105 | + print w, |
| 106 | + n = n + len(w) + 1 |
| 107 | + print |
| 108 | + print |
| 109 | + |
| 110 | +def tuple(list): |
| 111 | + if len(list) == 0: return () |
| 112 | + if len(list) == 1: return (list[0],) |
| 113 | + i = len(list)/2 |
| 114 | + return tuple(list[:i]) + tuple(list[i:]) |
| 115 | + |
| 116 | +test() |
0 commit comments