Initial revision

gvanrossum · gvanrossum · commit 6930b3d18da1 · 1993-12-14T10:08:02.000Z
diff --git a/Demo/scripts/markov.py b/Demo/scripts/markov.py
@@ -0,0 +1,116 @@
+#! /usr/local/bin/python
+
+class Markov:
+	def __init__(self, histsize, choice):
+		self.histsize = histsize
+		self.choice = choice
+		self.trans = {}
+	def add(self, state, next):
+		if not self.trans.has_key(state):
+			self.trans[state] = [next]
+		else:
+			self.trans[state].append(next)
+	def put(self, seq):
+		n = self.histsize
+		add = self.add
+		add(None, seq[:0])
+		for i in range(len(seq)):
+			add(seq[max(0, i-n):i], seq[i:i+1])
+		add(seq[len(seq)-n:], None)
+	def get(self):
+		choice = self.choice
+		trans = self.trans
+		n = self.histsize
+		seq = choice(trans[None])
+		while 1:
+			subseq = seq[max(0, len(seq)-n):]
+			options = trans[subseq]
+			next = choice(options)
+			if not next: break
+			seq = seq + next
+		return seq
+
+def test():
+	import sys, string, whrandom, getopt
+	args = sys.argv[1:]
+	try:
+		opts, args = getopt.getopt(args, '0123456789cdw')
+	except getopt.error:
+		print 'Usage: markov [-#] [-cddqw] [file] ...'
+		print 'Options:'
+		print '-#: 1-digit history size (default 2)'
+		print '-c: characters (default)'
+		print '-w: words'
+		print '-d: more debugging output'
+		print '-q: no debugging output'
+		print 'Input files (default stdin) are split in paragraphs'
+		print 'separated blank lines and each paragraph is split'
+		print 'in words by whitespace, then reconcatenated with'
+		print 'exactly one space separating words.'
+		print 'Output consists of paragraphs separated by blank'
+		print 'lines, where lines are no longer than 72 characters.'
+	histsize = 2
+	do_words = 0
+	debug = 1
+	for o, a in opts:
+		if '-0' <= o <= '-9': histsize = eval(o[1:])
+		if o == '-c': do_words = 0
+		if o == '-d': debug = debug + 1
+		if o == '-q': debug = 0
+		if o == '-w': do_words = 1
+	if not args: args = ['-']
+	m = Markov(histsize, whrandom.choice)
+	try:
+	    for filename in args:
+		    if filename == '-':
+			    f = sys.stdin
+			    if f.isatty():
+				    print 'Sorry, need stdin from file'
+				    continue
+		    else:
+			    f = open(filename, 'r')
+		    if debug: print 'processing', filename, '...'
+		    text = f.read()
+		    f.close()
+		    paralist = string.splitfields(text, '\n\n')
+		    for para in paralist:
+			    if debug > 1: print 'feeding ...'
+			    words = string.split(para)
+			    if words:
+				    if do_words: data = tuple(words)
+				    else: data = string.joinfields(words, ' ')
+				    m.put(data)
+	except KeyboardInterrupt:
+		print 'Interrupted -- continue with data read so far'
+	if not m.trans:
+		print 'No valid input files'
+		return
+	if debug: print 'done.'
+	if debug > 1:
+		for key in m.trans.keys():
+			if key is None or len(key) < histsize:
+				print `key`, m.trans[key]
+		if histsize == 0: print `''`, m.trans['']
+		print
+	while 1:
+		data = m.get()
+		if do_words: words = data
+		else: words = string.split(data)
+		n = 0
+		limit = 72
+		for w in words:
+			if n + len(w) > limit:
+				print
+				n = 0
+			print w,
+			n = n + len(w) + 1
+		print
+		print
+
+def tuple(list):
+	if len(list) == 0: return ()
+	if len(list) == 1: return (list[0],)
+	i = len(list)/2
+	return tuple(list[:i]) + tuple(list[i:])
+
+test()