Convert text prediction script [Markov chain] from javascript to python

I have been trying for the last couple of days to convert this js script to python code.

My implementation (blind cp basically, some minor fixes here and there):

import random
class markov:
    memory = {}
    separator = ' '
    order = 2

    def getInitial(self):
        ret = []
        for i in range(0, self.order, 1):
            ret.append('')
        return ret

    def breakText(self, txt, cb):
        parts = txt.split(self.separator)
        prev = self.getInitial()
        def step(self):
            cb(prev, self.next)
            prev.shift()#Javascript function.
            prev.append(self.next)
        #parts.forEach(step) # - step is the function above.
        cb(prev, '')

    def learn(self, txt):
        mem = self.memory
        def learnPart(key, value):
            if not mem[key]:
                mem[key] = []
            mem[key] = value
            return mem
        self.breakText(txt, learnPart)

    def step(self, state, ret):
        nextAvailable = self.memory[state] or ['']
        self.next = nextAvailable[random.choice(nextAvailable.keys())]
        if not self.next:
            return ret
        ret.append(next)
        nextState = state.slice(1)
        return self.step(nextState, ret)

    def ask(self, seed):
        if not seed:
            seed = self.genInitial()
        seed = seed + self.step(seed, []).join(self.separator)
        return seed

Questions:

  • I have absolutely no knowledge of javascript.

  • When I try to "learn" some text for an object of the "mark" class [for example: a = markov (); a.learn ("sdfg");] I get the following error: "TypeError: unhashable type:" list ", for the dictionary" mem "in the function" learnPart ", a member of the function" learn ".

    , , [TypeError , ( hashable)] ?

, , , : D

+5
3

, . , ! Python, , . , Markov 2, , , , order-N .

js - ( ). Python . :

, a from collections import defaultdict markov.memory:

memory = defaultdict(list)

markov.getInitial, (, -2):

def getInitial(self):
    return ('', '')

( , Python: tuple([''] * 2) . None)

, genInitial.

, js (), Python, yield (. ).

Python - for. , (, yield). , breakText:

def breakText(self, txt):
    #our very own (ε,ε)
    prev = self.getInitial()

    for word in txt.split(self.separator):
        yield prev, word
        #will be explained in the next paragraph
        prev = (prev[1], word)

    #end-of-sentence, prev->ε
    yield prev, ''

, prev = (prev[1], word) :

>>> a = (0, 1)
>>> a
(0, 1)
>>> a = (a[1], 2)
>>> a
(1, 2)

, . , breakText, markov.learn:

def learn(self, txt):
    for part in self.breakText(txt):
        key = part[0]
        value = part[1]

        self.memory[key].append(value)

defaultdict, , .

, , , ! :

from collections import defaultdict

class Markov:
    memory = defaultdict(list)
    separator = ' '

    def learn(self, txt):
        for part in self.breakText(txt):
            key = part[0]
            value = part[1]

            self.memory[key].append(value)

    def breakText(self, txt):
        #our very own (ε,ε)
        prev = self.getInitial()

        for word in txt.split(self.separator):
            yield prev, word
            prev = (prev[1], word)

        #end-of-sentence, prev->ε
        yield (prev, '')

    def getInitial(self):
        return ('', '')

( markov markov, , ). brain.py Python.

>>> import brain
>>> bob = brain.Markov()
>>> bob.learn('Mary had a little lamb')
>>> bob.memory
defaultdict(<class 'list'>, {('had', 'a'): ['little'], ('Mary', 'had'): ['a'], ('', ''): ['Mary'], ('little', 'lamb'): [''], ('a', 'little'): ['lamb'], ('', 'Mary'): ['had']})

! , , :

{ ('', ''): ['Mary'],
  ('', 'Mary'): ['had'],
  ('Mary', 'had'): ['a'],
  ('a', 'little'): ['lamb'],
  ('had', 'a'): ['little'],
  ('little', 'lamb'): ['']}

zips up ? !

step

, step. defaultdict, random.choice , , . ( ), , ( - ).

def step(self, state):
    choice = random.choice(self.memory[state] or [''])

    if not choice:
        return None

    nextState = (state[1], choice)
    return choice, nextState

or [''], random.choice . , ask ( ):

def ask(self, seed=False):
    ret = []

    if not seed:
        seed = self.getInitial()

    while True:
        link = self.step(seed)

        if link is None:
            break

        ret.append(link[0])
        seed = link[1]

    return self.separator.join(ret)

, yucky. step , , , , ! !

bob:

from collections import defaultdict
import random

class Markov:
    memory = defaultdict(list)
    separator = ' '

    def learn(self, txt):
        for part in self.breakText(txt):
            key = part[0]
            value = part[1]

            self.memory[key].append(value)

    def ask(self, seed=False):
        ret = []

        if not seed:
            seed = self.getInitial()

        while True:
            link = self.step(seed)

            if link is None:
                break

            ret.append(link[0])
            seed = link[1]

        return self.separator.join(ret)

    def breakText(self, txt):
        #our very own (ε,ε)
        prev = self.getInitial()

        for word in txt.split(self.separator):
            yield prev, word
            prev = (prev[1], word)

        #end-of-sentence, prev->ε
        yield (prev, '')

    def step(self, state):
        choice = random.choice(self.memory[state] or [''])

        if not choice:
            return None

        nextState = (state[1], choice)
        return choice, nextState

    def getInitial(self):
        return ('', '')

:

>>> import brain
>>> bob = brain.Markov()
>>> bob.learn('Mary had a little lamb')
>>> bob.ask()
'Mary had a little lamb'
>>> bob.learn('Mary had a giant crab')
>>> bob.ask(('Mary', 'had'))
'a giant crab'

, , . , .

, - 4 .

+15

, learnPart getInitial, , . , , , , .

learnPart:

def learnPart(key, value):
    key = tuple(key) #<-----Try adding this line
    if not mem[key]:
        mem[key] = []
    mem[key] = value
    return mem

, .

, Python. Github 168 : https://github.com/search?l=Python&q=markov+chain

+1

I made a simplified version of the code:

import re
class Brain():
    H = ''
    def learn(self, txt):
        self.H = txt
    def ask(self,ask):
        H=self.H
        ask = re.compile(r"%s(.*)"%(ask),re.I|re.DOTALL)
        m = ask.search(H)
        print m.group(1)

Here's the execution:

>>> import brain
>>> bob = brain.Brain()
>>> bob.learn('Mary had a little lamb' )
>>> bob.ask('Mary had')
'a little lamb'

I agree that this is not exactly a Markov chain algorithm. But it has several advantages:

I am. You can provide the ask()source as shown above.

II. It has several lines of code.

III. And hopefully easier to understand.

+1
source

All Articles