Lisp-like expression compiler and Virtual Machine

Sunday 9th of May 2010 05:46:41 PM


  Toggle Advanced Options



Modules

  • Virtual Machine
  • Compiler
  • Scanner
  • Tree (Abstract Syntax Tree - AST)
  • Node

Example usage

vm = VirtualMachine()
expression = '(+ 130 (- 100 (/ (* 2 25) (- 10 8)) -25 7 (+ 10 10) (/ 30 10)))'
compiler = Compiler.Compiler(expression)
compiler.scan()
compiler.buildTree()
compiler.generate()
vm.load(compiler.byteCode)    
vm.execute(compiler.byteCode)
vm.cpu.dumpRegisters()
vm.dumpMemory()

Example output

Translator.out

(+ 130 (- 100 (/ (* 2 25) (- 10 8)) -25 7 (+ 10 10) (/ 30 10)))
--------------------------------------------------------------------------------
>   0: ['+', '130']
    >   1: ['-', '100 -25 7']
        >   2: ['/']
            >   5: ['*', '2 25']
            >   6: ['-', '10 8']
        >   3: ['+', '10 10']
        >   4: ['/', '30 10']
--------------------------------------------------------------------------------
[[200], [70], [25, 20, 3], [50, 2]]
--------------------------------------------------------------------------------
200
--------------------------------------------------------------------------------
[4, '-', '10 8']
[4, '*', '2 25']
[3, '/', '30 10']
[3, '+', '10 10']
[3, '/', '']
[2, '-', '100 -25 7']
[1, '+', '130']

Compiler.out

(+ 130 (- 100 (/ (* 2 25) (- 10 8)) -25 7 (+ 10 10) (/ 30 10)))
--------------------------------------------------------------------------------
[1, '+', '130']
[2, '-', '100 -25 7']
[3, '/', '']
[3, '+', '10 10']
[3, '/', '30 10']
[4, '*', '2 25']
[4, '-', '10 8']
--------------------------------------------------------------------------------
>   0: ['+', '130']
    >   1: ['-', '100 -25 7']
        >   2: ['/']
            >   5: ['*', '2 25']
            >   6: ['-', '10 8']
        >   3: ['+', '10 10']
        >   4: ['/', '30 10']
--------------------------------------------------------------------------------
['PUSH', 
10, 
'PUSH', 
30, 
'DIV', 
'PUSH', 
10, 
'PUSH', 
10, 
'ADD', 
'PUSH', 
8, 
'PUSH', 
10, 
'SUB', 
'PUSH', 
25, 
'PUSH', 
2, 
'MUL', 
'DIV', 
'PUSH', 
7, 
'PUSH', 
-25, 
'PUSH', 
100, 
'SUB', 
'SUB', 
'SUB', 
'SUB', 
'SUB', 
'PUSH', 
130, 
'ADD']

VirtualMachine.out

Click on the following link to see a screen shot of the Virtual Machine (VM) memory dump.

Source code

Node.py:

# Copyright (c) 2010, Brett Alistair Kromkamp - brettkromkamp@gmail.com
# All rights reserved.
# 
# Redistribution and use in source and binary forms, with or without modification,
# are permitted provided that the following conditions are met:
# 
# Redistributions of source code must retain the above copyright notice, this list
# of conditions and the following disclaimer.
# 
# Redistributions in binary form must reproduce the above copyright notice, this
# list of conditions and the following disclaimer in the documentation and/or
# other materials provided with the distribution.
# 
# Neither the name of the copyright holder nor the names of the contributors
# may be used to endorse or promote products derived from this software without
# specific prior written permission.
# 
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
# ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
# WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR
# ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
# (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
# LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
# ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
# SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.


# Class that implements a node structure

import pickle

# module constants

#--------------------------------------------------------------------------------

class BaseNode(object):  # python 2.2. new-style class
    """Class for basic node functionality"""
    
    __slots__ = ['id', 'data', 'bpointer', 'fpointer']

    def __init__(self, id, data):
        self.id = id
        self.data = data
        self.bpointer = None
        self.fpointer = []
        
    def getId(self):
        return self.id

    def getData(self):
        return self.data

    def setData(self, data):
        self.data = data

    def setFPointer(self, id):
        self.fpointer.append(id)

    def getFPointer(self):
        return self.fpointer

    def setBPointer(self, id):
        self.bpointer = id

    def getBPointer(self):
        return self.bpointer

Tree.py:

# Copyright (c) 2010, Brett Alistair Kromkamp - brettkromkamp@gmail.com
# All rights reserved.
# 
# Redistribution and use in source and binary forms, with or without modification,
# are permitted provided that the following conditions are met:
# 
# Redistributions of source code must retain the above copyright notice, this list
# of conditions and the following disclaimer.
# 
# Redistributions in binary form must reproduce the above copyright notice, this
# list of conditions and the following disclaimer in the documentation and/or
# other materials provided with the distribution.
# 
# Neither the name of the copyright holder nor the names of the contributors
# may be used to endorse or promote products derived from this software without
# specific prior written permission.
# 
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
# ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
# WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR
# ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
# (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
# LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
# ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
# SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.


# Class that implements a tree structure

from __future__ import generators

import string
import Node

# module constants
_ADD = 0
_ROOT = 0

#--------------------------------------------------------------------------------

class BaseTree(object):  # python 2.2 new-style class
    """Class for basic tree/network functionality"""
    
    __slots__ = ['tree']
    
    def __init__(self):
        self.tree = []

    def appendNode(self, pos, data):
        """Append a node after the position indicated by the 'pos' parameter"""

        if pos is None:  # root node
            id = _ROOT
        else:                
            id = self.__generateId()
        node = Node.BaseNode(id, data)  # create node
        self.tree.append(node)
        self.__updateFPointer(pos, id, _ADD)
        node.setBPointer(pos)
        
    def getIndex(self, pos):  # debug: optimize
        if pos is None:
            return _ROOT
        index = 0
        if pos in self:
            for node in self.tree:
                if node.getId() == pos:
                    break
                index += 1
            return index
        else:
            return None

    def show(self, pos, pLevel=_ROOT):
        """Recursive depth-first function that prints-out the actual tree structure"""
        
        level = pLevel
        queue = self.tree[pos].getFPointer()
        if queue:  # branch
            if level == _ROOT:                
                print '>', string.rjust(`self.tree[pos].getId()`, 3) + 
                    ':', self.tree[pos].getData()
            else:
                print 't'*level, '>', string.rjust(`self.tree[pos].getId()`, 3) + 
                    ':', self.tree[pos].getData()
            level += 1
            for element in queue:
                index = self.getIndex(element)
                self.show(index, level)  # recursive call
        else:  # leaf
            if level == _ROOT:
                print '>', string.rjust(`self.tree[pos].getId()`, 3) + 
                    ':', self.tree[pos].getData()
            else:
                print 't'*level, '>', string.rjust(`self.tree[pos].getId()`, 3) + 
                    ':', self.tree[pos].getData()
            return
        
    def __updateFPointer(self, pos, id, mode):
        if pos is None:
            return
        else:
            self.tree[self.getIndex(pos)].setFPointer(id)

    def __updateBPointer(self, pos, id):
        self.tree[self.getIndex(pos)].setBPointer(id)

    def __generateId(self):
        """Private function that generates an unique node-ID"""
        
        nodes = [node.getId() for node in self.tree]
        result = max(nodes) +1
        return result

    def __getitem__(self, key):
        return self.tree[self.getIndex(key)]

    def __setitem__(self, key, item):
        self.tree[self.getIndex(key)] = item

    def __len__(self):
        return len(self.tree)

    def __nonzero__(self):
        if self.tree == []:
            return False  # python 2.2.1 constant
        else:
            return True  # python 2.2.1 constant

    def __contains__(self, pos):
        return [node.getId() for node in self.tree if node.getId() == pos]

Scanner.py:

# Copyright (c) 2010, Brett Alistair Kromkamp - brettkromkamp@gmail.com
# All rights reserved.
# 
# Redistribution and use in source and binary forms, with or without modification,
# are permitted provided that the following conditions are met:
# 
# Redistributions of source code must retain the above copyright notice, this list
# of conditions and the following disclaimer.
# 
# Redistributions in binary form must reproduce the above copyright notice, this
# list of conditions and the following disclaimer in the documentation and/or
# other materials provided with the distribution.
# 
# Neither the name of the copyright holder nor the names of the contributors
# may be used to endorse or promote products derived from this software without
# specific prior written permission.
# 
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
# ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
# WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR
# ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
# (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
# LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
# ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
# SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.


# LISP-like grammar (postfix) scanner

import re
import Translator

# module constants
_KEYWORD = 0

#--------------------------------------------------------------------------------

class ScanError(Exception):
    def __init__(self, value):
        self.value = value

    def __str__(self):
        return repr(self.value)

#--------------------------------------------------------------------------------

class ParseError(Exception):
    def __init__(self, value):
        self.value = value

    def __str__(self):
        return repr(self.value)

#--------------------------------------------------------------------------------

class Scanner(object):
    """Class that implements a LISP-like expression scanner"""

    def __init__(self, expression):
        self.expression = expression
        self.levels = []
        self.keywords = ['prog', 'const', 'bind', 'func']

    def scan(self):
        def getFName(pTokens):  # inner function
            begin = pTokens.find(' ')
            end = pTokens.find('(')
            name = pTokens[begin +1:end]
            return name

        def getFParameters(pTokens):  # inner function
            begin = pTokens.find('(')
            end = pTokens.find(')')
            parameters = pTokens[begin +1:end]
            if parameters != '':
                parameters = parameters.split()
            else:
                parameters = []
            return parameters
            
        levels = [''] * self.maxLevel()
        index = 0
        counter = 0
        for char in self.expression:
            if char == '{':
                counter += 1
            levels[counter -1] = levels[counter -1] + char
            if char == '}':
                counter -= 1
        counter = 1

        for level in levels:            
            rawExpr = level.split('}')
            try:
                rawExpr.remove('')
            except ValueError:
                raise ScanError('Missing parenthesis')
            for expr in rawExpr:                
                tokens = expr.strip()
                tokens = tokens[1:]
                tokenStr = tokens
                tokens = tokens.split()
                if not self.isExpression(tokens):
                    try:
                        if not tokens[_KEYWORD] in self.keywords:
                            raise ScanError('Unknown keyword')
                    except ScanError, e:
                        print 'Illegal construct: %s' % e.value
                        raise
                    else:
                        if tokens[_KEYWORD] == 'func':
                            name = getFName(tokenStr)
                            parameters = getFParameters(tokenStr)
                            functionTokens = [name]
                            functionTokens.append(parameters)
                            self.levels.append([counter, functionTokens, tokens[_KEYWORD]])
                        else:
                            self.levels.append([counter, tokens[1:], tokens[_KEYWORD]])
                else:
                    expression = ' '.join(tokens)
                    self.levels.append([counter, expression, 'expr'])
            counter += 1                

    def maxLevel(self):
        level = 0
        max = 0
        for char in self.expression:
            if char == '{':
                level += 1
                if level > max:
                    max = level
            if char == '}':
                level -= 1
        return max

    def isExpression(self, pExpression):
        expression = ' '.join(pExpression)
        expression = expression.strip()
        if expression[0] =='(' and expression[-1] == ')':
            result = True
        else:
            result = False
        return result                       

#--------------------------------------------------------------------------------

class Parser(object):
    """Class that implements a LISP-like grammar scanner"""

##    symtable = {} # symbol table (class-attribute)

    def __init__(self, struct):
        self.struct = struct
        self.doProg(self.struct[0])        
##        self.doConst()
##        self.doBind()
##        self.doFunc()
        for level in self.struct:
            if (level[0] == 2) and (level[2] == 'expr'):
                print self.doExpression(level[1])

    def doProg(self, prog):
        try:
            if not self.isIdentifier(prog[2][0]):
                raise ParseError('Bad identfier in PROG-statement')
        except ParseError, e:
            print 'Illegal language construct: %s' % e.value
            raise

    def doConst(self):
        pass

##    def doBind(self):
##        for level in self.struct:
##            if (level[0] == 2) and (level[2] == 'bind'):
##                variable = (level[1][0], level[1][1])
##                self.__class__.symtable[variable[0]] = variable[1]

    def doFunc(self):
        pass

    def doExpression(self, expression):
        translator = Translator.Translator(expression)
        translator.scan()
        translator.buildTree()
        translator.evaluate()
        return translator.__class__.stack[0][0]

    def isIdentifier(self, identifier):
        p = re.compile(r'^[a-z]w*', re.IGNORECASE)
        m = p.match(identifier)
        if m != None:
            result = True
        else:
            result = False
        return result

Translator.py:

# Copyright (c) 2010, Brett Alistair Kromkamp - brettkromkamp@gmail.com
# All rights reserved.
# 
# Redistribution and use in source and binary forms, with or without modification,
# are permitted provided that the following conditions are met:
# 
# Redistributions of source code must retain the above copyright notice, this list
# of conditions and the following disclaimer.
# 
# Redistributions in binary form must reproduce the above copyright notice, this
# list of conditions and the following disclaimer in the documentation and/or
# other materials provided with the distribution.
# 
# Neither the name of the copyright holder nor the names of the contributors
# may be used to endorse or promote products derived from this software without
# specific prior written permission.
# 
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
# ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
# WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR
# ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
# (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
# LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
# ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
# SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.


# LISP-like expression (postfix) translator with scanner/parser and evaluator

import Tree

# module constants
_ROOT = 0
_LEVEL = 0
_OPERATOR = 1
_OPERANDS = 2

#--------------------------------------------------------------------------------

class ScanError(Exception):
    def __init__(self, value):
        self.value = value

    def __str__(self):
        return repr(self.value)

#--------------------------------------------------------------------------------

class EvaluationError(Exception):
    def __init__(self, value):
        self.value = value

    def __str__(self):
        return repr(self.value)

#--------------------------------------------------------------------------------

class Translator(object):
    """Class that implements a LISP-like expression scanner/parser and evaluator"""

    __slots__ = ['expression', 'levels', 'tree']

    stack = []  # stack for intermediate calculations (class-attribute)

    def __init__(self, expression):
        self.expression = expression
        self.levels = []
        self.tree = Tree.BaseTree()
        
    def scan(self):
        levels = [''] * self.maxLevel()
        self.__class__.stack = [None] * self.maxLevel()
        index = 0
        counter = 0
        for char in self.expression:
            if char == '(':
                counter += 1
            levels[counter -1] = levels[counter -1] + char
            if char == ')':
                counter -= 1
        counter = 1        
        for level in levels:            
            rawExpr = level.split(')')
            try:
                rawExpr.remove('')
            except ValueError:
                raise ScanError('Missing parenthesis')
            for expr in rawExpr:
                tokens = expr.strip()
                tokens = tokens[1:]
                tokens = tokens.split()
                try:
                    if not tokens[0] in ['*', '/', '+', '-']:
                        raise ScanError('Operator not found or missing parenthesis')
                except ScanError, e:
                    print 'Illegal expression: %s' % e.value
                    raise
                else:                    
                    tokenStr = ' '.join(tokens)
                    tokenStr = tokenStr[1:]
                    tokenStr.strip()
                    self.levels.append([counter, tokens[0], tokenStr[1:]])
            counter += 1
    
    def buildTree(self):
        if self.levels[0][_OPERANDS] != '':
            self.tree.appendNode(None, [self.levels[0][_OPERATOR], 
                self.levels[0][_OPERANDS]])
        else:
            self.tree.appendNode(None, [self.levels[0][_OPERATOR]])
        for level in self.levels:
            if level[_LEVEL] != 1:
                if level[_OPERANDS] != '':
                    self.tree.appendNode(level[_LEVEL] -2, 
                        [level[_OPERATOR], level[_OPERANDS]])
                else:
                    self.tree.appendNode(level[_LEVEL] -2, 
                        [level[_OPERATOR]])
                
    def evaluate(self, pos=_ROOT, pLevel=_ROOT):
        level = pLevel                
        queue = self.tree[pos].getFPointer()
        if queue:  # branch
            level += 1
            for element in queue:
                index = self.tree.getIndex(element)
                result = self.evaluate(index, level)  # recursive call
            if len(self.tree[pos].data) == 2:
                operands = self.tree[pos].data[1] + ' ' + self.getStackStr(level)
                # completed (or highest level) expression
                expr3 = self.inFix(self.tree[pos].data[0], operands)
                try:
                    value3 = eval(expr3)                    
                except (SyntaxError, NameError):
                    raise EvaluationError('Error in high level expression')
                self.setStack(level -1, value3)
            else:
                # intermediate (or multi-level) expression
                expr2 = self.inFix(self.tree[pos].data[0], self.getStackStr(level))
                try:
                    value2 = eval(expr2)                    
                except (SyntaxError, NameError):
                    raise EvaluationError('Error in intermediate level expression')
                self.setStack(level -1, value2)
        else:  # leaf
            # final (or lowest) level expression
            expr1 = self.inFix(self.tree[pos].data[0], self.tree[pos].data[1])
            try:
                value = eval(expr1)                
            except (SyntaxError, NameError):
                raise EvaluationError('Error in low level expression')
            self.setStack(level, value)
            return value

    def maxLevel(self):
        level = 0
        max = 0
        for char in self.expression:
            if char == '(':
                level += 1
                if level > max:
                    max = level
            if char == ')':
                level -= 1
        return max

    def setStack(self, level, value):
        if self.__class__.stack[level] is None:
            self.__class__.stack[level] = [value]
        else:
            self.__class__.stack[level].append(value)

    def getStack(self, level):
        return self.__class__.stack[level]

    def getStackStr(self, level):
        expr = ''
        for each in self.__class__.stack[level]:
            expr = expr + ' ' + repr(each)
        expr = expr.strip()
        return expr

    def inFix(self, operator, pOperand):
        if pOperand == '':
            return operator
        operands = pOperand.split()
        expr = ''
        for index in range(len(operands)):
            if index != (len(operands) -1) or len(operands) == 1:
                expr = expr + ' ' + operands[index] + ' ' + operator
            else:
                expr = expr + ' ' + operands[index]
        expr = expr.strip()
        return expr

Compiler.py:

# Copyright (c) 2010, Brett Alistair Kromkamp - brettkromkamp@gmail.com
# All rights reserved.
# 
# Redistribution and use in source and binary forms, with or without modification,
# are permitted provided that the following conditions are met:
# 
# Redistributions of source code must retain the above copyright notice, this list
# of conditions and the following disclaimer.
# 
# Redistributions in binary form must reproduce the above copyright notice, this
# list of conditions and the following disclaimer in the documentation and/or
# other materials provided with the distribution.
# 
# Neither the name of the copyright holder nor the names of the contributors
# may be used to endorse or promote products derived from this software without
# specific prior written permission.
# 
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
# ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
# WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR
# ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
# (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
# LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
# ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
# SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.


# LISP-like (postfix) expression compiler

import Tree

# module constants
_ROOT = 0
_LEVEL = 0
_OPERATOR = 1
_OPERANDS = 2
_TOPLEVEL = 1
# mnemonics
(HLT, MUL, DIV, ADD, SUB, PUSH, POP, EXCH, LT, LE, 
    EQ, NE, GT, GE, NEG, NOT, AND, OR, PRN) = range(19)

#--------------------------------------------------------------------------------

class ScanError(Exception):
    def __init__(self, value):
        self.value = value

    def __str__(self):
        return repr(self.value)

#--------------------------------------------------------------------------------

class Compiler(object):
    """Class that implements a LISP-like (postfix) expression compiler"""

    __slots__ = ['expression', 'levels', 'tree', 'byteCode', 'operators']
    
    stackCount = []  # class attribute
    operandCount = []
    operatorCount = []

    def __init__(self, expression):
        self.expression = expression
        self.levels = []
        self.tree = Tree.BaseTree()
        self.byteCode = []
        self.operators = ['*', '/', '+', '-', '<', '<=', 
            '==', '!=', '>=', '>', 'not', 'and', 'or']
        
    def scan(self):
        levels = [''] * self.maxLevel()
        self.__class__.stackCount = [0] * self.maxLevel()
        self.__class__.operandCount = [0] * self.maxLevel()
        self.__class__.operatorCount = [0] * self.maxLevel()
        index = 0
        counter = 0
        for char in self.expression:
            if char == '(':
                counter += 1
            levels[counter -1] = levels[counter -1] + char
            if char == ')':
                counter -= 1
        counter = 1        
        for level in levels:            
            rawExpr = level.split(')')
            try:
                rawExpr.remove('')
            except ValueError:
                raise ScanError('Missing parenthesis')
            for expr in rawExpr:                
                tokens = expr.strip()                
                tokens = tokens[1:]
                tokens = tokens.split()
                try:
                    if not tokens[0] in self.operators:
                        raise ScanError('Operator not found or missing parenthesis')
                except ScanError, e:
                    print 'Illegal expression: %s' % e.value
                    raise
                else:
                    operands = tokens[1:]
                    operandStr = ' '.join(operands)
                    operandStr.strip()
                    self.levels.append([counter, tokens[0], operandStr])
            counter += 1
            
    def buildTree(self):
        if self.levels[0][_OPERANDS] != '':
            self.tree.appendNode(None, [self.levels[0][_OPERATOR], 
                self.levels[0][_OPERANDS]])
        else:
            self.tree.appendNode(None, [self.levels[0][_OPERATOR]])
        for level in self.levels:
            if level[_LEVEL] != 1:
                if level[_OPERANDS] != '':
                    self.tree.appendNode(level[_LEVEL] -2, [level[_OPERATOR], 
                        level[_OPERANDS]])                    
                else:
                    self.tree.appendNode(level[_LEVEL] -2, [level[_OPERATOR]])
                
    def generate(self, pos=_ROOT, pLevel=_ROOT, count=0):
        count += 1
        level = pLevel                
        queue = self.tree[pos].getFPointer()
        queue.reverse()
        if queue:  # branch
            level += 1
            for element in queue:
                index = self.tree.getIndex(element)
                self.generate(index, level, count)  # recursive call
            if len(self.tree[pos].data) == 2:
                self.output(count, self.tree[pos].data[0], self.tree[pos].data[1])
                self.countStack(level -1)
            else:
                self.output(count, self.tree[pos].data[0])
                self.countStack(level -1)
        else:  # leaf
            self.output(count, self.tree[pos].data[0], self.tree[pos].data[1])
            self.countStack(level)

    def maxLevel(self):
        level = 0
        max = 0
        for char in self.expression:
            if char == '(':
                level += 1
                if level > max:
                    max = level
            if char == ')':
                level -= 1
        return max

    def countStack(self, level):
        count = self.__class__.stackCount[level]
        count += 1
        self.__class__.stackCount[level] = count

    def output(self, level, operator, operand=''):
        operator = operator.lower()
        operandCount = 0
        operatorCount = 0
        operands = operand.split()
        operands.reverse()
        if operator == '*':
            mnemonic = 'MUL'
        elif operator == '/':
            mnemonic = 'DIV'
        elif operator == '+':
            mnemonic = 'ADD'
        elif operator == '-':
            mnemonic = 'SUB'
        elif operator == '<':
            mnemonic = 'LT'
        elif operator == '<=':
            mnemonic = 'LE'
        elif operator == '==':
            mnemonic = 'EQ'
        elif operator == '!=':
            mnemonic = 'NE'
        elif operator == '>':
            mnemonic = 'GT'
        elif operator == '>=':
            mnemonic = 'GE'
        elif operator == 'not':
            mnemonic = 'NOT'
        elif operator == 'and':
            mnemonic = 'AND'
        elif operator == 'or':
            mnemonic = 'OR'

        if len(operands) >= 1:
            for index in range(len(operands)):
                self.byteCode.append('PUSH')
                self.byteCode.append(int(operands[index]))
                operandCount += 1
            for index in range(len(operands)):
                if index != (len(operands) -1) or len(operands) == 1:
                    self.byteCode.append(mnemonic)
                    operatorCount += 1
        else:
            self.byteCode.append(mnemonic)
            operatorCount += 1

        self.__class__.operandCount[level -1] = operandCount
        self.__class__.operatorCount[level -1] = operatorCount
        if level < len(self.__class__.stackCount) and 
            self.__class__.stackCount[level] != 0:
            padding = ((self.__class__.operandCount[level -1] + 
                self.__class__.stackCount[level]) 
                    - self.__class__.operatorCount[level -1]) -1
            if padding > 0:
                for index in range(padding):
                    self.byteCode.append(mnemonic)

VirtualMachine.py:

# Copyright (c) 2010, Brett Alistair Kromkamp - brettkromkamp@gmail.com
# All rights reserved.
# 
# Redistribution and use in source and binary forms, with or without modification,
# are permitted provided that the following conditions are met:
# 
# Redistributions of source code must retain the above copyright notice, this list
# of conditions and the following disclaimer.
# 
# Redistributions in binary form must reproduce the above copyright notice, this
# list of conditions and the following disclaimer in the documentation and/or
# other materials provided with the distribution.
# 
# Neither the name of the copyright holder nor the names of the contributors
# may be used to endorse or promote products derived from this software without
# specific prior written permission.
# 
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
# ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
# WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR
# ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
# (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
# LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
# ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
# SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.


# Class that implements a virtual (stack-based) machine: CPU, memory, and I/O

import Compiler

# module constants
_KBYTE = 1024
_MEM = 1
#_TOTAL_MEM = _MEM * _KBYTE
_TOTAL_MEM = 256  # (bytes)
_FALSE = 0
_TRUE = 1
# machine status
(RUNNING, FINISHED, BADMEM, BADDATA, NODATA, DIVZERO, BADOP) = range(7)
# mnemonics
(HLT, MUL, DIV, ADD, SUB, PUSH, POP, EXCH, LT, LE, 
    EQ, NE, GT, GE, NEG, NOT, AND, OR, PRN) = range(19)

#--------------------------------------------------------------------------------

class CPU(object):
    """Class that implements a CPU"""

    def __init__(self, memSize):
        self.pc = 0  # program counter
        self.ir = 0  # instruction register
        self.sp = memSize -1  # stack pointer

    def dumpRegisters(self):
        print 'Current state of CPU'
        print
        print 'Registers'
        print 'PC:', repr(self.pc).rjust(3)
        print 'IR:', repr(self.ir).rjust(3)
        print 'SP:', repr(self.sp).rjust(3)

#--------------------------------------------------------------------------------

class VirtualMachine(object):
    """Class that implements a virtual (stack-based) machine: CPU, memory, and I/O"""

    memory = [0] * _TOTAL_MEM  # class attribute

    def __init__(self):        
        self.cpu = CPU(len(self.__class__.memory))
        self.status = FINISHED        

    def dumpMemory(self):
        index = 0
        print 'Memory dump: %s bytes' % _TOTAL_MEM
        print
        for byte in range(16):
            print repr(byte).rjust(6),
        print
        print '-'*((16*7)+1)
        for byte in range(len(vm.__class__.memory)):
            if (byte % 16 == 0 and byte != 0):
                print '|' + repr(index).rjust(6)
                index += 1                
            print repr(vm.__class__.memory[byte]).rjust(6),
        print '|' + repr(index).rjust(6)

    def load(self, code):
        for index in range(len(code)):
            self.memory[index] = code[index]

    def execute(self, code, breakpoint=0):
        if breakpoint == 0:
            breakpoint = len(code)
        for index in range(len(code)):
            self.fetchExecute()
            if index == (breakpoint -1):
                break

    def fetchExecute(self):
        self.cpu.ir = self.__class__.memory[self.cpu.pc]  # fetch        
        self.cpu.pc += 1

        if self.cpu.ir == 'HLT':  # execute (from here on)
            self.status = FINISHED
        elif self.cpu.ir == 'MUL':
            self.cpu.sp += 1
            TOS = self.__class__.memory[self.cpu.sp]
            self.cpu.sp += 1
            SOS = self.__class__.memory[self.cpu.sp]
            self.__class__.memory[self.cpu.sp] = (TOS * SOS)
            self.cpu.sp -= 1
        elif self.cpu.ir == 'DIV':
            self.cpu.sp += 1
            TOS = self.__class__.memory[self.cpu.sp]
            self.cpu.sp += 1
            SOS = self.__class__.memory[self.cpu.sp]
            if SOS == 0:
                self.status = DIVZERO
                self.cpu.sp -= 1  # restore stack
                self.cpu.sp -= 1
            else:
                self.__class__.memory[self.cpu.sp] = (TOS / SOS)
                self.cpu.sp -= 1
        elif self.cpu.ir == 'ADD':
            self.cpu.sp += 1
            TOS = self.__class__.memory[self.cpu.sp]
            self.cpu.sp += 1
            SOS = self.__class__.memory[self.cpu.sp]
            self.__class__.memory[self.cpu.sp] = (TOS + SOS)
            self.cpu.sp -= 1
        elif self.cpu.ir == 'SUB':
            self.cpu.sp += 1
            TOS = self.__class__.memory[self.cpu.sp]
            self.cpu.sp += 1
            SOS = self.__class__.memory[self.cpu.sp]
            self.__class__.memory[self.cpu.sp] = (TOS - SOS)
            self.cpu.sp -= 1
        elif self.cpu.ir == 'PUSH':            
            self.__class__.memory[self.cpu.sp] = self.__class__.memory[self.cpu.pc]
            self.cpu.pc += 1
            self.cpu.sp -= 1
        elif self.cpu.ir == 'POP':
            self.cpu.sp += 1
        elif self.cpu.ir == 'EXCH':
            self.cpu.sp += 1
            TOS = self.__class__.memory[self.cpu.sp]
            self.cpu.sp += 1
            SOS = self.__class__.memory[self.cpu.sp]
            self.__class__.memory[self.cpu.sp] = TOS
            self.cpu.sp -= 1
            self.__class__.memory[self.cpu.sp] = SOS
            self.cpu.sp -= 1
        elif self.cpu.ir == 'LT':  # less than
            self.cpu.sp += 1
            TOS = self.__class__.memory[self.cpu.sp]
            self.cpu.sp += 1
            SOS = self.__class__.memory[self.cpu.sp]
            self.__class__.memory[self.cpu.sp] = (TOS < SOS)
            self.cpu.sp -= 1
        elif self.cpu.ir == 'LE':  # less than or equal
            self.cpu.sp += 1
            TOS = self.__class__.memory[self.cpu.sp]
            self.cpu.sp += 1
            SOS = self.__class__.memory[self.cpu.sp]
            self.__class__.memory[self.cpu.sp] = (TOS <= SOS)
            self.cpu.sp -= 1
        elif self.cpu.ir == 'EQ':  # equal
            self.cpu.sp += 1
            TOS = self.__class__.memory[self.cpu.sp]
            self.cpu.sp += 1
            SOS = self.__class__.memory[self.cpu.sp]
            self.__class__.memory[self.cpu.sp] = (TOS == SOS)
            self.cpu.sp -= 1
        elif self.cpu.ir == 'NE':  # not equal
            self.cpu.sp += 1
            TOS = self.__class__.memory[self.cpu.sp]
            self.cpu.sp += 1
            SOS = self.__class__.memory[self.cpu.sp]
            self.__class__.memory[self.cpu.sp] = (TOS != SOS)
            self.cpu.sp -= 1
        elif self.cpu.ir == 'GT':  # greater than
            self.cpu.sp += 1
            TOS = self.__class__.memory[self.cpu.sp]
            self.cpu.sp += 1
            SOS = self.__class__.memory[self.cpu.sp]
            self.__class__.memory[self.cpu.sp] = (TOS > SOS)
            self.cpu.sp -= 1
        elif self.cpu.ir == 'GE':  # greater than or equal
            self.cpu.sp += 1
            TOS = self.__class__.memory[self.cpu.sp]
            self.cpu.sp += 1
            SOS = self.__class__.memory[self.cpu.sp]
            self.__class__.memory[self.cpu.sp] = (TOS >= SOS)
            self.cpu.sp -= 1
        elif self.cpu.ir == 'NEG':  # negate
            self.cpu.sp += 1
            TOS = self.__class__.memory[self.cpu.sp]
            self.__class__.memory[self.cpu.sp] = (0 - TOS)
            self.cpu.sp -= 1
        elif self.cpu.ir == 'NOT':  # logical not
            self.cpu.sp += 1
            TOS = self.__class__.memory[self.cpu.sp]
            self.__class__.memory[self.cpu.sp] = (not TOS)
            self.cpu.sp -= 1
        elif self.cpu.ir == 'AND':  # logical and
            self.cpu.sp += 1
            TOS = self.__class__.memory[self.cpu.sp]
            self.cpu.sp += 1
            SOS = self.__class__.memory[self.cpu.sp]
            self.__class__.memory[self.cpu.sp] = (TOS and SOS)
            self.cpu.sp -= 1
        elif self.cpu.ir == 'OR':  # logical or
            self.cpu.sp += 1
            TOS = self.__class__.memory[self.cpu.sp]
            self.cpu.sp += 1
            SOS = self.__class__.memory[self.cpu.sp]
            self.__class__.memory[self.cpu.sp] = (TOS or SOS)
            self.cpu.sp -= 1
        elif self.cpu.ir == 'PRN':
            result = self.__class__.memory[self.cpu.sp]
            self.cpu.sp += 1
            print result

#--------------------------------------------------------------------------------

if __name__ == '__main__':
    vm = VirtualMachine()
    expression = '(+ 130 (- 100 (/ (* 2 25) (- 10 8)) -25 7 (+ 10 10) (/ 30 10)))'
    compiler = Compiler.Compiler(expression)
    print expression
    print '-'*80
    compiler.scan()
    compiler.buildTree()
    compiler.generate()
    vm.load(compiler.byteCode)    
    vm.execute(compiler.byteCode)
    vm.cpu.dumpRegisters()
    print
    print 'Status: ', vm.status
    print
    vm.dumpMemory()






Google