本文简单介绍了一种四则运算求值的实现方法(基于语法分析)
双栈算法可以实现四则运算的求值,但是扩展性比较低,更好的方式是基于语法分析来实现,整体大概包括以下几个步骤:
我们先来看词法分析:
首先定义我们需要用到的词法类型:
local token_type =
{
add = 1, -- '+'
minus = 2, -- '-'
mul = 3, -- '*'
div = 4, -- '/'
lbrace = 5, -- '('
rbrace = 6, -- ')'
num = 7, -- '33.6' etc.
}
return token_type
基本就是 加减乘除等 6 个符号类型,外加 1 个数字类型,其中数字类型有些特殊,我们除了需要知道他的类型(即数字类型)以外,还需要知道他的实际数值,所以我们需要对数字类型的数值进行额外的记录(后面会有说明).
接着我们来定义一些用于进行词法分析的辅助函数(和结构):
local token_map =
{
['+'] = token_type.add,
['-'] = token_type.minus,
['*'] = token_type.mul,
['/'] = token_type.div,
['('] = token_type.lbrace,
[')'] = token_type.rbrace,
}
local function is_space(c)
return c == ' ' or
c == '\t'
end
local function is_letter(c)
return (c >= 'a' and c <= 'z') or
(c >= 'A' and c <= 'Z')
end
local function is_number(c)
return c >= '0' and c <= '9'
end
local function is_digit(c)
return c == '.'
end
local function skip_space(raw_exp, from_index)
if raw_exp then
while is_space(raw_exp:sub(from_index, from_index)) do
from_index = from_index + 1
end
end
return from_index
end
local function read_num(raw_exp, from_index)
if raw_exp then
local start_index = from_index
local cur_char = raw_exp:sub(from_index, from_index)
while is_number(cur_char) or is_digit(cur_char) do
from_index = from_index + 1
cur_char = raw_exp:sub(from_index, from_index)
end
return from_index, raw_exp:sub(start_index, from_index - 1)
end
end
接着我们就可以进行实际的词法分析了,基本思路便是依次检查各个字符(中间会忽略空白字符),如果在 token_map 中有直接的 token 映射则直接解析成功,否则就尝试 read_num,代码中的 result_values 则是用于记录数字类型的实际数值,相关代码如下:
local function parse_token(raw_exp)
local result_tokens = {}
local result_values = {}
if raw_exp then
local parse_index = 1
while parse_index <= #raw_exp do
parse_index = skip_space(raw_exp, parse_index)
local cur_char = raw_exp:sub(parse_index, parse_index)
-- handle normal token
if token_map[cur_char] then
table.insert(result_tokens, token_map[cur_char])
parse_index = parse_index + 1
else
local index, num = read_num(raw_exp, parse_index)
if num and #num > 0 then
if tonumber(num) then
table.insert(result_tokens, token_type.num)
-- store num values
result_values[#result_tokens] = tonumber(num)
parse_index = index
else
print("error token number : " .. num)
parse_index = index
end
else
print("error to parse token : " .. raw_exp .. " at " .. parse_index)
break
end
end
end
end
return result_tokens, result_values
end
最后我们对词法分析的结果再做一次封装,用以更方便的使用:
local lexer = {}
lexer.__index = lexer
function lexer:init(tokens, values)
self.tokens = tokens or {}
self.values = values or {}
self.index = 1
end
function lexer:match(token)
if self.tokens[self.index] == token then
return true
else
return false
end
end
function lexer:peek()
return self.tokens[self.index], self.values[self.index]
end
function lexer:next()
local token, value = self.tokens[self.index], self.values[self.index]
self.index = self.index + 1
return token, value
end
function lexer.create(raw_exp)
local new_lexer = setmetatable({}, lexer)
new_lexer:init(parse_token(raw_exp))
return new_lexer
end
return lexer
OK, 词法分析结束,我们接着来做语法分析,其中的核心就是我们要明确四则运算表达式的 BNF 范式:
expression: term { ("+" | "-") term }
term: factor { ("*" | "/") factor }
factor: NUMBER | "(" expression ")" | - factor
上面就是经典的四则运算 BNF 范式,有了这个范式,我们便可以据此直接(按递归下降方式)写出语法分析的代码:
local parser = {}
function parser.parse_factor(lexer)
if lexer:match(token_type.num) then
lexer:next()
elseif lexer:match(token_type.lbrace) then
lexer:next()
parser.parse_expression(lexer)
lexer:next()
elseif lexer:match(token_type.minus) then
lexer:next()
parser.parse_factor(lexer)
else
print("error to parse factor : " .. tostring(lexer:peek()))
end
end
function parser.parse_term(lexer)
parser.parse_factor(lexer)
while true do
if lexer:match(token_type.mul) then
lexer:next()
parser.parse_factor(lexer)
elseif lexer:match(token_type.div) then
lexer:next()
parser.parse_factor(lexer)
else
break
end
end
end
function parser.parse_expression(lexer)
parser.parse_term(lexer)
while true do
if lexer:match(token_type.add) then
lexer:next()
parser.parse_term(lexer)
elseif lexer:match(token_type.minus) then
lexer:next()
parser.parse_term(lexer)
else
break
end
end
end
function parser.parse(raw_exp)
local lexer = lexer.create(raw_exp)
parser.parse_expression(lexer)
end
return parser
看到这里可能会产生疑问:我们的目的是实现四则运算的求值,上面的语法分析似乎只是解析了一遍语法(如果语法正确的话其实没有任何输出),怎么来进行实际的求值呢?
其实这个问题就引出了我们要介绍的第三个话题:语法树生成.其实在上面的语法分析过程中,我们不仅需要进行语法解析,还需要同时生成一颗对应的抽象语法树,而之后的四则运算求值就可以直接在这颗生成的抽象语法树上进行.
我们首先来定义语法树的各类节点:
local num_syntax_node = {}
num_syntax_node.__index = num_syntax_node
function num_syntax_node:init(num)
self.num = num
return self
end
function num_syntax_node.create(num)
local node = setmetatable({}, num_syntax_node)
return node:init(num)
end
return num_syntax_node
上面的就是最简单的数字节点,其余的节点(加减乘除)如下:
local add_syntax_node = {}
add_syntax_node.__index = add_syntax_node
function add_syntax_node:init(left_node, right_node)
self.left_node = left_node
self.right_node = right_node
return self
end
function add_syntax_node.create(left_node, right_node)
local node = setmetatable({}, add_syntax_node)
return node:init(left_node, right_node)
end
return add_syntax_node
local minus_syntax_node = {}
minus_syntax_node.__index = minus_syntax_node
function minus_syntax_node:init(left_node, right_node)
self.left_node = left_node
self.right_node = right_node
return self
end
function minus_syntax_node.create(left_node, right_node)
local node = setmetatable({}, minus_syntax_node)
return node:init(left_node, right_node)
end
return minus_syntax_node
local mul_syntax_node = {}
mul_syntax_node.__index = mul_syntax_node
function mul_syntax_node:init(left_node, right_node)
self.left_node = left_node
self.right_node = right_node
return self
end
function mul_syntax_node.create(left_node, right_node)
local node = setmetatable({}, mul_syntax_node)
return node:init(left_node, right_node)
end
return mul_syntax_node
local div_syntax_node = {}
div_syntax_node.__index = div_syntax_node
function div_syntax_node:init(left_node, right_node)
self.left_node = left_node
self.right_node = right_node
return self
end
function div_syntax_node.create(left_node, right_node)
local node = setmetatable({}, div_syntax_node)
return node:init(left_node, right_node)
end
return div_syntax_node
接着我们需要在上面的语法分析过程中生成对应的语法树节点:
local parser = {}
function parser.parse_factor(lexer)
if lexer:match(token_type.num) then
local _, num = lexer:next()
return num_syntax_node.create(num)
elseif lexer:match(token_type.lbrace) then
lexer:next()
local node = parser.parse_expression(lexer)
lexer:next()
return node
elseif lexer:match(token_type.minus) then
-- convert to "0 - factor"
lexer:next()
local node = parser.parse_factor(lexer)
return minus_syntax_node.create(num_syntax_node.create(0), node)
else
print("error to parse factor : " .. tostring(lexer:peek()))
end
end
function parser.parse_term(lexer)
local node = parser.parse_factor(lexer)
while true do
if lexer:match(token_type.mul) then
lexer:next()
node = mul_syntax_node.create(node, parser.parse_factor(lexer))
elseif lexer:match(token_type.div) then
lexer:next()
node = div_syntax_node.create(node, parser.parse_factor(lexer))
else
break
end
end
return node
end
function parser.parse_expression(lexer)
local node = parser.parse_term(lexer)
while true do
if lexer:match(token_type.add) then
lexer:next()
node = add_syntax_node.create(node, parser.parse_term(lexer))
elseif lexer:match(token_type.minus) then
lexer:next()
node = minus_syntax_node.create(node, parser.parse_term(lexer))
else
break
end
end
return node
end
function parser.parse(raw_exp)
local lexer = lexer.create(raw_exp)
local syntax_tree = parser.parse_expression(lexer)
-- TODO use syntax_tree to evaluate
end
最后一个问题就是如何通过语法树进行求值了,过程其实很简单,直接定义各个节点的求值函数(为各个节点统一添加 evaluate 方法),然后递归求解即可:
local num_syntax_node = {}
num_syntax_node.__index = num_syntax_node
function num_syntax_node:init(num)
self.num = num
return self
end
function num_syntax_node:evaluate()
return self.num
end
function num_syntax_node.create(num)
local node = setmetatable({}, num_syntax_node)
return node:init(num)
end
return num_syntax_node
local add_syntax_node = {}
add_syntax_node.__index = add_syntax_node
function add_syntax_node:init(left_node, right_node)
self.left_node = left_node
self.right_node = right_node
return self
end
function add_syntax_node:evaluate()
local left_val = self.left_node:evaluate()
local right_val = self.right_node:evaluate()
return left_val + right_val
end
function add_syntax_node.create(left_node, right_node)
local node = setmetatable({}, add_syntax_node)
return node:init(left_node, right_node)
end
return add_syntax_node
local minus_syntax_node = {}
minus_syntax_node.__index = minus_syntax_node
function minus_syntax_node:init(left_node, right_node)
self.left_node = left_node
self.right_node = right_node
return self
end
function minus_syntax_node:evaluate()
local left_val = self.left_node:evaluate()
local right_val = self.right_node:evaluate()
return left_val - right_val
end
function minus_syntax_node.create(left_node, right_node)
local node = setmetatable({}, minus_syntax_node)
return node:init(left_node, right_node)
end
return minus_syntax_node
local mul_syntax_node = {}
mul_syntax_node.__index = mul_syntax_node
function mul_syntax_node:init(left_node, right_node)
self.left_node = left_node
self.right_node = right_node
return self
end
function mul_syntax_node:evaluate()
local left_val = self.left_node:evaluate()
local right_val = self.right_node:evaluate()
return left_val * right_val
end
function mul_syntax_node.create(left_node, right_node)
local node = setmetatable({}, mul_syntax_node)
return node:init(left_node, right_node)
end
return mul_syntax_node
local div_syntax_node = {}
div_syntax_node.__index = div_syntax_node
function div_syntax_node:init(left_node, right_node)
self.left_node = left_node
self.right_node = right_node
return self
end
function div_syntax_node:evaluate()
local left_val = self.left_node:evaluate()
local right_val = self.right_node:evaluate()
return left_val / right_val
end
function div_syntax_node.create(left_node, right_node)
local node = setmetatable({}, div_syntax_node)
return node:init(left_node, right_node)
end
return div_syntax_node
上述的语法分析代码修改如下:
function parser.parse(raw_exp)
local lexer = lexer.create(raw_exp)
local syntax_tree = parser.parse_expression(lexer)
return syntax_tree:evaluate()
end
至此我们便可以顺利的对四则运算进行求值了:
local exp = "(3 + 4 * (5 + 6) - 7) * 0 + -3 * (5 + 6)"
print(parser.parse(exp))