pythonic.lua

$SUBTITLE$

-------------------------------------------------------------------------- This sample shows how to build and use a different lexer in-- Metalua. A lexer is essentially an object with a [newstream]-- method, which takes a source code as a parameter and return a-- lexstream as a result; a lexstream is an object with methods-- [peek], [next], [add] and [is_keyword].---- Although you can rewrite such an object from scratch, it's often-- best to extend the original lexer provided with Metalua; this is-- what's done in this sample.---- Here, we introduce an aternative syntax which structure code blocks-- according to indentation, in a way similar to what Python-- does. Since we don't want to modify the parser in depth, here is-- how it works:---- * a semicolon at the end of a line, followed by an indentation,-- begins a block;---- * when a line begins with an indentation less than the indentation-- of some unterminated blocks, all of these blocks are closed.-- Closing them is done by injecting as many `Keyword{ "end" }-- tokens in the stream as necessary.---- * Under such conditions, the Lua keywords which introduce new-- blocks become quite annoying, as they're redundant with the-- semicolon. Therefore, we suppress "then", as well as "do" in-- loops.---- * It's now possible to mix indentation-induced blocks, which-- generate implicit "end"s, with normal blocks. For instance, these-- two statements are equivalent:---- for i=1,10 print(i) end---- for i=1,10:-- print(i)---- Also notice that the former version, with explicit "end"s, could-- span on several lines, as long as each of them is indented enough-- not to close any surrounding semicolon-induced block.---- For a usage sample of this, look at pysample.lua------------------------------------------------------------------------------------------------------------------------------------------------ I assume that the regular lexer is loaded; this is always the-- case at metalevel zero.----------------------------------------------------------------------
assert (mlp.lexer)
------------------------------------------------------------------------ Brutal inheritance by full cloning: there's no need to keep shared-- behavior between the original lexer and this one, as I don't plan-- to modify the latter.---- Notice that [table.deep_copy()] takes care of also copying-- the metatable, therefore providing a real object-cloning-- facility.----------------------------------------------------------------------local super = mlp.lexer
pylex = table.deep_copy (super)
pylex.stream_mt = { __index=pylex }------------------------------------------------------------------------ We need to be less forgetful, now that whitespaces are significant.-- therefore we completely stop considering tabs as whitespaces:----------------------------------------------------------------------
pylex.patterns.spaces = "^[ \r\n]*()"------------------------------------------------------------------------ These are the remaining mandatory block end markers: if they-- appear alone on a line, we must not generate an implicit "end".----------------------------------------------------------------------
pylex.explicit_block_terminators = {
["elseif"]=1, ["else"]=1, ["until"]=1}------------------------------------------------------------------------ Handling whitespaces: it calls the normal whitespace handler, then-- checks if it's at a beginning of line. If so, and if indentation is-- less than some block indentation levels, add as many "end" keywords-- as required to close all blocks that must be closed. These "end"-- keywords are added by incrementing [self.pending_ends], which will-- actually be converted back into real keywords by the *following*-- call to the whitespace handler.----------------------------------------------------------------------function pylex:skip_whitespaces_and_comments()
----------------------------------------------------------- If there are some "end"s to generate, do it before-- munching more data:---------------------------------------------------------if self.pending_ends >0then-- There were some pending "end" keywords to generate.
printf("%i pending ends to generate", self.pending_ends)
self.pending_ends = self.pending_ends -1return"Keyword", "end"endlocal previous_i = self.i
local tag, content = super.skip_whitespaces_and_comments(self)
----------------------------------------------------------- When Eof happens, we need to close all open blocks-- before actually returning Eof.---------------------------------------------------------if tag=="Eof"thenlocal unclosed = #self.indent_levels-1
printf ("There are %i ends to close", unclosed);
if unclosed>0then
self.indent_levels = {0}
self.pending_ends = unclosed-1return"Keyword", "end"elsereturn tag, content
endend
assert (not tag, "Original lexer returned a non-Eof whitespace value!?")
----------------------------------------------------------- Check if this is a line's first token, and if so,-- check indentation---------------------------------------------------------local j, k = self.i
repeat j=j-1; k = self.src:sub(j,j) until j==previous_i or k~=" "if k=="\r"or k=="\n"then-- This is indeed the first token of a line----------------------------------------------------------- There are some explicit keywords which close a block-- explicitly. If one of them is going to be read,-- cancel one implicit "end".---------------------------------------------------------local next_word = self.src:match("^([%a_][%w_]+)", self.i)
if next_word and self.explicit_block_terminators[next_word] then
self.pending_ends = self.pending_ends-1end----------------------------------------------------------- Generate enough "end"s to match the new indent level.---------------------------------------------------------local indent_level = self.i-j-1whiletruedolocal block_level = self.indent_levels[1]
if block_level > indent_level then
self.pending_ends = self.pending_ends+1
table.remove(self.indent_levels, 1)
elsebreakendend-------------------------------------------------------- We have to do it again, as some "end"s might-- have been added just above:------------------------------------------------------if self.pending_ends >0then
printf("%i pending ends to generate [trail]", self.pending_ends)
self.pending_ends = self.pending_ends -1return"Keyword", "end"endendend------------------------------------------------------------------------ Find the semicolons followed by a line break----------------------------------------------------------------------function pylex:extract_block_begin()
local x, y = self.src:match("^: *[\r\n]+() +()", self.i)
ifnot x then x, y = self.src:match("^: %-%-[^\n]*[\r\n]+() +()", self.i) endif x thenlocal ilevel = y-x
if ilevel <= self.indent_levels[1] then-- error (string.format ("Messed up indentation: %i->%i", -- self.indent_levels[1], ilevel))end
table.insert (self.indent_levels, 1, ilevel)
self.i = y
-- There might be comments to skip after indent:
super.skip_whitespaces_and_comments (self)
returnendend------------------------------------------------------------------------ This is some internal hacking of the lexer: when [mlp_lexer] tries-- to extract a lexeme, it tries all the methods of this list in order,-- until one of them actually returns a (tag, content) pair.-- Therefore, we introduce [extract_block_begin], before -- [extract_symbol] (which would wrongly accept line-treminating ":")----------------------------------------------------------------------
table.insert (pylex.extractors, 2, "extract_block_begin")
------------------------------------------------------------------------ Constructor: there's a couple of extra instance fields to set up----------------------------------------------------------------------function pylex:newstream(src)
print "Opening pythonic lexer stream"local s = super.newstream (self, src)
s.indent_levels = {0}
s.pending_ends = 0return s
end------------------------------------------------------------------------ Now, about syntax tuning: we remove some redundant keywords, since-- they will overlap with ":" indenters: -- * "then"s in if statements-- * "do" in for and while loops-- We do a little bit of extra checking, just in case some other-- extension already hacked the parsers we're about to fiddle with.---- In a real extension, you'd rather make these keywords optional-- instead of just deleting them, but here I try to avoid unnecessary-- clutter.----------------------------------------------------------------------local x_then_y = mlp.stat:get("if")[2].primary
assert (table.remove(x_then_y, 2) =="then",
"Not the regular if/then/else parser?!")
local for_parser = mlp.stat:get("for")
assert (table.remove(for_parser, 3) =="do",
"Not the regular for/do parser?!")
local while_parser = mlp.stat:get("while")
assert (table.remove(while_parser, 3) =="do",
"Not the regular while/do parser?!")
------------------------------------------------------------------------ Finally, change the lexer used by the compiler!----------------------------------------------------------------------
mlp.lexer = pylex