1*16467b97STreehugger Robot"""ANTLR3 runtime package""" 2*16467b97STreehugger Robot 3*16467b97STreehugger Robot# begin[licence] 4*16467b97STreehugger Robot# 5*16467b97STreehugger Robot# [The "BSD licence"] 6*16467b97STreehugger Robot# Copyright (c) 2005-2008 Terence Parr 7*16467b97STreehugger Robot# All rights reserved. 8*16467b97STreehugger Robot# 9*16467b97STreehugger Robot# Redistribution and use in source and binary forms, with or without 10*16467b97STreehugger Robot# modification, are permitted provided that the following conditions 11*16467b97STreehugger Robot# are met: 12*16467b97STreehugger Robot# 1. Redistributions of source code must retain the above copyright 13*16467b97STreehugger Robot# notice, this list of conditions and the following disclaimer. 14*16467b97STreehugger Robot# 2. Redistributions in binary form must reproduce the above copyright 15*16467b97STreehugger Robot# notice, this list of conditions and the following disclaimer in the 16*16467b97STreehugger Robot# documentation and/or other materials provided with the distribution. 17*16467b97STreehugger Robot# 3. The name of the author may not be used to endorse or promote products 18*16467b97STreehugger Robot# derived from this software without specific prior written permission. 19*16467b97STreehugger Robot# 20*16467b97STreehugger Robot# THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 21*16467b97STreehugger Robot# IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 22*16467b97STreehugger Robot# OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 23*16467b97STreehugger Robot# IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 24*16467b97STreehugger Robot# INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 25*16467b97STreehugger Robot# NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 26*16467b97STreehugger Robot# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 27*16467b97STreehugger Robot# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 28*16467b97STreehugger Robot# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 29*16467b97STreehugger Robot# THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 30*16467b97STreehugger Robot# 31*16467b97STreehugger Robot# end[licence] 32*16467b97STreehugger Robot 33*16467b97STreehugger Robotimport codecs 34*16467b97STreehugger Robotfrom StringIO import StringIO 35*16467b97STreehugger Robot 36*16467b97STreehugger Robotfrom antlr3.constants import DEFAULT_CHANNEL, EOF 37*16467b97STreehugger Robotfrom antlr3.tokens import Token, CommonToken 38*16467b97STreehugger Robot 39*16467b97STreehugger Robot 40*16467b97STreehugger Robot############################################################################ 41*16467b97STreehugger Robot# 42*16467b97STreehugger Robot# basic interfaces 43*16467b97STreehugger Robot# IntStream 44*16467b97STreehugger Robot# +- CharStream 45*16467b97STreehugger Robot# \- TokenStream 46*16467b97STreehugger Robot# 47*16467b97STreehugger Robot# subclasses must implemented all methods 48*16467b97STreehugger Robot# 49*16467b97STreehugger Robot############################################################################ 50*16467b97STreehugger Robot 51*16467b97STreehugger Robotclass IntStream(object): 52*16467b97STreehugger Robot """ 53*16467b97STreehugger Robot @brief Base interface for streams of integer values. 54*16467b97STreehugger Robot 55*16467b97STreehugger Robot A simple stream of integers used when all I care about is the char 56*16467b97STreehugger Robot or token type sequence (such as interpretation). 57*16467b97STreehugger Robot """ 58*16467b97STreehugger Robot 59*16467b97STreehugger Robot def consume(self): 60*16467b97STreehugger Robot raise NotImplementedError 61*16467b97STreehugger Robot 62*16467b97STreehugger Robot 63*16467b97STreehugger Robot def LA(self, i): 64*16467b97STreehugger Robot """Get int at current input pointer + i ahead where i=1 is next int. 65*16467b97STreehugger Robot 66*16467b97STreehugger Robot Negative indexes are allowed. LA(-1) is previous token (token 67*16467b97STreehugger Robot just matched). LA(-i) where i is before first token should 68*16467b97STreehugger Robot yield -1, invalid char / EOF. 69*16467b97STreehugger Robot """ 70*16467b97STreehugger Robot 71*16467b97STreehugger Robot raise NotImplementedError 72*16467b97STreehugger Robot 73*16467b97STreehugger Robot 74*16467b97STreehugger Robot def mark(self): 75*16467b97STreehugger Robot """ 76*16467b97STreehugger Robot Tell the stream to start buffering if it hasn't already. Return 77*16467b97STreehugger Robot current input position, index(), or some other marker so that 78*16467b97STreehugger Robot when passed to rewind() you get back to the same spot. 79*16467b97STreehugger Robot rewind(mark()) should not affect the input cursor. The Lexer 80*16467b97STreehugger Robot track line/col info as well as input index so its markers are 81*16467b97STreehugger Robot not pure input indexes. Same for tree node streams. 82*16467b97STreehugger Robot """ 83*16467b97STreehugger Robot 84*16467b97STreehugger Robot raise NotImplementedError 85*16467b97STreehugger Robot 86*16467b97STreehugger Robot 87*16467b97STreehugger Robot def index(self): 88*16467b97STreehugger Robot """ 89*16467b97STreehugger Robot Return the current input symbol index 0..n where n indicates the 90*16467b97STreehugger Robot last symbol has been read. The index is the symbol about to be 91*16467b97STreehugger Robot read not the most recently read symbol. 92*16467b97STreehugger Robot """ 93*16467b97STreehugger Robot 94*16467b97STreehugger Robot raise NotImplementedError 95*16467b97STreehugger Robot 96*16467b97STreehugger Robot 97*16467b97STreehugger Robot def rewind(self, marker=None): 98*16467b97STreehugger Robot """ 99*16467b97STreehugger Robot Reset the stream so that next call to index would return marker. 100*16467b97STreehugger Robot The marker will usually be index() but it doesn't have to be. It's 101*16467b97STreehugger Robot just a marker to indicate what state the stream was in. This is 102*16467b97STreehugger Robot essentially calling release() and seek(). If there are markers 103*16467b97STreehugger Robot created after this marker argument, this routine must unroll them 104*16467b97STreehugger Robot like a stack. Assume the state the stream was in when this marker 105*16467b97STreehugger Robot was created. 106*16467b97STreehugger Robot 107*16467b97STreehugger Robot If marker is None: 108*16467b97STreehugger Robot Rewind to the input position of the last marker. 109*16467b97STreehugger Robot Used currently only after a cyclic DFA and just 110*16467b97STreehugger Robot before starting a sem/syn predicate to get the 111*16467b97STreehugger Robot input position back to the start of the decision. 112*16467b97STreehugger Robot Do not "pop" the marker off the state. mark(i) 113*16467b97STreehugger Robot and rewind(i) should balance still. It is 114*16467b97STreehugger Robot like invoking rewind(last marker) but it should not "pop" 115*16467b97STreehugger Robot the marker off. It's like seek(last marker's input position). 116*16467b97STreehugger Robot """ 117*16467b97STreehugger Robot 118*16467b97STreehugger Robot raise NotImplementedError 119*16467b97STreehugger Robot 120*16467b97STreehugger Robot 121*16467b97STreehugger Robot def release(self, marker=None): 122*16467b97STreehugger Robot """ 123*16467b97STreehugger Robot You may want to commit to a backtrack but don't want to force the 124*16467b97STreehugger Robot stream to keep bookkeeping objects around for a marker that is 125*16467b97STreehugger Robot no longer necessary. This will have the same behavior as 126*16467b97STreehugger Robot rewind() except it releases resources without the backward seek. 127*16467b97STreehugger Robot This must throw away resources for all markers back to the marker 128*16467b97STreehugger Robot argument. So if you're nested 5 levels of mark(), and then release(2) 129*16467b97STreehugger Robot you have to release resources for depths 2..5. 130*16467b97STreehugger Robot """ 131*16467b97STreehugger Robot 132*16467b97STreehugger Robot raise NotImplementedError 133*16467b97STreehugger Robot 134*16467b97STreehugger Robot 135*16467b97STreehugger Robot def seek(self, index): 136*16467b97STreehugger Robot """ 137*16467b97STreehugger Robot Set the input cursor to the position indicated by index. This is 138*16467b97STreehugger Robot normally used to seek ahead in the input stream. No buffering is 139*16467b97STreehugger Robot required to do this unless you know your stream will use seek to 140*16467b97STreehugger Robot move backwards such as when backtracking. 141*16467b97STreehugger Robot 142*16467b97STreehugger Robot This is different from rewind in its multi-directional 143*16467b97STreehugger Robot requirement and in that its argument is strictly an input cursor 144*16467b97STreehugger Robot (index). 145*16467b97STreehugger Robot 146*16467b97STreehugger Robot For char streams, seeking forward must update the stream state such 147*16467b97STreehugger Robot as line number. For seeking backwards, you will be presumably 148*16467b97STreehugger Robot backtracking using the mark/rewind mechanism that restores state and 149*16467b97STreehugger Robot so this method does not need to update state when seeking backwards. 150*16467b97STreehugger Robot 151*16467b97STreehugger Robot Currently, this method is only used for efficient backtracking using 152*16467b97STreehugger Robot memoization, but in the future it may be used for incremental parsing. 153*16467b97STreehugger Robot 154*16467b97STreehugger Robot The index is 0..n-1. A seek to position i means that LA(1) will 155*16467b97STreehugger Robot return the ith symbol. So, seeking to 0 means LA(1) will return the 156*16467b97STreehugger Robot first element in the stream. 157*16467b97STreehugger Robot """ 158*16467b97STreehugger Robot 159*16467b97STreehugger Robot raise NotImplementedError 160*16467b97STreehugger Robot 161*16467b97STreehugger Robot 162*16467b97STreehugger Robot def size(self): 163*16467b97STreehugger Robot """ 164*16467b97STreehugger Robot Only makes sense for streams that buffer everything up probably, but 165*16467b97STreehugger Robot might be useful to display the entire stream or for testing. This 166*16467b97STreehugger Robot value includes a single EOF. 167*16467b97STreehugger Robot """ 168*16467b97STreehugger Robot 169*16467b97STreehugger Robot raise NotImplementedError 170*16467b97STreehugger Robot 171*16467b97STreehugger Robot 172*16467b97STreehugger Robot def getSourceName(self): 173*16467b97STreehugger Robot """ 174*16467b97STreehugger Robot Where are you getting symbols from? Normally, implementations will 175*16467b97STreehugger Robot pass the buck all the way to the lexer who can ask its input stream 176*16467b97STreehugger Robot for the file name or whatever. 177*16467b97STreehugger Robot """ 178*16467b97STreehugger Robot 179*16467b97STreehugger Robot raise NotImplementedError 180*16467b97STreehugger Robot 181*16467b97STreehugger Robot 182*16467b97STreehugger Robotclass CharStream(IntStream): 183*16467b97STreehugger Robot """ 184*16467b97STreehugger Robot @brief A source of characters for an ANTLR lexer. 185*16467b97STreehugger Robot 186*16467b97STreehugger Robot This is an abstract class that must be implemented by a subclass. 187*16467b97STreehugger Robot 188*16467b97STreehugger Robot """ 189*16467b97STreehugger Robot 190*16467b97STreehugger Robot # pylint does not realize that this is an interface, too 191*16467b97STreehugger Robot #pylint: disable-msg=W0223 192*16467b97STreehugger Robot 193*16467b97STreehugger Robot EOF = -1 194*16467b97STreehugger Robot 195*16467b97STreehugger Robot 196*16467b97STreehugger Robot def substring(self, start, stop): 197*16467b97STreehugger Robot """ 198*16467b97STreehugger Robot For infinite streams, you don't need this; primarily I'm providing 199*16467b97STreehugger Robot a useful interface for action code. Just make sure actions don't 200*16467b97STreehugger Robot use this on streams that don't support it. 201*16467b97STreehugger Robot """ 202*16467b97STreehugger Robot 203*16467b97STreehugger Robot raise NotImplementedError 204*16467b97STreehugger Robot 205*16467b97STreehugger Robot 206*16467b97STreehugger Robot def LT(self, i): 207*16467b97STreehugger Robot """ 208*16467b97STreehugger Robot Get the ith character of lookahead. This is the same usually as 209*16467b97STreehugger Robot LA(i). This will be used for labels in the generated 210*16467b97STreehugger Robot lexer code. I'd prefer to return a char here type-wise, but it's 211*16467b97STreehugger Robot probably better to be 32-bit clean and be consistent with LA. 212*16467b97STreehugger Robot """ 213*16467b97STreehugger Robot 214*16467b97STreehugger Robot raise NotImplementedError 215*16467b97STreehugger Robot 216*16467b97STreehugger Robot 217*16467b97STreehugger Robot def getLine(self): 218*16467b97STreehugger Robot """ANTLR tracks the line information automatically""" 219*16467b97STreehugger Robot 220*16467b97STreehugger Robot raise NotImplementedError 221*16467b97STreehugger Robot 222*16467b97STreehugger Robot 223*16467b97STreehugger Robot def setLine(self, line): 224*16467b97STreehugger Robot """ 225*16467b97STreehugger Robot Because this stream can rewind, we need to be able to reset the line 226*16467b97STreehugger Robot """ 227*16467b97STreehugger Robot 228*16467b97STreehugger Robot raise NotImplementedError 229*16467b97STreehugger Robot 230*16467b97STreehugger Robot 231*16467b97STreehugger Robot def getCharPositionInLine(self): 232*16467b97STreehugger Robot """ 233*16467b97STreehugger Robot The index of the character relative to the beginning of the line 0..n-1 234*16467b97STreehugger Robot """ 235*16467b97STreehugger Robot 236*16467b97STreehugger Robot raise NotImplementedError 237*16467b97STreehugger Robot 238*16467b97STreehugger Robot 239*16467b97STreehugger Robot def setCharPositionInLine(self, pos): 240*16467b97STreehugger Robot raise NotImplementedError 241*16467b97STreehugger Robot 242*16467b97STreehugger Robot 243*16467b97STreehugger Robotclass TokenStream(IntStream): 244*16467b97STreehugger Robot """ 245*16467b97STreehugger Robot 246*16467b97STreehugger Robot @brief A stream of tokens accessing tokens from a TokenSource 247*16467b97STreehugger Robot 248*16467b97STreehugger Robot This is an abstract class that must be implemented by a subclass. 249*16467b97STreehugger Robot 250*16467b97STreehugger Robot """ 251*16467b97STreehugger Robot 252*16467b97STreehugger Robot # pylint does not realize that this is an interface, too 253*16467b97STreehugger Robot #pylint: disable-msg=W0223 254*16467b97STreehugger Robot 255*16467b97STreehugger Robot def LT(self, k): 256*16467b97STreehugger Robot """ 257*16467b97STreehugger Robot Get Token at current input pointer + i ahead where i=1 is next Token. 258*16467b97STreehugger Robot i<0 indicates tokens in the past. So -1 is previous token and -2 is 259*16467b97STreehugger Robot two tokens ago. LT(0) is undefined. For i>=n, return Token.EOFToken. 260*16467b97STreehugger Robot Return null for LT(0) and any index that results in an absolute address 261*16467b97STreehugger Robot that is negative. 262*16467b97STreehugger Robot """ 263*16467b97STreehugger Robot 264*16467b97STreehugger Robot raise NotImplementedError 265*16467b97STreehugger Robot 266*16467b97STreehugger Robot 267*16467b97STreehugger Robot def range(self): 268*16467b97STreehugger Robot """ 269*16467b97STreehugger Robot How far ahead has the stream been asked to look? The return 270*16467b97STreehugger Robot value is a valid index from 0..n-1. 271*16467b97STreehugger Robot """ 272*16467b97STreehugger Robot 273*16467b97STreehugger Robot raise NotImplementedError 274*16467b97STreehugger Robot 275*16467b97STreehugger Robot 276*16467b97STreehugger Robot def get(self, i): 277*16467b97STreehugger Robot """ 278*16467b97STreehugger Robot Get a token at an absolute index i; 0..n-1. This is really only 279*16467b97STreehugger Robot needed for profiling and debugging and token stream rewriting. 280*16467b97STreehugger Robot If you don't want to buffer up tokens, then this method makes no 281*16467b97STreehugger Robot sense for you. Naturally you can't use the rewrite stream feature. 282*16467b97STreehugger Robot I believe DebugTokenStream can easily be altered to not use 283*16467b97STreehugger Robot this method, removing the dependency. 284*16467b97STreehugger Robot """ 285*16467b97STreehugger Robot 286*16467b97STreehugger Robot raise NotImplementedError 287*16467b97STreehugger Robot 288*16467b97STreehugger Robot 289*16467b97STreehugger Robot def getTokenSource(self): 290*16467b97STreehugger Robot """ 291*16467b97STreehugger Robot Where is this stream pulling tokens from? This is not the name, but 292*16467b97STreehugger Robot the object that provides Token objects. 293*16467b97STreehugger Robot """ 294*16467b97STreehugger Robot 295*16467b97STreehugger Robot raise NotImplementedError 296*16467b97STreehugger Robot 297*16467b97STreehugger Robot 298*16467b97STreehugger Robot def toString(self, start=None, stop=None): 299*16467b97STreehugger Robot """ 300*16467b97STreehugger Robot Return the text of all tokens from start to stop, inclusive. 301*16467b97STreehugger Robot If the stream does not buffer all the tokens then it can just 302*16467b97STreehugger Robot return "" or null; Users should not access $ruleLabel.text in 303*16467b97STreehugger Robot an action of course in that case. 304*16467b97STreehugger Robot 305*16467b97STreehugger Robot Because the user is not required to use a token with an index stored 306*16467b97STreehugger Robot in it, we must provide a means for two token objects themselves to 307*16467b97STreehugger Robot indicate the start/end location. Most often this will just delegate 308*16467b97STreehugger Robot to the other toString(int,int). This is also parallel with 309*16467b97STreehugger Robot the TreeNodeStream.toString(Object,Object). 310*16467b97STreehugger Robot """ 311*16467b97STreehugger Robot 312*16467b97STreehugger Robot raise NotImplementedError 313*16467b97STreehugger Robot 314*16467b97STreehugger Robot 315*16467b97STreehugger Robot############################################################################ 316*16467b97STreehugger Robot# 317*16467b97STreehugger Robot# character streams for use in lexers 318*16467b97STreehugger Robot# CharStream 319*16467b97STreehugger Robot# \- ANTLRStringStream 320*16467b97STreehugger Robot# 321*16467b97STreehugger Robot############################################################################ 322*16467b97STreehugger Robot 323*16467b97STreehugger Robot 324*16467b97STreehugger Robotclass ANTLRStringStream(CharStream): 325*16467b97STreehugger Robot """ 326*16467b97STreehugger Robot @brief CharStream that pull data from a unicode string. 327*16467b97STreehugger Robot 328*16467b97STreehugger Robot A pretty quick CharStream that pulls all data from an array 329*16467b97STreehugger Robot directly. Every method call counts in the lexer. 330*16467b97STreehugger Robot 331*16467b97STreehugger Robot """ 332*16467b97STreehugger Robot 333*16467b97STreehugger Robot 334*16467b97STreehugger Robot def __init__(self, data): 335*16467b97STreehugger Robot """ 336*16467b97STreehugger Robot @param data This should be a unicode string holding the data you want 337*16467b97STreehugger Robot to parse. If you pass in a byte string, the Lexer will choke on 338*16467b97STreehugger Robot non-ascii data. 339*16467b97STreehugger Robot 340*16467b97STreehugger Robot """ 341*16467b97STreehugger Robot 342*16467b97STreehugger Robot CharStream.__init__(self) 343*16467b97STreehugger Robot 344*16467b97STreehugger Robot # The data being scanned 345*16467b97STreehugger Robot self.strdata = unicode(data) 346*16467b97STreehugger Robot self.data = [ord(c) for c in self.strdata] 347*16467b97STreehugger Robot 348*16467b97STreehugger Robot # How many characters are actually in the buffer 349*16467b97STreehugger Robot self.n = len(data) 350*16467b97STreehugger Robot 351*16467b97STreehugger Robot # 0..n-1 index into string of next char 352*16467b97STreehugger Robot self.p = 0 353*16467b97STreehugger Robot 354*16467b97STreehugger Robot # line number 1..n within the input 355*16467b97STreehugger Robot self.line = 1 356*16467b97STreehugger Robot 357*16467b97STreehugger Robot # The index of the character relative to the beginning of the 358*16467b97STreehugger Robot # line 0..n-1 359*16467b97STreehugger Robot self.charPositionInLine = 0 360*16467b97STreehugger Robot 361*16467b97STreehugger Robot # A list of CharStreamState objects that tracks the stream state 362*16467b97STreehugger Robot # values line, charPositionInLine, and p that can change as you 363*16467b97STreehugger Robot # move through the input stream. Indexed from 0..markDepth-1. 364*16467b97STreehugger Robot self._markers = [ ] 365*16467b97STreehugger Robot self.lastMarker = None 366*16467b97STreehugger Robot self.markDepth = 0 367*16467b97STreehugger Robot 368*16467b97STreehugger Robot # What is name or source of this char stream? 369*16467b97STreehugger Robot self.name = None 370*16467b97STreehugger Robot 371*16467b97STreehugger Robot 372*16467b97STreehugger Robot def reset(self): 373*16467b97STreehugger Robot """ 374*16467b97STreehugger Robot Reset the stream so that it's in the same state it was 375*16467b97STreehugger Robot when the object was created *except* the data array is not 376*16467b97STreehugger Robot touched. 377*16467b97STreehugger Robot """ 378*16467b97STreehugger Robot 379*16467b97STreehugger Robot self.p = 0 380*16467b97STreehugger Robot self.line = 1 381*16467b97STreehugger Robot self.charPositionInLine = 0 382*16467b97STreehugger Robot self._markers = [ ] 383*16467b97STreehugger Robot 384*16467b97STreehugger Robot 385*16467b97STreehugger Robot def consume(self): 386*16467b97STreehugger Robot try: 387*16467b97STreehugger Robot if self.data[self.p] == 10: # \n 388*16467b97STreehugger Robot self.line += 1 389*16467b97STreehugger Robot self.charPositionInLine = 0 390*16467b97STreehugger Robot else: 391*16467b97STreehugger Robot self.charPositionInLine += 1 392*16467b97STreehugger Robot 393*16467b97STreehugger Robot self.p += 1 394*16467b97STreehugger Robot 395*16467b97STreehugger Robot except IndexError: 396*16467b97STreehugger Robot # happend when we reached EOF and self.data[self.p] fails 397*16467b97STreehugger Robot # just do nothing 398*16467b97STreehugger Robot pass 399*16467b97STreehugger Robot 400*16467b97STreehugger Robot 401*16467b97STreehugger Robot 402*16467b97STreehugger Robot def LA(self, i): 403*16467b97STreehugger Robot if i == 0: 404*16467b97STreehugger Robot return 0 # undefined 405*16467b97STreehugger Robot 406*16467b97STreehugger Robot if i < 0: 407*16467b97STreehugger Robot i += 1 # e.g., translate LA(-1) to use offset i=0; then data[p+0-1] 408*16467b97STreehugger Robot 409*16467b97STreehugger Robot try: 410*16467b97STreehugger Robot return self.data[self.p+i-1] 411*16467b97STreehugger Robot except IndexError: 412*16467b97STreehugger Robot return EOF 413*16467b97STreehugger Robot 414*16467b97STreehugger Robot 415*16467b97STreehugger Robot 416*16467b97STreehugger Robot def LT(self, i): 417*16467b97STreehugger Robot if i == 0: 418*16467b97STreehugger Robot return 0 # undefined 419*16467b97STreehugger Robot 420*16467b97STreehugger Robot if i < 0: 421*16467b97STreehugger Robot i += 1 # e.g., translate LA(-1) to use offset i=0; then data[p+0-1] 422*16467b97STreehugger Robot 423*16467b97STreehugger Robot try: 424*16467b97STreehugger Robot return self.strdata[self.p+i-1] 425*16467b97STreehugger Robot except IndexError: 426*16467b97STreehugger Robot return EOF 427*16467b97STreehugger Robot 428*16467b97STreehugger Robot 429*16467b97STreehugger Robot def index(self): 430*16467b97STreehugger Robot """ 431*16467b97STreehugger Robot Return the current input symbol index 0..n where n indicates the 432*16467b97STreehugger Robot last symbol has been read. The index is the index of char to 433*16467b97STreehugger Robot be returned from LA(1). 434*16467b97STreehugger Robot """ 435*16467b97STreehugger Robot 436*16467b97STreehugger Robot return self.p 437*16467b97STreehugger Robot 438*16467b97STreehugger Robot 439*16467b97STreehugger Robot def size(self): 440*16467b97STreehugger Robot return self.n 441*16467b97STreehugger Robot 442*16467b97STreehugger Robot 443*16467b97STreehugger Robot def mark(self): 444*16467b97STreehugger Robot state = (self.p, self.line, self.charPositionInLine) 445*16467b97STreehugger Robot try: 446*16467b97STreehugger Robot self._markers[self.markDepth] = state 447*16467b97STreehugger Robot except IndexError: 448*16467b97STreehugger Robot self._markers.append(state) 449*16467b97STreehugger Robot self.markDepth += 1 450*16467b97STreehugger Robot 451*16467b97STreehugger Robot self.lastMarker = self.markDepth 452*16467b97STreehugger Robot 453*16467b97STreehugger Robot return self.lastMarker 454*16467b97STreehugger Robot 455*16467b97STreehugger Robot 456*16467b97STreehugger Robot def rewind(self, marker=None): 457*16467b97STreehugger Robot if marker is None: 458*16467b97STreehugger Robot marker = self.lastMarker 459*16467b97STreehugger Robot 460*16467b97STreehugger Robot p, line, charPositionInLine = self._markers[marker-1] 461*16467b97STreehugger Robot 462*16467b97STreehugger Robot self.seek(p) 463*16467b97STreehugger Robot self.line = line 464*16467b97STreehugger Robot self.charPositionInLine = charPositionInLine 465*16467b97STreehugger Robot self.release(marker) 466*16467b97STreehugger Robot 467*16467b97STreehugger Robot 468*16467b97STreehugger Robot def release(self, marker=None): 469*16467b97STreehugger Robot if marker is None: 470*16467b97STreehugger Robot marker = self.lastMarker 471*16467b97STreehugger Robot 472*16467b97STreehugger Robot self.markDepth = marker-1 473*16467b97STreehugger Robot 474*16467b97STreehugger Robot 475*16467b97STreehugger Robot def seek(self, index): 476*16467b97STreehugger Robot """ 477*16467b97STreehugger Robot consume() ahead until p==index; can't just set p=index as we must 478*16467b97STreehugger Robot update line and charPositionInLine. 479*16467b97STreehugger Robot """ 480*16467b97STreehugger Robot 481*16467b97STreehugger Robot if index <= self.p: 482*16467b97STreehugger Robot self.p = index # just jump; don't update stream state (line, ...) 483*16467b97STreehugger Robot return 484*16467b97STreehugger Robot 485*16467b97STreehugger Robot # seek forward, consume until p hits index 486*16467b97STreehugger Robot while self.p < index: 487*16467b97STreehugger Robot self.consume() 488*16467b97STreehugger Robot 489*16467b97STreehugger Robot 490*16467b97STreehugger Robot def substring(self, start, stop): 491*16467b97STreehugger Robot return self.strdata[start:stop+1] 492*16467b97STreehugger Robot 493*16467b97STreehugger Robot 494*16467b97STreehugger Robot def getLine(self): 495*16467b97STreehugger Robot """Using setter/getter methods is deprecated. Use o.line instead.""" 496*16467b97STreehugger Robot return self.line 497*16467b97STreehugger Robot 498*16467b97STreehugger Robot 499*16467b97STreehugger Robot def getCharPositionInLine(self): 500*16467b97STreehugger Robot """ 501*16467b97STreehugger Robot Using setter/getter methods is deprecated. Use o.charPositionInLine 502*16467b97STreehugger Robot instead. 503*16467b97STreehugger Robot """ 504*16467b97STreehugger Robot return self.charPositionInLine 505*16467b97STreehugger Robot 506*16467b97STreehugger Robot 507*16467b97STreehugger Robot def setLine(self, line): 508*16467b97STreehugger Robot """Using setter/getter methods is deprecated. Use o.line instead.""" 509*16467b97STreehugger Robot self.line = line 510*16467b97STreehugger Robot 511*16467b97STreehugger Robot 512*16467b97STreehugger Robot def setCharPositionInLine(self, pos): 513*16467b97STreehugger Robot """ 514*16467b97STreehugger Robot Using setter/getter methods is deprecated. Use o.charPositionInLine 515*16467b97STreehugger Robot instead. 516*16467b97STreehugger Robot """ 517*16467b97STreehugger Robot self.charPositionInLine = pos 518*16467b97STreehugger Robot 519*16467b97STreehugger Robot 520*16467b97STreehugger Robot def getSourceName(self): 521*16467b97STreehugger Robot return self.name 522*16467b97STreehugger Robot 523*16467b97STreehugger Robot 524*16467b97STreehugger Robotclass ANTLRFileStream(ANTLRStringStream): 525*16467b97STreehugger Robot """ 526*16467b97STreehugger Robot @brief CharStream that opens a file to read the data. 527*16467b97STreehugger Robot 528*16467b97STreehugger Robot This is a char buffer stream that is loaded from a file 529*16467b97STreehugger Robot all at once when you construct the object. 530*16467b97STreehugger Robot """ 531*16467b97STreehugger Robot 532*16467b97STreehugger Robot def __init__(self, fileName, encoding=None): 533*16467b97STreehugger Robot """ 534*16467b97STreehugger Robot @param fileName The path to the file to be opened. The file will be 535*16467b97STreehugger Robot opened with mode 'rb'. 536*16467b97STreehugger Robot 537*16467b97STreehugger Robot @param encoding If you set the optional encoding argument, then the 538*16467b97STreehugger Robot data will be decoded on the fly. 539*16467b97STreehugger Robot 540*16467b97STreehugger Robot """ 541*16467b97STreehugger Robot 542*16467b97STreehugger Robot self.fileName = fileName 543*16467b97STreehugger Robot 544*16467b97STreehugger Robot fp = codecs.open(fileName, 'rb', encoding) 545*16467b97STreehugger Robot try: 546*16467b97STreehugger Robot data = fp.read() 547*16467b97STreehugger Robot finally: 548*16467b97STreehugger Robot fp.close() 549*16467b97STreehugger Robot 550*16467b97STreehugger Robot ANTLRStringStream.__init__(self, data) 551*16467b97STreehugger Robot 552*16467b97STreehugger Robot 553*16467b97STreehugger Robot def getSourceName(self): 554*16467b97STreehugger Robot """Deprecated, access o.fileName directly.""" 555*16467b97STreehugger Robot 556*16467b97STreehugger Robot return self.fileName 557*16467b97STreehugger Robot 558*16467b97STreehugger Robot 559*16467b97STreehugger Robotclass ANTLRInputStream(ANTLRStringStream): 560*16467b97STreehugger Robot """ 561*16467b97STreehugger Robot @brief CharStream that reads data from a file-like object. 562*16467b97STreehugger Robot 563*16467b97STreehugger Robot This is a char buffer stream that is loaded from a file like object 564*16467b97STreehugger Robot all at once when you construct the object. 565*16467b97STreehugger Robot 566*16467b97STreehugger Robot All input is consumed from the file, but it is not closed. 567*16467b97STreehugger Robot """ 568*16467b97STreehugger Robot 569*16467b97STreehugger Robot def __init__(self, file, encoding=None): 570*16467b97STreehugger Robot """ 571*16467b97STreehugger Robot @param file A file-like object holding your input. Only the read() 572*16467b97STreehugger Robot method must be implemented. 573*16467b97STreehugger Robot 574*16467b97STreehugger Robot @param encoding If you set the optional encoding argument, then the 575*16467b97STreehugger Robot data will be decoded on the fly. 576*16467b97STreehugger Robot 577*16467b97STreehugger Robot """ 578*16467b97STreehugger Robot 579*16467b97STreehugger Robot if encoding is not None: 580*16467b97STreehugger Robot # wrap input in a decoding reader 581*16467b97STreehugger Robot reader = codecs.lookup(encoding)[2] 582*16467b97STreehugger Robot file = reader(file) 583*16467b97STreehugger Robot 584*16467b97STreehugger Robot data = file.read() 585*16467b97STreehugger Robot 586*16467b97STreehugger Robot ANTLRStringStream.__init__(self, data) 587*16467b97STreehugger Robot 588*16467b97STreehugger Robot 589*16467b97STreehugger Robot# I guess the ANTLR prefix exists only to avoid a name clash with some Java 590*16467b97STreehugger Robot# mumbojumbo. A plain "StringStream" looks better to me, which should be 591*16467b97STreehugger Robot# the preferred name in Python. 592*16467b97STreehugger RobotStringStream = ANTLRStringStream 593*16467b97STreehugger RobotFileStream = ANTLRFileStream 594*16467b97STreehugger RobotInputStream = ANTLRInputStream 595*16467b97STreehugger Robot 596*16467b97STreehugger Robot 597*16467b97STreehugger Robot############################################################################ 598*16467b97STreehugger Robot# 599*16467b97STreehugger Robot# Token streams 600*16467b97STreehugger Robot# TokenStream 601*16467b97STreehugger Robot# +- CommonTokenStream 602*16467b97STreehugger Robot# \- TokenRewriteStream 603*16467b97STreehugger Robot# 604*16467b97STreehugger Robot############################################################################ 605*16467b97STreehugger Robot 606*16467b97STreehugger Robot 607*16467b97STreehugger Robotclass CommonTokenStream(TokenStream): 608*16467b97STreehugger Robot """ 609*16467b97STreehugger Robot @brief The most common stream of tokens 610*16467b97STreehugger Robot 611*16467b97STreehugger Robot The most common stream of tokens is one where every token is buffered up 612*16467b97STreehugger Robot and tokens are prefiltered for a certain channel (the parser will only 613*16467b97STreehugger Robot see these tokens and cannot change the filter channel number during the 614*16467b97STreehugger Robot parse). 615*16467b97STreehugger Robot """ 616*16467b97STreehugger Robot 617*16467b97STreehugger Robot def __init__(self, tokenSource=None, channel=DEFAULT_CHANNEL): 618*16467b97STreehugger Robot """ 619*16467b97STreehugger Robot @param tokenSource A TokenSource instance (usually a Lexer) to pull 620*16467b97STreehugger Robot the tokens from. 621*16467b97STreehugger Robot 622*16467b97STreehugger Robot @param channel Skip tokens on any channel but this one; this is how we 623*16467b97STreehugger Robot skip whitespace... 624*16467b97STreehugger Robot 625*16467b97STreehugger Robot """ 626*16467b97STreehugger Robot 627*16467b97STreehugger Robot TokenStream.__init__(self) 628*16467b97STreehugger Robot 629*16467b97STreehugger Robot self.tokenSource = tokenSource 630*16467b97STreehugger Robot 631*16467b97STreehugger Robot # Record every single token pulled from the source so we can reproduce 632*16467b97STreehugger Robot # chunks of it later. 633*16467b97STreehugger Robot self.tokens = [] 634*16467b97STreehugger Robot 635*16467b97STreehugger Robot # Map<tokentype, channel> to override some Tokens' channel numbers 636*16467b97STreehugger Robot self.channelOverrideMap = {} 637*16467b97STreehugger Robot 638*16467b97STreehugger Robot # Set<tokentype>; discard any tokens with this type 639*16467b97STreehugger Robot self.discardSet = set() 640*16467b97STreehugger Robot 641*16467b97STreehugger Robot # Skip tokens on any channel but this one; this is how we skip 642*16467b97STreehugger Robot # whitespace... 643*16467b97STreehugger Robot self.channel = channel 644*16467b97STreehugger Robot 645*16467b97STreehugger Robot # By default, track all incoming tokens 646*16467b97STreehugger Robot self.discardOffChannelTokens = False 647*16467b97STreehugger Robot 648*16467b97STreehugger Robot # The index into the tokens list of the current token (next token 649*16467b97STreehugger Robot # to consume). p==-1 indicates that the tokens list is empty 650*16467b97STreehugger Robot self.p = -1 651*16467b97STreehugger Robot 652*16467b97STreehugger Robot # Remember last marked position 653*16467b97STreehugger Robot self.lastMarker = None 654*16467b97STreehugger Robot 655*16467b97STreehugger Robot # how deep have we gone? 656*16467b97STreehugger Robot self._range = -1 657*16467b97STreehugger Robot 658*16467b97STreehugger Robot 659*16467b97STreehugger Robot def makeEOFToken(self): 660*16467b97STreehugger Robot return self.tokenSource.makeEOFToken() 661*16467b97STreehugger Robot 662*16467b97STreehugger Robot 663*16467b97STreehugger Robot def setTokenSource(self, tokenSource): 664*16467b97STreehugger Robot """Reset this token stream by setting its token source.""" 665*16467b97STreehugger Robot 666*16467b97STreehugger Robot self.tokenSource = tokenSource 667*16467b97STreehugger Robot self.tokens = [] 668*16467b97STreehugger Robot self.p = -1 669*16467b97STreehugger Robot self.channel = DEFAULT_CHANNEL 670*16467b97STreehugger Robot 671*16467b97STreehugger Robot 672*16467b97STreehugger Robot def reset(self): 673*16467b97STreehugger Robot self.p = 0 674*16467b97STreehugger Robot self.lastMarker = None 675*16467b97STreehugger Robot 676*16467b97STreehugger Robot 677*16467b97STreehugger Robot def fillBuffer(self): 678*16467b97STreehugger Robot """ 679*16467b97STreehugger Robot Load all tokens from the token source and put in tokens. 680*16467b97STreehugger Robot This is done upon first LT request because you might want to 681*16467b97STreehugger Robot set some token type / channel overrides before filling buffer. 682*16467b97STreehugger Robot """ 683*16467b97STreehugger Robot 684*16467b97STreehugger Robot 685*16467b97STreehugger Robot index = 0 686*16467b97STreehugger Robot t = self.tokenSource.nextToken() 687*16467b97STreehugger Robot while t is not None and t.type != EOF: 688*16467b97STreehugger Robot discard = False 689*16467b97STreehugger Robot 690*16467b97STreehugger Robot if self.discardSet is not None and t.type in self.discardSet: 691*16467b97STreehugger Robot discard = True 692*16467b97STreehugger Robot 693*16467b97STreehugger Robot elif self.discardOffChannelTokens and t.channel != self.channel: 694*16467b97STreehugger Robot discard = True 695*16467b97STreehugger Robot 696*16467b97STreehugger Robot # is there a channel override for token type? 697*16467b97STreehugger Robot try: 698*16467b97STreehugger Robot overrideChannel = self.channelOverrideMap[t.type] 699*16467b97STreehugger Robot 700*16467b97STreehugger Robot except KeyError: 701*16467b97STreehugger Robot # no override for this type 702*16467b97STreehugger Robot pass 703*16467b97STreehugger Robot 704*16467b97STreehugger Robot else: 705*16467b97STreehugger Robot if overrideChannel == self.channel: 706*16467b97STreehugger Robot t.channel = overrideChannel 707*16467b97STreehugger Robot else: 708*16467b97STreehugger Robot discard = True 709*16467b97STreehugger Robot 710*16467b97STreehugger Robot if not discard: 711*16467b97STreehugger Robot t.index = index 712*16467b97STreehugger Robot self.tokens.append(t) 713*16467b97STreehugger Robot index += 1 714*16467b97STreehugger Robot 715*16467b97STreehugger Robot t = self.tokenSource.nextToken() 716*16467b97STreehugger Robot 717*16467b97STreehugger Robot # leave p pointing at first token on channel 718*16467b97STreehugger Robot self.p = 0 719*16467b97STreehugger Robot self.p = self.skipOffTokenChannels(self.p) 720*16467b97STreehugger Robot 721*16467b97STreehugger Robot 722*16467b97STreehugger Robot def consume(self): 723*16467b97STreehugger Robot """ 724*16467b97STreehugger Robot Move the input pointer to the next incoming token. The stream 725*16467b97STreehugger Robot must become active with LT(1) available. consume() simply 726*16467b97STreehugger Robot moves the input pointer so that LT(1) points at the next 727*16467b97STreehugger Robot input symbol. Consume at least one token. 728*16467b97STreehugger Robot 729*16467b97STreehugger Robot Walk past any token not on the channel the parser is listening to. 730*16467b97STreehugger Robot """ 731*16467b97STreehugger Robot 732*16467b97STreehugger Robot if self.p < len(self.tokens): 733*16467b97STreehugger Robot self.p += 1 734*16467b97STreehugger Robot 735*16467b97STreehugger Robot self.p = self.skipOffTokenChannels(self.p) # leave p on valid token 736*16467b97STreehugger Robot 737*16467b97STreehugger Robot 738*16467b97STreehugger Robot def skipOffTokenChannels(self, i): 739*16467b97STreehugger Robot """ 740*16467b97STreehugger Robot Given a starting index, return the index of the first on-channel 741*16467b97STreehugger Robot token. 742*16467b97STreehugger Robot """ 743*16467b97STreehugger Robot 744*16467b97STreehugger Robot try: 745*16467b97STreehugger Robot while self.tokens[i].channel != self.channel: 746*16467b97STreehugger Robot i += 1 747*16467b97STreehugger Robot except IndexError: 748*16467b97STreehugger Robot # hit the end of token stream 749*16467b97STreehugger Robot pass 750*16467b97STreehugger Robot 751*16467b97STreehugger Robot return i 752*16467b97STreehugger Robot 753*16467b97STreehugger Robot 754*16467b97STreehugger Robot def skipOffTokenChannelsReverse(self, i): 755*16467b97STreehugger Robot while i >= 0 and self.tokens[i].channel != self.channel: 756*16467b97STreehugger Robot i -= 1 757*16467b97STreehugger Robot 758*16467b97STreehugger Robot return i 759*16467b97STreehugger Robot 760*16467b97STreehugger Robot 761*16467b97STreehugger Robot def setTokenTypeChannel(self, ttype, channel): 762*16467b97STreehugger Robot """ 763*16467b97STreehugger Robot A simple filter mechanism whereby you can tell this token stream 764*16467b97STreehugger Robot to force all tokens of type ttype to be on channel. For example, 765*16467b97STreehugger Robot when interpreting, we cannot exec actions so we need to tell 766*16467b97STreehugger Robot the stream to force all WS and NEWLINE to be a different, ignored 767*16467b97STreehugger Robot channel. 768*16467b97STreehugger Robot """ 769*16467b97STreehugger Robot 770*16467b97STreehugger Robot self.channelOverrideMap[ttype] = channel 771*16467b97STreehugger Robot 772*16467b97STreehugger Robot 773*16467b97STreehugger Robot def discardTokenType(self, ttype): 774*16467b97STreehugger Robot self.discardSet.add(ttype) 775*16467b97STreehugger Robot 776*16467b97STreehugger Robot 777*16467b97STreehugger Robot def getTokens(self, start=None, stop=None, types=None): 778*16467b97STreehugger Robot """ 779*16467b97STreehugger Robot Given a start and stop index, return a list of all tokens in 780*16467b97STreehugger Robot the token type set. Return None if no tokens were found. This 781*16467b97STreehugger Robot method looks at both on and off channel tokens. 782*16467b97STreehugger Robot """ 783*16467b97STreehugger Robot 784*16467b97STreehugger Robot if self.p == -1: 785*16467b97STreehugger Robot self.fillBuffer() 786*16467b97STreehugger Robot 787*16467b97STreehugger Robot if stop is None or stop > len(self.tokens): 788*16467b97STreehugger Robot stop = len(self.tokens) 789*16467b97STreehugger Robot 790*16467b97STreehugger Robot if start is None or stop < 0: 791*16467b97STreehugger Robot start = 0 792*16467b97STreehugger Robot 793*16467b97STreehugger Robot if start > stop: 794*16467b97STreehugger Robot return None 795*16467b97STreehugger Robot 796*16467b97STreehugger Robot if isinstance(types, (int, long)): 797*16467b97STreehugger Robot # called with a single type, wrap into set 798*16467b97STreehugger Robot types = set([types]) 799*16467b97STreehugger Robot 800*16467b97STreehugger Robot filteredTokens = [ 801*16467b97STreehugger Robot token for token in self.tokens[start:stop] 802*16467b97STreehugger Robot if types is None or token.type in types 803*16467b97STreehugger Robot ] 804*16467b97STreehugger Robot 805*16467b97STreehugger Robot if len(filteredTokens) == 0: 806*16467b97STreehugger Robot return None 807*16467b97STreehugger Robot 808*16467b97STreehugger Robot return filteredTokens 809*16467b97STreehugger Robot 810*16467b97STreehugger Robot 811*16467b97STreehugger Robot def LT(self, k): 812*16467b97STreehugger Robot """ 813*16467b97STreehugger Robot Get the ith token from the current position 1..n where k=1 is the 814*16467b97STreehugger Robot first symbol of lookahead. 815*16467b97STreehugger Robot """ 816*16467b97STreehugger Robot 817*16467b97STreehugger Robot if self.p == -1: 818*16467b97STreehugger Robot self.fillBuffer() 819*16467b97STreehugger Robot 820*16467b97STreehugger Robot if k == 0: 821*16467b97STreehugger Robot return None 822*16467b97STreehugger Robot 823*16467b97STreehugger Robot if k < 0: 824*16467b97STreehugger Robot return self.LB(-k) 825*16467b97STreehugger Robot 826*16467b97STreehugger Robot i = self.p 827*16467b97STreehugger Robot n = 1 828*16467b97STreehugger Robot # find k good tokens 829*16467b97STreehugger Robot while n < k: 830*16467b97STreehugger Robot # skip off-channel tokens 831*16467b97STreehugger Robot i = self.skipOffTokenChannels(i+1) # leave p on valid token 832*16467b97STreehugger Robot n += 1 833*16467b97STreehugger Robot 834*16467b97STreehugger Robot if i > self._range: 835*16467b97STreehugger Robot self._range = i 836*16467b97STreehugger Robot 837*16467b97STreehugger Robot try: 838*16467b97STreehugger Robot return self.tokens[i] 839*16467b97STreehugger Robot except IndexError: 840*16467b97STreehugger Robot return self.makeEOFToken() 841*16467b97STreehugger Robot 842*16467b97STreehugger Robot 843*16467b97STreehugger Robot def LB(self, k): 844*16467b97STreehugger Robot """Look backwards k tokens on-channel tokens""" 845*16467b97STreehugger Robot 846*16467b97STreehugger Robot if self.p == -1: 847*16467b97STreehugger Robot self.fillBuffer() 848*16467b97STreehugger Robot 849*16467b97STreehugger Robot if k == 0: 850*16467b97STreehugger Robot return None 851*16467b97STreehugger Robot 852*16467b97STreehugger Robot if self.p - k < 0: 853*16467b97STreehugger Robot return None 854*16467b97STreehugger Robot 855*16467b97STreehugger Robot i = self.p 856*16467b97STreehugger Robot n = 1 857*16467b97STreehugger Robot # find k good tokens looking backwards 858*16467b97STreehugger Robot while n <= k: 859*16467b97STreehugger Robot # skip off-channel tokens 860*16467b97STreehugger Robot i = self.skipOffTokenChannelsReverse(i-1) # leave p on valid token 861*16467b97STreehugger Robot n += 1 862*16467b97STreehugger Robot 863*16467b97STreehugger Robot if i < 0: 864*16467b97STreehugger Robot return None 865*16467b97STreehugger Robot 866*16467b97STreehugger Robot return self.tokens[i] 867*16467b97STreehugger Robot 868*16467b97STreehugger Robot 869*16467b97STreehugger Robot def get(self, i): 870*16467b97STreehugger Robot """ 871*16467b97STreehugger Robot Return absolute token i; ignore which channel the tokens are on; 872*16467b97STreehugger Robot that is, count all tokens not just on-channel tokens. 873*16467b97STreehugger Robot """ 874*16467b97STreehugger Robot 875*16467b97STreehugger Robot return self.tokens[i] 876*16467b97STreehugger Robot 877*16467b97STreehugger Robot 878*16467b97STreehugger Robot def slice(self, start, stop): 879*16467b97STreehugger Robot if self.p == -1: 880*16467b97STreehugger Robot self.fillBuffer() 881*16467b97STreehugger Robot 882*16467b97STreehugger Robot if start < 0 or stop < 0: 883*16467b97STreehugger Robot return None 884*16467b97STreehugger Robot 885*16467b97STreehugger Robot return self.tokens[start:stop+1] 886*16467b97STreehugger Robot 887*16467b97STreehugger Robot 888*16467b97STreehugger Robot def LA(self, i): 889*16467b97STreehugger Robot return self.LT(i).type 890*16467b97STreehugger Robot 891*16467b97STreehugger Robot 892*16467b97STreehugger Robot def mark(self): 893*16467b97STreehugger Robot self.lastMarker = self.index() 894*16467b97STreehugger Robot return self.lastMarker 895*16467b97STreehugger Robot 896*16467b97STreehugger Robot 897*16467b97STreehugger Robot def release(self, marker=None): 898*16467b97STreehugger Robot # no resources to release 899*16467b97STreehugger Robot pass 900*16467b97STreehugger Robot 901*16467b97STreehugger Robot 902*16467b97STreehugger Robot def size(self): 903*16467b97STreehugger Robot return len(self.tokens) 904*16467b97STreehugger Robot 905*16467b97STreehugger Robot 906*16467b97STreehugger Robot def range(self): 907*16467b97STreehugger Robot return self._range 908*16467b97STreehugger Robot 909*16467b97STreehugger Robot 910*16467b97STreehugger Robot def index(self): 911*16467b97STreehugger Robot return self.p 912*16467b97STreehugger Robot 913*16467b97STreehugger Robot 914*16467b97STreehugger Robot def rewind(self, marker=None): 915*16467b97STreehugger Robot if marker is None: 916*16467b97STreehugger Robot marker = self.lastMarker 917*16467b97STreehugger Robot 918*16467b97STreehugger Robot self.seek(marker) 919*16467b97STreehugger Robot 920*16467b97STreehugger Robot 921*16467b97STreehugger Robot def seek(self, index): 922*16467b97STreehugger Robot self.p = index 923*16467b97STreehugger Robot 924*16467b97STreehugger Robot 925*16467b97STreehugger Robot def getTokenSource(self): 926*16467b97STreehugger Robot return self.tokenSource 927*16467b97STreehugger Robot 928*16467b97STreehugger Robot 929*16467b97STreehugger Robot def getSourceName(self): 930*16467b97STreehugger Robot return self.tokenSource.getSourceName() 931*16467b97STreehugger Robot 932*16467b97STreehugger Robot 933*16467b97STreehugger Robot def toString(self, start=None, stop=None): 934*16467b97STreehugger Robot if self.p == -1: 935*16467b97STreehugger Robot self.fillBuffer() 936*16467b97STreehugger Robot 937*16467b97STreehugger Robot if start is None: 938*16467b97STreehugger Robot start = 0 939*16467b97STreehugger Robot elif not isinstance(start, int): 940*16467b97STreehugger Robot start = start.index 941*16467b97STreehugger Robot 942*16467b97STreehugger Robot if stop is None: 943*16467b97STreehugger Robot stop = len(self.tokens) - 1 944*16467b97STreehugger Robot elif not isinstance(stop, int): 945*16467b97STreehugger Robot stop = stop.index 946*16467b97STreehugger Robot 947*16467b97STreehugger Robot if stop >= len(self.tokens): 948*16467b97STreehugger Robot stop = len(self.tokens) - 1 949*16467b97STreehugger Robot 950*16467b97STreehugger Robot return ''.join([t.text for t in self.tokens[start:stop+1]]) 951*16467b97STreehugger Robot 952*16467b97STreehugger Robot 953*16467b97STreehugger Robotclass RewriteOperation(object): 954*16467b97STreehugger Robot """@brief Internal helper class.""" 955*16467b97STreehugger Robot 956*16467b97STreehugger Robot def __init__(self, stream, index, text): 957*16467b97STreehugger Robot self.stream = stream 958*16467b97STreehugger Robot 959*16467b97STreehugger Robot # What index into rewrites List are we? 960*16467b97STreehugger Robot self.instructionIndex = None 961*16467b97STreehugger Robot 962*16467b97STreehugger Robot # Token buffer index. 963*16467b97STreehugger Robot self.index = index 964*16467b97STreehugger Robot self.text = text 965*16467b97STreehugger Robot 966*16467b97STreehugger Robot def execute(self, buf): 967*16467b97STreehugger Robot """Execute the rewrite operation by possibly adding to the buffer. 968*16467b97STreehugger Robot Return the index of the next token to operate on. 969*16467b97STreehugger Robot """ 970*16467b97STreehugger Robot 971*16467b97STreehugger Robot return self.index 972*16467b97STreehugger Robot 973*16467b97STreehugger Robot def toString(self): 974*16467b97STreehugger Robot opName = self.__class__.__name__ 975*16467b97STreehugger Robot return '<%s@%d:"%s">' % ( 976*16467b97STreehugger Robot opName, self.index, self.text) 977*16467b97STreehugger Robot 978*16467b97STreehugger Robot __str__ = toString 979*16467b97STreehugger Robot __repr__ = toString 980*16467b97STreehugger Robot 981*16467b97STreehugger Robot 982*16467b97STreehugger Robotclass InsertBeforeOp(RewriteOperation): 983*16467b97STreehugger Robot """@brief Internal helper class.""" 984*16467b97STreehugger Robot 985*16467b97STreehugger Robot def execute(self, buf): 986*16467b97STreehugger Robot buf.write(self.text) 987*16467b97STreehugger Robot if self.stream.tokens[self.index].type != EOF: 988*16467b97STreehugger Robot buf.write(self.stream.tokens[self.index].text) 989*16467b97STreehugger Robot return self.index + 1 990*16467b97STreehugger Robot 991*16467b97STreehugger Robot 992*16467b97STreehugger Robotclass ReplaceOp(RewriteOperation): 993*16467b97STreehugger Robot """ 994*16467b97STreehugger Robot @brief Internal helper class. 995*16467b97STreehugger Robot 996*16467b97STreehugger Robot I'm going to try replacing range from x..y with (y-x)+1 ReplaceOp 997*16467b97STreehugger Robot instructions. 998*16467b97STreehugger Robot """ 999*16467b97STreehugger Robot 1000*16467b97STreehugger Robot def __init__(self, stream, first, last, text): 1001*16467b97STreehugger Robot RewriteOperation.__init__(self, stream, first, text) 1002*16467b97STreehugger Robot self.lastIndex = last 1003*16467b97STreehugger Robot 1004*16467b97STreehugger Robot 1005*16467b97STreehugger Robot def execute(self, buf): 1006*16467b97STreehugger Robot if self.text is not None: 1007*16467b97STreehugger Robot buf.write(self.text) 1008*16467b97STreehugger Robot 1009*16467b97STreehugger Robot return self.lastIndex + 1 1010*16467b97STreehugger Robot 1011*16467b97STreehugger Robot 1012*16467b97STreehugger Robot def toString(self): 1013*16467b97STreehugger Robot if self.text is None: 1014*16467b97STreehugger Robot return '<DeleteOp@%d..%d>' % (self.index, self.lastIndex) 1015*16467b97STreehugger Robot 1016*16467b97STreehugger Robot return '<ReplaceOp@%d..%d:"%s">' % ( 1017*16467b97STreehugger Robot self.index, self.lastIndex, self.text) 1018*16467b97STreehugger Robot 1019*16467b97STreehugger Robot __str__ = toString 1020*16467b97STreehugger Robot __repr__ = toString 1021*16467b97STreehugger Robot 1022*16467b97STreehugger Robot 1023*16467b97STreehugger Robotclass TokenRewriteStream(CommonTokenStream): 1024*16467b97STreehugger Robot """@brief CommonTokenStream that can be modified. 1025*16467b97STreehugger Robot 1026*16467b97STreehugger Robot Useful for dumping out the input stream after doing some 1027*16467b97STreehugger Robot augmentation or other manipulations. 1028*16467b97STreehugger Robot 1029*16467b97STreehugger Robot You can insert stuff, replace, and delete chunks. Note that the 1030*16467b97STreehugger Robot operations are done lazily--only if you convert the buffer to a 1031*16467b97STreehugger Robot String. This is very efficient because you are not moving data around 1032*16467b97STreehugger Robot all the time. As the buffer of tokens is converted to strings, the 1033*16467b97STreehugger Robot toString() method(s) check to see if there is an operation at the 1034*16467b97STreehugger Robot current index. If so, the operation is done and then normal String 1035*16467b97STreehugger Robot rendering continues on the buffer. This is like having multiple Turing 1036*16467b97STreehugger Robot machine instruction streams (programs) operating on a single input tape. :) 1037*16467b97STreehugger Robot 1038*16467b97STreehugger Robot Since the operations are done lazily at toString-time, operations do not 1039*16467b97STreehugger Robot screw up the token index values. That is, an insert operation at token 1040*16467b97STreehugger Robot index i does not change the index values for tokens i+1..n-1. 1041*16467b97STreehugger Robot 1042*16467b97STreehugger Robot Because operations never actually alter the buffer, you may always get 1043*16467b97STreehugger Robot the original token stream back without undoing anything. Since 1044*16467b97STreehugger Robot the instructions are queued up, you can easily simulate transactions and 1045*16467b97STreehugger Robot roll back any changes if there is an error just by removing instructions. 1046*16467b97STreehugger Robot For example, 1047*16467b97STreehugger Robot 1048*16467b97STreehugger Robot CharStream input = new ANTLRFileStream("input"); 1049*16467b97STreehugger Robot TLexer lex = new TLexer(input); 1050*16467b97STreehugger Robot TokenRewriteStream tokens = new TokenRewriteStream(lex); 1051*16467b97STreehugger Robot T parser = new T(tokens); 1052*16467b97STreehugger Robot parser.startRule(); 1053*16467b97STreehugger Robot 1054*16467b97STreehugger Robot Then in the rules, you can execute 1055*16467b97STreehugger Robot Token t,u; 1056*16467b97STreehugger Robot ... 1057*16467b97STreehugger Robot input.insertAfter(t, "text to put after t");} 1058*16467b97STreehugger Robot input.insertAfter(u, "text after u");} 1059*16467b97STreehugger Robot System.out.println(tokens.toString()); 1060*16467b97STreehugger Robot 1061*16467b97STreehugger Robot Actually, you have to cast the 'input' to a TokenRewriteStream. :( 1062*16467b97STreehugger Robot 1063*16467b97STreehugger Robot You can also have multiple "instruction streams" and get multiple 1064*16467b97STreehugger Robot rewrites from a single pass over the input. Just name the instruction 1065*16467b97STreehugger Robot streams and use that name again when printing the buffer. This could be 1066*16467b97STreehugger Robot useful for generating a C file and also its header file--all from the 1067*16467b97STreehugger Robot same buffer: 1068*16467b97STreehugger Robot 1069*16467b97STreehugger Robot tokens.insertAfter("pass1", t, "text to put after t");} 1070*16467b97STreehugger Robot tokens.insertAfter("pass2", u, "text after u");} 1071*16467b97STreehugger Robot System.out.println(tokens.toString("pass1")); 1072*16467b97STreehugger Robot System.out.println(tokens.toString("pass2")); 1073*16467b97STreehugger Robot 1074*16467b97STreehugger Robot If you don't use named rewrite streams, a "default" stream is used as 1075*16467b97STreehugger Robot the first example shows. 1076*16467b97STreehugger Robot """ 1077*16467b97STreehugger Robot 1078*16467b97STreehugger Robot DEFAULT_PROGRAM_NAME = "default" 1079*16467b97STreehugger Robot MIN_TOKEN_INDEX = 0 1080*16467b97STreehugger Robot 1081*16467b97STreehugger Robot def __init__(self, tokenSource=None, channel=DEFAULT_CHANNEL): 1082*16467b97STreehugger Robot CommonTokenStream.__init__(self, tokenSource, channel) 1083*16467b97STreehugger Robot 1084*16467b97STreehugger Robot # You may have multiple, named streams of rewrite operations. 1085*16467b97STreehugger Robot # I'm calling these things "programs." 1086*16467b97STreehugger Robot # Maps String (name) -> rewrite (List) 1087*16467b97STreehugger Robot self.programs = {} 1088*16467b97STreehugger Robot self.programs[self.DEFAULT_PROGRAM_NAME] = [] 1089*16467b97STreehugger Robot 1090*16467b97STreehugger Robot # Map String (program name) -> Integer index 1091*16467b97STreehugger Robot self.lastRewriteTokenIndexes = {} 1092*16467b97STreehugger Robot 1093*16467b97STreehugger Robot 1094*16467b97STreehugger Robot def rollback(self, *args): 1095*16467b97STreehugger Robot """ 1096*16467b97STreehugger Robot Rollback the instruction stream for a program so that 1097*16467b97STreehugger Robot the indicated instruction (via instructionIndex) is no 1098*16467b97STreehugger Robot longer in the stream. UNTESTED! 1099*16467b97STreehugger Robot """ 1100*16467b97STreehugger Robot 1101*16467b97STreehugger Robot if len(args) == 2: 1102*16467b97STreehugger Robot programName = args[0] 1103*16467b97STreehugger Robot instructionIndex = args[1] 1104*16467b97STreehugger Robot elif len(args) == 1: 1105*16467b97STreehugger Robot programName = self.DEFAULT_PROGRAM_NAME 1106*16467b97STreehugger Robot instructionIndex = args[0] 1107*16467b97STreehugger Robot else: 1108*16467b97STreehugger Robot raise TypeError("Invalid arguments") 1109*16467b97STreehugger Robot 1110*16467b97STreehugger Robot p = self.programs.get(programName, None) 1111*16467b97STreehugger Robot if p is not None: 1112*16467b97STreehugger Robot self.programs[programName] = ( 1113*16467b97STreehugger Robot p[self.MIN_TOKEN_INDEX:instructionIndex]) 1114*16467b97STreehugger Robot 1115*16467b97STreehugger Robot 1116*16467b97STreehugger Robot def deleteProgram(self, programName=DEFAULT_PROGRAM_NAME): 1117*16467b97STreehugger Robot """Reset the program so that no instructions exist""" 1118*16467b97STreehugger Robot 1119*16467b97STreehugger Robot self.rollback(programName, self.MIN_TOKEN_INDEX) 1120*16467b97STreehugger Robot 1121*16467b97STreehugger Robot 1122*16467b97STreehugger Robot def insertAfter(self, *args): 1123*16467b97STreehugger Robot if len(args) == 2: 1124*16467b97STreehugger Robot programName = self.DEFAULT_PROGRAM_NAME 1125*16467b97STreehugger Robot index = args[0] 1126*16467b97STreehugger Robot text = args[1] 1127*16467b97STreehugger Robot 1128*16467b97STreehugger Robot elif len(args) == 3: 1129*16467b97STreehugger Robot programName = args[0] 1130*16467b97STreehugger Robot index = args[1] 1131*16467b97STreehugger Robot text = args[2] 1132*16467b97STreehugger Robot 1133*16467b97STreehugger Robot else: 1134*16467b97STreehugger Robot raise TypeError("Invalid arguments") 1135*16467b97STreehugger Robot 1136*16467b97STreehugger Robot if isinstance(index, Token): 1137*16467b97STreehugger Robot # index is a Token, grap the stream index from it 1138*16467b97STreehugger Robot index = index.index 1139*16467b97STreehugger Robot 1140*16467b97STreehugger Robot # to insert after, just insert before next index (even if past end) 1141*16467b97STreehugger Robot self.insertBefore(programName, index+1, text) 1142*16467b97STreehugger Robot 1143*16467b97STreehugger Robot 1144*16467b97STreehugger Robot def insertBefore(self, *args): 1145*16467b97STreehugger Robot if len(args) == 2: 1146*16467b97STreehugger Robot programName = self.DEFAULT_PROGRAM_NAME 1147*16467b97STreehugger Robot index = args[0] 1148*16467b97STreehugger Robot text = args[1] 1149*16467b97STreehugger Robot 1150*16467b97STreehugger Robot elif len(args) == 3: 1151*16467b97STreehugger Robot programName = args[0] 1152*16467b97STreehugger Robot index = args[1] 1153*16467b97STreehugger Robot text = args[2] 1154*16467b97STreehugger Robot 1155*16467b97STreehugger Robot else: 1156*16467b97STreehugger Robot raise TypeError("Invalid arguments") 1157*16467b97STreehugger Robot 1158*16467b97STreehugger Robot if isinstance(index, Token): 1159*16467b97STreehugger Robot # index is a Token, grap the stream index from it 1160*16467b97STreehugger Robot index = index.index 1161*16467b97STreehugger Robot 1162*16467b97STreehugger Robot op = InsertBeforeOp(self, index, text) 1163*16467b97STreehugger Robot rewrites = self.getProgram(programName) 1164*16467b97STreehugger Robot op.instructionIndex = len(rewrites) 1165*16467b97STreehugger Robot rewrites.append(op) 1166*16467b97STreehugger Robot 1167*16467b97STreehugger Robot 1168*16467b97STreehugger Robot def replace(self, *args): 1169*16467b97STreehugger Robot if len(args) == 2: 1170*16467b97STreehugger Robot programName = self.DEFAULT_PROGRAM_NAME 1171*16467b97STreehugger Robot first = args[0] 1172*16467b97STreehugger Robot last = args[0] 1173*16467b97STreehugger Robot text = args[1] 1174*16467b97STreehugger Robot 1175*16467b97STreehugger Robot elif len(args) == 3: 1176*16467b97STreehugger Robot programName = self.DEFAULT_PROGRAM_NAME 1177*16467b97STreehugger Robot first = args[0] 1178*16467b97STreehugger Robot last = args[1] 1179*16467b97STreehugger Robot text = args[2] 1180*16467b97STreehugger Robot 1181*16467b97STreehugger Robot elif len(args) == 4: 1182*16467b97STreehugger Robot programName = args[0] 1183*16467b97STreehugger Robot first = args[1] 1184*16467b97STreehugger Robot last = args[2] 1185*16467b97STreehugger Robot text = args[3] 1186*16467b97STreehugger Robot 1187*16467b97STreehugger Robot else: 1188*16467b97STreehugger Robot raise TypeError("Invalid arguments") 1189*16467b97STreehugger Robot 1190*16467b97STreehugger Robot if isinstance(first, Token): 1191*16467b97STreehugger Robot # first is a Token, grap the stream index from it 1192*16467b97STreehugger Robot first = first.index 1193*16467b97STreehugger Robot 1194*16467b97STreehugger Robot if isinstance(last, Token): 1195*16467b97STreehugger Robot # last is a Token, grap the stream index from it 1196*16467b97STreehugger Robot last = last.index 1197*16467b97STreehugger Robot 1198*16467b97STreehugger Robot if first > last or first < 0 or last < 0 or last >= len(self.tokens): 1199*16467b97STreehugger Robot raise ValueError( 1200*16467b97STreehugger Robot "replace: range invalid: %d..%d (size=%d)" 1201*16467b97STreehugger Robot % (first, last, len(self.tokens))) 1202*16467b97STreehugger Robot 1203*16467b97STreehugger Robot op = ReplaceOp(self, first, last, text) 1204*16467b97STreehugger Robot rewrites = self.getProgram(programName) 1205*16467b97STreehugger Robot op.instructionIndex = len(rewrites) 1206*16467b97STreehugger Robot rewrites.append(op) 1207*16467b97STreehugger Robot 1208*16467b97STreehugger Robot 1209*16467b97STreehugger Robot def delete(self, *args): 1210*16467b97STreehugger Robot self.replace(*(list(args) + [None])) 1211*16467b97STreehugger Robot 1212*16467b97STreehugger Robot 1213*16467b97STreehugger Robot def getLastRewriteTokenIndex(self, programName=DEFAULT_PROGRAM_NAME): 1214*16467b97STreehugger Robot return self.lastRewriteTokenIndexes.get(programName, -1) 1215*16467b97STreehugger Robot 1216*16467b97STreehugger Robot 1217*16467b97STreehugger Robot def setLastRewriteTokenIndex(self, programName, i): 1218*16467b97STreehugger Robot self.lastRewriteTokenIndexes[programName] = i 1219*16467b97STreehugger Robot 1220*16467b97STreehugger Robot 1221*16467b97STreehugger Robot def getProgram(self, name): 1222*16467b97STreehugger Robot p = self.programs.get(name, None) 1223*16467b97STreehugger Robot if p is None: 1224*16467b97STreehugger Robot p = self.initializeProgram(name) 1225*16467b97STreehugger Robot 1226*16467b97STreehugger Robot return p 1227*16467b97STreehugger Robot 1228*16467b97STreehugger Robot 1229*16467b97STreehugger Robot def initializeProgram(self, name): 1230*16467b97STreehugger Robot p = [] 1231*16467b97STreehugger Robot self.programs[name] = p 1232*16467b97STreehugger Robot return p 1233*16467b97STreehugger Robot 1234*16467b97STreehugger Robot 1235*16467b97STreehugger Robot def toOriginalString(self, start=None, end=None): 1236*16467b97STreehugger Robot if self.p == -1: 1237*16467b97STreehugger Robot self.fillBuffer() 1238*16467b97STreehugger Robot 1239*16467b97STreehugger Robot if start is None: 1240*16467b97STreehugger Robot start = self.MIN_TOKEN_INDEX 1241*16467b97STreehugger Robot if end is None: 1242*16467b97STreehugger Robot end = self.size() - 1 1243*16467b97STreehugger Robot 1244*16467b97STreehugger Robot buf = StringIO() 1245*16467b97STreehugger Robot i = start 1246*16467b97STreehugger Robot while i >= self.MIN_TOKEN_INDEX and i <= end and i < len(self.tokens): 1247*16467b97STreehugger Robot if self.get(i).type != EOF: 1248*16467b97STreehugger Robot buf.write(self.get(i).text) 1249*16467b97STreehugger Robot i += 1 1250*16467b97STreehugger Robot 1251*16467b97STreehugger Robot return buf.getvalue() 1252*16467b97STreehugger Robot 1253*16467b97STreehugger Robot 1254*16467b97STreehugger Robot def toString(self, *args): 1255*16467b97STreehugger Robot if self.p == -1: 1256*16467b97STreehugger Robot self.fillBuffer() 1257*16467b97STreehugger Robot 1258*16467b97STreehugger Robot if len(args) == 0: 1259*16467b97STreehugger Robot programName = self.DEFAULT_PROGRAM_NAME 1260*16467b97STreehugger Robot start = self.MIN_TOKEN_INDEX 1261*16467b97STreehugger Robot end = self.size() - 1 1262*16467b97STreehugger Robot 1263*16467b97STreehugger Robot elif len(args) == 1: 1264*16467b97STreehugger Robot programName = args[0] 1265*16467b97STreehugger Robot start = self.MIN_TOKEN_INDEX 1266*16467b97STreehugger Robot end = self.size() - 1 1267*16467b97STreehugger Robot 1268*16467b97STreehugger Robot elif len(args) == 2: 1269*16467b97STreehugger Robot programName = self.DEFAULT_PROGRAM_NAME 1270*16467b97STreehugger Robot start = args[0] 1271*16467b97STreehugger Robot end = args[1] 1272*16467b97STreehugger Robot 1273*16467b97STreehugger Robot if start is None: 1274*16467b97STreehugger Robot start = self.MIN_TOKEN_INDEX 1275*16467b97STreehugger Robot elif not isinstance(start, int): 1276*16467b97STreehugger Robot start = start.index 1277*16467b97STreehugger Robot 1278*16467b97STreehugger Robot if end is None: 1279*16467b97STreehugger Robot end = len(self.tokens) - 1 1280*16467b97STreehugger Robot elif not isinstance(end, int): 1281*16467b97STreehugger Robot end = end.index 1282*16467b97STreehugger Robot 1283*16467b97STreehugger Robot # ensure start/end are in range 1284*16467b97STreehugger Robot if end >= len(self.tokens): 1285*16467b97STreehugger Robot end = len(self.tokens) - 1 1286*16467b97STreehugger Robot 1287*16467b97STreehugger Robot if start < 0: 1288*16467b97STreehugger Robot start = 0 1289*16467b97STreehugger Robot 1290*16467b97STreehugger Robot rewrites = self.programs.get(programName) 1291*16467b97STreehugger Robot if rewrites is None or len(rewrites) == 0: 1292*16467b97STreehugger Robot # no instructions to execute 1293*16467b97STreehugger Robot return self.toOriginalString(start, end) 1294*16467b97STreehugger Robot 1295*16467b97STreehugger Robot buf = StringIO() 1296*16467b97STreehugger Robot 1297*16467b97STreehugger Robot # First, optimize instruction stream 1298*16467b97STreehugger Robot indexToOp = self.reduceToSingleOperationPerIndex(rewrites) 1299*16467b97STreehugger Robot 1300*16467b97STreehugger Robot # Walk buffer, executing instructions and emitting tokens 1301*16467b97STreehugger Robot i = start 1302*16467b97STreehugger Robot while i <= end and i < len(self.tokens): 1303*16467b97STreehugger Robot op = indexToOp.get(i) 1304*16467b97STreehugger Robot # remove so any left have index size-1 1305*16467b97STreehugger Robot try: 1306*16467b97STreehugger Robot del indexToOp[i] 1307*16467b97STreehugger Robot except KeyError: 1308*16467b97STreehugger Robot pass 1309*16467b97STreehugger Robot 1310*16467b97STreehugger Robot t = self.tokens[i] 1311*16467b97STreehugger Robot if op is None: 1312*16467b97STreehugger Robot # no operation at that index, just dump token 1313*16467b97STreehugger Robot if t.type != EOF: 1314*16467b97STreehugger Robot buf.write(t.text) 1315*16467b97STreehugger Robot i += 1 # move to next token 1316*16467b97STreehugger Robot 1317*16467b97STreehugger Robot else: 1318*16467b97STreehugger Robot i = op.execute(buf) # execute operation and skip 1319*16467b97STreehugger Robot 1320*16467b97STreehugger Robot # include stuff after end if it's last index in buffer 1321*16467b97STreehugger Robot # So, if they did an insertAfter(lastValidIndex, "foo"), include 1322*16467b97STreehugger Robot # foo if end==lastValidIndex. 1323*16467b97STreehugger Robot if end == len(self.tokens) - 1: 1324*16467b97STreehugger Robot # Scan any remaining operations after last token 1325*16467b97STreehugger Robot # should be included (they will be inserts). 1326*16467b97STreehugger Robot for i in sorted(indexToOp.keys()): 1327*16467b97STreehugger Robot op = indexToOp[i] 1328*16467b97STreehugger Robot if op.index >= len(self.tokens)-1: 1329*16467b97STreehugger Robot buf.write(op.text) 1330*16467b97STreehugger Robot 1331*16467b97STreehugger Robot return buf.getvalue() 1332*16467b97STreehugger Robot 1333*16467b97STreehugger Robot __str__ = toString 1334*16467b97STreehugger Robot 1335*16467b97STreehugger Robot 1336*16467b97STreehugger Robot def reduceToSingleOperationPerIndex(self, rewrites): 1337*16467b97STreehugger Robot """ 1338*16467b97STreehugger Robot We need to combine operations and report invalid operations (like 1339*16467b97STreehugger Robot overlapping replaces that are not completed nested). Inserts to 1340*16467b97STreehugger Robot same index need to be combined etc... Here are the cases: 1341*16467b97STreehugger Robot 1342*16467b97STreehugger Robot I.i.u I.j.v leave alone, nonoverlapping 1343*16467b97STreehugger Robot I.i.u I.i.v combine: Iivu 1344*16467b97STreehugger Robot 1345*16467b97STreehugger Robot R.i-j.u R.x-y.v | i-j in x-y delete first R 1346*16467b97STreehugger Robot R.i-j.u R.i-j.v delete first R 1347*16467b97STreehugger Robot R.i-j.u R.x-y.v | x-y in i-j ERROR 1348*16467b97STreehugger Robot R.i-j.u R.x-y.v | boundaries overlap ERROR 1349*16467b97STreehugger Robot 1350*16467b97STreehugger Robot Delete special case of replace (text==null): 1351*16467b97STreehugger Robot D.i-j.u D.x-y.v | boundaries overlapcombine to 1352*16467b97STreehugger Robot max(min)..max(right) 1353*16467b97STreehugger Robot 1354*16467b97STreehugger Robot I.i.u R.x-y.v | i in (x+1)-ydelete I (since 1355*16467b97STreehugger Robot insert before we're not deleting 1356*16467b97STreehugger Robot i) 1357*16467b97STreehugger Robot I.i.u R.x-y.v | i not in (x+1)-yleave alone, 1358*16467b97STreehugger Robot nonoverlapping 1359*16467b97STreehugger Robot 1360*16467b97STreehugger Robot R.x-y.v I.i.u | i in x-y ERROR 1361*16467b97STreehugger Robot R.x-y.v I.x.u R.x-y.uv (combine, delete I) 1362*16467b97STreehugger Robot R.x-y.v I.i.u | i not in x-y leave alone, nonoverlapping 1363*16467b97STreehugger Robot 1364*16467b97STreehugger Robot I.i.u = insert u before op @ index i 1365*16467b97STreehugger Robot R.x-y.u = replace x-y indexed tokens with u 1366*16467b97STreehugger Robot 1367*16467b97STreehugger Robot First we need to examine replaces. For any replace op: 1368*16467b97STreehugger Robot 1369*16467b97STreehugger Robot 1. wipe out any insertions before op within that range. 1370*16467b97STreehugger Robot 2. Drop any replace op before that is contained completely within 1371*16467b97STreehugger Robot that range. 1372*16467b97STreehugger Robot 3. Throw exception upon boundary overlap with any previous replace. 1373*16467b97STreehugger Robot 1374*16467b97STreehugger Robot Then we can deal with inserts: 1375*16467b97STreehugger Robot 1376*16467b97STreehugger Robot 1. for any inserts to same index, combine even if not adjacent. 1377*16467b97STreehugger Robot 2. for any prior replace with same left boundary, combine this 1378*16467b97STreehugger Robot insert with replace and delete this replace. 1379*16467b97STreehugger Robot 3. throw exception if index in same range as previous replace 1380*16467b97STreehugger Robot 1381*16467b97STreehugger Robot Don't actually delete; make op null in list. Easier to walk list. 1382*16467b97STreehugger Robot Later we can throw as we add to index -> op map. 1383*16467b97STreehugger Robot 1384*16467b97STreehugger Robot Note that I.2 R.2-2 will wipe out I.2 even though, technically, the 1385*16467b97STreehugger Robot inserted stuff would be before the replace range. But, if you 1386*16467b97STreehugger Robot add tokens in front of a method body '{' and then delete the method 1387*16467b97STreehugger Robot body, I think the stuff before the '{' you added should disappear too. 1388*16467b97STreehugger Robot 1389*16467b97STreehugger Robot Return a map from token index to operation. 1390*16467b97STreehugger Robot """ 1391*16467b97STreehugger Robot 1392*16467b97STreehugger Robot # WALK REPLACES 1393*16467b97STreehugger Robot for i, rop in enumerate(rewrites): 1394*16467b97STreehugger Robot if rop is None: 1395*16467b97STreehugger Robot continue 1396*16467b97STreehugger Robot 1397*16467b97STreehugger Robot if not isinstance(rop, ReplaceOp): 1398*16467b97STreehugger Robot continue 1399*16467b97STreehugger Robot 1400*16467b97STreehugger Robot # Wipe prior inserts within range 1401*16467b97STreehugger Robot for j, iop in self.getKindOfOps(rewrites, InsertBeforeOp, i): 1402*16467b97STreehugger Robot if iop.index == rop.index: 1403*16467b97STreehugger Robot # E.g., insert before 2, delete 2..2; update replace 1404*16467b97STreehugger Robot # text to include insert before, kill insert 1405*16467b97STreehugger Robot rewrites[iop.instructionIndex] = None 1406*16467b97STreehugger Robot rop.text = self.catOpText(iop.text, rop.text) 1407*16467b97STreehugger Robot 1408*16467b97STreehugger Robot elif iop.index > rop.index and iop.index <= rop.lastIndex: 1409*16467b97STreehugger Robot # delete insert as it's a no-op. 1410*16467b97STreehugger Robot rewrites[j] = None 1411*16467b97STreehugger Robot 1412*16467b97STreehugger Robot # Drop any prior replaces contained within 1413*16467b97STreehugger Robot for j, prevRop in self.getKindOfOps(rewrites, ReplaceOp, i): 1414*16467b97STreehugger Robot if (prevRop.index >= rop.index 1415*16467b97STreehugger Robot and prevRop.lastIndex <= rop.lastIndex): 1416*16467b97STreehugger Robot # delete replace as it's a no-op. 1417*16467b97STreehugger Robot rewrites[j] = None 1418*16467b97STreehugger Robot continue 1419*16467b97STreehugger Robot 1420*16467b97STreehugger Robot # throw exception unless disjoint or identical 1421*16467b97STreehugger Robot disjoint = (prevRop.lastIndex < rop.index 1422*16467b97STreehugger Robot or prevRop.index > rop.lastIndex) 1423*16467b97STreehugger Robot same = (prevRop.index == rop.index 1424*16467b97STreehugger Robot and prevRop.lastIndex == rop.lastIndex) 1425*16467b97STreehugger Robot 1426*16467b97STreehugger Robot # Delete special case of replace (text==null): 1427*16467b97STreehugger Robot # D.i-j.u D.x-y.v| boundaries overlapcombine to 1428*16467b97STreehugger Robot # max(min)..max(right) 1429*16467b97STreehugger Robot if prevRop.text is None and rop.text is None and not disjoint: 1430*16467b97STreehugger Robot # kill first delete 1431*16467b97STreehugger Robot rewrites[prevRop.instructionIndex] = None 1432*16467b97STreehugger Robot 1433*16467b97STreehugger Robot rop.index = min(prevRop.index, rop.index) 1434*16467b97STreehugger Robot rop.lastIndex = max(prevRop.lastIndex, rop.lastIndex) 1435*16467b97STreehugger Robot 1436*16467b97STreehugger Robot elif not disjoint and not same: 1437*16467b97STreehugger Robot raise ValueError( 1438*16467b97STreehugger Robot "replace op boundaries of %s overlap with previous %s" 1439*16467b97STreehugger Robot % (rop, prevRop)) 1440*16467b97STreehugger Robot 1441*16467b97STreehugger Robot # WALK INSERTS 1442*16467b97STreehugger Robot for i, iop in enumerate(rewrites): 1443*16467b97STreehugger Robot if iop is None: 1444*16467b97STreehugger Robot continue 1445*16467b97STreehugger Robot 1446*16467b97STreehugger Robot if not isinstance(iop, InsertBeforeOp): 1447*16467b97STreehugger Robot continue 1448*16467b97STreehugger Robot 1449*16467b97STreehugger Robot # combine current insert with prior if any at same index 1450*16467b97STreehugger Robot for j, prevIop in self.getKindOfOps(rewrites, InsertBeforeOp, i): 1451*16467b97STreehugger Robot if prevIop.index == iop.index: # combine objects 1452*16467b97STreehugger Robot # convert to strings...we're in process of toString'ing 1453*16467b97STreehugger Robot # whole token buffer so no lazy eval issue with any 1454*16467b97STreehugger Robot # templates 1455*16467b97STreehugger Robot iop.text = self.catOpText(iop.text, prevIop.text) 1456*16467b97STreehugger Robot # delete redundant prior insert 1457*16467b97STreehugger Robot rewrites[j] = None 1458*16467b97STreehugger Robot 1459*16467b97STreehugger Robot # look for replaces where iop.index is in range; error 1460*16467b97STreehugger Robot for j, rop in self.getKindOfOps(rewrites, ReplaceOp, i): 1461*16467b97STreehugger Robot if iop.index == rop.index: 1462*16467b97STreehugger Robot rop.text = self.catOpText(iop.text, rop.text) 1463*16467b97STreehugger Robot # delete current insert 1464*16467b97STreehugger Robot rewrites[i] = None 1465*16467b97STreehugger Robot continue 1466*16467b97STreehugger Robot 1467*16467b97STreehugger Robot if iop.index >= rop.index and iop.index <= rop.lastIndex: 1468*16467b97STreehugger Robot raise ValueError( 1469*16467b97STreehugger Robot "insert op %s within boundaries of previous %s" 1470*16467b97STreehugger Robot % (iop, rop)) 1471*16467b97STreehugger Robot 1472*16467b97STreehugger Robot m = {} 1473*16467b97STreehugger Robot for i, op in enumerate(rewrites): 1474*16467b97STreehugger Robot if op is None: 1475*16467b97STreehugger Robot # ignore deleted ops 1476*16467b97STreehugger Robot continue 1477*16467b97STreehugger Robot 1478*16467b97STreehugger Robot assert op.index not in m, "should only be one op per index" 1479*16467b97STreehugger Robot m[op.index] = op 1480*16467b97STreehugger Robot 1481*16467b97STreehugger Robot return m 1482*16467b97STreehugger Robot 1483*16467b97STreehugger Robot 1484*16467b97STreehugger Robot def catOpText(self, a, b): 1485*16467b97STreehugger Robot x = "" 1486*16467b97STreehugger Robot y = "" 1487*16467b97STreehugger Robot if a is not None: 1488*16467b97STreehugger Robot x = a 1489*16467b97STreehugger Robot if b is not None: 1490*16467b97STreehugger Robot y = b 1491*16467b97STreehugger Robot return x + y 1492*16467b97STreehugger Robot 1493*16467b97STreehugger Robot 1494*16467b97STreehugger Robot def getKindOfOps(self, rewrites, kind, before=None): 1495*16467b97STreehugger Robot """Get all operations before an index of a particular kind.""" 1496*16467b97STreehugger Robot 1497*16467b97STreehugger Robot if before is None: 1498*16467b97STreehugger Robot before = len(rewrites) 1499*16467b97STreehugger Robot elif before > len(rewrites): 1500*16467b97STreehugger Robot before = len(rewrites) 1501*16467b97STreehugger Robot 1502*16467b97STreehugger Robot for i, op in enumerate(rewrites[:before]): 1503*16467b97STreehugger Robot if op is None: 1504*16467b97STreehugger Robot # ignore deleted 1505*16467b97STreehugger Robot continue 1506*16467b97STreehugger Robot if op.__class__ == kind: 1507*16467b97STreehugger Robot yield i, op 1508*16467b97STreehugger Robot 1509*16467b97STreehugger Robot 1510*16467b97STreehugger Robot def toDebugString(self, start=None, end=None): 1511*16467b97STreehugger Robot if start is None: 1512*16467b97STreehugger Robot start = self.MIN_TOKEN_INDEX 1513*16467b97STreehugger Robot if end is None: 1514*16467b97STreehugger Robot end = self.size() - 1 1515*16467b97STreehugger Robot 1516*16467b97STreehugger Robot buf = StringIO() 1517*16467b97STreehugger Robot i = start 1518*16467b97STreehugger Robot while i >= self.MIN_TOKEN_INDEX and i <= end and i < len(self.tokens): 1519*16467b97STreehugger Robot buf.write(self.get(i)) 1520*16467b97STreehugger Robot i += 1 1521*16467b97STreehugger Robot 1522*16467b97STreehugger Robot return buf.getvalue() 1523