xref: /aosp_15_r20/external/antlr/runtime/Python/antlr3/streams.py (revision 16467b971bd3e2009fad32dd79016f2c7e421deb)
1*16467b97STreehugger Robot"""ANTLR3 runtime package"""
2*16467b97STreehugger Robot
3*16467b97STreehugger Robot# begin[licence]
4*16467b97STreehugger Robot#
5*16467b97STreehugger Robot# [The "BSD licence"]
6*16467b97STreehugger Robot# Copyright (c) 2005-2008 Terence Parr
7*16467b97STreehugger Robot# All rights reserved.
8*16467b97STreehugger Robot#
9*16467b97STreehugger Robot# Redistribution and use in source and binary forms, with or without
10*16467b97STreehugger Robot# modification, are permitted provided that the following conditions
11*16467b97STreehugger Robot# are met:
12*16467b97STreehugger Robot# 1. Redistributions of source code must retain the above copyright
13*16467b97STreehugger Robot#    notice, this list of conditions and the following disclaimer.
14*16467b97STreehugger Robot# 2. Redistributions in binary form must reproduce the above copyright
15*16467b97STreehugger Robot#    notice, this list of conditions and the following disclaimer in the
16*16467b97STreehugger Robot#    documentation and/or other materials provided with the distribution.
17*16467b97STreehugger Robot# 3. The name of the author may not be used to endorse or promote products
18*16467b97STreehugger Robot#    derived from this software without specific prior written permission.
19*16467b97STreehugger Robot#
20*16467b97STreehugger Robot# THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
21*16467b97STreehugger Robot# IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
22*16467b97STreehugger Robot# OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
23*16467b97STreehugger Robot# IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
24*16467b97STreehugger Robot# INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
25*16467b97STreehugger Robot# NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
26*16467b97STreehugger Robot# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
27*16467b97STreehugger Robot# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
28*16467b97STreehugger Robot# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
29*16467b97STreehugger Robot# THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30*16467b97STreehugger Robot#
31*16467b97STreehugger Robot# end[licence]
32*16467b97STreehugger Robot
33*16467b97STreehugger Robotimport codecs
34*16467b97STreehugger Robotfrom StringIO import StringIO
35*16467b97STreehugger Robot
36*16467b97STreehugger Robotfrom antlr3.constants import DEFAULT_CHANNEL, EOF
37*16467b97STreehugger Robotfrom antlr3.tokens import Token, CommonToken
38*16467b97STreehugger Robot
39*16467b97STreehugger Robot
40*16467b97STreehugger Robot############################################################################
41*16467b97STreehugger Robot#
42*16467b97STreehugger Robot# basic interfaces
43*16467b97STreehugger Robot#   IntStream
44*16467b97STreehugger Robot#    +- CharStream
45*16467b97STreehugger Robot#    \- TokenStream
46*16467b97STreehugger Robot#
47*16467b97STreehugger Robot# subclasses must implemented all methods
48*16467b97STreehugger Robot#
49*16467b97STreehugger Robot############################################################################
50*16467b97STreehugger Robot
51*16467b97STreehugger Robotclass IntStream(object):
52*16467b97STreehugger Robot    """
53*16467b97STreehugger Robot    @brief Base interface for streams of integer values.
54*16467b97STreehugger Robot
55*16467b97STreehugger Robot    A simple stream of integers used when all I care about is the char
56*16467b97STreehugger Robot    or token type sequence (such as interpretation).
57*16467b97STreehugger Robot    """
58*16467b97STreehugger Robot
59*16467b97STreehugger Robot    def consume(self):
60*16467b97STreehugger Robot        raise NotImplementedError
61*16467b97STreehugger Robot
62*16467b97STreehugger Robot
63*16467b97STreehugger Robot    def LA(self, i):
64*16467b97STreehugger Robot        """Get int at current input pointer + i ahead where i=1 is next int.
65*16467b97STreehugger Robot
66*16467b97STreehugger Robot        Negative indexes are allowed.  LA(-1) is previous token (token
67*16467b97STreehugger Robot	just matched).  LA(-i) where i is before first token should
68*16467b97STreehugger Robot	yield -1, invalid char / EOF.
69*16467b97STreehugger Robot	"""
70*16467b97STreehugger Robot
71*16467b97STreehugger Robot        raise NotImplementedError
72*16467b97STreehugger Robot
73*16467b97STreehugger Robot
74*16467b97STreehugger Robot    def mark(self):
75*16467b97STreehugger Robot        """
76*16467b97STreehugger Robot        Tell the stream to start buffering if it hasn't already.  Return
77*16467b97STreehugger Robot        current input position, index(), or some other marker so that
78*16467b97STreehugger Robot        when passed to rewind() you get back to the same spot.
79*16467b97STreehugger Robot        rewind(mark()) should not affect the input cursor.  The Lexer
80*16467b97STreehugger Robot        track line/col info as well as input index so its markers are
81*16467b97STreehugger Robot        not pure input indexes.  Same for tree node streams.
82*16467b97STreehugger Robot        """
83*16467b97STreehugger Robot
84*16467b97STreehugger Robot        raise NotImplementedError
85*16467b97STreehugger Robot
86*16467b97STreehugger Robot
87*16467b97STreehugger Robot    def index(self):
88*16467b97STreehugger Robot        """
89*16467b97STreehugger Robot        Return the current input symbol index 0..n where n indicates the
90*16467b97STreehugger Robot        last symbol has been read.  The index is the symbol about to be
91*16467b97STreehugger Robot        read not the most recently read symbol.
92*16467b97STreehugger Robot        """
93*16467b97STreehugger Robot
94*16467b97STreehugger Robot        raise NotImplementedError
95*16467b97STreehugger Robot
96*16467b97STreehugger Robot
97*16467b97STreehugger Robot    def rewind(self, marker=None):
98*16467b97STreehugger Robot        """
99*16467b97STreehugger Robot        Reset the stream so that next call to index would return marker.
100*16467b97STreehugger Robot        The marker will usually be index() but it doesn't have to be.  It's
101*16467b97STreehugger Robot        just a marker to indicate what state the stream was in.  This is
102*16467b97STreehugger Robot        essentially calling release() and seek().  If there are markers
103*16467b97STreehugger Robot        created after this marker argument, this routine must unroll them
104*16467b97STreehugger Robot        like a stack.  Assume the state the stream was in when this marker
105*16467b97STreehugger Robot        was created.
106*16467b97STreehugger Robot
107*16467b97STreehugger Robot        If marker is None:
108*16467b97STreehugger Robot        Rewind to the input position of the last marker.
109*16467b97STreehugger Robot        Used currently only after a cyclic DFA and just
110*16467b97STreehugger Robot        before starting a sem/syn predicate to get the
111*16467b97STreehugger Robot        input position back to the start of the decision.
112*16467b97STreehugger Robot        Do not "pop" the marker off the state.  mark(i)
113*16467b97STreehugger Robot        and rewind(i) should balance still. It is
114*16467b97STreehugger Robot        like invoking rewind(last marker) but it should not "pop"
115*16467b97STreehugger Robot        the marker off.  It's like seek(last marker's input position).
116*16467b97STreehugger Robot	"""
117*16467b97STreehugger Robot
118*16467b97STreehugger Robot        raise NotImplementedError
119*16467b97STreehugger Robot
120*16467b97STreehugger Robot
121*16467b97STreehugger Robot    def release(self, marker=None):
122*16467b97STreehugger Robot        """
123*16467b97STreehugger Robot        You may want to commit to a backtrack but don't want to force the
124*16467b97STreehugger Robot        stream to keep bookkeeping objects around for a marker that is
125*16467b97STreehugger Robot        no longer necessary.  This will have the same behavior as
126*16467b97STreehugger Robot        rewind() except it releases resources without the backward seek.
127*16467b97STreehugger Robot        This must throw away resources for all markers back to the marker
128*16467b97STreehugger Robot        argument.  So if you're nested 5 levels of mark(), and then release(2)
129*16467b97STreehugger Robot        you have to release resources for depths 2..5.
130*16467b97STreehugger Robot	"""
131*16467b97STreehugger Robot
132*16467b97STreehugger Robot        raise NotImplementedError
133*16467b97STreehugger Robot
134*16467b97STreehugger Robot
135*16467b97STreehugger Robot    def seek(self, index):
136*16467b97STreehugger Robot        """
137*16467b97STreehugger Robot        Set the input cursor to the position indicated by index.  This is
138*16467b97STreehugger Robot        normally used to seek ahead in the input stream.  No buffering is
139*16467b97STreehugger Robot        required to do this unless you know your stream will use seek to
140*16467b97STreehugger Robot        move backwards such as when backtracking.
141*16467b97STreehugger Robot
142*16467b97STreehugger Robot        This is different from rewind in its multi-directional
143*16467b97STreehugger Robot        requirement and in that its argument is strictly an input cursor
144*16467b97STreehugger Robot        (index).
145*16467b97STreehugger Robot
146*16467b97STreehugger Robot        For char streams, seeking forward must update the stream state such
147*16467b97STreehugger Robot        as line number.  For seeking backwards, you will be presumably
148*16467b97STreehugger Robot        backtracking using the mark/rewind mechanism that restores state and
149*16467b97STreehugger Robot        so this method does not need to update state when seeking backwards.
150*16467b97STreehugger Robot
151*16467b97STreehugger Robot        Currently, this method is only used for efficient backtracking using
152*16467b97STreehugger Robot        memoization, but in the future it may be used for incremental parsing.
153*16467b97STreehugger Robot
154*16467b97STreehugger Robot        The index is 0..n-1.  A seek to position i means that LA(1) will
155*16467b97STreehugger Robot        return the ith symbol.  So, seeking to 0 means LA(1) will return the
156*16467b97STreehugger Robot        first element in the stream.
157*16467b97STreehugger Robot        """
158*16467b97STreehugger Robot
159*16467b97STreehugger Robot        raise NotImplementedError
160*16467b97STreehugger Robot
161*16467b97STreehugger Robot
162*16467b97STreehugger Robot    def size(self):
163*16467b97STreehugger Robot        """
164*16467b97STreehugger Robot        Only makes sense for streams that buffer everything up probably, but
165*16467b97STreehugger Robot        might be useful to display the entire stream or for testing.  This
166*16467b97STreehugger Robot        value includes a single EOF.
167*16467b97STreehugger Robot	"""
168*16467b97STreehugger Robot
169*16467b97STreehugger Robot        raise NotImplementedError
170*16467b97STreehugger Robot
171*16467b97STreehugger Robot
172*16467b97STreehugger Robot    def getSourceName(self):
173*16467b97STreehugger Robot        """
174*16467b97STreehugger Robot        Where are you getting symbols from?  Normally, implementations will
175*16467b97STreehugger Robot        pass the buck all the way to the lexer who can ask its input stream
176*16467b97STreehugger Robot        for the file name or whatever.
177*16467b97STreehugger Robot        """
178*16467b97STreehugger Robot
179*16467b97STreehugger Robot        raise NotImplementedError
180*16467b97STreehugger Robot
181*16467b97STreehugger Robot
182*16467b97STreehugger Robotclass CharStream(IntStream):
183*16467b97STreehugger Robot    """
184*16467b97STreehugger Robot    @brief A source of characters for an ANTLR lexer.
185*16467b97STreehugger Robot
186*16467b97STreehugger Robot    This is an abstract class that must be implemented by a subclass.
187*16467b97STreehugger Robot
188*16467b97STreehugger Robot    """
189*16467b97STreehugger Robot
190*16467b97STreehugger Robot    # pylint does not realize that this is an interface, too
191*16467b97STreehugger Robot    #pylint: disable-msg=W0223
192*16467b97STreehugger Robot
193*16467b97STreehugger Robot    EOF = -1
194*16467b97STreehugger Robot
195*16467b97STreehugger Robot
196*16467b97STreehugger Robot    def substring(self, start, stop):
197*16467b97STreehugger Robot        """
198*16467b97STreehugger Robot        For infinite streams, you don't need this; primarily I'm providing
199*16467b97STreehugger Robot        a useful interface for action code.  Just make sure actions don't
200*16467b97STreehugger Robot        use this on streams that don't support it.
201*16467b97STreehugger Robot        """
202*16467b97STreehugger Robot
203*16467b97STreehugger Robot        raise NotImplementedError
204*16467b97STreehugger Robot
205*16467b97STreehugger Robot
206*16467b97STreehugger Robot    def LT(self, i):
207*16467b97STreehugger Robot        """
208*16467b97STreehugger Robot        Get the ith character of lookahead.  This is the same usually as
209*16467b97STreehugger Robot        LA(i).  This will be used for labels in the generated
210*16467b97STreehugger Robot        lexer code.  I'd prefer to return a char here type-wise, but it's
211*16467b97STreehugger Robot        probably better to be 32-bit clean and be consistent with LA.
212*16467b97STreehugger Robot        """
213*16467b97STreehugger Robot
214*16467b97STreehugger Robot        raise NotImplementedError
215*16467b97STreehugger Robot
216*16467b97STreehugger Robot
217*16467b97STreehugger Robot    def getLine(self):
218*16467b97STreehugger Robot        """ANTLR tracks the line information automatically"""
219*16467b97STreehugger Robot
220*16467b97STreehugger Robot        raise NotImplementedError
221*16467b97STreehugger Robot
222*16467b97STreehugger Robot
223*16467b97STreehugger Robot    def setLine(self, line):
224*16467b97STreehugger Robot        """
225*16467b97STreehugger Robot        Because this stream can rewind, we need to be able to reset the line
226*16467b97STreehugger Robot        """
227*16467b97STreehugger Robot
228*16467b97STreehugger Robot        raise NotImplementedError
229*16467b97STreehugger Robot
230*16467b97STreehugger Robot
231*16467b97STreehugger Robot    def getCharPositionInLine(self):
232*16467b97STreehugger Robot        """
233*16467b97STreehugger Robot        The index of the character relative to the beginning of the line 0..n-1
234*16467b97STreehugger Robot        """
235*16467b97STreehugger Robot
236*16467b97STreehugger Robot        raise NotImplementedError
237*16467b97STreehugger Robot
238*16467b97STreehugger Robot
239*16467b97STreehugger Robot    def setCharPositionInLine(self, pos):
240*16467b97STreehugger Robot        raise NotImplementedError
241*16467b97STreehugger Robot
242*16467b97STreehugger Robot
243*16467b97STreehugger Robotclass TokenStream(IntStream):
244*16467b97STreehugger Robot    """
245*16467b97STreehugger Robot
246*16467b97STreehugger Robot    @brief A stream of tokens accessing tokens from a TokenSource
247*16467b97STreehugger Robot
248*16467b97STreehugger Robot    This is an abstract class that must be implemented by a subclass.
249*16467b97STreehugger Robot
250*16467b97STreehugger Robot    """
251*16467b97STreehugger Robot
252*16467b97STreehugger Robot    # pylint does not realize that this is an interface, too
253*16467b97STreehugger Robot    #pylint: disable-msg=W0223
254*16467b97STreehugger Robot
255*16467b97STreehugger Robot    def LT(self, k):
256*16467b97STreehugger Robot        """
257*16467b97STreehugger Robot        Get Token at current input pointer + i ahead where i=1 is next Token.
258*16467b97STreehugger Robot        i<0 indicates tokens in the past.  So -1 is previous token and -2 is
259*16467b97STreehugger Robot        two tokens ago. LT(0) is undefined.  For i>=n, return Token.EOFToken.
260*16467b97STreehugger Robot        Return null for LT(0) and any index that results in an absolute address
261*16467b97STreehugger Robot        that is negative.
262*16467b97STreehugger Robot	"""
263*16467b97STreehugger Robot
264*16467b97STreehugger Robot        raise NotImplementedError
265*16467b97STreehugger Robot
266*16467b97STreehugger Robot
267*16467b97STreehugger Robot    def range(self):
268*16467b97STreehugger Robot        """
269*16467b97STreehugger Robot        How far ahead has the stream been asked to look?  The return
270*16467b97STreehugger Robot        value is a valid index from 0..n-1.
271*16467b97STreehugger Robot        """
272*16467b97STreehugger Robot
273*16467b97STreehugger Robot        raise NotImplementedError
274*16467b97STreehugger Robot
275*16467b97STreehugger Robot
276*16467b97STreehugger Robot    def get(self, i):
277*16467b97STreehugger Robot        """
278*16467b97STreehugger Robot        Get a token at an absolute index i; 0..n-1.  This is really only
279*16467b97STreehugger Robot        needed for profiling and debugging and token stream rewriting.
280*16467b97STreehugger Robot        If you don't want to buffer up tokens, then this method makes no
281*16467b97STreehugger Robot        sense for you.  Naturally you can't use the rewrite stream feature.
282*16467b97STreehugger Robot        I believe DebugTokenStream can easily be altered to not use
283*16467b97STreehugger Robot        this method, removing the dependency.
284*16467b97STreehugger Robot        """
285*16467b97STreehugger Robot
286*16467b97STreehugger Robot        raise NotImplementedError
287*16467b97STreehugger Robot
288*16467b97STreehugger Robot
289*16467b97STreehugger Robot    def getTokenSource(self):
290*16467b97STreehugger Robot        """
291*16467b97STreehugger Robot        Where is this stream pulling tokens from?  This is not the name, but
292*16467b97STreehugger Robot        the object that provides Token objects.
293*16467b97STreehugger Robot	"""
294*16467b97STreehugger Robot
295*16467b97STreehugger Robot        raise NotImplementedError
296*16467b97STreehugger Robot
297*16467b97STreehugger Robot
298*16467b97STreehugger Robot    def toString(self, start=None, stop=None):
299*16467b97STreehugger Robot        """
300*16467b97STreehugger Robot        Return the text of all tokens from start to stop, inclusive.
301*16467b97STreehugger Robot        If the stream does not buffer all the tokens then it can just
302*16467b97STreehugger Robot        return "" or null;  Users should not access $ruleLabel.text in
303*16467b97STreehugger Robot        an action of course in that case.
304*16467b97STreehugger Robot
305*16467b97STreehugger Robot        Because the user is not required to use a token with an index stored
306*16467b97STreehugger Robot        in it, we must provide a means for two token objects themselves to
307*16467b97STreehugger Robot        indicate the start/end location.  Most often this will just delegate
308*16467b97STreehugger Robot        to the other toString(int,int).  This is also parallel with
309*16467b97STreehugger Robot        the TreeNodeStream.toString(Object,Object).
310*16467b97STreehugger Robot	"""
311*16467b97STreehugger Robot
312*16467b97STreehugger Robot        raise NotImplementedError
313*16467b97STreehugger Robot
314*16467b97STreehugger Robot
315*16467b97STreehugger Robot############################################################################
316*16467b97STreehugger Robot#
317*16467b97STreehugger Robot# character streams for use in lexers
318*16467b97STreehugger Robot#   CharStream
319*16467b97STreehugger Robot#   \- ANTLRStringStream
320*16467b97STreehugger Robot#
321*16467b97STreehugger Robot############################################################################
322*16467b97STreehugger Robot
323*16467b97STreehugger Robot
324*16467b97STreehugger Robotclass ANTLRStringStream(CharStream):
325*16467b97STreehugger Robot    """
326*16467b97STreehugger Robot    @brief CharStream that pull data from a unicode string.
327*16467b97STreehugger Robot
328*16467b97STreehugger Robot    A pretty quick CharStream that pulls all data from an array
329*16467b97STreehugger Robot    directly.  Every method call counts in the lexer.
330*16467b97STreehugger Robot
331*16467b97STreehugger Robot    """
332*16467b97STreehugger Robot
333*16467b97STreehugger Robot
334*16467b97STreehugger Robot    def __init__(self, data):
335*16467b97STreehugger Robot        """
336*16467b97STreehugger Robot        @param data This should be a unicode string holding the data you want
337*16467b97STreehugger Robot           to parse. If you pass in a byte string, the Lexer will choke on
338*16467b97STreehugger Robot           non-ascii data.
339*16467b97STreehugger Robot
340*16467b97STreehugger Robot        """
341*16467b97STreehugger Robot
342*16467b97STreehugger Robot        CharStream.__init__(self)
343*16467b97STreehugger Robot
344*16467b97STreehugger Robot  	# The data being scanned
345*16467b97STreehugger Robot        self.strdata = unicode(data)
346*16467b97STreehugger Robot        self.data = [ord(c) for c in self.strdata]
347*16467b97STreehugger Robot
348*16467b97STreehugger Robot	# How many characters are actually in the buffer
349*16467b97STreehugger Robot        self.n = len(data)
350*16467b97STreehugger Robot
351*16467b97STreehugger Robot 	# 0..n-1 index into string of next char
352*16467b97STreehugger Robot        self.p = 0
353*16467b97STreehugger Robot
354*16467b97STreehugger Robot	# line number 1..n within the input
355*16467b97STreehugger Robot        self.line = 1
356*16467b97STreehugger Robot
357*16467b97STreehugger Robot 	# The index of the character relative to the beginning of the
358*16467b97STreehugger Robot        # line 0..n-1
359*16467b97STreehugger Robot        self.charPositionInLine = 0
360*16467b97STreehugger Robot
361*16467b97STreehugger Robot	# A list of CharStreamState objects that tracks the stream state
362*16467b97STreehugger Robot        # values line, charPositionInLine, and p that can change as you
363*16467b97STreehugger Robot        # move through the input stream.  Indexed from 0..markDepth-1.
364*16467b97STreehugger Robot        self._markers = [ ]
365*16467b97STreehugger Robot        self.lastMarker = None
366*16467b97STreehugger Robot        self.markDepth = 0
367*16467b97STreehugger Robot
368*16467b97STreehugger Robot        # What is name or source of this char stream?
369*16467b97STreehugger Robot        self.name = None
370*16467b97STreehugger Robot
371*16467b97STreehugger Robot
372*16467b97STreehugger Robot    def reset(self):
373*16467b97STreehugger Robot        """
374*16467b97STreehugger Robot        Reset the stream so that it's in the same state it was
375*16467b97STreehugger Robot        when the object was created *except* the data array is not
376*16467b97STreehugger Robot        touched.
377*16467b97STreehugger Robot        """
378*16467b97STreehugger Robot
379*16467b97STreehugger Robot        self.p = 0
380*16467b97STreehugger Robot        self.line = 1
381*16467b97STreehugger Robot        self.charPositionInLine = 0
382*16467b97STreehugger Robot        self._markers = [ ]
383*16467b97STreehugger Robot
384*16467b97STreehugger Robot
385*16467b97STreehugger Robot    def consume(self):
386*16467b97STreehugger Robot        try:
387*16467b97STreehugger Robot            if self.data[self.p] == 10: # \n
388*16467b97STreehugger Robot                self.line += 1
389*16467b97STreehugger Robot                self.charPositionInLine = 0
390*16467b97STreehugger Robot            else:
391*16467b97STreehugger Robot                self.charPositionInLine += 1
392*16467b97STreehugger Robot
393*16467b97STreehugger Robot            self.p += 1
394*16467b97STreehugger Robot
395*16467b97STreehugger Robot        except IndexError:
396*16467b97STreehugger Robot            # happend when we reached EOF and self.data[self.p] fails
397*16467b97STreehugger Robot            # just do nothing
398*16467b97STreehugger Robot            pass
399*16467b97STreehugger Robot
400*16467b97STreehugger Robot
401*16467b97STreehugger Robot
402*16467b97STreehugger Robot    def LA(self, i):
403*16467b97STreehugger Robot        if i == 0:
404*16467b97STreehugger Robot            return 0 # undefined
405*16467b97STreehugger Robot
406*16467b97STreehugger Robot        if i < 0:
407*16467b97STreehugger Robot            i += 1 # e.g., translate LA(-1) to use offset i=0; then data[p+0-1]
408*16467b97STreehugger Robot
409*16467b97STreehugger Robot        try:
410*16467b97STreehugger Robot            return self.data[self.p+i-1]
411*16467b97STreehugger Robot        except IndexError:
412*16467b97STreehugger Robot            return EOF
413*16467b97STreehugger Robot
414*16467b97STreehugger Robot
415*16467b97STreehugger Robot
416*16467b97STreehugger Robot    def LT(self, i):
417*16467b97STreehugger Robot        if i == 0:
418*16467b97STreehugger Robot            return 0 # undefined
419*16467b97STreehugger Robot
420*16467b97STreehugger Robot        if i < 0:
421*16467b97STreehugger Robot            i += 1 # e.g., translate LA(-1) to use offset i=0; then data[p+0-1]
422*16467b97STreehugger Robot
423*16467b97STreehugger Robot        try:
424*16467b97STreehugger Robot            return self.strdata[self.p+i-1]
425*16467b97STreehugger Robot        except IndexError:
426*16467b97STreehugger Robot            return EOF
427*16467b97STreehugger Robot
428*16467b97STreehugger Robot
429*16467b97STreehugger Robot    def index(self):
430*16467b97STreehugger Robot        """
431*16467b97STreehugger Robot        Return the current input symbol index 0..n where n indicates the
432*16467b97STreehugger Robot        last symbol has been read.  The index is the index of char to
433*16467b97STreehugger Robot        be returned from LA(1).
434*16467b97STreehugger Robot        """
435*16467b97STreehugger Robot
436*16467b97STreehugger Robot        return self.p
437*16467b97STreehugger Robot
438*16467b97STreehugger Robot
439*16467b97STreehugger Robot    def size(self):
440*16467b97STreehugger Robot        return self.n
441*16467b97STreehugger Robot
442*16467b97STreehugger Robot
443*16467b97STreehugger Robot    def mark(self):
444*16467b97STreehugger Robot        state = (self.p, self.line, self.charPositionInLine)
445*16467b97STreehugger Robot        try:
446*16467b97STreehugger Robot            self._markers[self.markDepth] = state
447*16467b97STreehugger Robot        except IndexError:
448*16467b97STreehugger Robot            self._markers.append(state)
449*16467b97STreehugger Robot        self.markDepth += 1
450*16467b97STreehugger Robot
451*16467b97STreehugger Robot        self.lastMarker = self.markDepth
452*16467b97STreehugger Robot
453*16467b97STreehugger Robot        return self.lastMarker
454*16467b97STreehugger Robot
455*16467b97STreehugger Robot
456*16467b97STreehugger Robot    def rewind(self, marker=None):
457*16467b97STreehugger Robot        if marker is None:
458*16467b97STreehugger Robot            marker = self.lastMarker
459*16467b97STreehugger Robot
460*16467b97STreehugger Robot        p, line, charPositionInLine = self._markers[marker-1]
461*16467b97STreehugger Robot
462*16467b97STreehugger Robot        self.seek(p)
463*16467b97STreehugger Robot        self.line = line
464*16467b97STreehugger Robot        self.charPositionInLine = charPositionInLine
465*16467b97STreehugger Robot        self.release(marker)
466*16467b97STreehugger Robot
467*16467b97STreehugger Robot
468*16467b97STreehugger Robot    def release(self, marker=None):
469*16467b97STreehugger Robot        if marker is None:
470*16467b97STreehugger Robot            marker = self.lastMarker
471*16467b97STreehugger Robot
472*16467b97STreehugger Robot        self.markDepth = marker-1
473*16467b97STreehugger Robot
474*16467b97STreehugger Robot
475*16467b97STreehugger Robot    def seek(self, index):
476*16467b97STreehugger Robot        """
477*16467b97STreehugger Robot        consume() ahead until p==index; can't just set p=index as we must
478*16467b97STreehugger Robot        update line and charPositionInLine.
479*16467b97STreehugger Robot        """
480*16467b97STreehugger Robot
481*16467b97STreehugger Robot        if index <= self.p:
482*16467b97STreehugger Robot            self.p = index # just jump; don't update stream state (line, ...)
483*16467b97STreehugger Robot            return
484*16467b97STreehugger Robot
485*16467b97STreehugger Robot        # seek forward, consume until p hits index
486*16467b97STreehugger Robot        while self.p < index:
487*16467b97STreehugger Robot            self.consume()
488*16467b97STreehugger Robot
489*16467b97STreehugger Robot
490*16467b97STreehugger Robot    def substring(self, start, stop):
491*16467b97STreehugger Robot        return self.strdata[start:stop+1]
492*16467b97STreehugger Robot
493*16467b97STreehugger Robot
494*16467b97STreehugger Robot    def getLine(self):
495*16467b97STreehugger Robot        """Using setter/getter methods is deprecated. Use o.line instead."""
496*16467b97STreehugger Robot        return self.line
497*16467b97STreehugger Robot
498*16467b97STreehugger Robot
499*16467b97STreehugger Robot    def getCharPositionInLine(self):
500*16467b97STreehugger Robot        """
501*16467b97STreehugger Robot        Using setter/getter methods is deprecated. Use o.charPositionInLine
502*16467b97STreehugger Robot        instead.
503*16467b97STreehugger Robot        """
504*16467b97STreehugger Robot        return self.charPositionInLine
505*16467b97STreehugger Robot
506*16467b97STreehugger Robot
507*16467b97STreehugger Robot    def setLine(self, line):
508*16467b97STreehugger Robot        """Using setter/getter methods is deprecated. Use o.line instead."""
509*16467b97STreehugger Robot        self.line = line
510*16467b97STreehugger Robot
511*16467b97STreehugger Robot
512*16467b97STreehugger Robot    def setCharPositionInLine(self, pos):
513*16467b97STreehugger Robot        """
514*16467b97STreehugger Robot        Using setter/getter methods is deprecated. Use o.charPositionInLine
515*16467b97STreehugger Robot        instead.
516*16467b97STreehugger Robot        """
517*16467b97STreehugger Robot        self.charPositionInLine = pos
518*16467b97STreehugger Robot
519*16467b97STreehugger Robot
520*16467b97STreehugger Robot    def getSourceName(self):
521*16467b97STreehugger Robot        return self.name
522*16467b97STreehugger Robot
523*16467b97STreehugger Robot
524*16467b97STreehugger Robotclass ANTLRFileStream(ANTLRStringStream):
525*16467b97STreehugger Robot    """
526*16467b97STreehugger Robot    @brief CharStream that opens a file to read the data.
527*16467b97STreehugger Robot
528*16467b97STreehugger Robot    This is a char buffer stream that is loaded from a file
529*16467b97STreehugger Robot    all at once when you construct the object.
530*16467b97STreehugger Robot    """
531*16467b97STreehugger Robot
532*16467b97STreehugger Robot    def __init__(self, fileName, encoding=None):
533*16467b97STreehugger Robot        """
534*16467b97STreehugger Robot        @param fileName The path to the file to be opened. The file will be
535*16467b97STreehugger Robot           opened with mode 'rb'.
536*16467b97STreehugger Robot
537*16467b97STreehugger Robot        @param encoding If you set the optional encoding argument, then the
538*16467b97STreehugger Robot           data will be decoded on the fly.
539*16467b97STreehugger Robot
540*16467b97STreehugger Robot        """
541*16467b97STreehugger Robot
542*16467b97STreehugger Robot        self.fileName = fileName
543*16467b97STreehugger Robot
544*16467b97STreehugger Robot        fp = codecs.open(fileName, 'rb', encoding)
545*16467b97STreehugger Robot        try:
546*16467b97STreehugger Robot            data = fp.read()
547*16467b97STreehugger Robot        finally:
548*16467b97STreehugger Robot            fp.close()
549*16467b97STreehugger Robot
550*16467b97STreehugger Robot        ANTLRStringStream.__init__(self, data)
551*16467b97STreehugger Robot
552*16467b97STreehugger Robot
553*16467b97STreehugger Robot    def getSourceName(self):
554*16467b97STreehugger Robot        """Deprecated, access o.fileName directly."""
555*16467b97STreehugger Robot
556*16467b97STreehugger Robot        return self.fileName
557*16467b97STreehugger Robot
558*16467b97STreehugger Robot
559*16467b97STreehugger Robotclass ANTLRInputStream(ANTLRStringStream):
560*16467b97STreehugger Robot    """
561*16467b97STreehugger Robot    @brief CharStream that reads data from a file-like object.
562*16467b97STreehugger Robot
563*16467b97STreehugger Robot    This is a char buffer stream that is loaded from a file like object
564*16467b97STreehugger Robot    all at once when you construct the object.
565*16467b97STreehugger Robot
566*16467b97STreehugger Robot    All input is consumed from the file, but it is not closed.
567*16467b97STreehugger Robot    """
568*16467b97STreehugger Robot
569*16467b97STreehugger Robot    def __init__(self, file, encoding=None):
570*16467b97STreehugger Robot        """
571*16467b97STreehugger Robot        @param file A file-like object holding your input. Only the read()
572*16467b97STreehugger Robot           method must be implemented.
573*16467b97STreehugger Robot
574*16467b97STreehugger Robot        @param encoding If you set the optional encoding argument, then the
575*16467b97STreehugger Robot           data will be decoded on the fly.
576*16467b97STreehugger Robot
577*16467b97STreehugger Robot        """
578*16467b97STreehugger Robot
579*16467b97STreehugger Robot        if encoding is not None:
580*16467b97STreehugger Robot            # wrap input in a decoding reader
581*16467b97STreehugger Robot            reader = codecs.lookup(encoding)[2]
582*16467b97STreehugger Robot            file = reader(file)
583*16467b97STreehugger Robot
584*16467b97STreehugger Robot        data = file.read()
585*16467b97STreehugger Robot
586*16467b97STreehugger Robot        ANTLRStringStream.__init__(self, data)
587*16467b97STreehugger Robot
588*16467b97STreehugger Robot
589*16467b97STreehugger Robot# I guess the ANTLR prefix exists only to avoid a name clash with some Java
590*16467b97STreehugger Robot# mumbojumbo. A plain "StringStream" looks better to me, which should be
591*16467b97STreehugger Robot# the preferred name in Python.
592*16467b97STreehugger RobotStringStream = ANTLRStringStream
593*16467b97STreehugger RobotFileStream = ANTLRFileStream
594*16467b97STreehugger RobotInputStream = ANTLRInputStream
595*16467b97STreehugger Robot
596*16467b97STreehugger Robot
597*16467b97STreehugger Robot############################################################################
598*16467b97STreehugger Robot#
599*16467b97STreehugger Robot# Token streams
600*16467b97STreehugger Robot#   TokenStream
601*16467b97STreehugger Robot#   +- CommonTokenStream
602*16467b97STreehugger Robot#   \- TokenRewriteStream
603*16467b97STreehugger Robot#
604*16467b97STreehugger Robot############################################################################
605*16467b97STreehugger Robot
606*16467b97STreehugger Robot
607*16467b97STreehugger Robotclass CommonTokenStream(TokenStream):
608*16467b97STreehugger Robot    """
609*16467b97STreehugger Robot    @brief The most common stream of tokens
610*16467b97STreehugger Robot
611*16467b97STreehugger Robot    The most common stream of tokens is one where every token is buffered up
612*16467b97STreehugger Robot    and tokens are prefiltered for a certain channel (the parser will only
613*16467b97STreehugger Robot    see these tokens and cannot change the filter channel number during the
614*16467b97STreehugger Robot    parse).
615*16467b97STreehugger Robot    """
616*16467b97STreehugger Robot
617*16467b97STreehugger Robot    def __init__(self, tokenSource=None, channel=DEFAULT_CHANNEL):
618*16467b97STreehugger Robot        """
619*16467b97STreehugger Robot        @param tokenSource A TokenSource instance (usually a Lexer) to pull
620*16467b97STreehugger Robot            the tokens from.
621*16467b97STreehugger Robot
622*16467b97STreehugger Robot        @param channel Skip tokens on any channel but this one; this is how we
623*16467b97STreehugger Robot            skip whitespace...
624*16467b97STreehugger Robot
625*16467b97STreehugger Robot        """
626*16467b97STreehugger Robot
627*16467b97STreehugger Robot        TokenStream.__init__(self)
628*16467b97STreehugger Robot
629*16467b97STreehugger Robot        self.tokenSource = tokenSource
630*16467b97STreehugger Robot
631*16467b97STreehugger Robot	# Record every single token pulled from the source so we can reproduce
632*16467b97STreehugger Robot        # chunks of it later.
633*16467b97STreehugger Robot        self.tokens = []
634*16467b97STreehugger Robot
635*16467b97STreehugger Robot	# Map<tokentype, channel> to override some Tokens' channel numbers
636*16467b97STreehugger Robot        self.channelOverrideMap = {}
637*16467b97STreehugger Robot
638*16467b97STreehugger Robot	# Set<tokentype>; discard any tokens with this type
639*16467b97STreehugger Robot        self.discardSet = set()
640*16467b97STreehugger Robot
641*16467b97STreehugger Robot	# Skip tokens on any channel but this one; this is how we skip
642*16467b97STreehugger Robot        # whitespace...
643*16467b97STreehugger Robot        self.channel = channel
644*16467b97STreehugger Robot
645*16467b97STreehugger Robot	# By default, track all incoming tokens
646*16467b97STreehugger Robot        self.discardOffChannelTokens = False
647*16467b97STreehugger Robot
648*16467b97STreehugger Robot	# The index into the tokens list of the current token (next token
649*16467b97STreehugger Robot        # to consume).  p==-1 indicates that the tokens list is empty
650*16467b97STreehugger Robot        self.p = -1
651*16467b97STreehugger Robot
652*16467b97STreehugger Robot        # Remember last marked position
653*16467b97STreehugger Robot        self.lastMarker = None
654*16467b97STreehugger Robot
655*16467b97STreehugger Robot        # how deep have we gone?
656*16467b97STreehugger Robot        self._range = -1
657*16467b97STreehugger Robot
658*16467b97STreehugger Robot
659*16467b97STreehugger Robot    def makeEOFToken(self):
660*16467b97STreehugger Robot        return self.tokenSource.makeEOFToken()
661*16467b97STreehugger Robot
662*16467b97STreehugger Robot
663*16467b97STreehugger Robot    def setTokenSource(self, tokenSource):
664*16467b97STreehugger Robot        """Reset this token stream by setting its token source."""
665*16467b97STreehugger Robot
666*16467b97STreehugger Robot        self.tokenSource = tokenSource
667*16467b97STreehugger Robot        self.tokens = []
668*16467b97STreehugger Robot        self.p = -1
669*16467b97STreehugger Robot        self.channel = DEFAULT_CHANNEL
670*16467b97STreehugger Robot
671*16467b97STreehugger Robot
672*16467b97STreehugger Robot    def reset(self):
673*16467b97STreehugger Robot        self.p = 0
674*16467b97STreehugger Robot        self.lastMarker = None
675*16467b97STreehugger Robot
676*16467b97STreehugger Robot
677*16467b97STreehugger Robot    def fillBuffer(self):
678*16467b97STreehugger Robot        """
679*16467b97STreehugger Robot        Load all tokens from the token source and put in tokens.
680*16467b97STreehugger Robot	This is done upon first LT request because you might want to
681*16467b97STreehugger Robot        set some token type / channel overrides before filling buffer.
682*16467b97STreehugger Robot        """
683*16467b97STreehugger Robot
684*16467b97STreehugger Robot
685*16467b97STreehugger Robot        index = 0
686*16467b97STreehugger Robot        t = self.tokenSource.nextToken()
687*16467b97STreehugger Robot        while t is not None and t.type != EOF:
688*16467b97STreehugger Robot            discard = False
689*16467b97STreehugger Robot
690*16467b97STreehugger Robot            if self.discardSet is not None and t.type in self.discardSet:
691*16467b97STreehugger Robot                discard = True
692*16467b97STreehugger Robot
693*16467b97STreehugger Robot            elif self.discardOffChannelTokens and t.channel != self.channel:
694*16467b97STreehugger Robot                discard = True
695*16467b97STreehugger Robot
696*16467b97STreehugger Robot            # is there a channel override for token type?
697*16467b97STreehugger Robot            try:
698*16467b97STreehugger Robot                overrideChannel = self.channelOverrideMap[t.type]
699*16467b97STreehugger Robot
700*16467b97STreehugger Robot            except KeyError:
701*16467b97STreehugger Robot                # no override for this type
702*16467b97STreehugger Robot                pass
703*16467b97STreehugger Robot
704*16467b97STreehugger Robot            else:
705*16467b97STreehugger Robot                if overrideChannel == self.channel:
706*16467b97STreehugger Robot                    t.channel = overrideChannel
707*16467b97STreehugger Robot                else:
708*16467b97STreehugger Robot                    discard = True
709*16467b97STreehugger Robot
710*16467b97STreehugger Robot            if not discard:
711*16467b97STreehugger Robot                t.index = index
712*16467b97STreehugger Robot                self.tokens.append(t)
713*16467b97STreehugger Robot                index += 1
714*16467b97STreehugger Robot
715*16467b97STreehugger Robot            t = self.tokenSource.nextToken()
716*16467b97STreehugger Robot
717*16467b97STreehugger Robot        # leave p pointing at first token on channel
718*16467b97STreehugger Robot        self.p = 0
719*16467b97STreehugger Robot        self.p = self.skipOffTokenChannels(self.p)
720*16467b97STreehugger Robot
721*16467b97STreehugger Robot
722*16467b97STreehugger Robot    def consume(self):
723*16467b97STreehugger Robot        """
724*16467b97STreehugger Robot        Move the input pointer to the next incoming token.  The stream
725*16467b97STreehugger Robot        must become active with LT(1) available.  consume() simply
726*16467b97STreehugger Robot        moves the input pointer so that LT(1) points at the next
727*16467b97STreehugger Robot        input symbol. Consume at least one token.
728*16467b97STreehugger Robot
729*16467b97STreehugger Robot        Walk past any token not on the channel the parser is listening to.
730*16467b97STreehugger Robot        """
731*16467b97STreehugger Robot
732*16467b97STreehugger Robot        if self.p < len(self.tokens):
733*16467b97STreehugger Robot            self.p += 1
734*16467b97STreehugger Robot
735*16467b97STreehugger Robot            self.p = self.skipOffTokenChannels(self.p) # leave p on valid token
736*16467b97STreehugger Robot
737*16467b97STreehugger Robot
738*16467b97STreehugger Robot    def skipOffTokenChannels(self, i):
739*16467b97STreehugger Robot        """
740*16467b97STreehugger Robot        Given a starting index, return the index of the first on-channel
741*16467b97STreehugger Robot        token.
742*16467b97STreehugger Robot        """
743*16467b97STreehugger Robot
744*16467b97STreehugger Robot        try:
745*16467b97STreehugger Robot            while self.tokens[i].channel != self.channel:
746*16467b97STreehugger Robot                i += 1
747*16467b97STreehugger Robot        except IndexError:
748*16467b97STreehugger Robot            # hit the end of token stream
749*16467b97STreehugger Robot            pass
750*16467b97STreehugger Robot
751*16467b97STreehugger Robot        return i
752*16467b97STreehugger Robot
753*16467b97STreehugger Robot
754*16467b97STreehugger Robot    def skipOffTokenChannelsReverse(self, i):
755*16467b97STreehugger Robot        while i >= 0 and self.tokens[i].channel != self.channel:
756*16467b97STreehugger Robot            i -= 1
757*16467b97STreehugger Robot
758*16467b97STreehugger Robot        return i
759*16467b97STreehugger Robot
760*16467b97STreehugger Robot
761*16467b97STreehugger Robot    def setTokenTypeChannel(self, ttype, channel):
762*16467b97STreehugger Robot        """
763*16467b97STreehugger Robot        A simple filter mechanism whereby you can tell this token stream
764*16467b97STreehugger Robot        to force all tokens of type ttype to be on channel.  For example,
765*16467b97STreehugger Robot        when interpreting, we cannot exec actions so we need to tell
766*16467b97STreehugger Robot        the stream to force all WS and NEWLINE to be a different, ignored
767*16467b97STreehugger Robot        channel.
768*16467b97STreehugger Robot	"""
769*16467b97STreehugger Robot
770*16467b97STreehugger Robot        self.channelOverrideMap[ttype] = channel
771*16467b97STreehugger Robot
772*16467b97STreehugger Robot
773*16467b97STreehugger Robot    def discardTokenType(self, ttype):
774*16467b97STreehugger Robot        self.discardSet.add(ttype)
775*16467b97STreehugger Robot
776*16467b97STreehugger Robot
777*16467b97STreehugger Robot    def getTokens(self, start=None, stop=None, types=None):
778*16467b97STreehugger Robot        """
779*16467b97STreehugger Robot        Given a start and stop index, return a list of all tokens in
780*16467b97STreehugger Robot        the token type set.  Return None if no tokens were found.  This
781*16467b97STreehugger Robot        method looks at both on and off channel tokens.
782*16467b97STreehugger Robot        """
783*16467b97STreehugger Robot
784*16467b97STreehugger Robot        if self.p == -1:
785*16467b97STreehugger Robot            self.fillBuffer()
786*16467b97STreehugger Robot
787*16467b97STreehugger Robot        if stop is None or stop > len(self.tokens):
788*16467b97STreehugger Robot            stop = len(self.tokens)
789*16467b97STreehugger Robot
790*16467b97STreehugger Robot        if start is None or stop < 0:
791*16467b97STreehugger Robot            start = 0
792*16467b97STreehugger Robot
793*16467b97STreehugger Robot        if start > stop:
794*16467b97STreehugger Robot            return None
795*16467b97STreehugger Robot
796*16467b97STreehugger Robot        if isinstance(types, (int, long)):
797*16467b97STreehugger Robot            # called with a single type, wrap into set
798*16467b97STreehugger Robot            types = set([types])
799*16467b97STreehugger Robot
800*16467b97STreehugger Robot        filteredTokens = [
801*16467b97STreehugger Robot            token for token in self.tokens[start:stop]
802*16467b97STreehugger Robot            if types is None or token.type in types
803*16467b97STreehugger Robot            ]
804*16467b97STreehugger Robot
805*16467b97STreehugger Robot        if len(filteredTokens) == 0:
806*16467b97STreehugger Robot            return None
807*16467b97STreehugger Robot
808*16467b97STreehugger Robot        return filteredTokens
809*16467b97STreehugger Robot
810*16467b97STreehugger Robot
811*16467b97STreehugger Robot    def LT(self, k):
812*16467b97STreehugger Robot        """
813*16467b97STreehugger Robot        Get the ith token from the current position 1..n where k=1 is the
814*16467b97STreehugger Robot        first symbol of lookahead.
815*16467b97STreehugger Robot        """
816*16467b97STreehugger Robot
817*16467b97STreehugger Robot        if self.p == -1:
818*16467b97STreehugger Robot            self.fillBuffer()
819*16467b97STreehugger Robot
820*16467b97STreehugger Robot        if k == 0:
821*16467b97STreehugger Robot            return None
822*16467b97STreehugger Robot
823*16467b97STreehugger Robot        if k < 0:
824*16467b97STreehugger Robot            return self.LB(-k)
825*16467b97STreehugger Robot
826*16467b97STreehugger Robot        i = self.p
827*16467b97STreehugger Robot        n = 1
828*16467b97STreehugger Robot        # find k good tokens
829*16467b97STreehugger Robot        while n < k:
830*16467b97STreehugger Robot            # skip off-channel tokens
831*16467b97STreehugger Robot            i = self.skipOffTokenChannels(i+1) # leave p on valid token
832*16467b97STreehugger Robot            n += 1
833*16467b97STreehugger Robot
834*16467b97STreehugger Robot        if i > self._range:
835*16467b97STreehugger Robot            self._range = i
836*16467b97STreehugger Robot
837*16467b97STreehugger Robot        try:
838*16467b97STreehugger Robot            return self.tokens[i]
839*16467b97STreehugger Robot        except IndexError:
840*16467b97STreehugger Robot            return self.makeEOFToken()
841*16467b97STreehugger Robot
842*16467b97STreehugger Robot
843*16467b97STreehugger Robot    def LB(self, k):
844*16467b97STreehugger Robot        """Look backwards k tokens on-channel tokens"""
845*16467b97STreehugger Robot
846*16467b97STreehugger Robot        if self.p == -1:
847*16467b97STreehugger Robot            self.fillBuffer()
848*16467b97STreehugger Robot
849*16467b97STreehugger Robot        if k == 0:
850*16467b97STreehugger Robot            return None
851*16467b97STreehugger Robot
852*16467b97STreehugger Robot        if self.p - k < 0:
853*16467b97STreehugger Robot            return None
854*16467b97STreehugger Robot
855*16467b97STreehugger Robot        i = self.p
856*16467b97STreehugger Robot        n = 1
857*16467b97STreehugger Robot        # find k good tokens looking backwards
858*16467b97STreehugger Robot        while n <= k:
859*16467b97STreehugger Robot            # skip off-channel tokens
860*16467b97STreehugger Robot            i = self.skipOffTokenChannelsReverse(i-1) # leave p on valid token
861*16467b97STreehugger Robot            n += 1
862*16467b97STreehugger Robot
863*16467b97STreehugger Robot        if i < 0:
864*16467b97STreehugger Robot            return None
865*16467b97STreehugger Robot
866*16467b97STreehugger Robot        return self.tokens[i]
867*16467b97STreehugger Robot
868*16467b97STreehugger Robot
869*16467b97STreehugger Robot    def get(self, i):
870*16467b97STreehugger Robot        """
871*16467b97STreehugger Robot        Return absolute token i; ignore which channel the tokens are on;
872*16467b97STreehugger Robot        that is, count all tokens not just on-channel tokens.
873*16467b97STreehugger Robot        """
874*16467b97STreehugger Robot
875*16467b97STreehugger Robot        return self.tokens[i]
876*16467b97STreehugger Robot
877*16467b97STreehugger Robot
878*16467b97STreehugger Robot    def slice(self, start, stop):
879*16467b97STreehugger Robot        if self.p == -1:
880*16467b97STreehugger Robot            self.fillBuffer()
881*16467b97STreehugger Robot
882*16467b97STreehugger Robot        if start < 0 or stop < 0:
883*16467b97STreehugger Robot            return None
884*16467b97STreehugger Robot
885*16467b97STreehugger Robot        return self.tokens[start:stop+1]
886*16467b97STreehugger Robot
887*16467b97STreehugger Robot
888*16467b97STreehugger Robot    def LA(self, i):
889*16467b97STreehugger Robot        return self.LT(i).type
890*16467b97STreehugger Robot
891*16467b97STreehugger Robot
892*16467b97STreehugger Robot    def mark(self):
893*16467b97STreehugger Robot        self.lastMarker = self.index()
894*16467b97STreehugger Robot        return self.lastMarker
895*16467b97STreehugger Robot
896*16467b97STreehugger Robot
897*16467b97STreehugger Robot    def release(self, marker=None):
898*16467b97STreehugger Robot        # no resources to release
899*16467b97STreehugger Robot        pass
900*16467b97STreehugger Robot
901*16467b97STreehugger Robot
902*16467b97STreehugger Robot    def size(self):
903*16467b97STreehugger Robot        return len(self.tokens)
904*16467b97STreehugger Robot
905*16467b97STreehugger Robot
906*16467b97STreehugger Robot    def range(self):
907*16467b97STreehugger Robot        return self._range
908*16467b97STreehugger Robot
909*16467b97STreehugger Robot
910*16467b97STreehugger Robot    def index(self):
911*16467b97STreehugger Robot        return self.p
912*16467b97STreehugger Robot
913*16467b97STreehugger Robot
914*16467b97STreehugger Robot    def rewind(self, marker=None):
915*16467b97STreehugger Robot        if marker is None:
916*16467b97STreehugger Robot            marker = self.lastMarker
917*16467b97STreehugger Robot
918*16467b97STreehugger Robot        self.seek(marker)
919*16467b97STreehugger Robot
920*16467b97STreehugger Robot
921*16467b97STreehugger Robot    def seek(self, index):
922*16467b97STreehugger Robot        self.p = index
923*16467b97STreehugger Robot
924*16467b97STreehugger Robot
925*16467b97STreehugger Robot    def getTokenSource(self):
926*16467b97STreehugger Robot        return self.tokenSource
927*16467b97STreehugger Robot
928*16467b97STreehugger Robot
929*16467b97STreehugger Robot    def getSourceName(self):
930*16467b97STreehugger Robot        return self.tokenSource.getSourceName()
931*16467b97STreehugger Robot
932*16467b97STreehugger Robot
933*16467b97STreehugger Robot    def toString(self, start=None, stop=None):
934*16467b97STreehugger Robot        if self.p == -1:
935*16467b97STreehugger Robot            self.fillBuffer()
936*16467b97STreehugger Robot
937*16467b97STreehugger Robot        if start is None:
938*16467b97STreehugger Robot            start = 0
939*16467b97STreehugger Robot        elif not isinstance(start, int):
940*16467b97STreehugger Robot            start = start.index
941*16467b97STreehugger Robot
942*16467b97STreehugger Robot        if stop is None:
943*16467b97STreehugger Robot            stop = len(self.tokens) - 1
944*16467b97STreehugger Robot        elif not isinstance(stop, int):
945*16467b97STreehugger Robot            stop = stop.index
946*16467b97STreehugger Robot
947*16467b97STreehugger Robot        if stop >= len(self.tokens):
948*16467b97STreehugger Robot            stop = len(self.tokens) - 1
949*16467b97STreehugger Robot
950*16467b97STreehugger Robot        return ''.join([t.text for t in self.tokens[start:stop+1]])
951*16467b97STreehugger Robot
952*16467b97STreehugger Robot
953*16467b97STreehugger Robotclass RewriteOperation(object):
954*16467b97STreehugger Robot    """@brief Internal helper class."""
955*16467b97STreehugger Robot
956*16467b97STreehugger Robot    def __init__(self, stream, index, text):
957*16467b97STreehugger Robot        self.stream = stream
958*16467b97STreehugger Robot
959*16467b97STreehugger Robot        # What index into rewrites List are we?
960*16467b97STreehugger Robot        self.instructionIndex = None
961*16467b97STreehugger Robot
962*16467b97STreehugger Robot        # Token buffer index.
963*16467b97STreehugger Robot        self.index = index
964*16467b97STreehugger Robot        self.text = text
965*16467b97STreehugger Robot
966*16467b97STreehugger Robot    def execute(self, buf):
967*16467b97STreehugger Robot        """Execute the rewrite operation by possibly adding to the buffer.
968*16467b97STreehugger Robot        Return the index of the next token to operate on.
969*16467b97STreehugger Robot        """
970*16467b97STreehugger Robot
971*16467b97STreehugger Robot        return self.index
972*16467b97STreehugger Robot
973*16467b97STreehugger Robot    def toString(self):
974*16467b97STreehugger Robot        opName = self.__class__.__name__
975*16467b97STreehugger Robot        return '<%s@%d:"%s">' % (
976*16467b97STreehugger Robot            opName, self.index, self.text)
977*16467b97STreehugger Robot
978*16467b97STreehugger Robot    __str__ = toString
979*16467b97STreehugger Robot    __repr__ = toString
980*16467b97STreehugger Robot
981*16467b97STreehugger Robot
982*16467b97STreehugger Robotclass InsertBeforeOp(RewriteOperation):
983*16467b97STreehugger Robot    """@brief Internal helper class."""
984*16467b97STreehugger Robot
985*16467b97STreehugger Robot    def execute(self, buf):
986*16467b97STreehugger Robot        buf.write(self.text)
987*16467b97STreehugger Robot        if self.stream.tokens[self.index].type != EOF:
988*16467b97STreehugger Robot            buf.write(self.stream.tokens[self.index].text)
989*16467b97STreehugger Robot        return self.index + 1
990*16467b97STreehugger Robot
991*16467b97STreehugger Robot
992*16467b97STreehugger Robotclass ReplaceOp(RewriteOperation):
993*16467b97STreehugger Robot    """
994*16467b97STreehugger Robot    @brief Internal helper class.
995*16467b97STreehugger Robot
996*16467b97STreehugger Robot    I'm going to try replacing range from x..y with (y-x)+1 ReplaceOp
997*16467b97STreehugger Robot    instructions.
998*16467b97STreehugger Robot    """
999*16467b97STreehugger Robot
1000*16467b97STreehugger Robot    def __init__(self, stream, first, last, text):
1001*16467b97STreehugger Robot        RewriteOperation.__init__(self, stream, first, text)
1002*16467b97STreehugger Robot        self.lastIndex = last
1003*16467b97STreehugger Robot
1004*16467b97STreehugger Robot
1005*16467b97STreehugger Robot    def execute(self, buf):
1006*16467b97STreehugger Robot        if self.text is not None:
1007*16467b97STreehugger Robot            buf.write(self.text)
1008*16467b97STreehugger Robot
1009*16467b97STreehugger Robot        return self.lastIndex + 1
1010*16467b97STreehugger Robot
1011*16467b97STreehugger Robot
1012*16467b97STreehugger Robot    def toString(self):
1013*16467b97STreehugger Robot        if self.text is None:
1014*16467b97STreehugger Robot            return '<DeleteOp@%d..%d>' % (self.index, self.lastIndex)
1015*16467b97STreehugger Robot
1016*16467b97STreehugger Robot        return '<ReplaceOp@%d..%d:"%s">' % (
1017*16467b97STreehugger Robot            self.index, self.lastIndex, self.text)
1018*16467b97STreehugger Robot
1019*16467b97STreehugger Robot    __str__ = toString
1020*16467b97STreehugger Robot    __repr__ = toString
1021*16467b97STreehugger Robot
1022*16467b97STreehugger Robot
1023*16467b97STreehugger Robotclass TokenRewriteStream(CommonTokenStream):
1024*16467b97STreehugger Robot    """@brief CommonTokenStream that can be modified.
1025*16467b97STreehugger Robot
1026*16467b97STreehugger Robot    Useful for dumping out the input stream after doing some
1027*16467b97STreehugger Robot    augmentation or other manipulations.
1028*16467b97STreehugger Robot
1029*16467b97STreehugger Robot    You can insert stuff, replace, and delete chunks.  Note that the
1030*16467b97STreehugger Robot    operations are done lazily--only if you convert the buffer to a
1031*16467b97STreehugger Robot    String.  This is very efficient because you are not moving data around
1032*16467b97STreehugger Robot    all the time.  As the buffer of tokens is converted to strings, the
1033*16467b97STreehugger Robot    toString() method(s) check to see if there is an operation at the
1034*16467b97STreehugger Robot    current index.  If so, the operation is done and then normal String
1035*16467b97STreehugger Robot    rendering continues on the buffer.  This is like having multiple Turing
1036*16467b97STreehugger Robot    machine instruction streams (programs) operating on a single input tape. :)
1037*16467b97STreehugger Robot
1038*16467b97STreehugger Robot    Since the operations are done lazily at toString-time, operations do not
1039*16467b97STreehugger Robot    screw up the token index values.  That is, an insert operation at token
1040*16467b97STreehugger Robot    index i does not change the index values for tokens i+1..n-1.
1041*16467b97STreehugger Robot
1042*16467b97STreehugger Robot    Because operations never actually alter the buffer, you may always get
1043*16467b97STreehugger Robot    the original token stream back without undoing anything.  Since
1044*16467b97STreehugger Robot    the instructions are queued up, you can easily simulate transactions and
1045*16467b97STreehugger Robot    roll back any changes if there is an error just by removing instructions.
1046*16467b97STreehugger Robot    For example,
1047*16467b97STreehugger Robot
1048*16467b97STreehugger Robot     CharStream input = new ANTLRFileStream("input");
1049*16467b97STreehugger Robot     TLexer lex = new TLexer(input);
1050*16467b97STreehugger Robot     TokenRewriteStream tokens = new TokenRewriteStream(lex);
1051*16467b97STreehugger Robot     T parser = new T(tokens);
1052*16467b97STreehugger Robot     parser.startRule();
1053*16467b97STreehugger Robot
1054*16467b97STreehugger Robot     Then in the rules, you can execute
1055*16467b97STreehugger Robot        Token t,u;
1056*16467b97STreehugger Robot        ...
1057*16467b97STreehugger Robot        input.insertAfter(t, "text to put after t");}
1058*16467b97STreehugger Robot        input.insertAfter(u, "text after u");}
1059*16467b97STreehugger Robot        System.out.println(tokens.toString());
1060*16467b97STreehugger Robot
1061*16467b97STreehugger Robot    Actually, you have to cast the 'input' to a TokenRewriteStream. :(
1062*16467b97STreehugger Robot
1063*16467b97STreehugger Robot    You can also have multiple "instruction streams" and get multiple
1064*16467b97STreehugger Robot    rewrites from a single pass over the input.  Just name the instruction
1065*16467b97STreehugger Robot    streams and use that name again when printing the buffer.  This could be
1066*16467b97STreehugger Robot    useful for generating a C file and also its header file--all from the
1067*16467b97STreehugger Robot    same buffer:
1068*16467b97STreehugger Robot
1069*16467b97STreehugger Robot        tokens.insertAfter("pass1", t, "text to put after t");}
1070*16467b97STreehugger Robot        tokens.insertAfter("pass2", u, "text after u");}
1071*16467b97STreehugger Robot        System.out.println(tokens.toString("pass1"));
1072*16467b97STreehugger Robot        System.out.println(tokens.toString("pass2"));
1073*16467b97STreehugger Robot
1074*16467b97STreehugger Robot    If you don't use named rewrite streams, a "default" stream is used as
1075*16467b97STreehugger Robot    the first example shows.
1076*16467b97STreehugger Robot    """
1077*16467b97STreehugger Robot
1078*16467b97STreehugger Robot    DEFAULT_PROGRAM_NAME = "default"
1079*16467b97STreehugger Robot    MIN_TOKEN_INDEX = 0
1080*16467b97STreehugger Robot
1081*16467b97STreehugger Robot    def __init__(self, tokenSource=None, channel=DEFAULT_CHANNEL):
1082*16467b97STreehugger Robot        CommonTokenStream.__init__(self, tokenSource, channel)
1083*16467b97STreehugger Robot
1084*16467b97STreehugger Robot        # You may have multiple, named streams of rewrite operations.
1085*16467b97STreehugger Robot        # I'm calling these things "programs."
1086*16467b97STreehugger Robot        #  Maps String (name) -> rewrite (List)
1087*16467b97STreehugger Robot        self.programs = {}
1088*16467b97STreehugger Robot        self.programs[self.DEFAULT_PROGRAM_NAME] = []
1089*16467b97STreehugger Robot
1090*16467b97STreehugger Robot 	# Map String (program name) -> Integer index
1091*16467b97STreehugger Robot        self.lastRewriteTokenIndexes = {}
1092*16467b97STreehugger Robot
1093*16467b97STreehugger Robot
1094*16467b97STreehugger Robot    def rollback(self, *args):
1095*16467b97STreehugger Robot        """
1096*16467b97STreehugger Robot        Rollback the instruction stream for a program so that
1097*16467b97STreehugger Robot        the indicated instruction (via instructionIndex) is no
1098*16467b97STreehugger Robot        longer in the stream.  UNTESTED!
1099*16467b97STreehugger Robot        """
1100*16467b97STreehugger Robot
1101*16467b97STreehugger Robot        if len(args) == 2:
1102*16467b97STreehugger Robot            programName = args[0]
1103*16467b97STreehugger Robot            instructionIndex = args[1]
1104*16467b97STreehugger Robot        elif len(args) == 1:
1105*16467b97STreehugger Robot            programName = self.DEFAULT_PROGRAM_NAME
1106*16467b97STreehugger Robot            instructionIndex = args[0]
1107*16467b97STreehugger Robot        else:
1108*16467b97STreehugger Robot            raise TypeError("Invalid arguments")
1109*16467b97STreehugger Robot
1110*16467b97STreehugger Robot        p = self.programs.get(programName, None)
1111*16467b97STreehugger Robot        if p is not None:
1112*16467b97STreehugger Robot            self.programs[programName] = (
1113*16467b97STreehugger Robot                p[self.MIN_TOKEN_INDEX:instructionIndex])
1114*16467b97STreehugger Robot
1115*16467b97STreehugger Robot
1116*16467b97STreehugger Robot    def deleteProgram(self, programName=DEFAULT_PROGRAM_NAME):
1117*16467b97STreehugger Robot        """Reset the program so that no instructions exist"""
1118*16467b97STreehugger Robot
1119*16467b97STreehugger Robot        self.rollback(programName, self.MIN_TOKEN_INDEX)
1120*16467b97STreehugger Robot
1121*16467b97STreehugger Robot
1122*16467b97STreehugger Robot    def insertAfter(self, *args):
1123*16467b97STreehugger Robot        if len(args) == 2:
1124*16467b97STreehugger Robot            programName = self.DEFAULT_PROGRAM_NAME
1125*16467b97STreehugger Robot            index = args[0]
1126*16467b97STreehugger Robot            text = args[1]
1127*16467b97STreehugger Robot
1128*16467b97STreehugger Robot        elif len(args) == 3:
1129*16467b97STreehugger Robot            programName = args[0]
1130*16467b97STreehugger Robot            index = args[1]
1131*16467b97STreehugger Robot            text = args[2]
1132*16467b97STreehugger Robot
1133*16467b97STreehugger Robot        else:
1134*16467b97STreehugger Robot            raise TypeError("Invalid arguments")
1135*16467b97STreehugger Robot
1136*16467b97STreehugger Robot        if isinstance(index, Token):
1137*16467b97STreehugger Robot            # index is a Token, grap the stream index from it
1138*16467b97STreehugger Robot            index = index.index
1139*16467b97STreehugger Robot
1140*16467b97STreehugger Robot        # to insert after, just insert before next index (even if past end)
1141*16467b97STreehugger Robot        self.insertBefore(programName, index+1, text)
1142*16467b97STreehugger Robot
1143*16467b97STreehugger Robot
1144*16467b97STreehugger Robot    def insertBefore(self, *args):
1145*16467b97STreehugger Robot        if len(args) == 2:
1146*16467b97STreehugger Robot            programName = self.DEFAULT_PROGRAM_NAME
1147*16467b97STreehugger Robot            index = args[0]
1148*16467b97STreehugger Robot            text = args[1]
1149*16467b97STreehugger Robot
1150*16467b97STreehugger Robot        elif len(args) == 3:
1151*16467b97STreehugger Robot            programName = args[0]
1152*16467b97STreehugger Robot            index = args[1]
1153*16467b97STreehugger Robot            text = args[2]
1154*16467b97STreehugger Robot
1155*16467b97STreehugger Robot        else:
1156*16467b97STreehugger Robot            raise TypeError("Invalid arguments")
1157*16467b97STreehugger Robot
1158*16467b97STreehugger Robot        if isinstance(index, Token):
1159*16467b97STreehugger Robot            # index is a Token, grap the stream index from it
1160*16467b97STreehugger Robot            index = index.index
1161*16467b97STreehugger Robot
1162*16467b97STreehugger Robot        op = InsertBeforeOp(self, index, text)
1163*16467b97STreehugger Robot        rewrites = self.getProgram(programName)
1164*16467b97STreehugger Robot        op.instructionIndex = len(rewrites)
1165*16467b97STreehugger Robot        rewrites.append(op)
1166*16467b97STreehugger Robot
1167*16467b97STreehugger Robot
1168*16467b97STreehugger Robot    def replace(self, *args):
1169*16467b97STreehugger Robot        if len(args) == 2:
1170*16467b97STreehugger Robot            programName = self.DEFAULT_PROGRAM_NAME
1171*16467b97STreehugger Robot            first = args[0]
1172*16467b97STreehugger Robot            last = args[0]
1173*16467b97STreehugger Robot            text = args[1]
1174*16467b97STreehugger Robot
1175*16467b97STreehugger Robot        elif len(args) == 3:
1176*16467b97STreehugger Robot            programName = self.DEFAULT_PROGRAM_NAME
1177*16467b97STreehugger Robot            first = args[0]
1178*16467b97STreehugger Robot            last = args[1]
1179*16467b97STreehugger Robot            text = args[2]
1180*16467b97STreehugger Robot
1181*16467b97STreehugger Robot        elif len(args) == 4:
1182*16467b97STreehugger Robot            programName = args[0]
1183*16467b97STreehugger Robot            first = args[1]
1184*16467b97STreehugger Robot            last = args[2]
1185*16467b97STreehugger Robot            text = args[3]
1186*16467b97STreehugger Robot
1187*16467b97STreehugger Robot        else:
1188*16467b97STreehugger Robot            raise TypeError("Invalid arguments")
1189*16467b97STreehugger Robot
1190*16467b97STreehugger Robot        if isinstance(first, Token):
1191*16467b97STreehugger Robot            # first is a Token, grap the stream index from it
1192*16467b97STreehugger Robot            first = first.index
1193*16467b97STreehugger Robot
1194*16467b97STreehugger Robot        if isinstance(last, Token):
1195*16467b97STreehugger Robot            # last is a Token, grap the stream index from it
1196*16467b97STreehugger Robot            last = last.index
1197*16467b97STreehugger Robot
1198*16467b97STreehugger Robot        if first > last or first < 0 or last < 0 or last >= len(self.tokens):
1199*16467b97STreehugger Robot            raise ValueError(
1200*16467b97STreehugger Robot                "replace: range invalid: %d..%d (size=%d)"
1201*16467b97STreehugger Robot                % (first, last, len(self.tokens)))
1202*16467b97STreehugger Robot
1203*16467b97STreehugger Robot        op = ReplaceOp(self, first, last, text)
1204*16467b97STreehugger Robot        rewrites = self.getProgram(programName)
1205*16467b97STreehugger Robot        op.instructionIndex = len(rewrites)
1206*16467b97STreehugger Robot        rewrites.append(op)
1207*16467b97STreehugger Robot
1208*16467b97STreehugger Robot
1209*16467b97STreehugger Robot    def delete(self, *args):
1210*16467b97STreehugger Robot        self.replace(*(list(args) + [None]))
1211*16467b97STreehugger Robot
1212*16467b97STreehugger Robot
1213*16467b97STreehugger Robot    def getLastRewriteTokenIndex(self, programName=DEFAULT_PROGRAM_NAME):
1214*16467b97STreehugger Robot        return self.lastRewriteTokenIndexes.get(programName, -1)
1215*16467b97STreehugger Robot
1216*16467b97STreehugger Robot
1217*16467b97STreehugger Robot    def setLastRewriteTokenIndex(self, programName, i):
1218*16467b97STreehugger Robot        self.lastRewriteTokenIndexes[programName] = i
1219*16467b97STreehugger Robot
1220*16467b97STreehugger Robot
1221*16467b97STreehugger Robot    def getProgram(self, name):
1222*16467b97STreehugger Robot        p = self.programs.get(name, None)
1223*16467b97STreehugger Robot        if p is  None:
1224*16467b97STreehugger Robot            p = self.initializeProgram(name)
1225*16467b97STreehugger Robot
1226*16467b97STreehugger Robot        return p
1227*16467b97STreehugger Robot
1228*16467b97STreehugger Robot
1229*16467b97STreehugger Robot    def initializeProgram(self, name):
1230*16467b97STreehugger Robot        p = []
1231*16467b97STreehugger Robot        self.programs[name] = p
1232*16467b97STreehugger Robot        return p
1233*16467b97STreehugger Robot
1234*16467b97STreehugger Robot
1235*16467b97STreehugger Robot    def toOriginalString(self, start=None, end=None):
1236*16467b97STreehugger Robot        if self.p == -1:
1237*16467b97STreehugger Robot            self.fillBuffer()
1238*16467b97STreehugger Robot
1239*16467b97STreehugger Robot        if start is None:
1240*16467b97STreehugger Robot            start = self.MIN_TOKEN_INDEX
1241*16467b97STreehugger Robot        if end is None:
1242*16467b97STreehugger Robot            end = self.size() - 1
1243*16467b97STreehugger Robot
1244*16467b97STreehugger Robot        buf = StringIO()
1245*16467b97STreehugger Robot        i = start
1246*16467b97STreehugger Robot        while i >= self.MIN_TOKEN_INDEX and i <= end and i < len(self.tokens):
1247*16467b97STreehugger Robot            if self.get(i).type != EOF:
1248*16467b97STreehugger Robot                buf.write(self.get(i).text)
1249*16467b97STreehugger Robot            i += 1
1250*16467b97STreehugger Robot
1251*16467b97STreehugger Robot        return buf.getvalue()
1252*16467b97STreehugger Robot
1253*16467b97STreehugger Robot
1254*16467b97STreehugger Robot    def toString(self, *args):
1255*16467b97STreehugger Robot        if self.p == -1:
1256*16467b97STreehugger Robot            self.fillBuffer()
1257*16467b97STreehugger Robot
1258*16467b97STreehugger Robot        if len(args) == 0:
1259*16467b97STreehugger Robot            programName = self.DEFAULT_PROGRAM_NAME
1260*16467b97STreehugger Robot            start = self.MIN_TOKEN_INDEX
1261*16467b97STreehugger Robot            end = self.size() - 1
1262*16467b97STreehugger Robot
1263*16467b97STreehugger Robot        elif len(args) == 1:
1264*16467b97STreehugger Robot            programName = args[0]
1265*16467b97STreehugger Robot            start = self.MIN_TOKEN_INDEX
1266*16467b97STreehugger Robot            end = self.size() - 1
1267*16467b97STreehugger Robot
1268*16467b97STreehugger Robot        elif len(args) == 2:
1269*16467b97STreehugger Robot            programName = self.DEFAULT_PROGRAM_NAME
1270*16467b97STreehugger Robot            start = args[0]
1271*16467b97STreehugger Robot            end = args[1]
1272*16467b97STreehugger Robot
1273*16467b97STreehugger Robot        if start is None:
1274*16467b97STreehugger Robot            start = self.MIN_TOKEN_INDEX
1275*16467b97STreehugger Robot        elif not isinstance(start, int):
1276*16467b97STreehugger Robot            start = start.index
1277*16467b97STreehugger Robot
1278*16467b97STreehugger Robot        if end is None:
1279*16467b97STreehugger Robot            end = len(self.tokens) - 1
1280*16467b97STreehugger Robot        elif not isinstance(end, int):
1281*16467b97STreehugger Robot            end = end.index
1282*16467b97STreehugger Robot
1283*16467b97STreehugger Robot        # ensure start/end are in range
1284*16467b97STreehugger Robot        if end >= len(self.tokens):
1285*16467b97STreehugger Robot            end = len(self.tokens) - 1
1286*16467b97STreehugger Robot
1287*16467b97STreehugger Robot        if start < 0:
1288*16467b97STreehugger Robot            start = 0
1289*16467b97STreehugger Robot
1290*16467b97STreehugger Robot        rewrites = self.programs.get(programName)
1291*16467b97STreehugger Robot        if rewrites is None or len(rewrites) == 0:
1292*16467b97STreehugger Robot            # no instructions to execute
1293*16467b97STreehugger Robot            return self.toOriginalString(start, end)
1294*16467b97STreehugger Robot
1295*16467b97STreehugger Robot        buf = StringIO()
1296*16467b97STreehugger Robot
1297*16467b97STreehugger Robot        # First, optimize instruction stream
1298*16467b97STreehugger Robot        indexToOp = self.reduceToSingleOperationPerIndex(rewrites)
1299*16467b97STreehugger Robot
1300*16467b97STreehugger Robot        # Walk buffer, executing instructions and emitting tokens
1301*16467b97STreehugger Robot        i = start
1302*16467b97STreehugger Robot        while i <= end and i < len(self.tokens):
1303*16467b97STreehugger Robot            op = indexToOp.get(i)
1304*16467b97STreehugger Robot            # remove so any left have index size-1
1305*16467b97STreehugger Robot            try:
1306*16467b97STreehugger Robot                del indexToOp[i]
1307*16467b97STreehugger Robot            except KeyError:
1308*16467b97STreehugger Robot                pass
1309*16467b97STreehugger Robot
1310*16467b97STreehugger Robot            t = self.tokens[i]
1311*16467b97STreehugger Robot            if op is None:
1312*16467b97STreehugger Robot                # no operation at that index, just dump token
1313*16467b97STreehugger Robot                if t.type != EOF:
1314*16467b97STreehugger Robot                    buf.write(t.text)
1315*16467b97STreehugger Robot                i += 1 # move to next token
1316*16467b97STreehugger Robot
1317*16467b97STreehugger Robot            else:
1318*16467b97STreehugger Robot                i = op.execute(buf) # execute operation and skip
1319*16467b97STreehugger Robot
1320*16467b97STreehugger Robot        # include stuff after end if it's last index in buffer
1321*16467b97STreehugger Robot        # So, if they did an insertAfter(lastValidIndex, "foo"), include
1322*16467b97STreehugger Robot        # foo if end==lastValidIndex.
1323*16467b97STreehugger Robot        if end == len(self.tokens) - 1:
1324*16467b97STreehugger Robot            # Scan any remaining operations after last token
1325*16467b97STreehugger Robot            # should be included (they will be inserts).
1326*16467b97STreehugger Robot            for i in sorted(indexToOp.keys()):
1327*16467b97STreehugger Robot                op = indexToOp[i]
1328*16467b97STreehugger Robot                if op.index >= len(self.tokens)-1:
1329*16467b97STreehugger Robot                    buf.write(op.text)
1330*16467b97STreehugger Robot
1331*16467b97STreehugger Robot        return buf.getvalue()
1332*16467b97STreehugger Robot
1333*16467b97STreehugger Robot    __str__ = toString
1334*16467b97STreehugger Robot
1335*16467b97STreehugger Robot
1336*16467b97STreehugger Robot    def reduceToSingleOperationPerIndex(self, rewrites):
1337*16467b97STreehugger Robot        """
1338*16467b97STreehugger Robot        We need to combine operations and report invalid operations (like
1339*16467b97STreehugger Robot        overlapping replaces that are not completed nested).  Inserts to
1340*16467b97STreehugger Robot        same index need to be combined etc...   Here are the cases:
1341*16467b97STreehugger Robot
1342*16467b97STreehugger Robot        I.i.u I.j.v                           leave alone, nonoverlapping
1343*16467b97STreehugger Robot        I.i.u I.i.v                           combine: Iivu
1344*16467b97STreehugger Robot
1345*16467b97STreehugger Robot        R.i-j.u R.x-y.v | i-j in x-y          delete first R
1346*16467b97STreehugger Robot        R.i-j.u R.i-j.v                       delete first R
1347*16467b97STreehugger Robot        R.i-j.u R.x-y.v | x-y in i-j          ERROR
1348*16467b97STreehugger Robot        R.i-j.u R.x-y.v | boundaries overlap  ERROR
1349*16467b97STreehugger Robot
1350*16467b97STreehugger Robot        Delete special case of replace (text==null):
1351*16467b97STreehugger Robot        D.i-j.u D.x-y.v |                     boundaries overlapcombine to
1352*16467b97STreehugger Robot                                              max(min)..max(right)
1353*16467b97STreehugger Robot
1354*16467b97STreehugger Robot        I.i.u R.x-y.v   |                     i in (x+1)-ydelete I (since
1355*16467b97STreehugger Robot                                              insert before we're not deleting
1356*16467b97STreehugger Robot                                              i)
1357*16467b97STreehugger Robot        I.i.u R.x-y.v   |                     i not in (x+1)-yleave alone,
1358*16467b97STreehugger Robot                                              nonoverlapping
1359*16467b97STreehugger Robot
1360*16467b97STreehugger Robot        R.x-y.v I.i.u   | i in x-y            ERROR
1361*16467b97STreehugger Robot        R.x-y.v I.x.u                         R.x-y.uv (combine, delete I)
1362*16467b97STreehugger Robot        R.x-y.v I.i.u   | i not in x-y        leave alone, nonoverlapping
1363*16467b97STreehugger Robot
1364*16467b97STreehugger Robot        I.i.u = insert u before op @ index i
1365*16467b97STreehugger Robot        R.x-y.u = replace x-y indexed tokens with u
1366*16467b97STreehugger Robot
1367*16467b97STreehugger Robot        First we need to examine replaces.  For any replace op:
1368*16467b97STreehugger Robot
1369*16467b97STreehugger Robot          1. wipe out any insertions before op within that range.
1370*16467b97STreehugger Robot          2. Drop any replace op before that is contained completely within
1371*16467b97STreehugger Robot             that range.
1372*16467b97STreehugger Robot          3. Throw exception upon boundary overlap with any previous replace.
1373*16467b97STreehugger Robot
1374*16467b97STreehugger Robot        Then we can deal with inserts:
1375*16467b97STreehugger Robot
1376*16467b97STreehugger Robot          1. for any inserts to same index, combine even if not adjacent.
1377*16467b97STreehugger Robot          2. for any prior replace with same left boundary, combine this
1378*16467b97STreehugger Robot             insert with replace and delete this replace.
1379*16467b97STreehugger Robot          3. throw exception if index in same range as previous replace
1380*16467b97STreehugger Robot
1381*16467b97STreehugger Robot        Don't actually delete; make op null in list. Easier to walk list.
1382*16467b97STreehugger Robot        Later we can throw as we add to index -> op map.
1383*16467b97STreehugger Robot
1384*16467b97STreehugger Robot        Note that I.2 R.2-2 will wipe out I.2 even though, technically, the
1385*16467b97STreehugger Robot        inserted stuff would be before the replace range.  But, if you
1386*16467b97STreehugger Robot        add tokens in front of a method body '{' and then delete the method
1387*16467b97STreehugger Robot        body, I think the stuff before the '{' you added should disappear too.
1388*16467b97STreehugger Robot
1389*16467b97STreehugger Robot        Return a map from token index to operation.
1390*16467b97STreehugger Robot        """
1391*16467b97STreehugger Robot
1392*16467b97STreehugger Robot        # WALK REPLACES
1393*16467b97STreehugger Robot        for i, rop in enumerate(rewrites):
1394*16467b97STreehugger Robot            if rop is None:
1395*16467b97STreehugger Robot                continue
1396*16467b97STreehugger Robot
1397*16467b97STreehugger Robot            if not isinstance(rop, ReplaceOp):
1398*16467b97STreehugger Robot                continue
1399*16467b97STreehugger Robot
1400*16467b97STreehugger Robot            # Wipe prior inserts within range
1401*16467b97STreehugger Robot            for j, iop in self.getKindOfOps(rewrites, InsertBeforeOp, i):
1402*16467b97STreehugger Robot                if iop.index == rop.index:
1403*16467b97STreehugger Robot                    # E.g., insert before 2, delete 2..2; update replace
1404*16467b97STreehugger Robot                    # text to include insert before, kill insert
1405*16467b97STreehugger Robot                    rewrites[iop.instructionIndex] = None
1406*16467b97STreehugger Robot                    rop.text = self.catOpText(iop.text, rop.text)
1407*16467b97STreehugger Robot
1408*16467b97STreehugger Robot                elif iop.index > rop.index and iop.index <= rop.lastIndex:
1409*16467b97STreehugger Robot                    # delete insert as it's a no-op.
1410*16467b97STreehugger Robot                    rewrites[j] = None
1411*16467b97STreehugger Robot
1412*16467b97STreehugger Robot            # Drop any prior replaces contained within
1413*16467b97STreehugger Robot            for j, prevRop in self.getKindOfOps(rewrites, ReplaceOp, i):
1414*16467b97STreehugger Robot                if (prevRop.index >= rop.index
1415*16467b97STreehugger Robot                    and prevRop.lastIndex <= rop.lastIndex):
1416*16467b97STreehugger Robot                    # delete replace as it's a no-op.
1417*16467b97STreehugger Robot                    rewrites[j] = None
1418*16467b97STreehugger Robot                    continue
1419*16467b97STreehugger Robot
1420*16467b97STreehugger Robot                # throw exception unless disjoint or identical
1421*16467b97STreehugger Robot                disjoint = (prevRop.lastIndex < rop.index
1422*16467b97STreehugger Robot                            or prevRop.index > rop.lastIndex)
1423*16467b97STreehugger Robot                same = (prevRop.index == rop.index
1424*16467b97STreehugger Robot                        and prevRop.lastIndex == rop.lastIndex)
1425*16467b97STreehugger Robot
1426*16467b97STreehugger Robot                # Delete special case of replace (text==null):
1427*16467b97STreehugger Robot                # D.i-j.u D.x-y.v| boundaries overlapcombine to
1428*16467b97STreehugger Robot                # max(min)..max(right)
1429*16467b97STreehugger Robot                if prevRop.text is None and rop.text is None and not disjoint:
1430*16467b97STreehugger Robot                    # kill first delete
1431*16467b97STreehugger Robot                    rewrites[prevRop.instructionIndex] = None
1432*16467b97STreehugger Robot
1433*16467b97STreehugger Robot                    rop.index = min(prevRop.index, rop.index)
1434*16467b97STreehugger Robot                    rop.lastIndex = max(prevRop.lastIndex, rop.lastIndex)
1435*16467b97STreehugger Robot
1436*16467b97STreehugger Robot                elif not disjoint and not same:
1437*16467b97STreehugger Robot                    raise ValueError(
1438*16467b97STreehugger Robot                        "replace op boundaries of %s overlap with previous %s"
1439*16467b97STreehugger Robot                        % (rop, prevRop))
1440*16467b97STreehugger Robot
1441*16467b97STreehugger Robot        # WALK INSERTS
1442*16467b97STreehugger Robot        for i, iop in enumerate(rewrites):
1443*16467b97STreehugger Robot            if iop is None:
1444*16467b97STreehugger Robot                continue
1445*16467b97STreehugger Robot
1446*16467b97STreehugger Robot            if not isinstance(iop, InsertBeforeOp):
1447*16467b97STreehugger Robot                continue
1448*16467b97STreehugger Robot
1449*16467b97STreehugger Robot            # combine current insert with prior if any at same index
1450*16467b97STreehugger Robot            for j, prevIop in self.getKindOfOps(rewrites, InsertBeforeOp, i):
1451*16467b97STreehugger Robot                if prevIop.index == iop.index: # combine objects
1452*16467b97STreehugger Robot                    # convert to strings...we're in process of toString'ing
1453*16467b97STreehugger Robot                    # whole token buffer so no lazy eval issue with any
1454*16467b97STreehugger Robot                    # templates
1455*16467b97STreehugger Robot                    iop.text = self.catOpText(iop.text, prevIop.text)
1456*16467b97STreehugger Robot                    # delete redundant prior insert
1457*16467b97STreehugger Robot                    rewrites[j] = None
1458*16467b97STreehugger Robot
1459*16467b97STreehugger Robot            # look for replaces where iop.index is in range; error
1460*16467b97STreehugger Robot            for j, rop in self.getKindOfOps(rewrites, ReplaceOp, i):
1461*16467b97STreehugger Robot                if iop.index == rop.index:
1462*16467b97STreehugger Robot                    rop.text = self.catOpText(iop.text, rop.text)
1463*16467b97STreehugger Robot                    # delete current insert
1464*16467b97STreehugger Robot                    rewrites[i] = None
1465*16467b97STreehugger Robot                    continue
1466*16467b97STreehugger Robot
1467*16467b97STreehugger Robot                if iop.index >= rop.index and iop.index <= rop.lastIndex:
1468*16467b97STreehugger Robot                    raise ValueError(
1469*16467b97STreehugger Robot                        "insert op %s within boundaries of previous %s"
1470*16467b97STreehugger Robot                        % (iop, rop))
1471*16467b97STreehugger Robot
1472*16467b97STreehugger Robot        m = {}
1473*16467b97STreehugger Robot        for i, op in enumerate(rewrites):
1474*16467b97STreehugger Robot            if op is None:
1475*16467b97STreehugger Robot                # ignore deleted ops
1476*16467b97STreehugger Robot                continue
1477*16467b97STreehugger Robot
1478*16467b97STreehugger Robot            assert op.index not in m, "should only be one op per index"
1479*16467b97STreehugger Robot            m[op.index] = op
1480*16467b97STreehugger Robot
1481*16467b97STreehugger Robot        return m
1482*16467b97STreehugger Robot
1483*16467b97STreehugger Robot
1484*16467b97STreehugger Robot    def catOpText(self, a, b):
1485*16467b97STreehugger Robot        x = ""
1486*16467b97STreehugger Robot        y = ""
1487*16467b97STreehugger Robot        if a is not None:
1488*16467b97STreehugger Robot            x = a
1489*16467b97STreehugger Robot        if b is not None:
1490*16467b97STreehugger Robot            y = b
1491*16467b97STreehugger Robot        return x + y
1492*16467b97STreehugger Robot
1493*16467b97STreehugger Robot
1494*16467b97STreehugger Robot    def getKindOfOps(self, rewrites, kind, before=None):
1495*16467b97STreehugger Robot        """Get all operations before an index of a particular kind."""
1496*16467b97STreehugger Robot
1497*16467b97STreehugger Robot        if before is None:
1498*16467b97STreehugger Robot            before = len(rewrites)
1499*16467b97STreehugger Robot        elif before > len(rewrites):
1500*16467b97STreehugger Robot            before = len(rewrites)
1501*16467b97STreehugger Robot
1502*16467b97STreehugger Robot        for i, op in enumerate(rewrites[:before]):
1503*16467b97STreehugger Robot            if op is None:
1504*16467b97STreehugger Robot                # ignore deleted
1505*16467b97STreehugger Robot                continue
1506*16467b97STreehugger Robot            if op.__class__ == kind:
1507*16467b97STreehugger Robot                yield i, op
1508*16467b97STreehugger Robot
1509*16467b97STreehugger Robot
1510*16467b97STreehugger Robot    def toDebugString(self, start=None, end=None):
1511*16467b97STreehugger Robot        if start is None:
1512*16467b97STreehugger Robot            start = self.MIN_TOKEN_INDEX
1513*16467b97STreehugger Robot        if end is None:
1514*16467b97STreehugger Robot            end = self.size() - 1
1515*16467b97STreehugger Robot
1516*16467b97STreehugger Robot        buf = StringIO()
1517*16467b97STreehugger Robot        i = start
1518*16467b97STreehugger Robot        while i >= self.MIN_TOKEN_INDEX and i <= end and i < len(self.tokens):
1519*16467b97STreehugger Robot            buf.write(self.get(i))
1520*16467b97STreehugger Robot            i += 1
1521*16467b97STreehugger Robot
1522*16467b97STreehugger Robot        return buf.getvalue()
1523