1 files changed, 172 insertions, 0 deletions
diff --git a/cvs2svn_rcsparse/default.py b/cvs2svn_rcsparse/default.py
new file mode 100644
index 0000000..57f9fc6
--- /dev/null
+++ b/cvs2svn_rcsparse/default.py
@@ -0,0 +1,172 @@
+# -*-python-*-
+#
+# Copyright (C) 1999-2006 The ViewCVS Group. All Rights Reserved.
+#
+# By using this file, you agree to the terms and conditions set forth in
+# the LICENSE.html file which can be found at the top level of the ViewVC
+# distribution or at http://viewvc.org/license-1.html.
+#
+# For more information, visit http://viewvc.org/
+#
+# -----------------------------------------------------------------------
+#
+# This file was originally based on portions of the blame.py script by
+# Curt Hagenlocher.
+#
+# -----------------------------------------------------------------------
+
+import string
+import common
+
+class _TokenStream:
+  token_term = frozenset(string.whitespace + ';:')
+
+  # the algorithm is about the same speed for any CHUNK_SIZE chosen.
+  # grab a good-sized chunk, but not too large to overwhelm memory.
+  # note: we use a multiple of a standard block size
+  CHUNK_SIZE  = 192 * 512  # about 100k
+
+# CHUNK_SIZE  = 5   # for debugging, make the function grind...
+
+  def __init__(self, file):
+    self.rcsfile = file
+    self.idx = 0
+    self.buf = self.rcsfile.read(self.CHUNK_SIZE)
+    if self.buf == '':
+      raise RuntimeError, 'EOF'
+
+  def get(self):
+    "Get the next token from the RCS file."
+
+    # Note: we can afford to loop within Python, examining individual
+    # characters. For the whitespace and tokens, the number of iterations
+    # is typically quite small. Thus, a simple iterative loop will beat
+    # out more complex solutions.
+
+    buf = self.buf
+    lbuf = len(buf)
+    idx = self.idx
+
+    while 1:
+      if idx == lbuf:
+        buf = self.rcsfile.read(self.CHUNK_SIZE)
+        if buf == '':
+          # signal EOF by returning None as the token
+          del self.buf   # so we fail if get() is called again
+          return None
+        lbuf = len(buf)
+        idx = 0
+
+      if buf[idx] not in string.whitespace:
+        break
+
+      idx = idx + 1
+
+    if buf[idx] in ';:':
+      self.buf = buf
+      self.idx = idx + 1
+      return buf[idx]
+
+    if buf[idx] != '@':
+      end = idx + 1
+      token = ''
+      while 1:
+        # find token characters in the current buffer
+        while end < lbuf and buf[end] not in self.token_term:
+          end = end + 1
+        token = token + buf[idx:end]
+
+        if end < lbuf:
+          # we stopped before the end, so we have a full token
+          idx = end
+          break
+
+        # we stopped at the end of the buffer, so we may have a partial token
+        buf = self.rcsfile.read(self.CHUNK_SIZE)
+        lbuf = len(buf)
+        idx = end = 0
+
+      self.buf = buf
+      self.idx = idx
+      return token
+
+    # a "string" which starts with the "@" character. we'll skip it when we
+    # search for content.
+    idx = idx + 1
+
+    chunks = [ ]
+
+    while 1:
+      if idx == lbuf:
+        idx = 0
+        buf = self.rcsfile.read(self.CHUNK_SIZE)
+        if buf == '':
+          raise RuntimeError, 'EOF'
+        lbuf = len(buf)
+      i = string.find(buf, '@', idx)
+      if i == -1:
+        chunks.append(buf[idx:])
+        idx = lbuf
+        continue
+      if i == lbuf - 1:
+        chunks.append(buf[idx:i])
+        idx = 0
+        buf = '@' + self.rcsfile.read(self.CHUNK_SIZE)
+        if buf == '@':
+          raise RuntimeError, 'EOF'
+        lbuf = len(buf)
+        continue
+      if buf[i + 1] == '@':
+        chunks.append(buf[idx:i+1])
+        idx = i + 2
+        continue
+
+      chunks.append(buf[idx:i])
+
+      self.buf = buf
+      self.idx = i + 1
+
+      return ''.join(chunks)
+
+#  _get = get
+#  def get(self):
+    token = self._get()
+    print 'T:', `token`
+    return token
+
+  def match(self, match):
+    "Try to match the next token from the input buffer."
+
+    token = self.get()
+    if token != match:
+      raise common.RCSExpected(token, match)
+
+  def unget(self, token):
+    "Put this token back, for the next get() to return."
+
+    # Override the class' .get method with a function which clears the
+    # overridden method then returns the pushed token. Since this function
+    # will not be looked up via the class mechanism, it should be a "normal"
+    # function, meaning it won't have "self" automatically inserted.
+    # Therefore, we need to pass both self and the token thru via defaults.
+
+    # note: we don't put this into the input buffer because it may have been
+    # @-unescaped already.
+
+    def give_it_back(self=self, token=token):
+      del self.get
+      return token
+
+    self.get = give_it_back
+
+  def mget(self, count):
+    "Return multiple tokens. 'next' is at the end."
+    result = [ ]
+    for i in range(count):
+      result.append(self.get())
+    result.reverse()
+    return result
+
+
+class Parser(common._Parser):
+  stream_class = _TokenStream