changeset 34:08e2157aaa9a

Remove local fork of bzr-fastimport; use my fastimport library instead.
author Greg Ward <greg-hg@gerg.ca>
date Tue, 05 May 2009 11:28:53 -0400
parents 31b9a5805f02
children 233040cea97a
files hgfastimport/__init__.py hgfastimport/commands.py hgfastimport/dates.py hgfastimport/errors.py hgfastimport/hgechoprocessor.py hgfastimport/hgimport.py hgfastimport/parser.py hgfastimport/processor.py
diffstat 8 files changed, 19 insertions(+), 1146 deletions(-) [+]
line wrap: on
line diff
--- a/hgfastimport/__init__.py	Tue May 05 10:27:27 2009 -0400
+++ b/hgfastimport/__init__.py	Tue May 05 11:28:53 2009 -0400
@@ -1,6 +1,6 @@
 from mercurial import commands
 
-import parser
+from fastimport import parser
 import hgechoprocessor
 import hgimport
 
@@ -13,7 +13,7 @@
             ui.write("Reading source: %s\n" % source)
             f = open(source)
             p = parser.ImportParser(f)
-            proc._process(p.iter_commands)
+            proc.process(p.iter_commands)
             f.close()
     finally:
         proc.teardown()
--- a/hgfastimport/commands.py	Tue May 05 10:27:27 2009 -0400
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,216 +0,0 @@
-# Copyright (C) 2008 Canonical Ltd
-#
-# This program is free software; you can redistribute it and/or modify
-# it under the terms of the GNU General Public License as published by
-# the Free Software Foundation; either version 2 of the License, or
-# (at your option) any later version.
-#
-# This program is distributed in the hope that it will be useful,
-# but WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-# GNU General Public License for more details.
-#
-# You should have received a copy of the GNU General Public License
-# along with this program; if not, write to the Free Software
-# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
-
-"""Import command classes."""
-
-
-# Lists of command names
-COMMAND_NAMES = ['blob', 'checkpoint', 'commit', 'progress', 'reset', 'tag']
-FILE_COMMAND_NAMES = ['filemodify', 'filedelete', 'filecopy', 'filerename',
-    'filedeleteall']
-
-# Bazaar file kinds
-FILE_KIND = 'file'
-SYMLINK_KIND = 'symlink'
-
-
-class ImportCommand(object):
-    """Base class for import commands."""
-
-    def __init__(self, name):
-        self.name = name
-        # List of field names not to display
-        self._binary = []
-
-    def __repr__(self):
-        return "<%s at %x: %s>" % (self.__class__.__name__, id(self), self)
-
-    def __str__(self):
-        return self.name
-
-    def dump_str(self, names=None, child_lists=None, verbose=False):
-        """Dump fields as a string.
-
-        :param names: the list of fields to include or
-            None for all public fields
-        :param child_lists: dictionary of child command names to
-            fields for that child command to include
-        :param verbose: if True, prefix each line with the command class and
-            display fields as a dictionary; if False, dump just the field
-            values with tabs between them
-        """
-        interesting = {}
-        if names is None:
-            fields = [k for k in self.__dict__.keys() if not k.startswith('_')]
-        else:
-            fields = names
-        for field in fields:
-            value = self.__dict__.get(field)
-            if field in self._binary and value is not None:
-                value = '(...)'
-            interesting[field] = value
-        if verbose:
-            return "%s: %s" % (self.__class__.__name__, interesting)
-        else:
-            return "\t".join([str(interesting[k]) for k in fields])
-
-
-class BlobCommand(ImportCommand):
-
-    def __init__(self, mark, data, lineno=0):
-        ImportCommand.__init__(self, 'blob')
-        self.mark = mark
-        self.data = data
-        self.lineno = lineno
-        # Provide a unique id in case the mark is missing
-        if mark is None:
-            self.id = '@%d' % lineno
-        else:
-            self.id = ':' + mark
-        self._binary = ['data']
-
-    def __str__(self):
-        return self.id
-
-
-class CheckpointCommand(ImportCommand):
-
-    def __init__(self):
-        ImportCommand.__init__(self, 'checkpoint')
-
-
-class CommitCommand(ImportCommand):
-
-    def __init__(self, ref, mark, author, committer, message, from_,
-        parents, file_iter, lineno=0):
-        ImportCommand.__init__(self, 'commit')
-        self.ref = ref
-        self.mark = mark
-        self.author = author
-        self.committer = committer
-        self.message = message
-        self.from_ = from_
-        self.parents = parents
-        self.file_iter = file_iter
-        self.lineno = lineno
-        self._binary = ['file_iter']
-        # Provide a unique id in case the mark is missing
-        if mark is None:
-            self.id = '@%d' % lineno
-        else:
-            self.id = ':' + mark
-
-    def __str__(self):
-        return "ref %s, mark %s" % (self.ref, self.mark)
-
-    def dump_str(self, names=None, child_lists=None, verbose=False):
-        result = [ImportCommand.dump_str(self, names, verbose=verbose)]
-        for f in self.file_iter():
-            if child_lists is None:
-                continue
-            try:
-                child_names = child_lists[f.name]
-            except KeyError:
-                continue
-            result.append("\t%s" % f.dump_str(child_names, verbose=verbose))
-        return '\n'.join(result)
-
-
-class ProgressCommand(ImportCommand):
-
-    def __init__(self, message):
-        ImportCommand.__init__(self, 'progress')
-        self.message = message
-
-
-class ResetCommand(ImportCommand):
-
-    def __init__(self, ref, from_):
-        ImportCommand.__init__(self, 'reset')
-        self.ref = ref
-        self.from_ = from_
-
-
-class TagCommand(ImportCommand):
-
-    def __init__(self, id, from_, tagger, message):
-        ImportCommand.__init__(self, 'tag')
-        self.id = id
-        self.from_ = from_
-        self.tagger = tagger
-        self.message = message
-
-    def __str__(self):
-        return self.id
-
-
-class FileCommand(ImportCommand):
-    """Base class for file commands."""
-    pass
-
-
-class FileModifyCommand(FileCommand):
-
-    def __init__(self, path, kind, is_executable, dataref, data):
-        # Either dataref or data should be null
-        FileCommand.__init__(self, 'filemodify')
-        self.path = path
-        self.kind = kind
-        self.is_executable = is_executable
-        self.dataref = dataref
-        self.data = data
-        self._binary = ['data']
-
-    def __str__(self):
-        return self.path
-
-
-class FileDeleteCommand(FileCommand):
-
-    def __init__(self, path):
-        FileCommand.__init__(self, 'filedelete')
-        self.path = path
-
-    def __str__(self):
-        return self.path
-
-
-class FileCopyCommand(FileCommand):
-
-    def __init__(self, src_path, dest_path):
-        FileCommand.__init__(self, 'filecopy')
-        self.src_path = src_path
-        self.dest_path = dest_path
-
-    def __str__(self):
-        return "%s -> %s" % (self.src_path, self.dest_path)
-
-
-class FileRenameCommand(FileCommand):
-
-    def __init__(self, old_path, new_path):
-        FileCommand.__init__(self, 'filerename')
-        self.old_path = old_path
-        self.new_path = new_path
-
-    def __str__(self):
-        return "%s -> %s" % (self.old_path, self.new_path)
-
-
-class FileDeleteAllCommand(FileCommand):
-
-    def __init__(self):
-        FileCommand.__init__(self, 'filedeleteall')
--- a/hgfastimport/dates.py	Tue May 05 10:27:27 2009 -0400
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,76 +0,0 @@
-# Copyright (C) 2008 Canonical Ltd
-#
-# This program is free software; you can redistribute it and/or modify
-# it under the terms of the GNU General Public License as published by
-# the Free Software Foundation; either version 2 of the License, or
-# (at your option) any later version.
-#
-# This program is distributed in the hope that it will be useful,
-# but WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-# GNU General Public License for more details.
-#
-# You should have received a copy of the GNU General Public License
-# along with this program; if not, write to the Free Software
-# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
-
-"""Date parsing routines.
-
-Each routine returns timestamp,timezone where
-
-* timestamp is seconds since epoch
-* timezone is the offset from UTC in seconds.
-"""
-
-
-import time
-
-
-def parse_raw(s):
-    """Parse a date from a raw string.
-    
-    The format must be exactly "seconds-since-epoch offset-utc".
-    See the spec for details.
-    """
-    timestamp_str, timezone_str = s.split(' ', 1)
-    timestamp = float(timestamp_str)
-    timezone = _parse_tz(timezone_str)
-    return timestamp, timezone
-
-
-def _parse_tz(tz):
-    """Parse a timezone specification in the [+|-]HHMM format.
-
-    :return: the timezone offset in seconds.
-    """
-    # from git_repository.py in bzr-git
-    assert len(tz) == 5
-    sign = {'+': +1, '-': -1}[tz[0]]
-    hours = int(tz[1:3])
-    minutes = int(tz[3:])
-    return sign * 60 * (60 * hours + minutes)
-
-
-def parse_rfc2822(s):
-    """Parse a date from a rfc2822 string.
-    
-    See the spec for details.
-    """
-    raise NotImplementedError(parse_rfc2822)
-
-
-def parse_now(s):
-    """Parse a date from a string.
-
-    The format must be exactly "now".
-    See the spec for details.
-    """
-    return time.time(), 0
-
-
-# Lookup tabel of date parsing routines
-DATE_PARSERS_BY_NAME = {
-    'raw':      parse_raw,
-    'rfc2822':  parse_rfc2822,
-    'now':      parse_now,
-    }
--- a/hgfastimport/errors.py	Tue May 05 10:27:27 2009 -0400
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,171 +0,0 @@
-# Copyright (C) 2008 Canonical Ltd
-#
-# This program is free software; you can redistribute it and/or modify
-# it under the terms of the GNU General Public License as published by
-# the Free Software Foundation; either version 2 of the License, or
-# (at your option) any later version.
-#
-# This program is distributed in the hope that it will be useful,
-# but WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-# GNU General Public License for more details.
-#
-# You should have received a copy of the GNU General Public License
-# along with this program; if not, write to the Free Software
-# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
-
-"""Exception classes for fastimport"""
-
-#from bzrlib import errors as bzr_errors
-
-
-# Prefix to messages to show location information
-_LOCATION_FMT = "line %(lineno)d: "
-
-class FmtException(StandardError):
-    def __str__(self):
-        return repr(self)
-
-    def __repr__(self):
-        return self._fmt % self.__dict__
-
-class ImportError(FmtException):
-    """The base exception class for all import processing exceptions."""
-
-    _fmt = "Unknown Import Error"
-
-
-class ParsingError(ImportError):
-    """The base exception class for all import processing exceptions."""
-
-    _fmt = _LOCATION_FMT + "Unknown Import Parsing Error"
-
-    def __init__(self, lineno):
-        ImportError.__init__(self)
-        self.lineno = lineno
-
-
-class MissingBytes(ParsingError):
-    """Raised when EOF encountered while expecting to find more bytes."""
-
-    _fmt = (_LOCATION_FMT + "Unexpected EOF - expected %(expected)d bytes,"
-        " found %(found)d")
-
-    def __init__(self, lineno, expected, found):
-        ParsingError.__init__(self, lineno)
-        self.expected = expected
-        self.found = found
-
-
-class MissingTerminator(ParsingError):
-    """Raised when EOF encountered while expecting to find a terminator."""
-
-    _fmt = (_LOCATION_FMT +
-        "Unexpected EOF - expected '%(terminator)s' terminator")
-
-    def __init__(self, lineno, terminator):
-        ParsingError.__init__(self, lineno)
-        self.terminator = terminator
-
-
-class InvalidCommand(ParsingError):
-    """Raised when an unknown command found."""
-
-    _fmt = (_LOCATION_FMT + "Invalid command '%(cmd)s'")
-
-    def __init__(self, lineno, cmd):
-        ParsingError.__init__(self, lineno)
-        self.cmd = cmd
-
-
-class MissingSection(ParsingError):
-    """Raised when a section is required in a command but not present."""
-
-    _fmt = (_LOCATION_FMT + "Command %(cmd)s is missing section %(section)s")
-
-    def __init__(self, lineno, cmd, section):
-        ParsingError.__init__(self, lineno)
-        self.cmd = cmd
-        self.section = section
-
-
-class BadFormat(ParsingError):
-    """Raised when a section is formatted incorrectly."""
-
-    _fmt = (_LOCATION_FMT + "Bad format for section %(section)s in "
-        "command %(cmd)s: found '%(text)s'")
-
-    def __init__(self, lineno, cmd, section, text):
-        ParsingError.__init__(self, lineno)
-        self.cmd = cmd
-        self.section = section
-        self.text = text
-
-
-class InvalidTimezone(ParsingError):
-    """Raised when converting a string timezone to a seconds offset."""
-
-    _fmt = (_LOCATION_FMT +
-        "Timezone %(timezone)r could not be converted.%(reason)s")
-
-    def __init__(self, lineno, timezone, reason=None):
-        ParsingError.__init__(self, lineno)
-        self.timezone = timezone
-        if reason:
-            self.reason = ' ' + reason
-        else:
-            self.reason = ''
-
-
-class UnknownDateFormat(ImportError):
-    """Raised when an unknown date format is given."""
-
-    _fmt = ("Unknown date format '%(format)s'")
-
-    def __init__(self, format):
-        ImportError.__init__(self)
-        self.format = format
-
-
-class MissingHandler(ImportError):
-    """Raised when a processor can't handle a command."""
-
-    _fmt = ("Missing handler for command %(cmd)s")
-
-    def __init__(self, cmd):
-        ImportError.__init__(self)
-        self.cmd = cmd
-
-
-class UnknownParameter(ImportError):
-    """Raised when an unknown parameter is passed to a processor."""
-
-    _fmt = ("Unknown parameter - '%(param)s' not in %(knowns)s")
-
-    def __init__(self, param, knowns):
-        ImportError.__init__(self)
-        self.param = param
-        self.knowns = knowns
-
-
-class BadRepositorySize(ImportError):
-    """Raised when the repository has an incorrect number of revisions."""
-
-    _fmt = ("Bad repository size - %(found)d revisions found, "
-        "%(expected)d expected")
-
-    def __init__(self, expected, found):
-        ImportError.__init__(self)
-        self.expected = expected
-        self.found = found
-
-
-class BadRestart(ImportError):
-    """Raised when the import stream and id-map do not match up."""
-
-    _fmt = ("Bad restart - attempted to skip commit %(commit_id)s "
-        "but matching revision-id is unknown")
-
-    def __init__(self, commit_id):
-        ImportError.__init__(self)
-        self.commit_id = commit_id
--- a/hgfastimport/hgechoprocessor.py	Tue May 05 10:27:27 2009 -0400
+++ b/hgfastimport/hgechoprocessor.py	Tue May 05 11:28:53 2009 -0400
@@ -20,8 +20,7 @@
 for basing real processors on. See the processors package for examples.
 """
 
-
-import processor
+from fastimport import processor
 
 class HgEchoProcessor(processor.ImportProcessor):
     
--- a/hgfastimport/hgimport.py	Tue May 05 10:27:27 2009 -0400
+++ b/hgfastimport/hgimport.py	Tue May 05 11:28:53 2009 -0400
@@ -29,9 +29,9 @@
 import mercurial.commands
 from mercurial import util
 from mercurial.node import nullrev
-import processor
 
-import hgechoprocessor
+from fastimport import processor
+from hgfastimport import hgechoprocessor
 
 class HgImportProcessor(processor.ImportProcessor):
     
@@ -49,7 +49,12 @@
         self.numblobs = 0               # for progress reporting
         self.blobdir = None
 
+    def setup(self):
+        """Setup before processing any streams."""
+        pass
+
     def teardown(self):
+        """Cleanup after processing all streams."""
         if self.blobdir and os.path.exists(self.blobdir):
             self.ui.status("Removing blob dir %r ...\n" % self.blobdir)
             shutil.rmtree(self.blobdir)
@@ -104,11 +109,11 @@
         # Update to the first parent
         mercurial.hg.clean(self.repo, self.repo.lookup(first_parent))
         #self.ui.write("Bing\n")
-        if cmd.parents:
+        if cmd.merges:
             #self.ui.write("foo")
-            if len(cmd.parents) > 1:
+            if len(cmd.merges) > 1:
                 raise NotImplementedError("Can't handle more than two parents")
-            second_parent = self.committish_rev(cmd.parents[0])
+            second_parent = self.committish_rev(cmd.merges[0])
             #self.ui.write("Second parent: %s\n" % second_parent)
             mercurial.commands.debugsetparents(self.ui, self.repo, 
                 first_parent, second_parent)
@@ -132,10 +137,12 @@
         # optional)
         userinfo = cmd.author or cmd.committer
         user = "%s <%s>" % (userinfo[0], userinfo[1])
-        node = self.repo.rawcommit(files = commit_handler.filelist(),
-            text = cmd.message,
-            user = user,
-            date = self.convert_date(userinfo))
+
+        # XXX is this the right way to specify filename encoding?!?
+        files = [f.encode("utf-8") for f in commit_handler.filelist()]
+        date = self.convert_date(userinfo)
+        node = self.repo.rawcommit(
+            files=files, text=cmd.message, user=user, date=date)
         rev = self.repo.changelog.rev(node)
         if cmd.mark is not None:
             self.mark_map[":" + cmd.mark] = rev
--- a/hgfastimport/parser.py	Tue May 05 10:27:27 2009 -0400
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,507 +0,0 @@
-# Copyright (C) 2008 Canonical Ltd
-#
-# This program is free software; you can redistribute it and/or modify
-# it under the terms of the GNU General Public License as published by
-# the Free Software Foundation; either version 2 of the License, or
-# (at your option) any later version.
-#
-# This program is distributed in the hope that it will be useful,
-# but WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-# GNU General Public License for more details.
-#
-# You should have received a copy of the GNU General Public License
-# along with this program; if not, write to the Free Software
-# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
-
-"""Parser of import data into command objects.
-
-In order to reuse existing front-ends, the stream format is a subset of
-the one used by git-fast-import (as of the 1.5.4 release of git at least).
-The grammar is:
-
-  stream ::= cmd*;
-
-  cmd ::= new_blob
-        | new_commit
-        | new_tag
-        | reset_branch
-        | checkpoint
-        | progress
-        ;
-
-  new_blob ::= 'blob' lf
-    mark?
-    file_content;
-  file_content ::= data;
-
-  new_commit ::= 'commit' sp ref_str lf
-    mark?
-    ('author' sp name '<' email '>' when lf)?
-    'committer' sp name '<' email '>' when lf
-    commit_msg
-    ('from' sp (ref_str | hexsha1 | sha1exp_str | idnum) lf)?
-    ('merge' sp (ref_str | hexsha1 | sha1exp_str | idnum) lf)*
-    file_change*
-    lf?;
-  commit_msg ::= data;
-
-  file_change ::= file_clr
-    | file_del
-    | file_rnm
-    | file_cpy
-    | file_obm
-    | file_inm;
-  file_clr ::= 'deleteall' lf;
-  file_del ::= 'D' sp path_str lf;
-  file_rnm ::= 'R' sp path_str sp path_str lf;
-  file_cpy ::= 'C' sp path_str sp path_str lf;
-  file_obm ::= 'M' sp mode sp (hexsha1 | idnum) sp path_str lf;
-  file_inm ::= 'M' sp mode sp 'inline' sp path_str lf
-    data;
-
-  new_tag ::= 'tag' sp tag_str lf
-    'from' sp (ref_str | hexsha1 | sha1exp_str | idnum) lf
-    'tagger' sp name '<' email '>' when lf
-    tag_msg;
-  tag_msg ::= data;
-
-  reset_branch ::= 'reset' sp ref_str lf
-    ('from' sp (ref_str | hexsha1 | sha1exp_str | idnum) lf)?
-    lf?;
-
-  checkpoint ::= 'checkpoint' lf
-    lf?;
-
-  progress ::= 'progress' sp not_lf* lf
-    lf?;
-
-     # note: the first idnum in a stream should be 1 and subsequent
-     # idnums should not have gaps between values as this will cause
-     # the stream parser to reserve space for the gapped values.  An
-     # idnum can be updated in the future to a new object by issuing
-     # a new mark directive with the old idnum.
-     #
-  mark ::= 'mark' sp idnum lf;
-  data ::= (delimited_data | exact_data)
-    lf?;
-
-    # note: delim may be any string but must not contain lf.
-    # data_line may contain any data but must not be exactly
-    # delim.
-  delimited_data ::= 'data' sp '<<' delim lf
-    (data_line lf)*
-    delim lf;
-
-     # note: declen indicates the length of binary_data in bytes.
-     # declen does not include the lf preceeding the binary data.
-     #
-  exact_data ::= 'data' sp declen lf
-    binary_data;
-
-     # note: quoted strings are C-style quoting supporting \c for
-     # common escapes of 'c' (e..g \n, \t, \\, \") or \nnn where nnn
-     # is the signed byte value in octal.  Note that the only
-     # characters which must actually be escaped to protect the
-     # stream formatting is: \, \" and LF.  Otherwise these values
-     # are UTF8.
-     #
-  ref_str     ::= ref;
-  sha1exp_str ::= sha1exp;
-  tag_str     ::= tag;
-  path_str    ::= path    | '"' quoted(path)    '"' ;
-  mode        ::= '100644' | '644'
-                | '100755' | '755'
-                | '120000'
-                ;
-
-  declen ::= # unsigned 32 bit value, ascii base10 notation;
-  bigint ::= # unsigned integer value, ascii base10 notation;
-  binary_data ::= # file content, not interpreted;
-
-  when         ::= raw_when | rfc2822_when;
-  raw_when     ::= ts sp tz;
-  rfc2822_when ::= # Valid RFC 2822 date and time;
-
-  sp ::= # ASCII space character;
-  lf ::= # ASCII newline (LF) character;
-
-     # note: a colon (':') must precede the numerical value assigned to
-     # an idnum.  This is to distinguish it from a ref or tag name as
-     # GIT does not permit ':' in ref or tag strings.
-     #
-  idnum   ::= ':' bigint;
-  path    ::= # GIT style file path, e.g. "a/b/c";
-  ref     ::= # GIT ref name, e.g. "refs/heads/MOZ_GECKO_EXPERIMENT";
-  tag     ::= # GIT tag name, e.g. "FIREFOX_1_5";
-  sha1exp ::= # Any valid GIT SHA1 expression;
-  hexsha1 ::= # SHA1 in hexadecimal format;
-
-     # note: name and email are UTF8 strings, however name must not
-     # contain '<' or lf and email must not contain any of the
-     # following: '<', '>', lf.
-     #
-  name  ::= # valid GIT author/committer name;
-  email ::= # valid GIT author/committer email;
-  ts    ::= # time since the epoch in seconds, ascii base10 notation;
-  tz    ::= # GIT style timezone;
-
-     # note: comments may appear anywhere in the input, except
-     # within a data command.  Any form of the data command
-     # always escapes the related input from comment processing.
-     #
-     # In case it is not clear, the '#' that starts the comment
-     # must be the first character on that the line (an lf have
-     # preceeded it).
-     #
-  comment ::= '#' not_lf* lf;
-  not_lf  ::= # Any byte that is not ASCII newline (LF);
-"""
-
-
-import re
-import sys
-
-import commands
-import dates
-import errors
-
-
-## Stream parsing ##
-
-class LineBasedParser(object):
-
-    def __init__(self, input):
-        """A Parser that keeps track of line numbers.
-
-        :param input: the file-like object to read from
-        """
-        self.input = input
-        self.lineno = 0
-        # Lines pushed back onto the input stream
-        self._buffer = []
-
-    def abort(self, exception, *args):
-        """Raise an exception providing line number information."""
-        raise exception(self.lineno, *args)
-
-    def readline(self):
-        """Get the next line including the newline or '' on EOF."""
-        self.lineno += 1
-        if self._buffer:
-            return self._buffer.pop()
-        else:
-            return self.input.readline()
-
-    def next_line(self):
-        """Get the next line without the newline or None on EOF."""
-        line = self.readline()
-        if line:
-            return line[:-1]
-        else:
-            return None
-
-    def push_line(self, line):
-        """Push line back onto the line buffer.
-        
-        :param line: the line with no trailing newline
-        """
-        self.lineno -= 1
-        self._buffer.append(line + "\n")
-
-    def read_bytes(self, count):
-        """Read a given number of bytes from the input stream.
-        
-        Throws MissingBytes if the bytes are not found.
-
-        Note: This method does not read from the line buffer.
-
-        :return: a string
-        """
-        lines = []
-        left = count
-        found = 0
-        while left > 0:
-            line = self.input.readline(left)
-            if line:
-                line_len = len(line)
-                left -= line_len
-                found += line_len
-                lines.append(line)
-                if line.endswith('\n'):
-                    self.lineno += 1
-            else:
-                left = 0
-        if found != count:
-            self.abort(errors.MissingBytes, count, found)
-        return ''.join(lines)
-
-    def read_until(self, terminator):
-        """Read the input stream until the terminator is found.
-        
-        Throws MissingTerminator if the terminator is not found.
-
-        Note: This method does not read from the line buffer.
-
-        :return: the bytes read up to but excluding the terminator.
-        """
-        raise NotImplementedError(self.read_until)
-
-
-# Regular expression used for parsing. (Note: The spec states that the name
-# part should be non-empty, but git-fast-export doesn't always do that.)
-_WHO_AND_WHEN_RE = re.compile(r'([^\<\n]+) <([^\>\n]+)> (.+)')
-
-
-class ImportParser(LineBasedParser):
-
-    def __init__(self, input, verbose=False, output=sys.stdout):
-        """A Parser of import commands.
-
-        :param input: the file-like object to read from
-        :param verbose: display extra information of not
-        :param output: the file-like object to write messages to (YAGNI?)
-        """
-        LineBasedParser.__init__(self, input)
-        self.verbose = verbose
-        self.output = output
-        # We auto-detect the date format when a date is first encountered
-        self.date_parser = None
-
-    def iter_commands(self):
-        """Iterator returning ImportCommand objects."""
-        while True:
-            line = self.next_line()
-            if line is None:
-                break
-            elif len(line) == 0 or line.startswith('#'):
-                continue
-            # Search for commands in order of likelihood
-            elif line.startswith('commit '):
-                yield self._parse_commit(line[len('commit '):])
-            elif line.startswith('blob'):
-                yield self._parse_blob()
-            elif line.startswith('progress '):
-                yield commands.ProgressCommand(line[len('progress '):])
-            elif line.startswith('reset '):
-                yield self._parse_reset(line[len('reset '):])
-            elif line.startswith('tag '):
-                yield self._parse_tag(line[len('tag '):])
-            elif line.startswith('checkpoint'):
-                yield commands.CheckpointCommand()
-            else:
-                print line
-                self.abort(errors.InvalidCommand, line)
-
-    def iter_file_commands(self):
-        """Iterator returning FileCommand objects.
-        
-        If an invalid file command is found, the line is silently
-        pushed back and iteration ends.
-        """
-        while True:
-            line = self.next_line()
-            if line is None:
-                break
-            elif len(line) == 0 or line.startswith('#'):
-                continue
-            # Search for file commands in order of likelihood
-            elif line.startswith('M '):
-                yield self._parse_file_modify(line[2:])
-            elif line.startswith('D '):
-                path = self._path(line[2:])
-                yield commands.FileDeleteCommand(path)
-            elif line.startswith('R '):
-                old, new = self._path_pair(line[2:])
-                yield commands.FileRenameCommand(old, new)
-            elif line.startswith('C '):
-                src, dest = self._path_pair(line[2:])
-                yield commands.FileRenameCommand(src, dest)
-            elif line.startswith('deleteall'):
-                yield commands.FileDeleteAllCommand()
-            else:
-                self.push_line(line)
-                break
-
-    def _parse_blob(self):
-        """Parse a blob command."""
-        lineno = self.lineno
-        mark = self._get_mark_if_any()
-        data = self._get_data('blob')
-        return commands.BlobCommand(mark, data, lineno)
-
-    def _parse_commit(self, ref):
-        """Parse a commit command."""
-        lineno  = self.lineno
-        mark = self._get_mark_if_any()
-        author = self._get_user_info('commit', 'author', False)
-        committer = self._get_user_info('commit', 'committer')
-        message = self._get_data('commit', 'message')
-        from_ = self._get_from()
-        parents = []
-        while True:
-            merge = self._get_merge()
-            if merge is not None:
-                parents.append(merge)
-            else:
-                break
-        return commands.CommitCommand(ref, mark, author, committer, message, from_,
-            parents, self.iter_file_commands, lineno)
-
-    def _parse_file_modify(self, info):
-        """Parse a filemodify command within a commit.
-
-        :param info: a string in the format "mode dataref path"
-          (where dataref might be the hard-coded literal 'inline').
-        """
-        params = info.split(' ', 2)
-        path = self._path(params[2])
-        is_executable, is_symlink = self._mode(params[0])
-        if is_symlink:
-            kind = commands.SYMLINK_KIND
-        else:
-            kind = commands.FILE_KIND
-        if params[1] == 'inline':
-            dataref = None
-            data = self._get_data('filemodify')
-        else:
-            dataref = params[1]
-            data = None
-        return commands.FileModifyCommand(path, kind, is_executable, dataref,
-            data)
-
-    def _parse_reset(self, ref):
-        """Parse a reset command."""
-        from_ = self._get_from()
-        return commands.ResetCommand(ref, from_)
-
-    def _parse_tag(self, name):
-        """Parse a tag command."""
-        from_ = self._get_from('tag')
-        tagger = self._get_user_info('tag', 'tagger')
-        message = self._get_data('tag', 'message')
-        return commands.TagCommand(name, from_, tagger, message)
-
-    def _get_mark_if_any(self):
-        """Parse a mark section."""
-        line = self.next_line()
-        if line.startswith('mark :'):
-            return line[len('mark :'):]
-        else:
-            self.push_line(line)
-            return None
-
-    def _get_from(self, required_for=None):
-        """Parse a from section."""
-        line = self.next_line()
-        if line.startswith('from '):
-            return line[len('from '):]
-        elif required_for:
-            self.abort(errors.MissingSection, required_for, 'from')
-        else:
-            self.push_line(line)
-            return None
-
-    def _get_merge(self):
-        """Parse a merge section."""
-        line = self.next_line()
-        if line is None:                # EOF after last "merge" line
-            return None
-        elif line.startswith('merge '):
-            return line[len('merge '):]
-        else:
-            #print "not a merge:", line
-            self.push_line(line)
-            return None
-
-    def _get_user_info(self, cmd, section, required=True):
-        """Parse a user section."""
-        line = self.next_line()
-        if line.startswith(section + ' '):
-            return self._who_when(line[len(section + ' '):], cmd, section)
-        elif required:
-            self.abort(errors.MissingSection, cmd, section)
-        else:
-            self.push_line(line)
-            return None
-
-    def _get_data(self, required_for, section='data'):
-        """Parse a data section."""
-        line = self.next_line()
-        if line.startswith('data '):
-            rest = line[len('data '):]
-            if rest.startswith('<<'):
-                return self.read_until(rest[2:])
-            else:
-                size = int(rest)
-                result = self.read_bytes(size)
-                # optional LF after data.
-                next = self.input.readline()
-                self.lineno += 1
-                if len(next) > 1 or next != "\n":
-                    self.push_line(next[:-1])
-                return result
-        else:
-            self.abort(errors.MissingSection, required_for, section)
-
-    def _who_when(self, s, cmd, section):
-        """Parse who and when information from a string.
-        
-        :return: a tuple of (name,email,timestamp,timezone)
-        """
-        match = _WHO_AND_WHEN_RE.search(s)
-        if match:
-            datestr = match.group(3)
-            if self.date_parser is None:
-                # auto-detect the date format
-                if len(datestr.split(' ')) == 2:
-                    format = 'raw'
-                elif datestr == 'now':
-                    format = 'now'
-                else:
-                    format = 'rfc2822'
-                self.date_parser = dates.DATE_PARSERS_BY_NAME[format]
-            when = self.date_parser(datestr)
-            return (match.group(1),match.group(2),when[0],when[1])
-        else:
-            self.abort(errors.BadFormat, cmd, section, s)
-
-    def _path(self, s):
-        """Parse a path."""
-        if s.startswith('"'):
-            if s[-1] != '"':
-                self.abort(errors.BadFormat)
-            else:
-                s = _unquote_c_string(s[1:-1])
-        # Check path for sanity
-        sp = s.split("/")
-        if "" in sp or ".." in sp:
-            self.abort(errors.BadFormat)
-        return s
-
-    def _path_pair(self, s):
-        """Parse two paths separated by a space."""
-        # TODO: handle a space in the first path
-        parts = s.split(' ', 1)
-        return map(_unquote_c_string, parts)
-
-    def _mode(self, s):
-        """Parse a file mode into executable and symlink flags.
-        
-        :return (is_executable, is_symlink)
-        """
-        # Note: Output from git-fast-export slightly different to spec
-        if s in ['644', '100644', '0100644']:
-            return False, False
-        elif s in ['755', '100755', '0100755']:
-            return True, False
-        elif s in ['120000', '0120000']:
-            return False, True
-        else:
-            self.abort(errors.BadFormat, 'filemodify', 'mode', s)
-
-
-def _unquote_c_string(s):
-    """replace C-style escape sequences (\n, \", etc.) with real chars."""
-    # HACK: Python strings are close enough
-    return s.decode('string_escape', 'replace')
--- a/hgfastimport/processor.py	Tue May 05 10:27:27 2009 -0400
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,163 +0,0 @@
-# Copyright (C) 2008 Canonical Ltd
-#
-# This program is free software; you can redistribute it and/or modify
-# it under the terms of the GNU General Public License as published by
-# the Free Software Foundation; either version 2 of the License, or
-# (at your option) any later version.
-#
-# This program is distributed in the hope that it will be useful,
-# but WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-# GNU General Public License for more details.
-#
-# You should have received a copy of the GNU General Public License
-# along with this program; if not, write to the Free Software
-# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
-
-"""Processor of import commands.
-
-This module provides core processing functionality including an abstract class
-for basing real processors on. See the processors package for examples.
-"""
-
-
-#from bzrlib.errors import NotBranchError
-import errors
-
-
-class ImportProcessor(object):
-    """Base class for import processors.
-    
-    Subclasses should override the pre_*, post_* and *_handler
-    methods as appropriate.
-    """
-    
-    # XXX this is useless now that we process multiple input streams:
-    # we only want to call setup() and teardown() once for all of them!
-    def process(self, command_iter):
-        """Process the stream of commands.
-
-        :param command_iter: an iterator providing commands
-        """
-        raise RuntimeError("hey! who's calling this?!?")
-        self.setup()
-        try:
-            self._process(command_iter)
-        finally:
-            self.teardown()
-
-    def _process(self, command_iter):
-        self.pre_process()
-        for cmd in command_iter():
-            #print cmd.dump_str(verbose=True)
-            #print "starting"
-            try:
-                #print cmd.name
-                handler = self.__class__.__dict__[cmd.name + "_handler"]
-            except KeyError:
-                raise errors.MissingHandler(cmd.name)
-            else:
-                self.pre_handler(cmd)
-                handler(self, cmd)
-                self.post_handler(cmd)
-            if self.finished:
-                break
-            #print "around again"
-        self.post_process()
-
-    def setup(self):
-        pass
-    
-    def teardown(self):
-        pass
-        
-    def pre_process(self):
-        """Hook for logic at start of processing."""
-        pass
-
-    def post_process(self):
-        """Hook for logic at end of processing."""
-        pass
-
-    def pre_handler(self, cmd):
-        """Hook for logic before each handler starts."""
-        pass
-
-    def post_handler(self, cmd):
-        """Hook for logic after each handler finishes."""
-        pass
-
-    def progress_handler(self, cmd):
-        """Process a ProgressCommand."""
-        raise NotImplementedError(self.progress_handler)
-
-    def blob_handler(self, cmd):
-        """Process a BlobCommand."""
-        raise NotImplementedError(self.blob_handler)
-
-    def checkpoint_handler(self, cmd):
-        """Process a CheckpointCommand."""
-        raise NotImplementedError(self.checkpoint_handler)
-
-    def commit_handler(self, cmd):
-        """Process a CommitCommand."""
-        raise NotImplementedError(self.commit_handler)
-
-    def reset_handler(self, cmd):
-        """Process a ResetCommand."""
-        raise NotImplementedError(self.reset_handler)
-
-    def tag_handler(self, cmd):
-        """Process a TagCommand."""
-        raise NotImplementedError(self.tag_handler)
-
-
-class CommitHandler(object):
-    """Base class for commit handling.
-    
-    Subclasses should override the pre_*, post_* and *_handler
-    methods as appropriate.
-    """
-
-    def __init__(self, command):
-        self.command = command
-
-    def process(self):
-        self.pre_process_files()
-        for fc in self.command.file_iter():
-            #print fc.dump_str(verbose=True)
-            try:
-                handler = self.__class__.__dict__[fc.name[4:] + "_handler"]
-            except KeyError:
-                raise errors.MissingHandler(fc.name)
-            else:
-                handler(self, fc)
-        self.post_process_files()
-
-    def pre_process_files(self):
-        """Prepare for committing."""
-        pass
-
-    def post_process_files(self):
-        """Save the revision."""
-        pass
-
-    def modify_handler(self, filecmd):
-        """Handle a filemodify command."""
-        raise NotImplementedError(self.modify_handler)
-
-    def delete_handler(self, filecmd):
-        """Handle a filedelete command."""
-        raise NotImplementedError(self.delete_handler)
-
-    def copy_handler(self, filecmd):
-        """Handle a filecopy command."""
-        raise NotImplementedError(self.copy_handler)
-
-    def rename_handler(self, filecmd):
-        """Handle a filerename command."""
-        raise NotImplementedError(self.rename_handler)
-
-    def deleteall_handler(self, filecmd):
-        """Handle a filedeleteall command."""
-        raise NotImplementedError(self.deleteall_handler)