changeset 33:31b9a5805f02

Rename the extension's package from 'fastimport' to 'hgfastimport'.
author Greg Ward <greg-hg@gerg.ca>
date Tue, 05 May 2009 10:27:27 -0400
parents 8a92919bcd16
children 08e2157aaa9a
files README.txt fastimport/__init__.py fastimport/commands.py fastimport/dates.py fastimport/errors.py fastimport/hgechoprocessor.py fastimport/hgimport.py fastimport/parser.py fastimport/processor.py hgfastimport/__init__.py hgfastimport/commands.py hgfastimport/dates.py hgfastimport/errors.py hgfastimport/hgechoprocessor.py hgfastimport/hgimport.py hgfastimport/parser.py hgfastimport/processor.py tests/fastimport-common
diffstat 18 files changed, 1463 insertions(+), 1463 deletions(-) [+]
line wrap: on
line diff
--- a/README.txt	Mon May 04 19:38:20 2009 -0400
+++ b/README.txt	Tue May 05 10:27:27 2009 -0400
@@ -28,7 +28,7 @@
 
 To use hg-fastimport, add a line like
 
-  fastimport = /path/to/hg-fastimport/fastimport
+  fastimport = /path/to/hg-fastimport/hgfastimport
 
 to the [extensions] section of your hgrc.
 
--- a/fastimport/__init__.py	Mon May 04 19:38:20 2009 -0400
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,26 +0,0 @@
-from mercurial import commands
-
-import parser
-import hgechoprocessor
-import hgimport
-
-def fastimport(ui, repo, *sources, **opts):
-    proc = hgimport.HgImportProcessor(ui, repo, **opts)
-    #proc = hgechoprocessor.HgEchoProcessor(ui, repo, **opts)
-    proc.setup()
-    try:
-        for source in sources:
-            ui.write("Reading source: %s\n" % source)
-            f = open(source)
-            p = parser.ImportParser(f)
-            proc._process(p.iter_commands)
-            f.close()
-    finally:
-        proc.teardown()
-
-cmdtable = {
-    "fastimport":
-        (fastimport,
-         [],
-         'hg fastimport SOURCE ...')
-}
--- a/fastimport/commands.py	Mon May 04 19:38:20 2009 -0400
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,216 +0,0 @@
-# Copyright (C) 2008 Canonical Ltd
-#
-# This program is free software; you can redistribute it and/or modify
-# it under the terms of the GNU General Public License as published by
-# the Free Software Foundation; either version 2 of the License, or
-# (at your option) any later version.
-#
-# This program is distributed in the hope that it will be useful,
-# but WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-# GNU General Public License for more details.
-#
-# You should have received a copy of the GNU General Public License
-# along with this program; if not, write to the Free Software
-# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
-
-"""Import command classes."""
-
-
-# Lists of command names
-COMMAND_NAMES = ['blob', 'checkpoint', 'commit', 'progress', 'reset', 'tag']
-FILE_COMMAND_NAMES = ['filemodify', 'filedelete', 'filecopy', 'filerename',
-    'filedeleteall']
-
-# Bazaar file kinds
-FILE_KIND = 'file'
-SYMLINK_KIND = 'symlink'
-
-
-class ImportCommand(object):
-    """Base class for import commands."""
-
-    def __init__(self, name):
-        self.name = name
-        # List of field names not to display
-        self._binary = []
-
-    def __repr__(self):
-        return "<%s at %x: %s>" % (self.__class__.__name__, id(self), self)
-
-    def __str__(self):
-        return self.name
-
-    def dump_str(self, names=None, child_lists=None, verbose=False):
-        """Dump fields as a string.
-
-        :param names: the list of fields to include or
-            None for all public fields
-        :param child_lists: dictionary of child command names to
-            fields for that child command to include
-        :param verbose: if True, prefix each line with the command class and
-            display fields as a dictionary; if False, dump just the field
-            values with tabs between them
-        """
-        interesting = {}
-        if names is None:
-            fields = [k for k in self.__dict__.keys() if not k.startswith('_')]
-        else:
-            fields = names
-        for field in fields:
-            value = self.__dict__.get(field)
-            if field in self._binary and value is not None:
-                value = '(...)'
-            interesting[field] = value
-        if verbose:
-            return "%s: %s" % (self.__class__.__name__, interesting)
-        else:
-            return "\t".join([str(interesting[k]) for k in fields])
-
-
-class BlobCommand(ImportCommand):
-
-    def __init__(self, mark, data, lineno=0):
-        ImportCommand.__init__(self, 'blob')
-        self.mark = mark
-        self.data = data
-        self.lineno = lineno
-        # Provide a unique id in case the mark is missing
-        if mark is None:
-            self.id = '@%d' % lineno
-        else:
-            self.id = ':' + mark
-        self._binary = ['data']
-
-    def __str__(self):
-        return self.id
-
-
-class CheckpointCommand(ImportCommand):
-
-    def __init__(self):
-        ImportCommand.__init__(self, 'checkpoint')
-
-
-class CommitCommand(ImportCommand):
-
-    def __init__(self, ref, mark, author, committer, message, from_,
-        parents, file_iter, lineno=0):
-        ImportCommand.__init__(self, 'commit')
-        self.ref = ref
-        self.mark = mark
-        self.author = author
-        self.committer = committer
-        self.message = message
-        self.from_ = from_
-        self.parents = parents
-        self.file_iter = file_iter
-        self.lineno = lineno
-        self._binary = ['file_iter']
-        # Provide a unique id in case the mark is missing
-        if mark is None:
-            self.id = '@%d' % lineno
-        else:
-            self.id = ':' + mark
-
-    def __str__(self):
-        return "ref %s, mark %s" % (self.ref, self.mark)
-
-    def dump_str(self, names=None, child_lists=None, verbose=False):
-        result = [ImportCommand.dump_str(self, names, verbose=verbose)]
-        for f in self.file_iter():
-            if child_lists is None:
-                continue
-            try:
-                child_names = child_lists[f.name]
-            except KeyError:
-                continue
-            result.append("\t%s" % f.dump_str(child_names, verbose=verbose))
-        return '\n'.join(result)
-
-
-class ProgressCommand(ImportCommand):
-
-    def __init__(self, message):
-        ImportCommand.__init__(self, 'progress')
-        self.message = message
-
-
-class ResetCommand(ImportCommand):
-
-    def __init__(self, ref, from_):
-        ImportCommand.__init__(self, 'reset')
-        self.ref = ref
-        self.from_ = from_
-
-
-class TagCommand(ImportCommand):
-
-    def __init__(self, id, from_, tagger, message):
-        ImportCommand.__init__(self, 'tag')
-        self.id = id
-        self.from_ = from_
-        self.tagger = tagger
-        self.message = message
-
-    def __str__(self):
-        return self.id
-
-
-class FileCommand(ImportCommand):
-    """Base class for file commands."""
-    pass
-
-
-class FileModifyCommand(FileCommand):
-
-    def __init__(self, path, kind, is_executable, dataref, data):
-        # Either dataref or data should be null
-        FileCommand.__init__(self, 'filemodify')
-        self.path = path
-        self.kind = kind
-        self.is_executable = is_executable
-        self.dataref = dataref
-        self.data = data
-        self._binary = ['data']
-
-    def __str__(self):
-        return self.path
-
-
-class FileDeleteCommand(FileCommand):
-
-    def __init__(self, path):
-        FileCommand.__init__(self, 'filedelete')
-        self.path = path
-
-    def __str__(self):
-        return self.path
-
-
-class FileCopyCommand(FileCommand):
-
-    def __init__(self, src_path, dest_path):
-        FileCommand.__init__(self, 'filecopy')
-        self.src_path = src_path
-        self.dest_path = dest_path
-
-    def __str__(self):
-        return "%s -> %s" % (self.src_path, self.dest_path)
-
-
-class FileRenameCommand(FileCommand):
-
-    def __init__(self, old_path, new_path):
-        FileCommand.__init__(self, 'filerename')
-        self.old_path = old_path
-        self.new_path = new_path
-
-    def __str__(self):
-        return "%s -> %s" % (self.old_path, self.new_path)
-
-
-class FileDeleteAllCommand(FileCommand):
-
-    def __init__(self):
-        FileCommand.__init__(self, 'filedeleteall')
--- a/fastimport/dates.py	Mon May 04 19:38:20 2009 -0400
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,76 +0,0 @@
-# Copyright (C) 2008 Canonical Ltd
-#
-# This program is free software; you can redistribute it and/or modify
-# it under the terms of the GNU General Public License as published by
-# the Free Software Foundation; either version 2 of the License, or
-# (at your option) any later version.
-#
-# This program is distributed in the hope that it will be useful,
-# but WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-# GNU General Public License for more details.
-#
-# You should have received a copy of the GNU General Public License
-# along with this program; if not, write to the Free Software
-# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
-
-"""Date parsing routines.
-
-Each routine returns timestamp,timezone where
-
-* timestamp is seconds since epoch
-* timezone is the offset from UTC in seconds.
-"""
-
-
-import time
-
-
-def parse_raw(s):
-    """Parse a date from a raw string.
-    
-    The format must be exactly "seconds-since-epoch offset-utc".
-    See the spec for details.
-    """
-    timestamp_str, timezone_str = s.split(' ', 1)
-    timestamp = float(timestamp_str)
-    timezone = _parse_tz(timezone_str)
-    return timestamp, timezone
-
-
-def _parse_tz(tz):
-    """Parse a timezone specification in the [+|-]HHMM format.
-
-    :return: the timezone offset in seconds.
-    """
-    # from git_repository.py in bzr-git
-    assert len(tz) == 5
-    sign = {'+': +1, '-': -1}[tz[0]]
-    hours = int(tz[1:3])
-    minutes = int(tz[3:])
-    return sign * 60 * (60 * hours + minutes)
-
-
-def parse_rfc2822(s):
-    """Parse a date from a rfc2822 string.
-    
-    See the spec for details.
-    """
-    raise NotImplementedError(parse_rfc2822)
-
-
-def parse_now(s):
-    """Parse a date from a string.
-
-    The format must be exactly "now".
-    See the spec for details.
-    """
-    return time.time(), 0
-
-
-# Lookup tabel of date parsing routines
-DATE_PARSERS_BY_NAME = {
-    'raw':      parse_raw,
-    'rfc2822':  parse_rfc2822,
-    'now':      parse_now,
-    }
--- a/fastimport/errors.py	Mon May 04 19:38:20 2009 -0400
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,171 +0,0 @@
-# Copyright (C) 2008 Canonical Ltd
-#
-# This program is free software; you can redistribute it and/or modify
-# it under the terms of the GNU General Public License as published by
-# the Free Software Foundation; either version 2 of the License, or
-# (at your option) any later version.
-#
-# This program is distributed in the hope that it will be useful,
-# but WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-# GNU General Public License for more details.
-#
-# You should have received a copy of the GNU General Public License
-# along with this program; if not, write to the Free Software
-# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
-
-"""Exception classes for fastimport"""
-
-#from bzrlib import errors as bzr_errors
-
-
-# Prefix to messages to show location information
-_LOCATION_FMT = "line %(lineno)d: "
-
-class FmtException(StandardError):
-    def __str__(self):
-        return repr(self)
-
-    def __repr__(self):
-        return self._fmt % self.__dict__
-
-class ImportError(FmtException):
-    """The base exception class for all import processing exceptions."""
-
-    _fmt = "Unknown Import Error"
-
-
-class ParsingError(ImportError):
-    """The base exception class for all import processing exceptions."""
-
-    _fmt = _LOCATION_FMT + "Unknown Import Parsing Error"
-
-    def __init__(self, lineno):
-        ImportError.__init__(self)
-        self.lineno = lineno
-
-
-class MissingBytes(ParsingError):
-    """Raised when EOF encountered while expecting to find more bytes."""
-
-    _fmt = (_LOCATION_FMT + "Unexpected EOF - expected %(expected)d bytes,"
-        " found %(found)d")
-
-    def __init__(self, lineno, expected, found):
-        ParsingError.__init__(self, lineno)
-        self.expected = expected
-        self.found = found
-
-
-class MissingTerminator(ParsingError):
-    """Raised when EOF encountered while expecting to find a terminator."""
-
-    _fmt = (_LOCATION_FMT +
-        "Unexpected EOF - expected '%(terminator)s' terminator")
-
-    def __init__(self, lineno, terminator):
-        ParsingError.__init__(self, lineno)
-        self.terminator = terminator
-
-
-class InvalidCommand(ParsingError):
-    """Raised when an unknown command found."""
-
-    _fmt = (_LOCATION_FMT + "Invalid command '%(cmd)s'")
-
-    def __init__(self, lineno, cmd):
-        ParsingError.__init__(self, lineno)
-        self.cmd = cmd
-
-
-class MissingSection(ParsingError):
-    """Raised when a section is required in a command but not present."""
-
-    _fmt = (_LOCATION_FMT + "Command %(cmd)s is missing section %(section)s")
-
-    def __init__(self, lineno, cmd, section):
-        ParsingError.__init__(self, lineno)
-        self.cmd = cmd
-        self.section = section
-
-
-class BadFormat(ParsingError):
-    """Raised when a section is formatted incorrectly."""
-
-    _fmt = (_LOCATION_FMT + "Bad format for section %(section)s in "
-        "command %(cmd)s: found '%(text)s'")
-
-    def __init__(self, lineno, cmd, section, text):
-        ParsingError.__init__(self, lineno)
-        self.cmd = cmd
-        self.section = section
-        self.text = text
-
-
-class InvalidTimezone(ParsingError):
-    """Raised when converting a string timezone to a seconds offset."""
-
-    _fmt = (_LOCATION_FMT +
-        "Timezone %(timezone)r could not be converted.%(reason)s")
-
-    def __init__(self, lineno, timezone, reason=None):
-        ParsingError.__init__(self, lineno)
-        self.timezone = timezone
-        if reason:
-            self.reason = ' ' + reason
-        else:
-            self.reason = ''
-
-
-class UnknownDateFormat(ImportError):
-    """Raised when an unknown date format is given."""
-
-    _fmt = ("Unknown date format '%(format)s'")
-
-    def __init__(self, format):
-        ImportError.__init__(self)
-        self.format = format
-
-
-class MissingHandler(ImportError):
-    """Raised when a processor can't handle a command."""
-
-    _fmt = ("Missing handler for command %(cmd)s")
-
-    def __init__(self, cmd):
-        ImportError.__init__(self)
-        self.cmd = cmd
-
-
-class UnknownParameter(ImportError):
-    """Raised when an unknown parameter is passed to a processor."""
-
-    _fmt = ("Unknown parameter - '%(param)s' not in %(knowns)s")
-
-    def __init__(self, param, knowns):
-        ImportError.__init__(self)
-        self.param = param
-        self.knowns = knowns
-
-
-class BadRepositorySize(ImportError):
-    """Raised when the repository has an incorrect number of revisions."""
-
-    _fmt = ("Bad repository size - %(found)d revisions found, "
-        "%(expected)d expected")
-
-    def __init__(self, expected, found):
-        ImportError.__init__(self)
-        self.expected = expected
-        self.found = found
-
-
-class BadRestart(ImportError):
-    """Raised when the import stream and id-map do not match up."""
-
-    _fmt = ("Bad restart - attempted to skip commit %(commit_id)s "
-        "but matching revision-id is unknown")
-
-    def __init__(self, commit_id):
-        ImportError.__init__(self)
-        self.commit_id = commit_id
--- a/fastimport/hgechoprocessor.py	Mon May 04 19:38:20 2009 -0400
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,75 +0,0 @@
-# Copyright (C) 2008 Canonical Ltd
-#
-# This program is free software; you can redistribute it and/or modify
-# it under the terms of the GNU General Public License as published by
-# the Free Software Foundation; either version 2 of the License, or
-# (at your option) any later version.
-#
-# This program is distributed in the hope that it will be useful,
-# but WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-# GNU General Public License for more details.
-#
-# You should have received a copy of the GNU General Public License
-# along with this program; if not, write to the Free Software
-# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
-
-"""Processor of import commands.
-
-This module provides core processing functionality including an abstract class
-for basing real processors on. See the processors package for examples.
-"""
-
-
-import processor
-
-class HgEchoProcessor(processor.ImportProcessor):
-    
-    def __init__(self, ui, repo, **opts):
-        self.ui = ui
-        self.repo = repo
-        self.opts = opts
-        self.finished = False
-        
-    def progress_handler(self, cmd):
-        self.ui.write(cmd.dump_str(verbose=True) + "\n")
-
-    def blob_handler(self, cmd):
-        self.ui.write(cmd.dump_str(verbose=True) + "\n")
-
-    def checkpoint_handler(self, cmd):
-        self.ui.write(cmd.dump_str(verbose=True) + "\n")
-
-    def commit_handler(self, cmd):
-        commit_handler = HgEchoCommitHandler(cmd, self.ui, self.repo, **self.opts)
-        commit_handler.process()
-        self.ui.write(cmd.dump_str(verbose=True) + "\n")
-
-    def reset_handler(self, cmd):
-        self.ui.write(cmd.dump_str(verbose=True) + "\n")
-
-    def tag_handler(self, cmd):
-        self.ui.write(cmd.dump_str(verbose=True) + "\n")
-
-class HgEchoCommitHandler(processor.CommitHandler):
-
-    def __init__(self, command, ui, repo, **opts):
-        self.command = command
-        self.ui = ui
-        self.repo = repo
-        self.opts = opts
-
-    def modify_handler(self, filecmd):
-        self.ui.write(filecmd.dump_str(verbose=True) + "\n")
-
-    def delete_handler(self, filecmd):
-        self.ui.write(filecmd.dump_str(verbose=True) + "\n")
-
-    def copy_handler(self, filecmd):
-        self.ui.write(filecmd.dump_str(verbose=True) + "\n")
-
-    def rename_handler(self, filecmd):
-        self.ui.write(filecmd.dump_str(verbose=True) + "\n")
-
-    def deleteall_handler(self, filecmd):
-        self.ui.write(filecmd.dump_str(verbose=True) + "\n")
--- a/fastimport/hgimport.py	Mon May 04 19:38:20 2009 -0400
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,227 +0,0 @@
-# Copyright (C) 2008 Canonical Ltd
-#
-# This program is free software; you can redistribute it and/or modify
-# it under the terms of the GNU General Public License as published by
-# the Free Software Foundation; either version 2 of the License, or
-# (at your option) any later version.
-#
-# This program is distributed in the hope that it will be useful,
-# but WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-# GNU General Public License for more details.
-#
-# You should have received a copy of the GNU General Public License
-# along with this program; if not, write to the Free Software
-# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
-
-"""Processor of import commands.
-
-This module provides core processing functionality including an abstract class
-for basing real processors on. See the processors package for examples.
-"""
-
-import os
-import os.path
-import errno
-import shutil
-
-import mercurial.hg
-import mercurial.commands
-from mercurial import util
-from mercurial.node import nullrev
-import processor
-
-import hgechoprocessor
-
-class HgImportProcessor(processor.ImportProcessor):
-    
-    def __init__(self, ui, repo, **opts):
-        self.ui = ui
-        self.repo = repo
-        self.opts = opts
-        self.last_mark = None
-        self.mark_map = {}
-        self.branch_map = {}
-        #self.tag_map = {}
-        #self.tag_back_map = {}
-        self.finished = False
-
-        self.numblobs = 0               # for progress reporting
-        self.blobdir = None
-
-    def teardown(self):
-        if self.blobdir and os.path.exists(self.blobdir):
-            self.ui.status("Removing blob dir %r ...\n" % self.blobdir)
-            shutil.rmtree(self.blobdir)
-
-    def progress_handler(self, cmd):
-        self.ui.write("Progress: %s\n" % cmd.message)
-
-    def blob_handler(self, cmd):
-        if self.blobdir is None:        # no blobs seen yet
-            # XXX cleanup?
-            self.blobdir = os.path.join(self.repo.root, ".hg", "blobs")
-            os.mkdir(self.blobdir)
-
-        fn = self.getblobfilename(cmd.id)
-        blobfile = open(fn, "wb")
-        #self.ui.debug("writing blob %s to %s (%d bytes)\n"
-        #              % (cmd.id, fn, len(cmd.data)))
-        blobfile.write(cmd.data)
-        blobfile.close()
-
-        self.numblobs += 1
-        if self.numblobs % 500 == 0:
-            self.ui.status("%d blobs read\n" % self.numblobs)
-
-    def getblobfilename(self, blobid):
-        if self.blobdir is None:
-            raise RuntimeError("no blobs seen, so no blob directory created")
-        # XXX should escape ":" for windows
-        return os.path.join(self.blobdir, "blob-" + blobid)
-
-    def checkpoint_handler(self, cmd):
-        # This command means nothing to us
-        pass
-
-    def committish_rev(self, committish):
-        if committish.startswith(":"):
-            return self.mark_map[committish]
-        else:
-            return self.branch_map[committish]
-        
-    def commit_handler(self, cmd):
-        if cmd.ref == "refs/heads/TAG.FIXUP":
-            #self.tag_back_map[cmd.mark] == first_parent
-            commit_handler = hgechoprocessor.HgEchoCommitHandler(cmd, self.ui, self.repo, **self.opts)
-            commit_handler.process()
-            return
-        if cmd.from_:
-            first_parent = self.committish_rev(cmd.from_)
-        else:
-            first_parent = self.branch_map.get(cmd.ref, nullrev)
-        #self.ui.write("First parent: %s\n" % first_parent)
-        # Update to the first parent
-        mercurial.hg.clean(self.repo, self.repo.lookup(first_parent))
-        #self.ui.write("Bing\n")
-        if cmd.parents:
-            #self.ui.write("foo")
-            if len(cmd.parents) > 1:
-                raise NotImplementedError("Can't handle more than two parents")
-            second_parent = self.committish_rev(cmd.parents[0])
-            #self.ui.write("Second parent: %s\n" % second_parent)
-            mercurial.commands.debugsetparents(self.ui, self.repo, 
-                first_parent, second_parent)
-        #self.ui.write("Bing\n")
-        if cmd.ref == "refs/heads/master":
-            branch = "default"
-        else:
-            branch = cmd.ref[len("refs/heads/"):]
-        #self.ui.write("Branch: %s\n" % branch)
-        self.repo.dirstate.setbranch(branch)
-        #self.ui.write("Bing\n")
-        #print "vvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvv"
-        commit_handler = HgImportCommitHandler(
-            self, cmd, self.ui, self.repo, **self.opts)
-        commit_handler.process()
-        #print "^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^"
-        #self.ui.write(cmd.dump_str(verbose=True))
-
-        # in case we are converting from git or bzr, prefer author but
-        # fallback to committer (committer is required, author is
-        # optional)
-        userinfo = cmd.author or cmd.committer
-        user = "%s <%s>" % (userinfo[0], userinfo[1])
-        node = self.repo.rawcommit(files = commit_handler.filelist(),
-            text = cmd.message,
-            user = user,
-            date = self.convert_date(userinfo))
-        rev = self.repo.changelog.rev(node)
-        if cmd.mark is not None:
-            self.mark_map[":" + cmd.mark] = rev
-        self.branch_map[cmd.ref] = rev
-        self.ui.write("Done commit of rev %d\n" % rev)
-        #self.ui.write("%s\n" % self.mark_map)
-
-    def convert_date(self, c):
-        res = (int(c[2]), int(c[3]))
-        #print c, res
-        #print type((0, 0)), type(res), len(res), type(res) is type((0, 0))
-        #if type(res) is type((0, 0)) and len(res) == 2:
-        #    print "go for it"
-        #return res
-        return "%d %d" % res
-        
-    def reset_handler(self, cmd):
-        if cmd.from_ is not None:
-            self.branch_map[cmd.ref] = self.committish_rev(cmd.from_)
-
-    def tag_handler(self, cmd):
-        # self.tag_map[cmd.id] = self.tag_back_map[cmd.from_]
-        pass
-
-class HgImportCommitHandler(processor.CommitHandler):
-
-    def __init__(self, parent, command, ui, repo, **opts):
-        self.parent = parent            # HgImportProcessor running the show
-        self.command = command
-        self.ui = ui
-        self.repo = repo
-        self.opts = opts
-        self.files = set()
-
-    def _make_container(self, path):
-        if '/' in path:
-            d = os.path.dirname(path)
-            if not os.path.isdir(d):
-                os.makedirs(d)
-        
-    def modify_handler(self, filecmd):
-        #print "============================" + filecmd.path
-        # FIXME: handle mode
-        self.files.add(filecmd.path)
-        fullpath = os.path.join(self.repo.root, filecmd.path)
-        self._make_container(fullpath)
-        #print "made dirs, writing file"
-        if filecmd.dataref:
-            # reference to a blob that has already appeared in the stream
-            fn = self.parent.getblobfilename(filecmd.dataref)
-            if os.path.exists(fullpath):
-                os.remove(fullpath)
-            try:
-                os.link(fn, fullpath)
-            except OSError, err:
-                if err.errno == errno.ENOENT:
-                    # if this happens, it's a problem in the fast-import
-                    # stream
-                    raise util.Abort("bad blob ref %r (no such file %s)"
-                                     % (filecmd.dataref, fn))
-                else:
-                    # anything else is a bug in this extension
-                    # (cross-device move, permissions, etc.)
-                    raise
-        elif filecmd.data:
-            f = open(fullpath, "w")
-            f.write(filecmd.data)
-            f.close()
-        else:
-            raise RuntimeError("either filecmd.dataref or filecmd.data must be set")
-        #print self.repo.add([filecmd.path])
-        #print "Done:", filecmd.path
-
-    def delete_handler(self, filecmd):
-        self.files.add(filecmd.path)
-        self.repo.remove([filecmd.path], unlink=True)
-
-    #def copy_handler(self, filecmd):
-    #    self.files.add(filecmd.path)
-    #    """Handle a filecopy command."""
-    #    self.ui.write("Cmd: %s\n" % repr(filecmd))
-
-    #def rename_handler(self, filecmd):
-    #    self.files.add(filecmd.path)
-    #    """Handle a filerename command."""
-    #    self.ui.write("Cmd: %s\n" % repr(filecmd))
-
-    def filelist(self):
-        return list(self.files)
--- a/fastimport/parser.py	Mon May 04 19:38:20 2009 -0400
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,507 +0,0 @@
-# Copyright (C) 2008 Canonical Ltd
-#
-# This program is free software; you can redistribute it and/or modify
-# it under the terms of the GNU General Public License as published by
-# the Free Software Foundation; either version 2 of the License, or
-# (at your option) any later version.
-#
-# This program is distributed in the hope that it will be useful,
-# but WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-# GNU General Public License for more details.
-#
-# You should have received a copy of the GNU General Public License
-# along with this program; if not, write to the Free Software
-# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
-
-"""Parser of import data into command objects.
-
-In order to reuse existing front-ends, the stream format is a subset of
-the one used by git-fast-import (as of the 1.5.4 release of git at least).
-The grammar is:
-
-  stream ::= cmd*;
-
-  cmd ::= new_blob
-        | new_commit
-        | new_tag
-        | reset_branch
-        | checkpoint
-        | progress
-        ;
-
-  new_blob ::= 'blob' lf
-    mark?
-    file_content;
-  file_content ::= data;
-
-  new_commit ::= 'commit' sp ref_str lf
-    mark?
-    ('author' sp name '<' email '>' when lf)?
-    'committer' sp name '<' email '>' when lf
-    commit_msg
-    ('from' sp (ref_str | hexsha1 | sha1exp_str | idnum) lf)?
-    ('merge' sp (ref_str | hexsha1 | sha1exp_str | idnum) lf)*
-    file_change*
-    lf?;
-  commit_msg ::= data;
-
-  file_change ::= file_clr
-    | file_del
-    | file_rnm
-    | file_cpy
-    | file_obm
-    | file_inm;
-  file_clr ::= 'deleteall' lf;
-  file_del ::= 'D' sp path_str lf;
-  file_rnm ::= 'R' sp path_str sp path_str lf;
-  file_cpy ::= 'C' sp path_str sp path_str lf;
-  file_obm ::= 'M' sp mode sp (hexsha1 | idnum) sp path_str lf;
-  file_inm ::= 'M' sp mode sp 'inline' sp path_str lf
-    data;
-
-  new_tag ::= 'tag' sp tag_str lf
-    'from' sp (ref_str | hexsha1 | sha1exp_str | idnum) lf
-    'tagger' sp name '<' email '>' when lf
-    tag_msg;
-  tag_msg ::= data;
-
-  reset_branch ::= 'reset' sp ref_str lf
-    ('from' sp (ref_str | hexsha1 | sha1exp_str | idnum) lf)?
-    lf?;
-
-  checkpoint ::= 'checkpoint' lf
-    lf?;
-
-  progress ::= 'progress' sp not_lf* lf
-    lf?;
-
-     # note: the first idnum in a stream should be 1 and subsequent
-     # idnums should not have gaps between values as this will cause
-     # the stream parser to reserve space for the gapped values.  An
-     # idnum can be updated in the future to a new object by issuing
-     # a new mark directive with the old idnum.
-     #
-  mark ::= 'mark' sp idnum lf;
-  data ::= (delimited_data | exact_data)
-    lf?;
-
-    # note: delim may be any string but must not contain lf.
-    # data_line may contain any data but must not be exactly
-    # delim.
-  delimited_data ::= 'data' sp '<<' delim lf
-    (data_line lf)*
-    delim lf;
-
-     # note: declen indicates the length of binary_data in bytes.
-     # declen does not include the lf preceeding the binary data.
-     #
-  exact_data ::= 'data' sp declen lf
-    binary_data;
-
-     # note: quoted strings are C-style quoting supporting \c for
-     # common escapes of 'c' (e..g \n, \t, \\, \") or \nnn where nnn
-     # is the signed byte value in octal.  Note that the only
-     # characters which must actually be escaped to protect the
-     # stream formatting is: \, \" and LF.  Otherwise these values
-     # are UTF8.
-     #
-  ref_str     ::= ref;
-  sha1exp_str ::= sha1exp;
-  tag_str     ::= tag;
-  path_str    ::= path    | '"' quoted(path)    '"' ;
-  mode        ::= '100644' | '644'
-                | '100755' | '755'
-                | '120000'
-                ;
-
-  declen ::= # unsigned 32 bit value, ascii base10 notation;
-  bigint ::= # unsigned integer value, ascii base10 notation;
-  binary_data ::= # file content, not interpreted;
-
-  when         ::= raw_when | rfc2822_when;
-  raw_when     ::= ts sp tz;
-  rfc2822_when ::= # Valid RFC 2822 date and time;
-
-  sp ::= # ASCII space character;
-  lf ::= # ASCII newline (LF) character;
-
-     # note: a colon (':') must precede the numerical value assigned to
-     # an idnum.  This is to distinguish it from a ref or tag name as
-     # GIT does not permit ':' in ref or tag strings.
-     #
-  idnum   ::= ':' bigint;
-  path    ::= # GIT style file path, e.g. "a/b/c";
-  ref     ::= # GIT ref name, e.g. "refs/heads/MOZ_GECKO_EXPERIMENT";
-  tag     ::= # GIT tag name, e.g. "FIREFOX_1_5";
-  sha1exp ::= # Any valid GIT SHA1 expression;
-  hexsha1 ::= # SHA1 in hexadecimal format;
-
-     # note: name and email are UTF8 strings, however name must not
-     # contain '<' or lf and email must not contain any of the
-     # following: '<', '>', lf.
-     #
-  name  ::= # valid GIT author/committer name;
-  email ::= # valid GIT author/committer email;
-  ts    ::= # time since the epoch in seconds, ascii base10 notation;
-  tz    ::= # GIT style timezone;
-
-     # note: comments may appear anywhere in the input, except
-     # within a data command.  Any form of the data command
-     # always escapes the related input from comment processing.
-     #
-     # In case it is not clear, the '#' that starts the comment
-     # must be the first character on that the line (an lf have
-     # preceeded it).
-     #
-  comment ::= '#' not_lf* lf;
-  not_lf  ::= # Any byte that is not ASCII newline (LF);
-"""
-
-
-import re
-import sys
-
-import commands
-import dates
-import errors
-
-
-## Stream parsing ##
-
-class LineBasedParser(object):
-
-    def __init__(self, input):
-        """A Parser that keeps track of line numbers.
-
-        :param input: the file-like object to read from
-        """
-        self.input = input
-        self.lineno = 0
-        # Lines pushed back onto the input stream
-        self._buffer = []
-
-    def abort(self, exception, *args):
-        """Raise an exception providing line number information."""
-        raise exception(self.lineno, *args)
-
-    def readline(self):
-        """Get the next line including the newline or '' on EOF."""
-        self.lineno += 1
-        if self._buffer:
-            return self._buffer.pop()
-        else:
-            return self.input.readline()
-
-    def next_line(self):
-        """Get the next line without the newline or None on EOF."""
-        line = self.readline()
-        if line:
-            return line[:-1]
-        else:
-            return None
-
-    def push_line(self, line):
-        """Push line back onto the line buffer.
-        
-        :param line: the line with no trailing newline
-        """
-        self.lineno -= 1
-        self._buffer.append(line + "\n")
-
-    def read_bytes(self, count):
-        """Read a given number of bytes from the input stream.
-        
-        Throws MissingBytes if the bytes are not found.
-
-        Note: This method does not read from the line buffer.
-
-        :return: a string
-        """
-        lines = []
-        left = count
-        found = 0
-        while left > 0:
-            line = self.input.readline(left)
-            if line:
-                line_len = len(line)
-                left -= line_len
-                found += line_len
-                lines.append(line)
-                if line.endswith('\n'):
-                    self.lineno += 1
-            else:
-                left = 0
-        if found != count:
-            self.abort(errors.MissingBytes, count, found)
-        return ''.join(lines)
-
-    def read_until(self, terminator):
-        """Read the input stream until the terminator is found.
-        
-        Throws MissingTerminator if the terminator is not found.
-
-        Note: This method does not read from the line buffer.
-
-        :return: the bytes read up to but excluding the terminator.
-        """
-        raise NotImplementedError(self.read_until)
-
-
-# Regular expression used for parsing. (Note: The spec states that the name
-# part should be non-empty, but git-fast-export doesn't always do that.)
-_WHO_AND_WHEN_RE = re.compile(r'([^\<\n]+) <([^\>\n]+)> (.+)')
-
-
-class ImportParser(LineBasedParser):
-
-    def __init__(self, input, verbose=False, output=sys.stdout):
-        """A Parser of import commands.
-
-        :param input: the file-like object to read from
-        :param verbose: display extra information of not
-        :param output: the file-like object to write messages to (YAGNI?)
-        """
-        LineBasedParser.__init__(self, input)
-        self.verbose = verbose
-        self.output = output
-        # We auto-detect the date format when a date is first encountered
-        self.date_parser = None
-
-    def iter_commands(self):
-        """Iterator returning ImportCommand objects."""
-        while True:
-            line = self.next_line()
-            if line is None:
-                break
-            elif len(line) == 0 or line.startswith('#'):
-                continue
-            # Search for commands in order of likelihood
-            elif line.startswith('commit '):
-                yield self._parse_commit(line[len('commit '):])
-            elif line.startswith('blob'):
-                yield self._parse_blob()
-            elif line.startswith('progress '):
-                yield commands.ProgressCommand(line[len('progress '):])
-            elif line.startswith('reset '):
-                yield self._parse_reset(line[len('reset '):])
-            elif line.startswith('tag '):
-                yield self._parse_tag(line[len('tag '):])
-            elif line.startswith('checkpoint'):
-                yield commands.CheckpointCommand()
-            else:
-                print line
-                self.abort(errors.InvalidCommand, line)
-
-    def iter_file_commands(self):
-        """Iterator returning FileCommand objects.
-        
-        If an invalid file command is found, the line is silently
-        pushed back and iteration ends.
-        """
-        while True:
-            line = self.next_line()
-            if line is None:
-                break
-            elif len(line) == 0 or line.startswith('#'):
-                continue
-            # Search for file commands in order of likelihood
-            elif line.startswith('M '):
-                yield self._parse_file_modify(line[2:])
-            elif line.startswith('D '):
-                path = self._path(line[2:])
-                yield commands.FileDeleteCommand(path)
-            elif line.startswith('R '):
-                old, new = self._path_pair(line[2:])
-                yield commands.FileRenameCommand(old, new)
-            elif line.startswith('C '):
-                src, dest = self._path_pair(line[2:])
-                yield commands.FileRenameCommand(src, dest)
-            elif line.startswith('deleteall'):
-                yield commands.FileDeleteAllCommand()
-            else:
-                self.push_line(line)
-                break
-
-    def _parse_blob(self):
-        """Parse a blob command."""
-        lineno = self.lineno
-        mark = self._get_mark_if_any()
-        data = self._get_data('blob')
-        return commands.BlobCommand(mark, data, lineno)
-
-    def _parse_commit(self, ref):
-        """Parse a commit command."""
-        lineno  = self.lineno
-        mark = self._get_mark_if_any()
-        author = self._get_user_info('commit', 'author', False)
-        committer = self._get_user_info('commit', 'committer')
-        message = self._get_data('commit', 'message')
-        from_ = self._get_from()
-        parents = []
-        while True:
-            merge = self._get_merge()
-            if merge is not None:
-                parents.append(merge)
-            else:
-                break
-        return commands.CommitCommand(ref, mark, author, committer, message, from_,
-            parents, self.iter_file_commands, lineno)
-
-    def _parse_file_modify(self, info):
-        """Parse a filemodify command within a commit.
-
-        :param info: a string in the format "mode dataref path"
-          (where dataref might be the hard-coded literal 'inline').
-        """
-        params = info.split(' ', 2)
-        path = self._path(params[2])
-        is_executable, is_symlink = self._mode(params[0])
-        if is_symlink:
-            kind = commands.SYMLINK_KIND
-        else:
-            kind = commands.FILE_KIND
-        if params[1] == 'inline':
-            dataref = None
-            data = self._get_data('filemodify')
-        else:
-            dataref = params[1]
-            data = None
-        return commands.FileModifyCommand(path, kind, is_executable, dataref,
-            data)
-
-    def _parse_reset(self, ref):
-        """Parse a reset command."""
-        from_ = self._get_from()
-        return commands.ResetCommand(ref, from_)
-
-    def _parse_tag(self, name):
-        """Parse a tag command."""
-        from_ = self._get_from('tag')
-        tagger = self._get_user_info('tag', 'tagger')
-        message = self._get_data('tag', 'message')
-        return commands.TagCommand(name, from_, tagger, message)
-
-    def _get_mark_if_any(self):
-        """Parse a mark section."""
-        line = self.next_line()
-        if line.startswith('mark :'):
-            return line[len('mark :'):]
-        else:
-            self.push_line(line)
-            return None
-
-    def _get_from(self, required_for=None):
-        """Parse a from section."""
-        line = self.next_line()
-        if line.startswith('from '):
-            return line[len('from '):]
-        elif required_for:
-            self.abort(errors.MissingSection, required_for, 'from')
-        else:
-            self.push_line(line)
-            return None
-
-    def _get_merge(self):
-        """Parse a merge section."""
-        line = self.next_line()
-        if line is None:                # EOF after last "merge" line
-            return None
-        elif line.startswith('merge '):
-            return line[len('merge '):]
-        else:
-            #print "not a merge:", line
-            self.push_line(line)
-            return None
-
-    def _get_user_info(self, cmd, section, required=True):
-        """Parse a user section."""
-        line = self.next_line()
-        if line.startswith(section + ' '):
-            return self._who_when(line[len(section + ' '):], cmd, section)
-        elif required:
-            self.abort(errors.MissingSection, cmd, section)
-        else:
-            self.push_line(line)
-            return None
-
-    def _get_data(self, required_for, section='data'):
-        """Parse a data section."""
-        line = self.next_line()
-        if line.startswith('data '):
-            rest = line[len('data '):]
-            if rest.startswith('<<'):
-                return self.read_until(rest[2:])
-            else:
-                size = int(rest)
-                result = self.read_bytes(size)
-                # optional LF after data.
-                next = self.input.readline()
-                self.lineno += 1
-                if len(next) > 1 or next != "\n":
-                    self.push_line(next[:-1])
-                return result
-        else:
-            self.abort(errors.MissingSection, required_for, section)
-
-    def _who_when(self, s, cmd, section):
-        """Parse who and when information from a string.
-        
-        :return: a tuple of (name,email,timestamp,timezone)
-        """
-        match = _WHO_AND_WHEN_RE.search(s)
-        if match:
-            datestr = match.group(3)
-            if self.date_parser is None:
-                # auto-detect the date format
-                if len(datestr.split(' ')) == 2:
-                    format = 'raw'
-                elif datestr == 'now':
-                    format = 'now'
-                else:
-                    format = 'rfc2822'
-                self.date_parser = dates.DATE_PARSERS_BY_NAME[format]
-            when = self.date_parser(datestr)
-            return (match.group(1),match.group(2),when[0],when[1])
-        else:
-            self.abort(errors.BadFormat, cmd, section, s)
-
-    def _path(self, s):
-        """Parse a path."""
-        if s.startswith('"'):
-            if s[-1] != '"':
-                self.abort(errors.BadFormat)
-            else:
-                s = _unquote_c_string(s[1:-1])
-        # Check path for sanity
-        sp = s.split("/")
-        if "" in sp or ".." in sp:
-            self.abort(errors.BadFormat)
-        return s
-
-    def _path_pair(self, s):
-        """Parse two paths separated by a space."""
-        # TODO: handle a space in the first path
-        parts = s.split(' ', 1)
-        return map(_unquote_c_string, parts)
-
-    def _mode(self, s):
-        """Parse a file mode into executable and symlink flags.
-        
-        :return (is_executable, is_symlink)
-        """
-        # Note: Output from git-fast-export slightly different to spec
-        if s in ['644', '100644', '0100644']:
-            return False, False
-        elif s in ['755', '100755', '0100755']:
-            return True, False
-        elif s in ['120000', '0120000']:
-            return False, True
-        else:
-            self.abort(errors.BadFormat, 'filemodify', 'mode', s)
-
-
-def _unquote_c_string(s):
-    """replace C-style escape sequences (\n, \", etc.) with real chars."""
-    # HACK: Python strings are close enough
-    return s.decode('string_escape', 'replace')
--- a/fastimport/processor.py	Mon May 04 19:38:20 2009 -0400
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,163 +0,0 @@
-# Copyright (C) 2008 Canonical Ltd
-#
-# This program is free software; you can redistribute it and/or modify
-# it under the terms of the GNU General Public License as published by
-# the Free Software Foundation; either version 2 of the License, or
-# (at your option) any later version.
-#
-# This program is distributed in the hope that it will be useful,
-# but WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-# GNU General Public License for more details.
-#
-# You should have received a copy of the GNU General Public License
-# along with this program; if not, write to the Free Software
-# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
-
-"""Processor of import commands.
-
-This module provides core processing functionality including an abstract class
-for basing real processors on. See the processors package for examples.
-"""
-
-
-#from bzrlib.errors import NotBranchError
-import errors
-
-
-class ImportProcessor(object):
-    """Base class for import processors.
-    
-    Subclasses should override the pre_*, post_* and *_handler
-    methods as appropriate.
-    """
-    
-    # XXX this is useless now that we process multiple input streams:
-    # we only want to call setup() and teardown() once for all of them!
-    def process(self, command_iter):
-        """Process the stream of commands.
-
-        :param command_iter: an iterator providing commands
-        """
-        raise RuntimeError("hey! who's calling this?!?")
-        self.setup()
-        try:
-            self._process(command_iter)
-        finally:
-            self.teardown()
-
-    def _process(self, command_iter):
-        self.pre_process()
-        for cmd in command_iter():
-            #print cmd.dump_str(verbose=True)
-            #print "starting"
-            try:
-                #print cmd.name
-                handler = self.__class__.__dict__[cmd.name + "_handler"]
-            except KeyError:
-                raise errors.MissingHandler(cmd.name)
-            else:
-                self.pre_handler(cmd)
-                handler(self, cmd)
-                self.post_handler(cmd)
-            if self.finished:
-                break
-            #print "around again"
-        self.post_process()
-
-    def setup(self):
-        pass
-    
-    def teardown(self):
-        pass
-        
-    def pre_process(self):
-        """Hook for logic at start of processing."""
-        pass
-
-    def post_process(self):
-        """Hook for logic at end of processing."""
-        pass
-
-    def pre_handler(self, cmd):
-        """Hook for logic before each handler starts."""
-        pass
-
-    def post_handler(self, cmd):
-        """Hook for logic after each handler finishes."""
-        pass
-
-    def progress_handler(self, cmd):
-        """Process a ProgressCommand."""
-        raise NotImplementedError(self.progress_handler)
-
-    def blob_handler(self, cmd):
-        """Process a BlobCommand."""
-        raise NotImplementedError(self.blob_handler)
-
-    def checkpoint_handler(self, cmd):
-        """Process a CheckpointCommand."""
-        raise NotImplementedError(self.checkpoint_handler)
-
-    def commit_handler(self, cmd):
-        """Process a CommitCommand."""
-        raise NotImplementedError(self.commit_handler)
-
-    def reset_handler(self, cmd):
-        """Process a ResetCommand."""
-        raise NotImplementedError(self.reset_handler)
-
-    def tag_handler(self, cmd):
-        """Process a TagCommand."""
-        raise NotImplementedError(self.tag_handler)
-
-
-class CommitHandler(object):
-    """Base class for commit handling.
-    
-    Subclasses should override the pre_*, post_* and *_handler
-    methods as appropriate.
-    """
-
-    def __init__(self, command):
-        self.command = command
-
-    def process(self):
-        self.pre_process_files()
-        for fc in self.command.file_iter():
-            #print fc.dump_str(verbose=True)
-            try:
-                handler = self.__class__.__dict__[fc.name[4:] + "_handler"]
-            except KeyError:
-                raise errors.MissingHandler(fc.name)
-            else:
-                handler(self, fc)
-        self.post_process_files()
-
-    def pre_process_files(self):
-        """Prepare for committing."""
-        pass
-
-    def post_process_files(self):
-        """Save the revision."""
-        pass
-
-    def modify_handler(self, filecmd):
-        """Handle a filemodify command."""
-        raise NotImplementedError(self.modify_handler)
-
-    def delete_handler(self, filecmd):
-        """Handle a filedelete command."""
-        raise NotImplementedError(self.delete_handler)
-
-    def copy_handler(self, filecmd):
-        """Handle a filecopy command."""
-        raise NotImplementedError(self.copy_handler)
-
-    def rename_handler(self, filecmd):
-        """Handle a filerename command."""
-        raise NotImplementedError(self.rename_handler)
-
-    def deleteall_handler(self, filecmd):
-        """Handle a filedeleteall command."""
-        raise NotImplementedError(self.deleteall_handler)
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/hgfastimport/__init__.py	Tue May 05 10:27:27 2009 -0400
@@ -0,0 +1,26 @@
+from mercurial import commands
+
+import parser
+import hgechoprocessor
+import hgimport
+
+def fastimport(ui, repo, *sources, **opts):
+    proc = hgimport.HgImportProcessor(ui, repo, **opts)
+    #proc = hgechoprocessor.HgEchoProcessor(ui, repo, **opts)
+    proc.setup()
+    try:
+        for source in sources:
+            ui.write("Reading source: %s\n" % source)
+            f = open(source)
+            p = parser.ImportParser(f)
+            proc._process(p.iter_commands)
+            f.close()
+    finally:
+        proc.teardown()
+
+cmdtable = {
+    "fastimport":
+        (fastimport,
+         [],
+         'hg fastimport SOURCE ...')
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/hgfastimport/commands.py	Tue May 05 10:27:27 2009 -0400
@@ -0,0 +1,216 @@
+# Copyright (C) 2008 Canonical Ltd
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+
+"""Import command classes."""
+
+
+# Lists of command names
+COMMAND_NAMES = ['blob', 'checkpoint', 'commit', 'progress', 'reset', 'tag']
+FILE_COMMAND_NAMES = ['filemodify', 'filedelete', 'filecopy', 'filerename',
+    'filedeleteall']
+
+# Bazaar file kinds
+FILE_KIND = 'file'
+SYMLINK_KIND = 'symlink'
+
+
+class ImportCommand(object):
+    """Base class for import commands."""
+
+    def __init__(self, name):
+        self.name = name
+        # List of field names not to display
+        self._binary = []
+
+    def __repr__(self):
+        return "<%s at %x: %s>" % (self.__class__.__name__, id(self), self)
+
+    def __str__(self):
+        return self.name
+
+    def dump_str(self, names=None, child_lists=None, verbose=False):
+        """Dump fields as a string.
+
+        :param names: the list of fields to include or
+            None for all public fields
+        :param child_lists: dictionary of child command names to
+            fields for that child command to include
+        :param verbose: if True, prefix each line with the command class and
+            display fields as a dictionary; if False, dump just the field
+            values with tabs between them
+        """
+        interesting = {}
+        if names is None:
+            fields = [k for k in self.__dict__.keys() if not k.startswith('_')]
+        else:
+            fields = names
+        for field in fields:
+            value = self.__dict__.get(field)
+            if field in self._binary and value is not None:
+                value = '(...)'
+            interesting[field] = value
+        if verbose:
+            return "%s: %s" % (self.__class__.__name__, interesting)
+        else:
+            return "\t".join([str(interesting[k]) for k in fields])
+
+
+class BlobCommand(ImportCommand):
+
+    def __init__(self, mark, data, lineno=0):
+        ImportCommand.__init__(self, 'blob')
+        self.mark = mark
+        self.data = data
+        self.lineno = lineno
+        # Provide a unique id in case the mark is missing
+        if mark is None:
+            self.id = '@%d' % lineno
+        else:
+            self.id = ':' + mark
+        self._binary = ['data']
+
+    def __str__(self):
+        return self.id
+
+
+class CheckpointCommand(ImportCommand):
+
+    def __init__(self):
+        ImportCommand.__init__(self, 'checkpoint')
+
+
+class CommitCommand(ImportCommand):
+
+    def __init__(self, ref, mark, author, committer, message, from_,
+        parents, file_iter, lineno=0):
+        ImportCommand.__init__(self, 'commit')
+        self.ref = ref
+        self.mark = mark
+        self.author = author
+        self.committer = committer
+        self.message = message
+        self.from_ = from_
+        self.parents = parents
+        self.file_iter = file_iter
+        self.lineno = lineno
+        self._binary = ['file_iter']
+        # Provide a unique id in case the mark is missing
+        if mark is None:
+            self.id = '@%d' % lineno
+        else:
+            self.id = ':' + mark
+
+    def __str__(self):
+        return "ref %s, mark %s" % (self.ref, self.mark)
+
+    def dump_str(self, names=None, child_lists=None, verbose=False):
+        result = [ImportCommand.dump_str(self, names, verbose=verbose)]
+        for f in self.file_iter():
+            if child_lists is None:
+                continue
+            try:
+                child_names = child_lists[f.name]
+            except KeyError:
+                continue
+            result.append("\t%s" % f.dump_str(child_names, verbose=verbose))
+        return '\n'.join(result)
+
+
+class ProgressCommand(ImportCommand):
+
+    def __init__(self, message):
+        ImportCommand.__init__(self, 'progress')
+        self.message = message
+
+
+class ResetCommand(ImportCommand):
+
+    def __init__(self, ref, from_):
+        ImportCommand.__init__(self, 'reset')
+        self.ref = ref
+        self.from_ = from_
+
+
+class TagCommand(ImportCommand):
+
+    def __init__(self, id, from_, tagger, message):
+        ImportCommand.__init__(self, 'tag')
+        self.id = id
+        self.from_ = from_
+        self.tagger = tagger
+        self.message = message
+
+    def __str__(self):
+        return self.id
+
+
+class FileCommand(ImportCommand):
+    """Base class for file commands."""
+    pass
+
+
+class FileModifyCommand(FileCommand):
+
+    def __init__(self, path, kind, is_executable, dataref, data):
+        # Either dataref or data should be null
+        FileCommand.__init__(self, 'filemodify')
+        self.path = path
+        self.kind = kind
+        self.is_executable = is_executable
+        self.dataref = dataref
+        self.data = data
+        self._binary = ['data']
+
+    def __str__(self):
+        return self.path
+
+
+class FileDeleteCommand(FileCommand):
+
+    def __init__(self, path):
+        FileCommand.__init__(self, 'filedelete')
+        self.path = path
+
+    def __str__(self):
+        return self.path
+
+
+class FileCopyCommand(FileCommand):
+
+    def __init__(self, src_path, dest_path):
+        FileCommand.__init__(self, 'filecopy')
+        self.src_path = src_path
+        self.dest_path = dest_path
+
+    def __str__(self):
+        return "%s -> %s" % (self.src_path, self.dest_path)
+
+
+class FileRenameCommand(FileCommand):
+
+    def __init__(self, old_path, new_path):
+        FileCommand.__init__(self, 'filerename')
+        self.old_path = old_path
+        self.new_path = new_path
+
+    def __str__(self):
+        return "%s -> %s" % (self.old_path, self.new_path)
+
+
+class FileDeleteAllCommand(FileCommand):
+
+    def __init__(self):
+        FileCommand.__init__(self, 'filedeleteall')
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/hgfastimport/dates.py	Tue May 05 10:27:27 2009 -0400
@@ -0,0 +1,76 @@
+# Copyright (C) 2008 Canonical Ltd
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+
+"""Date parsing routines.
+
+Each routine returns timestamp,timezone where
+
+* timestamp is seconds since epoch
+* timezone is the offset from UTC in seconds.
+"""
+
+
+import time
+
+
+def parse_raw(s):
+    """Parse a date from a raw string.
+    
+    The format must be exactly "seconds-since-epoch offset-utc".
+    See the spec for details.
+    """
+    timestamp_str, timezone_str = s.split(' ', 1)
+    timestamp = float(timestamp_str)
+    timezone = _parse_tz(timezone_str)
+    return timestamp, timezone
+
+
+def _parse_tz(tz):
+    """Parse a timezone specification in the [+|-]HHMM format.
+
+    :return: the timezone offset in seconds.
+    """
+    # from git_repository.py in bzr-git
+    assert len(tz) == 5
+    sign = {'+': +1, '-': -1}[tz[0]]
+    hours = int(tz[1:3])
+    minutes = int(tz[3:])
+    return sign * 60 * (60 * hours + minutes)
+
+
+def parse_rfc2822(s):
+    """Parse a date from a rfc2822 string.
+    
+    See the spec for details.
+    """
+    raise NotImplementedError(parse_rfc2822)
+
+
+def parse_now(s):
+    """Parse a date from a string.
+
+    The format must be exactly "now".
+    See the spec for details.
+    """
+    return time.time(), 0
+
+
+# Lookup tabel of date parsing routines
+DATE_PARSERS_BY_NAME = {
+    'raw':      parse_raw,
+    'rfc2822':  parse_rfc2822,
+    'now':      parse_now,
+    }
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/hgfastimport/errors.py	Tue May 05 10:27:27 2009 -0400
@@ -0,0 +1,171 @@
+# Copyright (C) 2008 Canonical Ltd
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+
+"""Exception classes for fastimport"""
+
+#from bzrlib import errors as bzr_errors
+
+
+# Prefix to messages to show location information
+_LOCATION_FMT = "line %(lineno)d: "
+
+class FmtException(StandardError):
+    def __str__(self):
+        return repr(self)
+
+    def __repr__(self):
+        return self._fmt % self.__dict__
+
+class ImportError(FmtException):
+    """The base exception class for all import processing exceptions."""
+
+    _fmt = "Unknown Import Error"
+
+
+class ParsingError(ImportError):
+    """The base exception class for all import processing exceptions."""
+
+    _fmt = _LOCATION_FMT + "Unknown Import Parsing Error"
+
+    def __init__(self, lineno):
+        ImportError.__init__(self)
+        self.lineno = lineno
+
+
+class MissingBytes(ParsingError):
+    """Raised when EOF encountered while expecting to find more bytes."""
+
+    _fmt = (_LOCATION_FMT + "Unexpected EOF - expected %(expected)d bytes,"
+        " found %(found)d")
+
+    def __init__(self, lineno, expected, found):
+        ParsingError.__init__(self, lineno)
+        self.expected = expected
+        self.found = found
+
+
+class MissingTerminator(ParsingError):
+    """Raised when EOF encountered while expecting to find a terminator."""
+
+    _fmt = (_LOCATION_FMT +
+        "Unexpected EOF - expected '%(terminator)s' terminator")
+
+    def __init__(self, lineno, terminator):
+        ParsingError.__init__(self, lineno)
+        self.terminator = terminator
+
+
+class InvalidCommand(ParsingError):
+    """Raised when an unknown command found."""
+
+    _fmt = (_LOCATION_FMT + "Invalid command '%(cmd)s'")
+
+    def __init__(self, lineno, cmd):
+        ParsingError.__init__(self, lineno)
+        self.cmd = cmd
+
+
+class MissingSection(ParsingError):
+    """Raised when a section is required in a command but not present."""
+
+    _fmt = (_LOCATION_FMT + "Command %(cmd)s is missing section %(section)s")
+
+    def __init__(self, lineno, cmd, section):
+        ParsingError.__init__(self, lineno)
+        self.cmd = cmd
+        self.section = section
+
+
+class BadFormat(ParsingError):
+    """Raised when a section is formatted incorrectly."""
+
+    _fmt = (_LOCATION_FMT + "Bad format for section %(section)s in "
+        "command %(cmd)s: found '%(text)s'")
+
+    def __init__(self, lineno, cmd, section, text):
+        ParsingError.__init__(self, lineno)
+        self.cmd = cmd
+        self.section = section
+        self.text = text
+
+
+class InvalidTimezone(ParsingError):
+    """Raised when converting a string timezone to a seconds offset."""
+
+    _fmt = (_LOCATION_FMT +
+        "Timezone %(timezone)r could not be converted.%(reason)s")
+
+    def __init__(self, lineno, timezone, reason=None):
+        ParsingError.__init__(self, lineno)
+        self.timezone = timezone
+        if reason:
+            self.reason = ' ' + reason
+        else:
+            self.reason = ''
+
+
+class UnknownDateFormat(ImportError):
+    """Raised when an unknown date format is given."""
+
+    _fmt = ("Unknown date format '%(format)s'")
+
+    def __init__(self, format):
+        ImportError.__init__(self)
+        self.format = format
+
+
+class MissingHandler(ImportError):
+    """Raised when a processor can't handle a command."""
+
+    _fmt = ("Missing handler for command %(cmd)s")
+
+    def __init__(self, cmd):
+        ImportError.__init__(self)
+        self.cmd = cmd
+
+
+class UnknownParameter(ImportError):
+    """Raised when an unknown parameter is passed to a processor."""
+
+    _fmt = ("Unknown parameter - '%(param)s' not in %(knowns)s")
+
+    def __init__(self, param, knowns):
+        ImportError.__init__(self)
+        self.param = param
+        self.knowns = knowns
+
+
+class BadRepositorySize(ImportError):
+    """Raised when the repository has an incorrect number of revisions."""
+
+    _fmt = ("Bad repository size - %(found)d revisions found, "
+        "%(expected)d expected")
+
+    def __init__(self, expected, found):
+        ImportError.__init__(self)
+        self.expected = expected
+        self.found = found
+
+
+class BadRestart(ImportError):
+    """Raised when the import stream and id-map do not match up."""
+
+    _fmt = ("Bad restart - attempted to skip commit %(commit_id)s "
+        "but matching revision-id is unknown")
+
+    def __init__(self, commit_id):
+        ImportError.__init__(self)
+        self.commit_id = commit_id
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/hgfastimport/hgechoprocessor.py	Tue May 05 10:27:27 2009 -0400
@@ -0,0 +1,75 @@
+# Copyright (C) 2008 Canonical Ltd
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+
+"""Processor of import commands.
+
+This module provides core processing functionality including an abstract class
+for basing real processors on. See the processors package for examples.
+"""
+
+
+import processor
+
+class HgEchoProcessor(processor.ImportProcessor):
+    
+    def __init__(self, ui, repo, **opts):
+        self.ui = ui
+        self.repo = repo
+        self.opts = opts
+        self.finished = False
+        
+    def progress_handler(self, cmd):
+        self.ui.write(cmd.dump_str(verbose=True) + "\n")
+
+    def blob_handler(self, cmd):
+        self.ui.write(cmd.dump_str(verbose=True) + "\n")
+
+    def checkpoint_handler(self, cmd):
+        self.ui.write(cmd.dump_str(verbose=True) + "\n")
+
+    def commit_handler(self, cmd):
+        commit_handler = HgEchoCommitHandler(cmd, self.ui, self.repo, **self.opts)
+        commit_handler.process()
+        self.ui.write(cmd.dump_str(verbose=True) + "\n")
+
+    def reset_handler(self, cmd):
+        self.ui.write(cmd.dump_str(verbose=True) + "\n")
+
+    def tag_handler(self, cmd):
+        self.ui.write(cmd.dump_str(verbose=True) + "\n")
+
+class HgEchoCommitHandler(processor.CommitHandler):
+
+    def __init__(self, command, ui, repo, **opts):
+        self.command = command
+        self.ui = ui
+        self.repo = repo
+        self.opts = opts
+
+    def modify_handler(self, filecmd):
+        self.ui.write(filecmd.dump_str(verbose=True) + "\n")
+
+    def delete_handler(self, filecmd):
+        self.ui.write(filecmd.dump_str(verbose=True) + "\n")
+
+    def copy_handler(self, filecmd):
+        self.ui.write(filecmd.dump_str(verbose=True) + "\n")
+
+    def rename_handler(self, filecmd):
+        self.ui.write(filecmd.dump_str(verbose=True) + "\n")
+
+    def deleteall_handler(self, filecmd):
+        self.ui.write(filecmd.dump_str(verbose=True) + "\n")
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/hgfastimport/hgimport.py	Tue May 05 10:27:27 2009 -0400
@@ -0,0 +1,227 @@
+# Copyright (C) 2008 Canonical Ltd
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+
+"""Processor of import commands.
+
+This module provides core processing functionality including an abstract class
+for basing real processors on. See the processors package for examples.
+"""
+
+import os
+import os.path
+import errno
+import shutil
+
+import mercurial.hg
+import mercurial.commands
+from mercurial import util
+from mercurial.node import nullrev
+import processor
+
+import hgechoprocessor
+
+class HgImportProcessor(processor.ImportProcessor):
+    
+    def __init__(self, ui, repo, **opts):
+        self.ui = ui
+        self.repo = repo
+        self.opts = opts
+        self.last_mark = None
+        self.mark_map = {}
+        self.branch_map = {}
+        #self.tag_map = {}
+        #self.tag_back_map = {}
+        self.finished = False
+
+        self.numblobs = 0               # for progress reporting
+        self.blobdir = None
+
+    def teardown(self):
+        if self.blobdir and os.path.exists(self.blobdir):
+            self.ui.status("Removing blob dir %r ...\n" % self.blobdir)
+            shutil.rmtree(self.blobdir)
+
+    def progress_handler(self, cmd):
+        self.ui.write("Progress: %s\n" % cmd.message)
+
+    def blob_handler(self, cmd):
+        if self.blobdir is None:        # no blobs seen yet
+            # XXX cleanup?
+            self.blobdir = os.path.join(self.repo.root, ".hg", "blobs")
+            os.mkdir(self.blobdir)
+
+        fn = self.getblobfilename(cmd.id)
+        blobfile = open(fn, "wb")
+        #self.ui.debug("writing blob %s to %s (%d bytes)\n"
+        #              % (cmd.id, fn, len(cmd.data)))
+        blobfile.write(cmd.data)
+        blobfile.close()
+
+        self.numblobs += 1
+        if self.numblobs % 500 == 0:
+            self.ui.status("%d blobs read\n" % self.numblobs)
+
+    def getblobfilename(self, blobid):
+        if self.blobdir is None:
+            raise RuntimeError("no blobs seen, so no blob directory created")
+        # XXX should escape ":" for windows
+        return os.path.join(self.blobdir, "blob-" + blobid)
+
+    def checkpoint_handler(self, cmd):
+        # This command means nothing to us
+        pass
+
+    def committish_rev(self, committish):
+        if committish.startswith(":"):
+            return self.mark_map[committish]
+        else:
+            return self.branch_map[committish]
+        
+    def commit_handler(self, cmd):
+        if cmd.ref == "refs/heads/TAG.FIXUP":
+            #self.tag_back_map[cmd.mark] == first_parent
+            commit_handler = hgechoprocessor.HgEchoCommitHandler(cmd, self.ui, self.repo, **self.opts)
+            commit_handler.process()
+            return
+        if cmd.from_:
+            first_parent = self.committish_rev(cmd.from_)
+        else:
+            first_parent = self.branch_map.get(cmd.ref, nullrev)
+        #self.ui.write("First parent: %s\n" % first_parent)
+        # Update to the first parent
+        mercurial.hg.clean(self.repo, self.repo.lookup(first_parent))
+        #self.ui.write("Bing\n")
+        if cmd.parents:
+            #self.ui.write("foo")
+            if len(cmd.parents) > 1:
+                raise NotImplementedError("Can't handle more than two parents")
+            second_parent = self.committish_rev(cmd.parents[0])
+            #self.ui.write("Second parent: %s\n" % second_parent)
+            mercurial.commands.debugsetparents(self.ui, self.repo, 
+                first_parent, second_parent)
+        #self.ui.write("Bing\n")
+        if cmd.ref == "refs/heads/master":
+            branch = "default"
+        else:
+            branch = cmd.ref[len("refs/heads/"):]
+        #self.ui.write("Branch: %s\n" % branch)
+        self.repo.dirstate.setbranch(branch)
+        #self.ui.write("Bing\n")
+        #print "vvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvv"
+        commit_handler = HgImportCommitHandler(
+            self, cmd, self.ui, self.repo, **self.opts)
+        commit_handler.process()
+        #print "^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^"
+        #self.ui.write(cmd.dump_str(verbose=True))
+
+        # in case we are converting from git or bzr, prefer author but
+        # fallback to committer (committer is required, author is
+        # optional)
+        userinfo = cmd.author or cmd.committer
+        user = "%s <%s>" % (userinfo[0], userinfo[1])
+        node = self.repo.rawcommit(files = commit_handler.filelist(),
+            text = cmd.message,
+            user = user,
+            date = self.convert_date(userinfo))
+        rev = self.repo.changelog.rev(node)
+        if cmd.mark is not None:
+            self.mark_map[":" + cmd.mark] = rev
+        self.branch_map[cmd.ref] = rev
+        self.ui.write("Done commit of rev %d\n" % rev)
+        #self.ui.write("%s\n" % self.mark_map)
+
+    def convert_date(self, c):
+        res = (int(c[2]), int(c[3]))
+        #print c, res
+        #print type((0, 0)), type(res), len(res), type(res) is type((0, 0))
+        #if type(res) is type((0, 0)) and len(res) == 2:
+        #    print "go for it"
+        #return res
+        return "%d %d" % res
+        
+    def reset_handler(self, cmd):
+        if cmd.from_ is not None:
+            self.branch_map[cmd.ref] = self.committish_rev(cmd.from_)
+
+    def tag_handler(self, cmd):
+        # self.tag_map[cmd.id] = self.tag_back_map[cmd.from_]
+        pass
+
+class HgImportCommitHandler(processor.CommitHandler):
+
+    def __init__(self, parent, command, ui, repo, **opts):
+        self.parent = parent            # HgImportProcessor running the show
+        self.command = command
+        self.ui = ui
+        self.repo = repo
+        self.opts = opts
+        self.files = set()
+
+    def _make_container(self, path):
+        if '/' in path:
+            d = os.path.dirname(path)
+            if not os.path.isdir(d):
+                os.makedirs(d)
+        
+    def modify_handler(self, filecmd):
+        #print "============================" + filecmd.path
+        # FIXME: handle mode
+        self.files.add(filecmd.path)
+        fullpath = os.path.join(self.repo.root, filecmd.path)
+        self._make_container(fullpath)
+        #print "made dirs, writing file"
+        if filecmd.dataref:
+            # reference to a blob that has already appeared in the stream
+            fn = self.parent.getblobfilename(filecmd.dataref)
+            if os.path.exists(fullpath):
+                os.remove(fullpath)
+            try:
+                os.link(fn, fullpath)
+            except OSError, err:
+                if err.errno == errno.ENOENT:
+                    # if this happens, it's a problem in the fast-import
+                    # stream
+                    raise util.Abort("bad blob ref %r (no such file %s)"
+                                     % (filecmd.dataref, fn))
+                else:
+                    # anything else is a bug in this extension
+                    # (cross-device move, permissions, etc.)
+                    raise
+        elif filecmd.data:
+            f = open(fullpath, "w")
+            f.write(filecmd.data)
+            f.close()
+        else:
+            raise RuntimeError("either filecmd.dataref or filecmd.data must be set")
+        #print self.repo.add([filecmd.path])
+        #print "Done:", filecmd.path
+
+    def delete_handler(self, filecmd):
+        self.files.add(filecmd.path)
+        self.repo.remove([filecmd.path], unlink=True)
+
+    #def copy_handler(self, filecmd):
+    #    self.files.add(filecmd.path)
+    #    """Handle a filecopy command."""
+    #    self.ui.write("Cmd: %s\n" % repr(filecmd))
+
+    #def rename_handler(self, filecmd):
+    #    self.files.add(filecmd.path)
+    #    """Handle a filerename command."""
+    #    self.ui.write("Cmd: %s\n" % repr(filecmd))
+
+    def filelist(self):
+        return list(self.files)
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/hgfastimport/parser.py	Tue May 05 10:27:27 2009 -0400
@@ -0,0 +1,507 @@
+# Copyright (C) 2008 Canonical Ltd
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+
+"""Parser of import data into command objects.
+
+In order to reuse existing front-ends, the stream format is a subset of
+the one used by git-fast-import (as of the 1.5.4 release of git at least).
+The grammar is:
+
+  stream ::= cmd*;
+
+  cmd ::= new_blob
+        | new_commit
+        | new_tag
+        | reset_branch
+        | checkpoint
+        | progress
+        ;
+
+  new_blob ::= 'blob' lf
+    mark?
+    file_content;
+  file_content ::= data;
+
+  new_commit ::= 'commit' sp ref_str lf
+    mark?
+    ('author' sp name '<' email '>' when lf)?
+    'committer' sp name '<' email '>' when lf
+    commit_msg
+    ('from' sp (ref_str | hexsha1 | sha1exp_str | idnum) lf)?
+    ('merge' sp (ref_str | hexsha1 | sha1exp_str | idnum) lf)*
+    file_change*
+    lf?;
+  commit_msg ::= data;
+
+  file_change ::= file_clr
+    | file_del
+    | file_rnm
+    | file_cpy
+    | file_obm
+    | file_inm;
+  file_clr ::= 'deleteall' lf;
+  file_del ::= 'D' sp path_str lf;
+  file_rnm ::= 'R' sp path_str sp path_str lf;
+  file_cpy ::= 'C' sp path_str sp path_str lf;
+  file_obm ::= 'M' sp mode sp (hexsha1 | idnum) sp path_str lf;
+  file_inm ::= 'M' sp mode sp 'inline' sp path_str lf
+    data;
+
+  new_tag ::= 'tag' sp tag_str lf
+    'from' sp (ref_str | hexsha1 | sha1exp_str | idnum) lf
+    'tagger' sp name '<' email '>' when lf
+    tag_msg;
+  tag_msg ::= data;
+
+  reset_branch ::= 'reset' sp ref_str lf
+    ('from' sp (ref_str | hexsha1 | sha1exp_str | idnum) lf)?
+    lf?;
+
+  checkpoint ::= 'checkpoint' lf
+    lf?;
+
+  progress ::= 'progress' sp not_lf* lf
+    lf?;
+
+     # note: the first idnum in a stream should be 1 and subsequent
+     # idnums should not have gaps between values as this will cause
+     # the stream parser to reserve space for the gapped values.  An
+     # idnum can be updated in the future to a new object by issuing
+     # a new mark directive with the old idnum.
+     #
+  mark ::= 'mark' sp idnum lf;
+  data ::= (delimited_data | exact_data)
+    lf?;
+
+    # note: delim may be any string but must not contain lf.
+    # data_line may contain any data but must not be exactly
+    # delim.
+  delimited_data ::= 'data' sp '<<' delim lf
+    (data_line lf)*
+    delim lf;
+
+     # note: declen indicates the length of binary_data in bytes.
+     # declen does not include the lf preceeding the binary data.
+     #
+  exact_data ::= 'data' sp declen lf
+    binary_data;
+
+     # note: quoted strings are C-style quoting supporting \c for
+     # common escapes of 'c' (e..g \n, \t, \\, \") or \nnn where nnn
+     # is the signed byte value in octal.  Note that the only
+     # characters which must actually be escaped to protect the
+     # stream formatting is: \, \" and LF.  Otherwise these values
+     # are UTF8.
+     #
+  ref_str     ::= ref;
+  sha1exp_str ::= sha1exp;
+  tag_str     ::= tag;
+  path_str    ::= path    | '"' quoted(path)    '"' ;
+  mode        ::= '100644' | '644'
+                | '100755' | '755'
+                | '120000'
+                ;
+
+  declen ::= # unsigned 32 bit value, ascii base10 notation;
+  bigint ::= # unsigned integer value, ascii base10 notation;
+  binary_data ::= # file content, not interpreted;
+
+  when         ::= raw_when | rfc2822_when;
+  raw_when     ::= ts sp tz;
+  rfc2822_when ::= # Valid RFC 2822 date and time;
+
+  sp ::= # ASCII space character;
+  lf ::= # ASCII newline (LF) character;
+
+     # note: a colon (':') must precede the numerical value assigned to
+     # an idnum.  This is to distinguish it from a ref or tag name as
+     # GIT does not permit ':' in ref or tag strings.
+     #
+  idnum   ::= ':' bigint;
+  path    ::= # GIT style file path, e.g. "a/b/c";
+  ref     ::= # GIT ref name, e.g. "refs/heads/MOZ_GECKO_EXPERIMENT";
+  tag     ::= # GIT tag name, e.g. "FIREFOX_1_5";
+  sha1exp ::= # Any valid GIT SHA1 expression;
+  hexsha1 ::= # SHA1 in hexadecimal format;
+
+     # note: name and email are UTF8 strings, however name must not
+     # contain '<' or lf and email must not contain any of the
+     # following: '<', '>', lf.
+     #
+  name  ::= # valid GIT author/committer name;
+  email ::= # valid GIT author/committer email;
+  ts    ::= # time since the epoch in seconds, ascii base10 notation;
+  tz    ::= # GIT style timezone;
+
+     # note: comments may appear anywhere in the input, except
+     # within a data command.  Any form of the data command
+     # always escapes the related input from comment processing.
+     #
+     # In case it is not clear, the '#' that starts the comment
+     # must be the first character on that the line (an lf have
+     # preceeded it).
+     #
+  comment ::= '#' not_lf* lf;
+  not_lf  ::= # Any byte that is not ASCII newline (LF);
+"""
+
+
+import re
+import sys
+
+import commands
+import dates
+import errors
+
+
+## Stream parsing ##
+
+class LineBasedParser(object):
+
+    def __init__(self, input):
+        """A Parser that keeps track of line numbers.
+
+        :param input: the file-like object to read from
+        """
+        self.input = input
+        self.lineno = 0
+        # Lines pushed back onto the input stream
+        self._buffer = []
+
+    def abort(self, exception, *args):
+        """Raise an exception providing line number information."""
+        raise exception(self.lineno, *args)
+
+    def readline(self):
+        """Get the next line including the newline or '' on EOF."""
+        self.lineno += 1
+        if self._buffer:
+            return self._buffer.pop()
+        else:
+            return self.input.readline()
+
+    def next_line(self):
+        """Get the next line without the newline or None on EOF."""
+        line = self.readline()
+        if line:
+            return line[:-1]
+        else:
+            return None
+
+    def push_line(self, line):
+        """Push line back onto the line buffer.
+        
+        :param line: the line with no trailing newline
+        """
+        self.lineno -= 1
+        self._buffer.append(line + "\n")
+
+    def read_bytes(self, count):
+        """Read a given number of bytes from the input stream.
+        
+        Throws MissingBytes if the bytes are not found.
+
+        Note: This method does not read from the line buffer.
+
+        :return: a string
+        """
+        lines = []
+        left = count
+        found = 0
+        while left > 0:
+            line = self.input.readline(left)
+            if line:
+                line_len = len(line)
+                left -= line_len
+                found += line_len
+                lines.append(line)
+                if line.endswith('\n'):
+                    self.lineno += 1
+            else:
+                left = 0
+        if found != count:
+            self.abort(errors.MissingBytes, count, found)
+        return ''.join(lines)
+
+    def read_until(self, terminator):
+        """Read the input stream until the terminator is found.
+        
+        Throws MissingTerminator if the terminator is not found.
+
+        Note: This method does not read from the line buffer.
+
+        :return: the bytes read up to but excluding the terminator.
+        """
+        raise NotImplementedError(self.read_until)
+
+
+# Regular expression used for parsing. (Note: The spec states that the name
+# part should be non-empty, but git-fast-export doesn't always do that.)
+_WHO_AND_WHEN_RE = re.compile(r'([^\<\n]+) <([^\>\n]+)> (.+)')
+
+
+class ImportParser(LineBasedParser):
+
+    def __init__(self, input, verbose=False, output=sys.stdout):
+        """A Parser of import commands.
+
+        :param input: the file-like object to read from
+        :param verbose: display extra information of not
+        :param output: the file-like object to write messages to (YAGNI?)
+        """
+        LineBasedParser.__init__(self, input)
+        self.verbose = verbose
+        self.output = output
+        # We auto-detect the date format when a date is first encountered
+        self.date_parser = None
+
+    def iter_commands(self):
+        """Iterator returning ImportCommand objects."""
+        while True:
+            line = self.next_line()
+            if line is None:
+                break
+            elif len(line) == 0 or line.startswith('#'):
+                continue
+            # Search for commands in order of likelihood
+            elif line.startswith('commit '):
+                yield self._parse_commit(line[len('commit '):])
+            elif line.startswith('blob'):
+                yield self._parse_blob()
+            elif line.startswith('progress '):
+                yield commands.ProgressCommand(line[len('progress '):])
+            elif line.startswith('reset '):
+                yield self._parse_reset(line[len('reset '):])
+            elif line.startswith('tag '):
+                yield self._parse_tag(line[len('tag '):])
+            elif line.startswith('checkpoint'):
+                yield commands.CheckpointCommand()
+            else:
+                print line
+                self.abort(errors.InvalidCommand, line)
+
+    def iter_file_commands(self):
+        """Iterator returning FileCommand objects.
+        
+        If an invalid file command is found, the line is silently
+        pushed back and iteration ends.
+        """
+        while True:
+            line = self.next_line()
+            if line is None:
+                break
+            elif len(line) == 0 or line.startswith('#'):
+                continue
+            # Search for file commands in order of likelihood
+            elif line.startswith('M '):
+                yield self._parse_file_modify(line[2:])
+            elif line.startswith('D '):
+                path = self._path(line[2:])
+                yield commands.FileDeleteCommand(path)
+            elif line.startswith('R '):
+                old, new = self._path_pair(line[2:])
+                yield commands.FileRenameCommand(old, new)
+            elif line.startswith('C '):
+                src, dest = self._path_pair(line[2:])
+                yield commands.FileRenameCommand(src, dest)
+            elif line.startswith('deleteall'):
+                yield commands.FileDeleteAllCommand()
+            else:
+                self.push_line(line)
+                break
+
+    def _parse_blob(self):
+        """Parse a blob command."""
+        lineno = self.lineno
+        mark = self._get_mark_if_any()
+        data = self._get_data('blob')
+        return commands.BlobCommand(mark, data, lineno)
+
+    def _parse_commit(self, ref):
+        """Parse a commit command."""
+        lineno  = self.lineno
+        mark = self._get_mark_if_any()
+        author = self._get_user_info('commit', 'author', False)
+        committer = self._get_user_info('commit', 'committer')
+        message = self._get_data('commit', 'message')
+        from_ = self._get_from()
+        parents = []
+        while True:
+            merge = self._get_merge()
+            if merge is not None:
+                parents.append(merge)
+            else:
+                break
+        return commands.CommitCommand(ref, mark, author, committer, message, from_,
+            parents, self.iter_file_commands, lineno)
+
+    def _parse_file_modify(self, info):
+        """Parse a filemodify command within a commit.
+
+        :param info: a string in the format "mode dataref path"
+          (where dataref might be the hard-coded literal 'inline').
+        """
+        params = info.split(' ', 2)
+        path = self._path(params[2])
+        is_executable, is_symlink = self._mode(params[0])
+        if is_symlink:
+            kind = commands.SYMLINK_KIND
+        else:
+            kind = commands.FILE_KIND
+        if params[1] == 'inline':
+            dataref = None
+            data = self._get_data('filemodify')
+        else:
+            dataref = params[1]
+            data = None
+        return commands.FileModifyCommand(path, kind, is_executable, dataref,
+            data)
+
+    def _parse_reset(self, ref):
+        """Parse a reset command."""
+        from_ = self._get_from()
+        return commands.ResetCommand(ref, from_)
+
+    def _parse_tag(self, name):
+        """Parse a tag command."""
+        from_ = self._get_from('tag')
+        tagger = self._get_user_info('tag', 'tagger')
+        message = self._get_data('tag', 'message')
+        return commands.TagCommand(name, from_, tagger, message)
+
+    def _get_mark_if_any(self):
+        """Parse a mark section."""
+        line = self.next_line()
+        if line.startswith('mark :'):
+            return line[len('mark :'):]
+        else:
+            self.push_line(line)
+            return None
+
+    def _get_from(self, required_for=None):
+        """Parse a from section."""
+        line = self.next_line()
+        if line.startswith('from '):
+            return line[len('from '):]
+        elif required_for:
+            self.abort(errors.MissingSection, required_for, 'from')
+        else:
+            self.push_line(line)
+            return None
+
+    def _get_merge(self):
+        """Parse a merge section."""
+        line = self.next_line()
+        if line is None:                # EOF after last "merge" line
+            return None
+        elif line.startswith('merge '):
+            return line[len('merge '):]
+        else:
+            #print "not a merge:", line
+            self.push_line(line)
+            return None
+
+    def _get_user_info(self, cmd, section, required=True):
+        """Parse a user section."""
+        line = self.next_line()
+        if line.startswith(section + ' '):
+            return self._who_when(line[len(section + ' '):], cmd, section)
+        elif required:
+            self.abort(errors.MissingSection, cmd, section)
+        else:
+            self.push_line(line)
+            return None
+
+    def _get_data(self, required_for, section='data'):
+        """Parse a data section."""
+        line = self.next_line()
+        if line.startswith('data '):
+            rest = line[len('data '):]
+            if rest.startswith('<<'):
+                return self.read_until(rest[2:])
+            else:
+                size = int(rest)
+                result = self.read_bytes(size)
+                # optional LF after data.
+                next = self.input.readline()
+                self.lineno += 1
+                if len(next) > 1 or next != "\n":
+                    self.push_line(next[:-1])
+                return result
+        else:
+            self.abort(errors.MissingSection, required_for, section)
+
+    def _who_when(self, s, cmd, section):
+        """Parse who and when information from a string.
+        
+        :return: a tuple of (name,email,timestamp,timezone)
+        """
+        match = _WHO_AND_WHEN_RE.search(s)
+        if match:
+            datestr = match.group(3)
+            if self.date_parser is None:
+                # auto-detect the date format
+                if len(datestr.split(' ')) == 2:
+                    format = 'raw'
+                elif datestr == 'now':
+                    format = 'now'
+                else:
+                    format = 'rfc2822'
+                self.date_parser = dates.DATE_PARSERS_BY_NAME[format]
+            when = self.date_parser(datestr)
+            return (match.group(1),match.group(2),when[0],when[1])
+        else:
+            self.abort(errors.BadFormat, cmd, section, s)
+
+    def _path(self, s):
+        """Parse a path."""
+        if s.startswith('"'):
+            if s[-1] != '"':
+                self.abort(errors.BadFormat)
+            else:
+                s = _unquote_c_string(s[1:-1])
+        # Check path for sanity
+        sp = s.split("/")
+        if "" in sp or ".." in sp:
+            self.abort(errors.BadFormat)
+        return s
+
+    def _path_pair(self, s):
+        """Parse two paths separated by a space."""
+        # TODO: handle a space in the first path
+        parts = s.split(' ', 1)
+        return map(_unquote_c_string, parts)
+
+    def _mode(self, s):
+        """Parse a file mode into executable and symlink flags.
+        
+        :return (is_executable, is_symlink)
+        """
+        # Note: Output from git-fast-export slightly different to spec
+        if s in ['644', '100644', '0100644']:
+            return False, False
+        elif s in ['755', '100755', '0100755']:
+            return True, False
+        elif s in ['120000', '0120000']:
+            return False, True
+        else:
+            self.abort(errors.BadFormat, 'filemodify', 'mode', s)
+
+
+def _unquote_c_string(s):
+    """replace C-style escape sequences (\n, \", etc.) with real chars."""
+    # HACK: Python strings are close enough
+    return s.decode('string_escape', 'replace')
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/hgfastimport/processor.py	Tue May 05 10:27:27 2009 -0400
@@ -0,0 +1,163 @@
+# Copyright (C) 2008 Canonical Ltd
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+
+"""Processor of import commands.
+
+This module provides core processing functionality including an abstract class
+for basing real processors on. See the processors package for examples.
+"""
+
+
+#from bzrlib.errors import NotBranchError
+import errors
+
+
+class ImportProcessor(object):
+    """Base class for import processors.
+    
+    Subclasses should override the pre_*, post_* and *_handler
+    methods as appropriate.
+    """
+    
+    # XXX this is useless now that we process multiple input streams:
+    # we only want to call setup() and teardown() once for all of them!
+    def process(self, command_iter):
+        """Process the stream of commands.
+
+        :param command_iter: an iterator providing commands
+        """
+        raise RuntimeError("hey! who's calling this?!?")
+        self.setup()
+        try:
+            self._process(command_iter)
+        finally:
+            self.teardown()
+
+    def _process(self, command_iter):
+        self.pre_process()
+        for cmd in command_iter():
+            #print cmd.dump_str(verbose=True)
+            #print "starting"
+            try:
+                #print cmd.name
+                handler = self.__class__.__dict__[cmd.name + "_handler"]
+            except KeyError:
+                raise errors.MissingHandler(cmd.name)
+            else:
+                self.pre_handler(cmd)
+                handler(self, cmd)
+                self.post_handler(cmd)
+            if self.finished:
+                break
+            #print "around again"
+        self.post_process()
+
+    def setup(self):
+        pass
+    
+    def teardown(self):
+        pass
+        
+    def pre_process(self):
+        """Hook for logic at start of processing."""
+        pass
+
+    def post_process(self):
+        """Hook for logic at end of processing."""
+        pass
+
+    def pre_handler(self, cmd):
+        """Hook for logic before each handler starts."""
+        pass
+
+    def post_handler(self, cmd):
+        """Hook for logic after each handler finishes."""
+        pass
+
+    def progress_handler(self, cmd):
+        """Process a ProgressCommand."""
+        raise NotImplementedError(self.progress_handler)
+
+    def blob_handler(self, cmd):
+        """Process a BlobCommand."""
+        raise NotImplementedError(self.blob_handler)
+
+    def checkpoint_handler(self, cmd):
+        """Process a CheckpointCommand."""
+        raise NotImplementedError(self.checkpoint_handler)
+
+    def commit_handler(self, cmd):
+        """Process a CommitCommand."""
+        raise NotImplementedError(self.commit_handler)
+
+    def reset_handler(self, cmd):
+        """Process a ResetCommand."""
+        raise NotImplementedError(self.reset_handler)
+
+    def tag_handler(self, cmd):
+        """Process a TagCommand."""
+        raise NotImplementedError(self.tag_handler)
+
+
+class CommitHandler(object):
+    """Base class for commit handling.
+    
+    Subclasses should override the pre_*, post_* and *_handler
+    methods as appropriate.
+    """
+
+    def __init__(self, command):
+        self.command = command
+
+    def process(self):
+        self.pre_process_files()
+        for fc in self.command.file_iter():
+            #print fc.dump_str(verbose=True)
+            try:
+                handler = self.__class__.__dict__[fc.name[4:] + "_handler"]
+            except KeyError:
+                raise errors.MissingHandler(fc.name)
+            else:
+                handler(self, fc)
+        self.post_process_files()
+
+    def pre_process_files(self):
+        """Prepare for committing."""
+        pass
+
+    def post_process_files(self):
+        """Save the revision."""
+        pass
+
+    def modify_handler(self, filecmd):
+        """Handle a filemodify command."""
+        raise NotImplementedError(self.modify_handler)
+
+    def delete_handler(self, filecmd):
+        """Handle a filedelete command."""
+        raise NotImplementedError(self.delete_handler)
+
+    def copy_handler(self, filecmd):
+        """Handle a filecopy command."""
+        raise NotImplementedError(self.copy_handler)
+
+    def rename_handler(self, filecmd):
+        """Handle a filerename command."""
+        raise NotImplementedError(self.rename_handler)
+
+    def deleteall_handler(self, filecmd):
+        """Handle a filedeleteall command."""
+        raise NotImplementedError(self.deleteall_handler)
--- a/tests/fastimport-common	Mon May 04 19:38:20 2009 -0400
+++ b/tests/fastimport-common	Tue May 05 10:27:27 2009 -0400
@@ -3,5 +3,5 @@
 cat > $HGRCPATH <<EOF
 [extensions]
 graphlog =
-fastimport = $TESTDIR/../fastimport
+fastimport = $TESTDIR/../hgfastimport
 EOF