changeset 47:7ff36dc9f0b1

Massive rework to use infrastructure provided by convert extension. fastimport no longer stages changes in the repository's working copy; instead, it now works like any other convert source: the imported history is kept in memory (except for file contents) and then processed by the 'convert' extension.
author Greg Ward <greg-hg@gerg.ca>
date Sat, 16 May 2009 12:57:22 -0400
parents 93c2b1e832bd
children 1cf21a8c274b
files hgfastimport/__init__.py hgfastimport/hgimport.py tests/test-fastimport-anonbranch tests/test-fastimport-anonbranch.out tests/test-fastimport-cvs2git-fixup.out tests/test-fastimport-cvs2git.out tests/test-fastimport-empty.out tests/test-fastimport-git.out tests/test-fastimport-nonascii tests/test-fastimport-nonascii.out tests/test-fastimport-simple tests/test-fastimport-simple.out tests/test-fastimport-syntax.out
diffstat 13 files changed, 415 insertions(+), 291 deletions(-) [+]
line wrap: on
line diff
--- a/hgfastimport/__init__.py	Sun May 10 14:16:02 2009 -0400
+++ b/hgfastimport/__init__.py	Sat May 16 12:57:22 2009 -0400
@@ -1,22 +1,42 @@
-from mercurial import commands
+from mercurial import encoding
+from hgext.convert import convcmd, hg
 
 from fastimport import parser
-import hgechoprocessor
-import hgimport
+from hgfastimport.hgimport import fastimport_source
 
 def fastimport(ui, repo, *sources, **opts):
-    proc = hgimport.HgImportProcessor(ui, repo, **opts)
-    #proc = hgechoprocessor.HgEchoProcessor(ui, repo, **opts)
-    proc.setup()
-    try:
-        for source in sources:
-            ui.write("Reading source: %s\n" % source)
-            f = open(source)
-            p = parser.ImportParser(f)
-            proc.process(p.iter_commands)
-            f.close()
-    finally:
-        proc.teardown()
+    """Convert a git fastimport dump into Mercurial changesets.
+
+    Reads a series of SOURCE fastimport dumps and adds the resulting
+    changes to the current Mercurial repository.
+    """
+    # Would be nice to just call hgext.convert.convcmd.convert() and let
+    # it take care of things.  But syntax and semantics are just a
+    # little mismatched:
+    #   - fastimport takes multiple source paths (mainly because cvs2git
+    #     produces 2 dump files)
+    #   - fastimport's dest is implicitly the current repo
+    #
+    # So for the time being, I have copied bits of convert() over here.
+    # Boo, hiss.
+
+    # assume fastimport metadata (usernames, commit messages) are
+    # encoded UTF-8
+    convcmd.orig_encoding = encoding.encoding
+    encoding.encoding = 'UTF-8'
+
+    # sink is the current repo, src is the list of fastimport streams
+    destc = hg.mercurial_sink(ui, repo.root)
+    srcc = fastimport_source(ui, repo, sources)
+
+    # TEMP hack to keep old behaviour and minimize test churn
+    # (this should be an option to fastimport)
+    opts['datesort'] = True
+
+    # not implemented: filemap, revmapfile
+    revmapfile = destc.revmapfile()
+    c = convcmd.converter(ui, srcc, destc, revmapfile, opts)
+    c.convert()
 
 cmdtable = {
     "fastimport":
--- a/hgfastimport/hgimport.py	Sun May 10 14:16:02 2009 -0400
+++ b/hgfastimport/hgimport.py	Sat May 16 12:57:22 2009 -0400
@@ -21,29 +21,119 @@
 """
 
 import os
-import os.path
-import errno
 import shutil
 
-import mercurial.hg
-import mercurial.commands
-from mercurial import util
-from mercurial.node import nullrev, hex
+from hgext.convert import common
+
+from fastimport import processor, parser
+
+class fastimport_source(common.converter_source):
+    """Interface between the fastimport processor below and Mercurial's
+    normal conversion infrastructure.
+    """
+    def __init__(self, ui, repo, sources):
+        self.ui = ui
+        self.sources = sources
+        self.processor = HgImportProcessor(ui, repo)
+        self.parsed = False
+
+    # converter_source methods
+
+    def before(self):
+        self.processor.setup()
+
+    def after(self):
+        self.processor.teardown()
+
+    def getheads(self):
+        """Return a list of this repository's heads"""
+        self._parse()
+        allheads = []
+        for branchheads in self.processor.branchmap.values():
+            allheads.extend(branchheads)
+        return allheads
+
+    def getfile(self, name, fileid):
+        """Return file contents as a string. rev is the identifier returned
+        by a previous call to getchanges().
+        """
+        if fileid is None:              # deleted file
+            raise IOError
+        return self.processor.getblob(fileid)
+
+    def getmode(self, name, fileid):
+        """Return file mode, eg. '', 'x', or 'l'. rev is the identifier
+        returned by a previous call to getchanges().
+        """
+        return self.processor.getmode(name, fileid)
+
+    def getchanges(self, commitid):
+        """Returns a tuple of (files, copies).
 
-from fastimport import processor
-from hgfastimport import hgechoprocessor
+        files is a sorted list of (filename, id) tuples for all files
+        changed between commitid and its first parent returned by
+        getcommit(). id is the source revision id of the file.
+
+        copies is a dictionary of dest: source
+        """
+        return (self.processor.modified[commitid],
+                self.processor.copies[commitid])
+
+    def getcommit(self, commitid):
+        """Return the commit object for commitid"""
+        if commitid is None:
+            return None
+        else:
+            return self.processor.commitmap[commitid]
+
+    def gettags(self):
+        """Return the tags as a dictionary of name: revision"""
+        return dict(self.processor.tags)
+    
+    def getchangedfiles(self, rev, i):
+        """Return the files changed by rev compared to parent[i].
+
+        i is an index selecting one of the parents of rev.  The return
+        value should be the list of files that are different in rev and
+        this parent.
+
+        If rev has no parents, i is None.
+
+        This function is only needed to support --filemap
+        """
+        raise NotImplementedError()
+
+    # private worker methods
+
+    def _parse(self):
+        if self.parsed:
+            return
+
+        for source in self.sources:
+            self.ui.debug("reading fastimport source: %s\n" % source)
+            f = open(source)
+            p = parser.ImportParser(f)
+            self.processor.process(p.iter_commands)
+            f.close()
+
+        self.parsed = True
 
 class HgImportProcessor(processor.ImportProcessor):
     
-    def __init__(self, ui, repo, **opts):
+    def __init__(self, ui, repo):
         super(HgImportProcessor, self).__init__()
         self.ui = ui
         self.repo = repo
-        self.opts = opts
-        self.last_commit = None         # CommitCommand object
-        self.mark_map = {}              # map mark (e.g. ":1") to revision number
-        self.branch_map = {}            # map git branch name to revision number
-        self.lightweight_tags = []      # list of (ref, mark) tuples
+
+        self.commitmap = {}             # map commit ID (":1") to commit object
+        self.branchmap = {}             # map branch name to list of heads
+
+        # see HgImportCommitHandler for details on these three
+        self.modified = {}              # map commit id to list of file mods
+        self.filemodes = {}             # map commit id to {filename: mode} map
+        self.copies = {}                # map commit id to dict of file copies
+
+        self.tags = []                  # list of (tag, mark) tuples
 
         self.numblobs = 0               # for progress reporting
         self.blobdir = None
@@ -54,13 +144,6 @@
 
     def teardown(self):
         """Cleanup after processing all streams."""
-        # Hmmm: this isn't really a cleanup step, it's a post-processing
-        # step.  But we currently have one processor per input
-        # stream... despite the fact that state like mark_map,
-        # branch_map, and lightweight_tags really should span input
-        # streams.
-        self.write_lightweight_tags()
-
         if self.blobdir and os.path.exists(self.blobdir):
             self.ui.status("Removing blob dir %r ...\n" % self.blobdir)
             shutil.rmtree(self.blobdir)
@@ -69,38 +152,83 @@
         self.ui.write("Progress: %s\n" % cmd.message)
 
     def blob_handler(self, cmd):
+        self.writeblob(cmd.id, cmd.data)
+
+    def _getblobfilename(self, blobid):
+        if self.blobdir is None:
+            raise RuntimeError("no blobs seen, so no blob directory created")
+        # XXX should escape ":" for windows
+        return os.path.join(self.blobdir, "blob-" + blobid)
+
+    def getblob(self, fileid):
+        (commitid, blobid) = fileid
+        f = open(self._getblobfilename(blobid), "rb")
+        try:
+            return f.read()
+        finally:
+            f.close()
+
+    def writeblob(self, blobid, data):
         if self.blobdir is None:        # no blobs seen yet
-            # XXX cleanup?
             self.blobdir = os.path.join(self.repo.root, ".hg", "blobs")
             os.mkdir(self.blobdir)
 
-        fn = self.getblobfilename(cmd.id)
+        fn = self._getblobfilename(blobid)
         blobfile = open(fn, "wb")
         #self.ui.debug("writing blob %s to %s (%d bytes)\n"
-        #              % (cmd.id, fn, len(cmd.data)))
-        blobfile.write(cmd.data)
+        #              % (blobid, fn, len(data)))
+        blobfile.write(data)
         blobfile.close()
 
         self.numblobs += 1
         if self.numblobs % 500 == 0:
             self.ui.status("%d blobs read\n" % self.numblobs)
 
-    def getblobfilename(self, blobid):
-        if self.blobdir is None:
-            raise RuntimeError("no blobs seen, so no blob directory created")
-        # XXX should escape ":" for windows
-        return os.path.join(self.blobdir, "blob-" + blobid)
+    def getmode(self, name, fileid):
+        (commitid, blobid) = fileid
+        return self.filemodes[commitid][name]
 
     def checkpoint_handler(self, cmd):
         # This command means nothing to us
         pass
 
-    def committish_rev(self, committish):
+    def _getcommit(self, committish):
+        """Given a mark reference or a branch name, return the
+        appropriate commit object.  Return None if committish is a
+        branch with no commits.  Raises KeyError if anything else is out
+        of whack.
+        """
         if committish.startswith(":"):
-            return self.mark_map[committish]
+            # KeyError here indicates the input stream is broken.
+            return self.commitmap[committish]
         else:
-            return self.branch_map[committish]
-        
+            branch = self._getbranch(committish)
+            if branch is None:
+                raise ValueError("invalid committish: %r" % committish)
+
+            heads = self.branchmap.get(branch)
+            if heads is None:
+                return None
+            else:
+                # KeyError here indicates bad commit id in self.branchmap.
+                return self.commitmap[heads[-1]]
+
+    def _getbranch(self, ref):
+        """Translate a Git head ref to corresponding Mercurial branch
+        name.  E.g. \"refs/heads/foo\" is translated to \"foo\".
+        Special case: \"refs/heads/master\" becomes \"default\".  If
+        'ref' is not a head ref, return None.
+        """
+        prefix = "refs/heads/"
+        if ref.startswith(prefix):
+            branch = ref[len(prefix):]
+            if branch == "master":
+                return "default"
+            else:
+                return branch
+        else:
+            return None
+
     def commit_handler(self, cmd):
         # XXX this assumes the fixup branch name used by cvs2git.  In
         # contrast, git-fast-import(1) recommends "TAG_FIXUP" (not under
@@ -110,76 +238,83 @@
         fixup = (cmd.ref == "refs/heads/TAG.FIXUP")
 
         if cmd.from_:
-            first_parent = self.committish_rev(cmd.from_)
+            first_parent = cmd.from_
         else:
-            first_parent = self.branch_map.get(cmd.ref, nullrev)
+            first_parent = self._getcommit(cmd.ref) # commit object
+            if first_parent is not None:
+                first_parent = first_parent.rev     # commit id
+
         if cmd.merges:
             if len(cmd.merges) > 1:
                 raise NotImplementedError("Can't handle more than two parents")
-            second_parent = self.committish_rev(cmd.merges[0])
+            second_parent = cmd.merges[0]
         else:
-            second_parent = nullrev
+            second_parent = None
 
-        if first_parent is nullrev and second_parent is not nullrev:
+        if first_parent is None and second_parent is not None:
             # First commit on a new branch that has 'merge' but no 'from':
             # special case meaning branch starts with no files; the contents of
             # the first commit (this one) determine the list of files at branch
             # time.
             first_parent = second_parent
-            second_parent = nullrev
-            no_files = True             # XXX not handled
+            second_parent = None
+            no_files = True             # XXX this is ignored...
 
         self.ui.debug("commit %s: first_parent = %r, second_parent = %r\n"
-                      % (cmd.id, first_parent, second_parent))
+                      % (cmd, first_parent, second_parent))
         assert ((first_parent != second_parent) or
-                (first_parent == second_parent == -1)), \
+                (first_parent is second_parent is None)), \
                ("commit %s: first_parent == second parent = %r"
-                % (cmd.id, first_parent))
-
-        # Update to the first parent
-        mercurial.hg.clean(self.repo, self.repo.lookup(first_parent))
-        mercurial.commands.debugsetparents(
-            self.ui, self.repo, first_parent, second_parent)
+                % (cmd, first_parent))
 
-        if cmd.ref == "refs/heads/master":
-            branch = "default"
-        elif fixup and first_parent is not nullrev:
-            # If this is a fixup commit, pretend it happened on the same branch
-            # as its first parent.  (We don't want a Mercurial named branch
-            # called "TAG.FIXUP" in the output repository.)
-            branch = self.repo.changectx(first_parent).branch()
+        # Figure out the Mercurial branch name.
+        if fixup and first_parent is not None:
+            # If this is a fixup commit, pretend it happened on the same
+            # branch as its first parent.  (We don't want a Mercurial
+            # named branch called "TAG.FIXUP" in the output repository.)
+            branch = self.commitmap[first_parent].branch
         else:
-            branch = cmd.ref[len("refs/heads/"):]
-        self.repo.dirstate.setbranch(branch)
+            branch = self._getbranch(cmd.ref)
+
         commit_handler = HgImportCommitHandler(
-            self, cmd, self.ui, self.repo, **self.opts)
+            self, cmd, self.ui)
         commit_handler.process()
+        self.modified[cmd.id] = commit_handler.modified
+        self.filemodes[cmd.id] = commit_handler.mode
+        self.copies[cmd.id] = commit_handler.copies
 
         # in case we are converting from git or bzr, prefer author but
         # fallback to committer (committer is required, author is
         # optional)
         userinfo = cmd.author or cmd.committer
-        user = "%s <%s>" % (userinfo[0], userinfo[1])
+        if userinfo[0] == userinfo[1]:
+            # In order to conform to fastimport syntax, cvs2git with no
+            # authormap produces author names like "jsmith <jsmith>"; if
+            # we see that, revert to plain old "jsmith".
+            user = userinfo[0]
+        else:
+            user = "%s <%s>" % (userinfo[0], userinfo[1])
 
-        # Blech: have to monkeypatch mercurial.encoding to ensure that
-        # everything under rawcommit() assumes the same encoding,
-        # regardless of current locale.
-        from mercurial import encoding
-        encoding.encoding = "UTF-8"
-
-        files = commit_handler.filelist()
         assert type(cmd.message) is unicode
-        text = cmd.message.encode("utf-8") # XXX cmd.message is unicode
+        text = cmd.message.encode("utf-8")
         date = self.convert_date(userinfo)
-        node = self.repo.rawcommit(
-            files=files, text=text, user=user, date=date)
-        rev = self.repo.changelog.rev(node)
-        if cmd.mark is not None:
-            self.mark_map[":" + cmd.mark] = rev
-        if not fixup:
-            self.branch_map[cmd.ref] = rev
-            self.last_commit = cmd
-        self.ui.write("Done commit of rev %d\n" % rev)
+
+        parents = filter(None, [first_parent, second_parent])
+        commit = common.commit(user, date, text, parents, branch, rev=cmd.id)
+
+        self.commitmap[cmd.id] = commit
+        heads = self.branchmap.get(branch)
+        if heads is None:
+            heads = [cmd.id]
+        else:
+            # adding to an existing branch: replace the previous head
+            try:
+                heads.remove(first_parent)
+            except ValueError:          # first parent not a head: no problem
+                pass
+            heads.append(cmd.id)        # at end means this is tipmost
+        self.branchmap[branch] = heads
+        self.ui.debug("processed commit %s\n" % cmd)
 
     def convert_date(self, c):
         res = (int(c[2]), int(c[3]))
@@ -191,119 +326,90 @@
         return "%d %d" % res
         
     def reset_handler(self, cmd):
-        if cmd.ref.startswith("refs/heads/"):
+        tagprefix = "refs/tags/"
+        branch = self._getbranch(cmd.ref)
+        if branch:
             # The usual case for 'reset': (re)create the named branch.
             # XXX what should we do if cmd.from_ is None?
             if cmd.from_ is not None:
-                self.branch_map[cmd.ref] = self.committish_rev(cmd.from_)
+                self.branchmap[branch] = [cmd.from_]
             else:
                 # pretend the branch never existed... is this right?!?
                 try:
-                    del self.branch_map[cmd.ref]
+                    del self.branchmap[branch]
                 except KeyError:
                     pass
             #else:
             #    # XXX filename? line number?
             #    self.ui.warn("ignoring branch reset with no 'from'\n")
-        elif cmd.ref.startswith("refs/tags/"):
+        elif cmd.ref.startswith(tagprefix):
             # Create a "lightweight tag" in Git terms.  As I understand
             # it, that's a tag with no description and no history --
             # rather like CVS tags.  cvs2git turns CVS tags into Git
             # lightweight tags, so we should make sure they become
             # Mercurial tags.  But we don't have to fake a history for
             # them; save them up for the end.
-            self.lightweight_tags.append((cmd.ref, cmd.from_))
+            tag = cmd.ref[len(tagprefix):]
+            self.tags.append((tag, cmd.from_))
 
     def tag_handler(self, cmd):
         pass
 
-    def write_lightweight_tags(self):
-        if not self.lightweight_tags:   # avoid writing empty .hgtags
-            return
-
-        # XXX what about duplicate tags?  lightweight_tags is
-        # deliberately a list, to preserve order ... but do we need to
-        # worry about repeated tags?  (Certainly not for cvs2git output,
-        # since CVS has no tag history.)
-
-        # Create Mercurial tags from git-style "lightweight tags" in the
-        # input stream.
-        self.ui.status("updating tags\n")
-        mercurial.hg.clean(self.repo, self.repo.lookup("default"))
-        tagfile = open(self.repo.wjoin(".hgtags"), "ab")
-        for (ref, mark) in self.lightweight_tags:
-            tag = ref[len("refs/tags/"):]
-            rev = self.mark_map[mark]
-            node = self.repo.changelog.node(rev)
-            tagfile.write("%s %s\n" % (hex(node), tag))
-        tagfile.close()
-
-        files = [".hgtags"]
-        self.repo.rawcommit(
-            files=files, text="update tags", user="convert-repo", date=None)
 
 class HgImportCommitHandler(processor.CommitHandler):
 
-    def __init__(self, parent, command, ui, repo, **opts):
+    def __init__(self, parent, command, ui):
         self.parent = parent            # HgImportProcessor running the show
-        self.command = command
+        self.command = command          # CommitCommand that we're processing
         self.ui = ui
-        self.repo = repo
-        self.opts = opts
-        self.files = set()
+
+        # Files changes by this commit as a list of (filename, id)
+        # tuples where id is (commitid, blobid).  The blobid is
+        # needed to fetch the file's contents later, and the commitid
+        # is needed to fetch the mode.
+        # (XXX what about inline file contents?)
+        # (XXX how to describe deleted files?)
+        self.modified = []
 
-    def _make_container(self, path):
-        if '/' in path:
-            d = os.path.dirname(path)
-            if not os.path.isdir(d):
-                os.makedirs(d)
+        # mode of files listed in self.modified: '', 'x', or 'l'
+        self.mode = {}
+
+        # dictionary of src: dest (renamed files are in here and self.modified)
+        self.copies = {}
+
+        # number of inline files seen in this commit
+        self.inlinecount = 0
         
     def modify_handler(self, filecmd):
-        #print "============================" + filecmd.path
-        # FIXME: handle mode
-        self.files.add(filecmd.path)
-        fullpath = os.path.join(self.repo.root, filecmd.path)
-        self._make_container(fullpath)
-        #print "made dirs, writing file"
         if filecmd.dataref:
-            # reference to a blob that has already appeared in the stream
-            fn = self.parent.getblobfilename(filecmd.dataref)
-            if os.path.exists(fullpath):
-                os.remove(fullpath)
-            try:
-                os.link(fn, fullpath)
-            except OSError, err:
-                if err.errno == errno.ENOENT:
-                    # if this happens, it's a problem in the fast-import
-                    # stream
-                    raise util.Abort("bad blob ref %r (no such file %s)"
-                                     % (filecmd.dataref, fn))
-                else:
-                    # anything else is a bug in this extension
-                    # (cross-device move, permissions, etc.)
-                    raise
-        elif filecmd.data:
-            f = open(fullpath, "w")
-            f.write(filecmd.data)
-            f.close()
+            blobid = filecmd.dataref    # blobid is the mark of the blob
         else:
-            raise RuntimeError("either filecmd.dataref or filecmd.data must be set")
-        #print self.repo.add([filecmd.path])
-        #print "Done:", filecmd.path
+            blobid = "%s-inline:%d" % (self.command.id, self.inlinecount)
+            assert filecmd.data is not None
+            self.parent.writeblob(blobid, filecmd.data)
+            self.inlinecount += 1
+
+        fileid = (self.command.id, blobid)
+
+        self.modified.append((filecmd.path, fileid))
+        if filecmd.mode.endswith("644"): # normal file
+            mode = ''
+        elif filecmd.mode.endswith("755"): # executable
+            mode = 'x'
+        elif filecmd.mode == "120000":  # symlink
+            mode = 'l'
+        else:
+            raise RuntimeError("mode %r unsupported" % filecmd.mode)
+
+        self.mode[filecmd.path] = mode
 
     def delete_handler(self, filecmd):
-        self.files.add(filecmd.path)
-        self.repo.remove([filecmd.path], unlink=True)
+        self.modified.append((filecmd.path, None))
 
-    #def copy_handler(self, filecmd):
-    #    self.files.add(filecmd.path)
-    #    """Handle a filecopy command."""
-    #    self.ui.write("Cmd: %s\n" % repr(filecmd))
+    def copy_handler(self, filecmd):
+        self.copies[filecmd.src_path] = filecmd.dest_path
 
-    #def rename_handler(self, filecmd):
-    #    self.files.add(filecmd.path)
-    #    """Handle a filerename command."""
-    #    self.ui.write("Cmd: %s\n" % repr(filecmd))
-
-    def filelist(self):
-        return list(self.files)
+    def rename_handler(self, filecmd):
+        # copy oldname to newname and delete oldname
+        self.copies[filecmd.oldname] = filecmd.newname
+        self.files.append((filecmd.path, None))
--- a/tests/test-fastimport-anonbranch	Sun May 10 14:16:02 2009 -0400
+++ b/tests/test-fastimport-anonbranch	Sat May 16 12:57:22 2009 -0400
@@ -108,7 +108,7 @@
 rm -rf anonbranch
 hg init anonbranch
 cd anonbranch
-hg fastimport ../anonbranch.dump
+hg fastimport ../anonbranch.dump | sed "s|$HGTMP|HGTMP|g"
 
 echo "% log"
 hg glog --template "{rev}:{node|short}\n{desc}\n\n"
--- a/tests/test-fastimport-anonbranch.out	Sun May 10 14:16:02 2009 -0400
+++ b/tests/test-fastimport-anonbranch.out	Sat May 16 12:57:22 2009 -0400
@@ -1,39 +1,35 @@
 % import
-Reading source: ../anonbranch.dump
-0 files updated, 0 files merged, 0 files removed, 0 files unresolved
-Done commit of rev 0
-2 files updated, 0 files merged, 0 files removed, 0 files unresolved
-Done commit of rev 1
-1 files updated, 0 files merged, 0 files removed, 0 files unresolved
-Done commit of rev 2
-0 files updated, 0 files merged, 0 files removed, 0 files unresolved
-Done commit of rev 3
-2 files updated, 0 files merged, 0 files removed, 0 files unresolved
-Done commit of rev 4
-1 files updated, 0 files merged, 0 files removed, 0 files unresolved
-Done commit of rev 5
-2 files updated, 0 files merged, 0 files removed, 0 files unresolved
-Done commit of rev 6
+scanning source...
+sorting...
+converting...
+6 initial revision
+5 modify foo, add bla
+4 add oog
+3 modify bla
+2 remove oog
+1 modify foo
+0 merge
+Removing blob dir 'HGTMP/test-fastimport-anonbranch/anonbranch/.hg/blobs' ...
 % log
-@    6:c454087588c4
+o    6:ca2d9acb975f
 |\   merge
 | |
-| o  5:d7502914e05c
+| o  5:426c5bd79694
 | |  modify foo
 | |
-o |  4:5dde14f70320
+o |  4:32f018ede0f8
 | |  remove oog
 | |
-| o  3:66562011aee8
+| o  3:7c78b201cf49
 | |  modify bla
 | |
-o |  2:3851fabfd46c
+o |  2:b4dd3ea3dee1
 |/   add oog
 |
-o  1:c09a5b11c17e
+o  1:60da203876bc
 |  modify foo, add bla
 |
-o  0:67886358f17d
+o  0:2eab317db71a
    initial revision
 
 % manifest at rev 0
@@ -56,14 +52,14 @@
 % contents of bla at rev 6
 blah blah
 % diff merge with first parent (rev 4:6)
-diff -r 5dde14f70320 -r c454087588c4 bla
+diff -r 32f018ede0f8 -r ca2d9acb975f bla
 --- a/bla	Thu Jan 10 21:23:00 2008 +0000
 +++ b/bla	Thu Jan 10 21:24:00 2008 +0000
 @@ -1,1 +1,1 @@
 -bla
 +blah blah
 % diff merge with second parent (rev 5:6)
-diff -r d7502914e05c -r c454087588c4 foo
+diff -r 426c5bd79694 -r ca2d9acb975f foo
 --- a/foo	Thu Jan 10 21:23:30 2008 +0000
 +++ b/foo	Thu Jan 10 21:24:00 2008 +0000
 @@ -1,1 +1,1 @@
--- a/tests/test-fastimport-cvs2git-fixup.out	Sun May 10 14:16:02 2009 -0400
+++ b/tests/test-fastimport-cvs2git-fixup.out	Sat May 16 12:57:22 2009 -0400
@@ -1,20 +1,16 @@
 ----------------------------------------
 % import git-dump-1.dat
-Reading source: git-blob.dat
-Reading source: git-dump-1.dat
-0 files updated, 0 files merged, 0 files removed, 0 files unresolved
-Done commit of rev 0
-1 files updated, 0 files merged, 0 files removed, 0 files unresolved
-Done commit of rev 1
-0 files updated, 0 files merged, 0 files removed, 0 files unresolved
-Done commit of rev 2
-0 files updated, 0 files merged, 0 files removed, 0 files unresolved
-Done commit of rev 3
+scanning source...
+sorting...
+converting...
+3 added Makefile
+2 create branch 'REL-2-2-3' (manufactured commit)
+1 create tag 'REL-2-2-3-P1' (manufactured commit)
+0 added iostream.h
 updating tags
-1 files updated, 0 files merged, 0 files removed, 0 files unresolved
 Removing blob dir 'HGTMP/test-fastimport-cvs2git-fixup/realcvs.1/.hg/blobs' ...
 % hg glog (git-dump-1.dat)
-@  rev:    4
+o  rev:    4
 |  author: convert-repo
 |  branch: ''  tags:   tip
 |  files:  .hgtags
@@ -27,13 +23,13 @@
 |  desc:   added iostream.h
 |
 | o  rev:    2
-| |  author: cvs2git <cvs2git>
+| |  author: cvs2git
 | |  branch: 'REL-2-2-3'  tags:   REL-2-2-3-P1
 | |  files:  Tools/Debug/C++/DebugCpp.doxygen
 | |  desc:   create tag 'REL-2-2-3-P1' (manufactured commit)
 | |
 | o  rev:    1
-|/   author: cvs2git <cvs2git>
+|/   author: cvs2git
 |    branch: 'REL-2-2-3'  tags:
 |    files:
 |    desc:   create branch 'REL-2-2-3' (manufactured commit)
@@ -45,7 +41,7 @@
    desc:   added Makefile
 
 % hg branches (git-dump-1.dat)
-REL-2-2-3                      2:9c706dffba0e
+REL-2-2-3                      2:b4709b6ae328
 % hg heads -v (git-dump-1.dat)
 rev:    4
 author: convert-repo
@@ -54,30 +50,26 @@
 desc:   update tags
 
 rev:    2
-author: cvs2git <cvs2git>
+author: cvs2git
 branch: 'REL-2-2-3'  tags:   REL-2-2-3-P1
 files:  Tools/Debug/C++/DebugCpp.doxygen
 desc:   create tag 'REL-2-2-3-P1' (manufactured commit)
 
 % hg tags -v (git-dump-1.dat)
-REL-2-2-3-P1                       2:9c706dffba0e
+REL-2-2-3-P1                       2:b4709b6ae328
 ----------------------------------------
 % import git-dump-2.dat
-Reading source: git-blob.dat
-Reading source: git-dump-2.dat
-0 files updated, 0 files merged, 0 files removed, 0 files unresolved
-Done commit of rev 0
-1 files updated, 0 files merged, 0 files removed, 0 files unresolved
-Done commit of rev 1
-0 files updated, 0 files merged, 0 files removed, 0 files unresolved
-Done commit of rev 2
-0 files updated, 0 files merged, 0 files removed, 0 files unresolved
-Done commit of rev 3
+scanning source...
+sorting...
+converting...
+3 added Makefile
+2 create branch 'REL-2-2-3' (manufactured commit)
+1 create tag 'REL-2-2-3-P1' (manufactured commit)
+0 added iostream.h
 updating tags
-1 files updated, 0 files merged, 0 files removed, 0 files unresolved
 Removing blob dir 'HGTMP/test-fastimport-cvs2git-fixup/realcvs.2/.hg/blobs' ...
 % hg glog (git-dump-2.dat)
-@  rev:    4
+o  rev:    4
 |  author: convert-repo
 |  branch: ''  tags:   tip
 |  files:  .hgtags
@@ -90,13 +82,13 @@
 |  desc:   added iostream.h
 |
 | o  rev:    2
-| |  author: cvs2git <cvs2git>
+| |  author: cvs2git
 | |  branch: 'REL-2-2-3'  tags:   REL-2-2-3-P1
 | |  files:  Tools/Debug/C++/DebugCpp.doxygen
 | |  desc:   create tag 'REL-2-2-3-P1' (manufactured commit)
 | |
 | o  rev:    1
-|/   author: cvs2git <cvs2git>
+|/   author: cvs2git
 |    branch: 'REL-2-2-3'  tags:
 |    files:
 |    desc:   create branch 'REL-2-2-3' (manufactured commit)
@@ -108,7 +100,7 @@
    desc:   added Makefile
 
 % hg branches (git-dump-2.dat)
-REL-2-2-3                      2:9c706dffba0e
+REL-2-2-3                      2:b4709b6ae328
 % hg heads -v (git-dump-2.dat)
 rev:    4
 author: convert-repo
@@ -117,10 +109,10 @@
 desc:   update tags
 
 rev:    2
-author: cvs2git <cvs2git>
+author: cvs2git
 branch: 'REL-2-2-3'  tags:   REL-2-2-3-P1
 files:  Tools/Debug/C++/DebugCpp.doxygen
 desc:   create tag 'REL-2-2-3-P1' (manufactured commit)
 
 % hg tags -v (git-dump-2.dat)
-REL-2-2-3-P1                       2:9c706dffba0e
+REL-2-2-3-P1                       2:b4709b6ae328
--- a/tests/test-fastimport-cvs2git.out	Sun May 10 14:16:02 2009 -0400
+++ b/tests/test-fastimport-cvs2git.out	Sat May 16 12:57:22 2009 -0400
@@ -1,23 +1,22 @@
 % import cvs2git dumps
-Reading source: git-blob.dat
-Reading source: git-dump.dat
-0 files updated, 0 files merged, 0 files removed, 0 files unresolved
-Done commit of rev 0
-2 files updated, 0 files merged, 0 files removed, 0 files unresolved
-Done commit of rev 1
+scanning source...
+sorting...
+converting...
+1 initial revision
+0 modify
 Removing blob dir 'HGTMP/test-fastimport-cvs2git/simplecvs/.hg/blobs' ...
 % log
-@  changeset:   1:be20ff2863a5
+o  changeset:   1:f5f5b99690f9
 |  tag:         tip
-|  user:        example <example>
+|  user:        example
 |  date:        Thu Jan 10 21:21:00 2008 +0000
 |  files:       ooga
 |  description:
 |  modify
 |
 |
-o  changeset:   0:5bb45e163687
-   user:        example <example>
+o  changeset:   0:602b297b695e
+   user:        example
    date:        Thu Jan 10 21:20:00 2008 +0000
    files:       foo.txt ooga
    description:
--- a/tests/test-fastimport-empty.out	Sun May 10 14:16:02 2009 -0400
+++ b/tests/test-fastimport-empty.out	Sat May 16 12:57:22 2009 -0400
@@ -1,1 +1,3 @@
-Reading source: /dev/null
+scanning source...
+sorting...
+converting...
--- a/tests/test-fastimport-git.out	Sun May 10 14:16:02 2009 -0400
+++ b/tests/test-fastimport-git.out	Sat May 16 12:57:22 2009 -0400
@@ -1,12 +1,12 @@
 % import simple dump from git
-Reading source: simplegit.dump
-0 files updated, 0 files merged, 0 files removed, 0 files unresolved
-Done commit of rev 0
-2 files updated, 0 files merged, 0 files removed, 0 files unresolved
-Done commit of rev 1
+scanning source...
+sorting...
+converting...
+1 initial revision
+0 modify
 Removing blob dir 'HGTMP/test-fastimport-git/simplegit/.hg/blobs' ...
 % hg log
-@  changeset:   1:f5fb7ed5752d
+o  changeset:   1:0a3befda043d
 |  tag:         tip
 |  user:        Joe Contributor <joe@example.com>
 |  date:        Fri Jan 11 01:20:00 2008 +0400
@@ -15,7 +15,7 @@
 |  modify
 |
 |
-o  changeset:   0:56c9f26e6c12
+o  changeset:   0:9a4b81675bd1
    user:        Example <example@example.org>
    date:        Fri Jan 11 01:21:00 2008 +0400
    files:       foo.txt ooga
--- a/tests/test-fastimport-nonascii	Sun May 10 14:16:02 2009 -0400
+++ b/tests/test-fastimport-nonascii	Sat May 16 12:57:22 2009 -0400
@@ -17,13 +17,18 @@
 
 __EOF__
 
+# N.B. it's import to run fastimport without overriding HGENCODING,
+# because we need to be sure that UTF-8 metadata is correctly converted
+# even when the default encoding is ASCII.  The catch is that the
+# converted commit messages will be mangled as they are printed to
+# stdout.  That's OK.  The real test is when we run hg log, and there we
+# set HGENCODING to ensure that hg accurately prints what's in the
+# repository.
+
 hg init committer
 cd committer
 hg --traceback fastimport ../committer.dump
 echo "% log with non-ASCII committer"
-
-# Overide HGENCODING (it is set to 'ascii' by run-tests.py) to ensure
-# log is accurately printed.
 HGENCODING=utf-8 hg log
 cd ..
 
@@ -62,7 +67,7 @@
 
 hg init message
 cd message
-hg --traceback fastimport ../message.dump
+hg --traceback fastimport ../message.dump | sed "s|$HGTMP|HGTMP|g"
 echo "% log with non-ASCII message"
 HGENCODING=utf-8 hg log
 echo "% manifest"
--- a/tests/test-fastimport-nonascii.out	Sun May 10 14:16:02 2009 -0400
+++ b/tests/test-fastimport-nonascii.out	Sat May 16 12:57:22 2009 -0400
@@ -1,31 +1,35 @@
 % import dump with non-ASCII committer
-Reading source: ../committer.dump
-0 files updated, 0 files merged, 0 files removed, 0 files unresolved
-Done commit of rev 0
+scanning source...
+sorting...
+converting...
+0 foo
 % log with non-ASCII committer
-changeset:   0:d06142abc8ab
+changeset:   0:f44a7ebfef4b
 tag:         tip
 user:        Jean-François <jf@example.com>
 date:        Fri Nov 30 14:58:10 2001 +0000
 summary:     foo
 
 % import dump with non-ASCII author
-Reading source: ../author.dump
-0 files updated, 0 files merged, 0 files removed, 0 files unresolved
-Done commit of rev 0
+scanning source...
+sorting...
+converting...
+0 blah
 % log with non-ASCII author
-changeset:   0:9a86e5e78bae
+changeset:   0:e10eff2ea202
 tag:         tip
 user:        Jürgen <juergen@example.org>
 date:        Tue Aug 11 10:13:20 2009 -0400
 summary:     blah
 
 % import dump with non-ASCII message
-Reading source: ../message.dump
-0 files updated, 0 files merged, 0 files removed, 0 files unresolved
-Done commit of rev 0
+scanning source...
+sorting...
+converting...
+0 fix na?ve implementation that ?le threw together for ?5
+Removing blob dir 'HGTMP/test-fastimport-nonascii/message/.hg/blobs' ...
 % log with non-ASCII message
-changeset:   0:0700c36eef88
+changeset:   0:cfd47dca9906
 tag:         tip
 user:        Dave <dave@example.org>
 date:        Tue Aug 11 10:13:50 2009 -0400
--- a/tests/test-fastimport-simple	Sun May 10 14:16:02 2009 -0400
+++ b/tests/test-fastimport-simple	Sat May 16 12:57:22 2009 -0400
@@ -35,7 +35,7 @@
 echo "% importing"
 hg init simple
 cd simple
-hg fastimport ../simple.dump
+hg fastimport ../simple.dump | sed "s|$HGTMP|HGTMP|g"
 
 # It's OK for the log to include changeset IDs, since the fastimport
 # dump includes everything used to compute them, so they should be the
--- a/tests/test-fastimport-simple.out	Sun May 10 14:16:02 2009 -0400
+++ b/tests/test-fastimport-simple.out	Sat May 16 12:57:22 2009 -0400
@@ -1,17 +1,18 @@
 % importing
-Reading source: ../simple.dump
-0 files updated, 0 files merged, 0 files removed, 0 files unresolved
-Done commit of rev 0
-2 files updated, 0 files merged, 0 files removed, 0 files unresolved
-Done commit of rev 1
+scanning source...
+sorting...
+converting...
+1 initial revision
+0 add a line
+Removing blob dir 'HGTMP/test-fastimport-simple/simple/.hg/blobs' ...
 % hg log
-changeset:   1:494d62fb22a9
+changeset:   1:777dd8cf7297
 tag:         tip
 user:        Example <example@example.org>
 date:        Thu Jan 10 21:20:01 2008 +0000
 summary:     add a line
 
-changeset:   0:43689aeeccc7
+changeset:   0:1e4114af38f1
 user:        Example <example@example.org>
 date:        Thu Jan 10 21:20:00 2008 +0000
 summary:     initial revision
--- a/tests/test-fastimport-syntax.out	Sun May 10 14:16:02 2009 -0400
+++ b/tests/test-fastimport-syntax.out	Sat May 16 12:57:22 2009 -0400
@@ -1,29 +1,28 @@
 % dump with no blank line after merge commit
-Reading source: ../test1.dump
-0 files updated, 0 files merged, 0 files removed, 0 files unresolved
-Done commit of rev 0
-0 files updated, 0 files merged, 0 files removed, 0 files unresolved
-Done commit of rev 1
-0 files updated, 0 files merged, 0 files removed, 0 files unresolved
-Done commit of rev 2
-@    2:feeb61781acb
+scanning source...
+sorting...
+converting...
+2 
+1 
+0 
+o    2:0c314954ac3f
 |\
 | |
-| o  1:4ebe0e085b60
+| o  1:1c1c7986a7e1
 |/
 |
-o  0:39327e07c83d
+o  0:ca38501957af
 
 
 % dump with redundant 'from' directive
-Reading source: ../test2.dump
-0 files updated, 0 files merged, 0 files removed, 0 files unresolved
-Done commit of rev 0
-0 files updated, 0 files merged, 0 files removed, 0 files unresolved
-Done commit of rev 1
-@  1:4ebe0e085b60
+scanning source...
+sorting...
+converting...
+1 
+0 
+o  1:1c1c7986a7e1
 |
 |
-o  0:39327e07c83d
+o  0:ca38501957af