view hgfastimport/hgimport.py @ 46:93c2b1e832bd

Remove commented-out cruft.
author Greg Ward <greg-hg@gerg.ca>
date Sun, 10 May 2009 14:16:02 -0400
parents 61ff7b929cea
children 7ff36dc9f0b1
line wrap: on
line source

# Copyright (C) 2008 Canonical Ltd
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA

"""Processor of import commands.

This module provides core processing functionality including an abstract class
for basing real processors on. See the processors package for examples.
"""

import os
import os.path
import errno
import shutil

import mercurial.hg
import mercurial.commands
from mercurial import util
from mercurial.node import nullrev, hex

from fastimport import processor
from hgfastimport import hgechoprocessor

class HgImportProcessor(processor.ImportProcessor):
    
    def __init__(self, ui, repo, **opts):
        super(HgImportProcessor, self).__init__()
        self.ui = ui
        self.repo = repo
        self.opts = opts
        self.last_commit = None         # CommitCommand object
        self.mark_map = {}              # map mark (e.g. ":1") to revision number
        self.branch_map = {}            # map git branch name to revision number
        self.lightweight_tags = []      # list of (ref, mark) tuples

        self.numblobs = 0               # for progress reporting
        self.blobdir = None

    def setup(self):
        """Setup before processing any streams."""
        pass

    def teardown(self):
        """Cleanup after processing all streams."""
        # Hmmm: this isn't really a cleanup step, it's a post-processing
        # step.  But we currently have one processor per input
        # stream... despite the fact that state like mark_map,
        # branch_map, and lightweight_tags really should span input
        # streams.
        self.write_lightweight_tags()

        if self.blobdir and os.path.exists(self.blobdir):
            self.ui.status("Removing blob dir %r ...\n" % self.blobdir)
            shutil.rmtree(self.blobdir)

    def progress_handler(self, cmd):
        self.ui.write("Progress: %s\n" % cmd.message)

    def blob_handler(self, cmd):
        if self.blobdir is None:        # no blobs seen yet
            # XXX cleanup?
            self.blobdir = os.path.join(self.repo.root, ".hg", "blobs")
            os.mkdir(self.blobdir)

        fn = self.getblobfilename(cmd.id)
        blobfile = open(fn, "wb")
        #self.ui.debug("writing blob %s to %s (%d bytes)\n"
        #              % (cmd.id, fn, len(cmd.data)))
        blobfile.write(cmd.data)
        blobfile.close()

        self.numblobs += 1
        if self.numblobs % 500 == 0:
            self.ui.status("%d blobs read\n" % self.numblobs)

    def getblobfilename(self, blobid):
        if self.blobdir is None:
            raise RuntimeError("no blobs seen, so no blob directory created")
        # XXX should escape ":" for windows
        return os.path.join(self.blobdir, "blob-" + blobid)

    def checkpoint_handler(self, cmd):
        # This command means nothing to us
        pass

    def committish_rev(self, committish):
        if committish.startswith(":"):
            return self.mark_map[committish]
        else:
            return self.branch_map[committish]
        
    def commit_handler(self, cmd):
        # XXX this assumes the fixup branch name used by cvs2git.  In
        # contrast, git-fast-import(1) recommends "TAG_FIXUP" (not under
        # refs/heads), and implies that it can be called whatever the
        # creator of the fastimport dump wants to call it.  So the name
        # of the fixup branch should be configurable!
        fixup = (cmd.ref == "refs/heads/TAG.FIXUP")

        if cmd.from_:
            first_parent = self.committish_rev(cmd.from_)
        else:
            first_parent = self.branch_map.get(cmd.ref, nullrev)
        if cmd.merges:
            if len(cmd.merges) > 1:
                raise NotImplementedError("Can't handle more than two parents")
            second_parent = self.committish_rev(cmd.merges[0])
        else:
            second_parent = nullrev

        if first_parent is nullrev and second_parent is not nullrev:
            # First commit on a new branch that has 'merge' but no 'from':
            # special case meaning branch starts with no files; the contents of
            # the first commit (this one) determine the list of files at branch
            # time.
            first_parent = second_parent
            second_parent = nullrev
            no_files = True             # XXX not handled

        self.ui.debug("commit %s: first_parent = %r, second_parent = %r\n"
                      % (cmd.id, first_parent, second_parent))
        assert ((first_parent != second_parent) or
                (first_parent == second_parent == -1)), \
               ("commit %s: first_parent == second parent = %r"
                % (cmd.id, first_parent))

        # Update to the first parent
        mercurial.hg.clean(self.repo, self.repo.lookup(first_parent))
        mercurial.commands.debugsetparents(
            self.ui, self.repo, first_parent, second_parent)

        if cmd.ref == "refs/heads/master":
            branch = "default"
        elif fixup and first_parent is not nullrev:
            # If this is a fixup commit, pretend it happened on the same branch
            # as its first parent.  (We don't want a Mercurial named branch
            # called "TAG.FIXUP" in the output repository.)
            branch = self.repo.changectx(first_parent).branch()
        else:
            branch = cmd.ref[len("refs/heads/"):]
        self.repo.dirstate.setbranch(branch)
        commit_handler = HgImportCommitHandler(
            self, cmd, self.ui, self.repo, **self.opts)
        commit_handler.process()

        # in case we are converting from git or bzr, prefer author but
        # fallback to committer (committer is required, author is
        # optional)
        userinfo = cmd.author or cmd.committer
        user = "%s <%s>" % (userinfo[0], userinfo[1])

        # Blech: have to monkeypatch mercurial.encoding to ensure that
        # everything under rawcommit() assumes the same encoding,
        # regardless of current locale.
        from mercurial import encoding
        encoding.encoding = "UTF-8"

        files = commit_handler.filelist()
        assert type(cmd.message) is unicode
        text = cmd.message.encode("utf-8") # XXX cmd.message is unicode
        date = self.convert_date(userinfo)
        node = self.repo.rawcommit(
            files=files, text=text, user=user, date=date)
        rev = self.repo.changelog.rev(node)
        if cmd.mark is not None:
            self.mark_map[":" + cmd.mark] = rev
        if not fixup:
            self.branch_map[cmd.ref] = rev
            self.last_commit = cmd
        self.ui.write("Done commit of rev %d\n" % rev)

    def convert_date(self, c):
        res = (int(c[2]), int(c[3]))
        #print c, res
        #print type((0, 0)), type(res), len(res), type(res) is type((0, 0))
        #if type(res) is type((0, 0)) and len(res) == 2:
        #    print "go for it"
        #return res
        return "%d %d" % res
        
    def reset_handler(self, cmd):
        if cmd.ref.startswith("refs/heads/"):
            # The usual case for 'reset': (re)create the named branch.
            # XXX what should we do if cmd.from_ is None?
            if cmd.from_ is not None:
                self.branch_map[cmd.ref] = self.committish_rev(cmd.from_)
            else:
                # pretend the branch never existed... is this right?!?
                try:
                    del self.branch_map[cmd.ref]
                except KeyError:
                    pass
            #else:
            #    # XXX filename? line number?
            #    self.ui.warn("ignoring branch reset with no 'from'\n")
        elif cmd.ref.startswith("refs/tags/"):
            # Create a "lightweight tag" in Git terms.  As I understand
            # it, that's a tag with no description and no history --
            # rather like CVS tags.  cvs2git turns CVS tags into Git
            # lightweight tags, so we should make sure they become
            # Mercurial tags.  But we don't have to fake a history for
            # them; save them up for the end.
            self.lightweight_tags.append((cmd.ref, cmd.from_))

    def tag_handler(self, cmd):
        pass

    def write_lightweight_tags(self):
        if not self.lightweight_tags:   # avoid writing empty .hgtags
            return

        # XXX what about duplicate tags?  lightweight_tags is
        # deliberately a list, to preserve order ... but do we need to
        # worry about repeated tags?  (Certainly not for cvs2git output,
        # since CVS has no tag history.)

        # Create Mercurial tags from git-style "lightweight tags" in the
        # input stream.
        self.ui.status("updating tags\n")
        mercurial.hg.clean(self.repo, self.repo.lookup("default"))
        tagfile = open(self.repo.wjoin(".hgtags"), "ab")
        for (ref, mark) in self.lightweight_tags:
            tag = ref[len("refs/tags/"):]
            rev = self.mark_map[mark]
            node = self.repo.changelog.node(rev)
            tagfile.write("%s %s\n" % (hex(node), tag))
        tagfile.close()

        files = [".hgtags"]
        self.repo.rawcommit(
            files=files, text="update tags", user="convert-repo", date=None)

class HgImportCommitHandler(processor.CommitHandler):

    def __init__(self, parent, command, ui, repo, **opts):
        self.parent = parent            # HgImportProcessor running the show
        self.command = command
        self.ui = ui
        self.repo = repo
        self.opts = opts
        self.files = set()

    def _make_container(self, path):
        if '/' in path:
            d = os.path.dirname(path)
            if not os.path.isdir(d):
                os.makedirs(d)
        
    def modify_handler(self, filecmd):
        #print "============================" + filecmd.path
        # FIXME: handle mode
        self.files.add(filecmd.path)
        fullpath = os.path.join(self.repo.root, filecmd.path)
        self._make_container(fullpath)
        #print "made dirs, writing file"
        if filecmd.dataref:
            # reference to a blob that has already appeared in the stream
            fn = self.parent.getblobfilename(filecmd.dataref)
            if os.path.exists(fullpath):
                os.remove(fullpath)
            try:
                os.link(fn, fullpath)
            except OSError, err:
                if err.errno == errno.ENOENT:
                    # if this happens, it's a problem in the fast-import
                    # stream
                    raise util.Abort("bad blob ref %r (no such file %s)"
                                     % (filecmd.dataref, fn))
                else:
                    # anything else is a bug in this extension
                    # (cross-device move, permissions, etc.)
                    raise
        elif filecmd.data:
            f = open(fullpath, "w")
            f.write(filecmd.data)
            f.close()
        else:
            raise RuntimeError("either filecmd.dataref or filecmd.data must be set")
        #print self.repo.add([filecmd.path])
        #print "Done:", filecmd.path

    def delete_handler(self, filecmd):
        self.files.add(filecmd.path)
        self.repo.remove([filecmd.path], unlink=True)

    #def copy_handler(self, filecmd):
    #    self.files.add(filecmd.path)
    #    """Handle a filecopy command."""
    #    self.ui.write("Cmd: %s\n" % repr(filecmd))

    #def rename_handler(self, filecmd):
    #    self.files.add(filecmd.path)
    #    """Handle a filerename command."""
    #    self.ui.write("Cmd: %s\n" % repr(filecmd))

    def filelist(self):
        return list(self.files)