Mercurial > hg > hg-fastimport
changeset 33:31b9a5805f02
Rename the extension's package from 'fastimport' to 'hgfastimport'.
| author | Greg Ward <greg-hg@gerg.ca> |
|---|---|
| date | Tue, 05 May 2009 10:27:27 -0400 |
| parents | 8a92919bcd16 |
| children | 08e2157aaa9a |
| files | README.txt fastimport/__init__.py fastimport/commands.py fastimport/dates.py fastimport/errors.py fastimport/hgechoprocessor.py fastimport/hgimport.py fastimport/parser.py fastimport/processor.py hgfastimport/__init__.py hgfastimport/commands.py hgfastimport/dates.py hgfastimport/errors.py hgfastimport/hgechoprocessor.py hgfastimport/hgimport.py hgfastimport/parser.py hgfastimport/processor.py tests/fastimport-common |
| diffstat | 18 files changed, 1463 insertions(+), 1463 deletions(-) [+] |
line wrap: on
line diff
--- a/README.txt Mon May 04 19:38:20 2009 -0400 +++ b/README.txt Tue May 05 10:27:27 2009 -0400 @@ -28,7 +28,7 @@ To use hg-fastimport, add a line like - fastimport = /path/to/hg-fastimport/fastimport + fastimport = /path/to/hg-fastimport/hgfastimport to the [extensions] section of your hgrc.
--- a/fastimport/__init__.py Mon May 04 19:38:20 2009 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,26 +0,0 @@ -from mercurial import commands - -import parser -import hgechoprocessor -import hgimport - -def fastimport(ui, repo, *sources, **opts): - proc = hgimport.HgImportProcessor(ui, repo, **opts) - #proc = hgechoprocessor.HgEchoProcessor(ui, repo, **opts) - proc.setup() - try: - for source in sources: - ui.write("Reading source: %s\n" % source) - f = open(source) - p = parser.ImportParser(f) - proc._process(p.iter_commands) - f.close() - finally: - proc.teardown() - -cmdtable = { - "fastimport": - (fastimport, - [], - 'hg fastimport SOURCE ...') -}
--- a/fastimport/commands.py Mon May 04 19:38:20 2009 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,216 +0,0 @@ -# Copyright (C) 2008 Canonical Ltd -# -# This program is free software; you can redistribute it and/or modify -# it under the terms of the GNU General Public License as published by -# the Free Software Foundation; either version 2 of the License, or -# (at your option) any later version. -# -# This program is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU General Public License for more details. -# -# You should have received a copy of the GNU General Public License -# along with this program; if not, write to the Free Software -# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - -"""Import command classes.""" - - -# Lists of command names -COMMAND_NAMES = ['blob', 'checkpoint', 'commit', 'progress', 'reset', 'tag'] -FILE_COMMAND_NAMES = ['filemodify', 'filedelete', 'filecopy', 'filerename', - 'filedeleteall'] - -# Bazaar file kinds -FILE_KIND = 'file' -SYMLINK_KIND = 'symlink' - - -class ImportCommand(object): - """Base class for import commands.""" - - def __init__(self, name): - self.name = name - # List of field names not to display - self._binary = [] - - def __repr__(self): - return "<%s at %x: %s>" % (self.__class__.__name__, id(self), self) - - def __str__(self): - return self.name - - def dump_str(self, names=None, child_lists=None, verbose=False): - """Dump fields as a string. - - :param names: the list of fields to include or - None for all public fields - :param child_lists: dictionary of child command names to - fields for that child command to include - :param verbose: if True, prefix each line with the command class and - display fields as a dictionary; if False, dump just the field - values with tabs between them - """ - interesting = {} - if names is None: - fields = [k for k in self.__dict__.keys() if not k.startswith('_')] - else: - fields = names - for field in fields: - value = self.__dict__.get(field) - if field in self._binary and value is not None: - value = '(...)' - interesting[field] = value - if verbose: - return "%s: %s" % (self.__class__.__name__, interesting) - else: - return "\t".join([str(interesting[k]) for k in fields]) - - -class BlobCommand(ImportCommand): - - def __init__(self, mark, data, lineno=0): - ImportCommand.__init__(self, 'blob') - self.mark = mark - self.data = data - self.lineno = lineno - # Provide a unique id in case the mark is missing - if mark is None: - self.id = '@%d' % lineno - else: - self.id = ':' + mark - self._binary = ['data'] - - def __str__(self): - return self.id - - -class CheckpointCommand(ImportCommand): - - def __init__(self): - ImportCommand.__init__(self, 'checkpoint') - - -class CommitCommand(ImportCommand): - - def __init__(self, ref, mark, author, committer, message, from_, - parents, file_iter, lineno=0): - ImportCommand.__init__(self, 'commit') - self.ref = ref - self.mark = mark - self.author = author - self.committer = committer - self.message = message - self.from_ = from_ - self.parents = parents - self.file_iter = file_iter - self.lineno = lineno - self._binary = ['file_iter'] - # Provide a unique id in case the mark is missing - if mark is None: - self.id = '@%d' % lineno - else: - self.id = ':' + mark - - def __str__(self): - return "ref %s, mark %s" % (self.ref, self.mark) - - def dump_str(self, names=None, child_lists=None, verbose=False): - result = [ImportCommand.dump_str(self, names, verbose=verbose)] - for f in self.file_iter(): - if child_lists is None: - continue - try: - child_names = child_lists[f.name] - except KeyError: - continue - result.append("\t%s" % f.dump_str(child_names, verbose=verbose)) - return '\n'.join(result) - - -class ProgressCommand(ImportCommand): - - def __init__(self, message): - ImportCommand.__init__(self, 'progress') - self.message = message - - -class ResetCommand(ImportCommand): - - def __init__(self, ref, from_): - ImportCommand.__init__(self, 'reset') - self.ref = ref - self.from_ = from_ - - -class TagCommand(ImportCommand): - - def __init__(self, id, from_, tagger, message): - ImportCommand.__init__(self, 'tag') - self.id = id - self.from_ = from_ - self.tagger = tagger - self.message = message - - def __str__(self): - return self.id - - -class FileCommand(ImportCommand): - """Base class for file commands.""" - pass - - -class FileModifyCommand(FileCommand): - - def __init__(self, path, kind, is_executable, dataref, data): - # Either dataref or data should be null - FileCommand.__init__(self, 'filemodify') - self.path = path - self.kind = kind - self.is_executable = is_executable - self.dataref = dataref - self.data = data - self._binary = ['data'] - - def __str__(self): - return self.path - - -class FileDeleteCommand(FileCommand): - - def __init__(self, path): - FileCommand.__init__(self, 'filedelete') - self.path = path - - def __str__(self): - return self.path - - -class FileCopyCommand(FileCommand): - - def __init__(self, src_path, dest_path): - FileCommand.__init__(self, 'filecopy') - self.src_path = src_path - self.dest_path = dest_path - - def __str__(self): - return "%s -> %s" % (self.src_path, self.dest_path) - - -class FileRenameCommand(FileCommand): - - def __init__(self, old_path, new_path): - FileCommand.__init__(self, 'filerename') - self.old_path = old_path - self.new_path = new_path - - def __str__(self): - return "%s -> %s" % (self.old_path, self.new_path) - - -class FileDeleteAllCommand(FileCommand): - - def __init__(self): - FileCommand.__init__(self, 'filedeleteall')
--- a/fastimport/dates.py Mon May 04 19:38:20 2009 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,76 +0,0 @@ -# Copyright (C) 2008 Canonical Ltd -# -# This program is free software; you can redistribute it and/or modify -# it under the terms of the GNU General Public License as published by -# the Free Software Foundation; either version 2 of the License, or -# (at your option) any later version. -# -# This program is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU General Public License for more details. -# -# You should have received a copy of the GNU General Public License -# along with this program; if not, write to the Free Software -# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - -"""Date parsing routines. - -Each routine returns timestamp,timezone where - -* timestamp is seconds since epoch -* timezone is the offset from UTC in seconds. -""" - - -import time - - -def parse_raw(s): - """Parse a date from a raw string. - - The format must be exactly "seconds-since-epoch offset-utc". - See the spec for details. - """ - timestamp_str, timezone_str = s.split(' ', 1) - timestamp = float(timestamp_str) - timezone = _parse_tz(timezone_str) - return timestamp, timezone - - -def _parse_tz(tz): - """Parse a timezone specification in the [+|-]HHMM format. - - :return: the timezone offset in seconds. - """ - # from git_repository.py in bzr-git - assert len(tz) == 5 - sign = {'+': +1, '-': -1}[tz[0]] - hours = int(tz[1:3]) - minutes = int(tz[3:]) - return sign * 60 * (60 * hours + minutes) - - -def parse_rfc2822(s): - """Parse a date from a rfc2822 string. - - See the spec for details. - """ - raise NotImplementedError(parse_rfc2822) - - -def parse_now(s): - """Parse a date from a string. - - The format must be exactly "now". - See the spec for details. - """ - return time.time(), 0 - - -# Lookup tabel of date parsing routines -DATE_PARSERS_BY_NAME = { - 'raw': parse_raw, - 'rfc2822': parse_rfc2822, - 'now': parse_now, - }
--- a/fastimport/errors.py Mon May 04 19:38:20 2009 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,171 +0,0 @@ -# Copyright (C) 2008 Canonical Ltd -# -# This program is free software; you can redistribute it and/or modify -# it under the terms of the GNU General Public License as published by -# the Free Software Foundation; either version 2 of the License, or -# (at your option) any later version. -# -# This program is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU General Public License for more details. -# -# You should have received a copy of the GNU General Public License -# along with this program; if not, write to the Free Software -# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - -"""Exception classes for fastimport""" - -#from bzrlib import errors as bzr_errors - - -# Prefix to messages to show location information -_LOCATION_FMT = "line %(lineno)d: " - -class FmtException(StandardError): - def __str__(self): - return repr(self) - - def __repr__(self): - return self._fmt % self.__dict__ - -class ImportError(FmtException): - """The base exception class for all import processing exceptions.""" - - _fmt = "Unknown Import Error" - - -class ParsingError(ImportError): - """The base exception class for all import processing exceptions.""" - - _fmt = _LOCATION_FMT + "Unknown Import Parsing Error" - - def __init__(self, lineno): - ImportError.__init__(self) - self.lineno = lineno - - -class MissingBytes(ParsingError): - """Raised when EOF encountered while expecting to find more bytes.""" - - _fmt = (_LOCATION_FMT + "Unexpected EOF - expected %(expected)d bytes," - " found %(found)d") - - def __init__(self, lineno, expected, found): - ParsingError.__init__(self, lineno) - self.expected = expected - self.found = found - - -class MissingTerminator(ParsingError): - """Raised when EOF encountered while expecting to find a terminator.""" - - _fmt = (_LOCATION_FMT + - "Unexpected EOF - expected '%(terminator)s' terminator") - - def __init__(self, lineno, terminator): - ParsingError.__init__(self, lineno) - self.terminator = terminator - - -class InvalidCommand(ParsingError): - """Raised when an unknown command found.""" - - _fmt = (_LOCATION_FMT + "Invalid command '%(cmd)s'") - - def __init__(self, lineno, cmd): - ParsingError.__init__(self, lineno) - self.cmd = cmd - - -class MissingSection(ParsingError): - """Raised when a section is required in a command but not present.""" - - _fmt = (_LOCATION_FMT + "Command %(cmd)s is missing section %(section)s") - - def __init__(self, lineno, cmd, section): - ParsingError.__init__(self, lineno) - self.cmd = cmd - self.section = section - - -class BadFormat(ParsingError): - """Raised when a section is formatted incorrectly.""" - - _fmt = (_LOCATION_FMT + "Bad format for section %(section)s in " - "command %(cmd)s: found '%(text)s'") - - def __init__(self, lineno, cmd, section, text): - ParsingError.__init__(self, lineno) - self.cmd = cmd - self.section = section - self.text = text - - -class InvalidTimezone(ParsingError): - """Raised when converting a string timezone to a seconds offset.""" - - _fmt = (_LOCATION_FMT + - "Timezone %(timezone)r could not be converted.%(reason)s") - - def __init__(self, lineno, timezone, reason=None): - ParsingError.__init__(self, lineno) - self.timezone = timezone - if reason: - self.reason = ' ' + reason - else: - self.reason = '' - - -class UnknownDateFormat(ImportError): - """Raised when an unknown date format is given.""" - - _fmt = ("Unknown date format '%(format)s'") - - def __init__(self, format): - ImportError.__init__(self) - self.format = format - - -class MissingHandler(ImportError): - """Raised when a processor can't handle a command.""" - - _fmt = ("Missing handler for command %(cmd)s") - - def __init__(self, cmd): - ImportError.__init__(self) - self.cmd = cmd - - -class UnknownParameter(ImportError): - """Raised when an unknown parameter is passed to a processor.""" - - _fmt = ("Unknown parameter - '%(param)s' not in %(knowns)s") - - def __init__(self, param, knowns): - ImportError.__init__(self) - self.param = param - self.knowns = knowns - - -class BadRepositorySize(ImportError): - """Raised when the repository has an incorrect number of revisions.""" - - _fmt = ("Bad repository size - %(found)d revisions found, " - "%(expected)d expected") - - def __init__(self, expected, found): - ImportError.__init__(self) - self.expected = expected - self.found = found - - -class BadRestart(ImportError): - """Raised when the import stream and id-map do not match up.""" - - _fmt = ("Bad restart - attempted to skip commit %(commit_id)s " - "but matching revision-id is unknown") - - def __init__(self, commit_id): - ImportError.__init__(self) - self.commit_id = commit_id
--- a/fastimport/hgechoprocessor.py Mon May 04 19:38:20 2009 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,75 +0,0 @@ -# Copyright (C) 2008 Canonical Ltd -# -# This program is free software; you can redistribute it and/or modify -# it under the terms of the GNU General Public License as published by -# the Free Software Foundation; either version 2 of the License, or -# (at your option) any later version. -# -# This program is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU General Public License for more details. -# -# You should have received a copy of the GNU General Public License -# along with this program; if not, write to the Free Software -# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - -"""Processor of import commands. - -This module provides core processing functionality including an abstract class -for basing real processors on. See the processors package for examples. -""" - - -import processor - -class HgEchoProcessor(processor.ImportProcessor): - - def __init__(self, ui, repo, **opts): - self.ui = ui - self.repo = repo - self.opts = opts - self.finished = False - - def progress_handler(self, cmd): - self.ui.write(cmd.dump_str(verbose=True) + "\n") - - def blob_handler(self, cmd): - self.ui.write(cmd.dump_str(verbose=True) + "\n") - - def checkpoint_handler(self, cmd): - self.ui.write(cmd.dump_str(verbose=True) + "\n") - - def commit_handler(self, cmd): - commit_handler = HgEchoCommitHandler(cmd, self.ui, self.repo, **self.opts) - commit_handler.process() - self.ui.write(cmd.dump_str(verbose=True) + "\n") - - def reset_handler(self, cmd): - self.ui.write(cmd.dump_str(verbose=True) + "\n") - - def tag_handler(self, cmd): - self.ui.write(cmd.dump_str(verbose=True) + "\n") - -class HgEchoCommitHandler(processor.CommitHandler): - - def __init__(self, command, ui, repo, **opts): - self.command = command - self.ui = ui - self.repo = repo - self.opts = opts - - def modify_handler(self, filecmd): - self.ui.write(filecmd.dump_str(verbose=True) + "\n") - - def delete_handler(self, filecmd): - self.ui.write(filecmd.dump_str(verbose=True) + "\n") - - def copy_handler(self, filecmd): - self.ui.write(filecmd.dump_str(verbose=True) + "\n") - - def rename_handler(self, filecmd): - self.ui.write(filecmd.dump_str(verbose=True) + "\n") - - def deleteall_handler(self, filecmd): - self.ui.write(filecmd.dump_str(verbose=True) + "\n")
--- a/fastimport/hgimport.py Mon May 04 19:38:20 2009 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,227 +0,0 @@ -# Copyright (C) 2008 Canonical Ltd -# -# This program is free software; you can redistribute it and/or modify -# it under the terms of the GNU General Public License as published by -# the Free Software Foundation; either version 2 of the License, or -# (at your option) any later version. -# -# This program is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU General Public License for more details. -# -# You should have received a copy of the GNU General Public License -# along with this program; if not, write to the Free Software -# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - -"""Processor of import commands. - -This module provides core processing functionality including an abstract class -for basing real processors on. See the processors package for examples. -""" - -import os -import os.path -import errno -import shutil - -import mercurial.hg -import mercurial.commands -from mercurial import util -from mercurial.node import nullrev -import processor - -import hgechoprocessor - -class HgImportProcessor(processor.ImportProcessor): - - def __init__(self, ui, repo, **opts): - self.ui = ui - self.repo = repo - self.opts = opts - self.last_mark = None - self.mark_map = {} - self.branch_map = {} - #self.tag_map = {} - #self.tag_back_map = {} - self.finished = False - - self.numblobs = 0 # for progress reporting - self.blobdir = None - - def teardown(self): - if self.blobdir and os.path.exists(self.blobdir): - self.ui.status("Removing blob dir %r ...\n" % self.blobdir) - shutil.rmtree(self.blobdir) - - def progress_handler(self, cmd): - self.ui.write("Progress: %s\n" % cmd.message) - - def blob_handler(self, cmd): - if self.blobdir is None: # no blobs seen yet - # XXX cleanup? - self.blobdir = os.path.join(self.repo.root, ".hg", "blobs") - os.mkdir(self.blobdir) - - fn = self.getblobfilename(cmd.id) - blobfile = open(fn, "wb") - #self.ui.debug("writing blob %s to %s (%d bytes)\n" - # % (cmd.id, fn, len(cmd.data))) - blobfile.write(cmd.data) - blobfile.close() - - self.numblobs += 1 - if self.numblobs % 500 == 0: - self.ui.status("%d blobs read\n" % self.numblobs) - - def getblobfilename(self, blobid): - if self.blobdir is None: - raise RuntimeError("no blobs seen, so no blob directory created") - # XXX should escape ":" for windows - return os.path.join(self.blobdir, "blob-" + blobid) - - def checkpoint_handler(self, cmd): - # This command means nothing to us - pass - - def committish_rev(self, committish): - if committish.startswith(":"): - return self.mark_map[committish] - else: - return self.branch_map[committish] - - def commit_handler(self, cmd): - if cmd.ref == "refs/heads/TAG.FIXUP": - #self.tag_back_map[cmd.mark] == first_parent - commit_handler = hgechoprocessor.HgEchoCommitHandler(cmd, self.ui, self.repo, **self.opts) - commit_handler.process() - return - if cmd.from_: - first_parent = self.committish_rev(cmd.from_) - else: - first_parent = self.branch_map.get(cmd.ref, nullrev) - #self.ui.write("First parent: %s\n" % first_parent) - # Update to the first parent - mercurial.hg.clean(self.repo, self.repo.lookup(first_parent)) - #self.ui.write("Bing\n") - if cmd.parents: - #self.ui.write("foo") - if len(cmd.parents) > 1: - raise NotImplementedError("Can't handle more than two parents") - second_parent = self.committish_rev(cmd.parents[0]) - #self.ui.write("Second parent: %s\n" % second_parent) - mercurial.commands.debugsetparents(self.ui, self.repo, - first_parent, second_parent) - #self.ui.write("Bing\n") - if cmd.ref == "refs/heads/master": - branch = "default" - else: - branch = cmd.ref[len("refs/heads/"):] - #self.ui.write("Branch: %s\n" % branch) - self.repo.dirstate.setbranch(branch) - #self.ui.write("Bing\n") - #print "vvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvv" - commit_handler = HgImportCommitHandler( - self, cmd, self.ui, self.repo, **self.opts) - commit_handler.process() - #print "^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^" - #self.ui.write(cmd.dump_str(verbose=True)) - - # in case we are converting from git or bzr, prefer author but - # fallback to committer (committer is required, author is - # optional) - userinfo = cmd.author or cmd.committer - user = "%s <%s>" % (userinfo[0], userinfo[1]) - node = self.repo.rawcommit(files = commit_handler.filelist(), - text = cmd.message, - user = user, - date = self.convert_date(userinfo)) - rev = self.repo.changelog.rev(node) - if cmd.mark is not None: - self.mark_map[":" + cmd.mark] = rev - self.branch_map[cmd.ref] = rev - self.ui.write("Done commit of rev %d\n" % rev) - #self.ui.write("%s\n" % self.mark_map) - - def convert_date(self, c): - res = (int(c[2]), int(c[3])) - #print c, res - #print type((0, 0)), type(res), len(res), type(res) is type((0, 0)) - #if type(res) is type((0, 0)) and len(res) == 2: - # print "go for it" - #return res - return "%d %d" % res - - def reset_handler(self, cmd): - if cmd.from_ is not None: - self.branch_map[cmd.ref] = self.committish_rev(cmd.from_) - - def tag_handler(self, cmd): - # self.tag_map[cmd.id] = self.tag_back_map[cmd.from_] - pass - -class HgImportCommitHandler(processor.CommitHandler): - - def __init__(self, parent, command, ui, repo, **opts): - self.parent = parent # HgImportProcessor running the show - self.command = command - self.ui = ui - self.repo = repo - self.opts = opts - self.files = set() - - def _make_container(self, path): - if '/' in path: - d = os.path.dirname(path) - if not os.path.isdir(d): - os.makedirs(d) - - def modify_handler(self, filecmd): - #print "============================" + filecmd.path - # FIXME: handle mode - self.files.add(filecmd.path) - fullpath = os.path.join(self.repo.root, filecmd.path) - self._make_container(fullpath) - #print "made dirs, writing file" - if filecmd.dataref: - # reference to a blob that has already appeared in the stream - fn = self.parent.getblobfilename(filecmd.dataref) - if os.path.exists(fullpath): - os.remove(fullpath) - try: - os.link(fn, fullpath) - except OSError, err: - if err.errno == errno.ENOENT: - # if this happens, it's a problem in the fast-import - # stream - raise util.Abort("bad blob ref %r (no such file %s)" - % (filecmd.dataref, fn)) - else: - # anything else is a bug in this extension - # (cross-device move, permissions, etc.) - raise - elif filecmd.data: - f = open(fullpath, "w") - f.write(filecmd.data) - f.close() - else: - raise RuntimeError("either filecmd.dataref or filecmd.data must be set") - #print self.repo.add([filecmd.path]) - #print "Done:", filecmd.path - - def delete_handler(self, filecmd): - self.files.add(filecmd.path) - self.repo.remove([filecmd.path], unlink=True) - - #def copy_handler(self, filecmd): - # self.files.add(filecmd.path) - # """Handle a filecopy command.""" - # self.ui.write("Cmd: %s\n" % repr(filecmd)) - - #def rename_handler(self, filecmd): - # self.files.add(filecmd.path) - # """Handle a filerename command.""" - # self.ui.write("Cmd: %s\n" % repr(filecmd)) - - def filelist(self): - return list(self.files)
--- a/fastimport/parser.py Mon May 04 19:38:20 2009 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,507 +0,0 @@ -# Copyright (C) 2008 Canonical Ltd -# -# This program is free software; you can redistribute it and/or modify -# it under the terms of the GNU General Public License as published by -# the Free Software Foundation; either version 2 of the License, or -# (at your option) any later version. -# -# This program is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU General Public License for more details. -# -# You should have received a copy of the GNU General Public License -# along with this program; if not, write to the Free Software -# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - -"""Parser of import data into command objects. - -In order to reuse existing front-ends, the stream format is a subset of -the one used by git-fast-import (as of the 1.5.4 release of git at least). -The grammar is: - - stream ::= cmd*; - - cmd ::= new_blob - | new_commit - | new_tag - | reset_branch - | checkpoint - | progress - ; - - new_blob ::= 'blob' lf - mark? - file_content; - file_content ::= data; - - new_commit ::= 'commit' sp ref_str lf - mark? - ('author' sp name '<' email '>' when lf)? - 'committer' sp name '<' email '>' when lf - commit_msg - ('from' sp (ref_str | hexsha1 | sha1exp_str | idnum) lf)? - ('merge' sp (ref_str | hexsha1 | sha1exp_str | idnum) lf)* - file_change* - lf?; - commit_msg ::= data; - - file_change ::= file_clr - | file_del - | file_rnm - | file_cpy - | file_obm - | file_inm; - file_clr ::= 'deleteall' lf; - file_del ::= 'D' sp path_str lf; - file_rnm ::= 'R' sp path_str sp path_str lf; - file_cpy ::= 'C' sp path_str sp path_str lf; - file_obm ::= 'M' sp mode sp (hexsha1 | idnum) sp path_str lf; - file_inm ::= 'M' sp mode sp 'inline' sp path_str lf - data; - - new_tag ::= 'tag' sp tag_str lf - 'from' sp (ref_str | hexsha1 | sha1exp_str | idnum) lf - 'tagger' sp name '<' email '>' when lf - tag_msg; - tag_msg ::= data; - - reset_branch ::= 'reset' sp ref_str lf - ('from' sp (ref_str | hexsha1 | sha1exp_str | idnum) lf)? - lf?; - - checkpoint ::= 'checkpoint' lf - lf?; - - progress ::= 'progress' sp not_lf* lf - lf?; - - # note: the first idnum in a stream should be 1 and subsequent - # idnums should not have gaps between values as this will cause - # the stream parser to reserve space for the gapped values. An - # idnum can be updated in the future to a new object by issuing - # a new mark directive with the old idnum. - # - mark ::= 'mark' sp idnum lf; - data ::= (delimited_data | exact_data) - lf?; - - # note: delim may be any string but must not contain lf. - # data_line may contain any data but must not be exactly - # delim. - delimited_data ::= 'data' sp '<<' delim lf - (data_line lf)* - delim lf; - - # note: declen indicates the length of binary_data in bytes. - # declen does not include the lf preceeding the binary data. - # - exact_data ::= 'data' sp declen lf - binary_data; - - # note: quoted strings are C-style quoting supporting \c for - # common escapes of 'c' (e..g \n, \t, \\, \") or \nnn where nnn - # is the signed byte value in octal. Note that the only - # characters which must actually be escaped to protect the - # stream formatting is: \, \" and LF. Otherwise these values - # are UTF8. - # - ref_str ::= ref; - sha1exp_str ::= sha1exp; - tag_str ::= tag; - path_str ::= path | '"' quoted(path) '"' ; - mode ::= '100644' | '644' - | '100755' | '755' - | '120000' - ; - - declen ::= # unsigned 32 bit value, ascii base10 notation; - bigint ::= # unsigned integer value, ascii base10 notation; - binary_data ::= # file content, not interpreted; - - when ::= raw_when | rfc2822_when; - raw_when ::= ts sp tz; - rfc2822_when ::= # Valid RFC 2822 date and time; - - sp ::= # ASCII space character; - lf ::= # ASCII newline (LF) character; - - # note: a colon (':') must precede the numerical value assigned to - # an idnum. This is to distinguish it from a ref or tag name as - # GIT does not permit ':' in ref or tag strings. - # - idnum ::= ':' bigint; - path ::= # GIT style file path, e.g. "a/b/c"; - ref ::= # GIT ref name, e.g. "refs/heads/MOZ_GECKO_EXPERIMENT"; - tag ::= # GIT tag name, e.g. "FIREFOX_1_5"; - sha1exp ::= # Any valid GIT SHA1 expression; - hexsha1 ::= # SHA1 in hexadecimal format; - - # note: name and email are UTF8 strings, however name must not - # contain '<' or lf and email must not contain any of the - # following: '<', '>', lf. - # - name ::= # valid GIT author/committer name; - email ::= # valid GIT author/committer email; - ts ::= # time since the epoch in seconds, ascii base10 notation; - tz ::= # GIT style timezone; - - # note: comments may appear anywhere in the input, except - # within a data command. Any form of the data command - # always escapes the related input from comment processing. - # - # In case it is not clear, the '#' that starts the comment - # must be the first character on that the line (an lf have - # preceeded it). - # - comment ::= '#' not_lf* lf; - not_lf ::= # Any byte that is not ASCII newline (LF); -""" - - -import re -import sys - -import commands -import dates -import errors - - -## Stream parsing ## - -class LineBasedParser(object): - - def __init__(self, input): - """A Parser that keeps track of line numbers. - - :param input: the file-like object to read from - """ - self.input = input - self.lineno = 0 - # Lines pushed back onto the input stream - self._buffer = [] - - def abort(self, exception, *args): - """Raise an exception providing line number information.""" - raise exception(self.lineno, *args) - - def readline(self): - """Get the next line including the newline or '' on EOF.""" - self.lineno += 1 - if self._buffer: - return self._buffer.pop() - else: - return self.input.readline() - - def next_line(self): - """Get the next line without the newline or None on EOF.""" - line = self.readline() - if line: - return line[:-1] - else: - return None - - def push_line(self, line): - """Push line back onto the line buffer. - - :param line: the line with no trailing newline - """ - self.lineno -= 1 - self._buffer.append(line + "\n") - - def read_bytes(self, count): - """Read a given number of bytes from the input stream. - - Throws MissingBytes if the bytes are not found. - - Note: This method does not read from the line buffer. - - :return: a string - """ - lines = [] - left = count - found = 0 - while left > 0: - line = self.input.readline(left) - if line: - line_len = len(line) - left -= line_len - found += line_len - lines.append(line) - if line.endswith('\n'): - self.lineno += 1 - else: - left = 0 - if found != count: - self.abort(errors.MissingBytes, count, found) - return ''.join(lines) - - def read_until(self, terminator): - """Read the input stream until the terminator is found. - - Throws MissingTerminator if the terminator is not found. - - Note: This method does not read from the line buffer. - - :return: the bytes read up to but excluding the terminator. - """ - raise NotImplementedError(self.read_until) - - -# Regular expression used for parsing. (Note: The spec states that the name -# part should be non-empty, but git-fast-export doesn't always do that.) -_WHO_AND_WHEN_RE = re.compile(r'([^\<\n]+) <([^\>\n]+)> (.+)') - - -class ImportParser(LineBasedParser): - - def __init__(self, input, verbose=False, output=sys.stdout): - """A Parser of import commands. - - :param input: the file-like object to read from - :param verbose: display extra information of not - :param output: the file-like object to write messages to (YAGNI?) - """ - LineBasedParser.__init__(self, input) - self.verbose = verbose - self.output = output - # We auto-detect the date format when a date is first encountered - self.date_parser = None - - def iter_commands(self): - """Iterator returning ImportCommand objects.""" - while True: - line = self.next_line() - if line is None: - break - elif len(line) == 0 or line.startswith('#'): - continue - # Search for commands in order of likelihood - elif line.startswith('commit '): - yield self._parse_commit(line[len('commit '):]) - elif line.startswith('blob'): - yield self._parse_blob() - elif line.startswith('progress '): - yield commands.ProgressCommand(line[len('progress '):]) - elif line.startswith('reset '): - yield self._parse_reset(line[len('reset '):]) - elif line.startswith('tag '): - yield self._parse_tag(line[len('tag '):]) - elif line.startswith('checkpoint'): - yield commands.CheckpointCommand() - else: - print line - self.abort(errors.InvalidCommand, line) - - def iter_file_commands(self): - """Iterator returning FileCommand objects. - - If an invalid file command is found, the line is silently - pushed back and iteration ends. - """ - while True: - line = self.next_line() - if line is None: - break - elif len(line) == 0 or line.startswith('#'): - continue - # Search for file commands in order of likelihood - elif line.startswith('M '): - yield self._parse_file_modify(line[2:]) - elif line.startswith('D '): - path = self._path(line[2:]) - yield commands.FileDeleteCommand(path) - elif line.startswith('R '): - old, new = self._path_pair(line[2:]) - yield commands.FileRenameCommand(old, new) - elif line.startswith('C '): - src, dest = self._path_pair(line[2:]) - yield commands.FileRenameCommand(src, dest) - elif line.startswith('deleteall'): - yield commands.FileDeleteAllCommand() - else: - self.push_line(line) - break - - def _parse_blob(self): - """Parse a blob command.""" - lineno = self.lineno - mark = self._get_mark_if_any() - data = self._get_data('blob') - return commands.BlobCommand(mark, data, lineno) - - def _parse_commit(self, ref): - """Parse a commit command.""" - lineno = self.lineno - mark = self._get_mark_if_any() - author = self._get_user_info('commit', 'author', False) - committer = self._get_user_info('commit', 'committer') - message = self._get_data('commit', 'message') - from_ = self._get_from() - parents = [] - while True: - merge = self._get_merge() - if merge is not None: - parents.append(merge) - else: - break - return commands.CommitCommand(ref, mark, author, committer, message, from_, - parents, self.iter_file_commands, lineno) - - def _parse_file_modify(self, info): - """Parse a filemodify command within a commit. - - :param info: a string in the format "mode dataref path" - (where dataref might be the hard-coded literal 'inline'). - """ - params = info.split(' ', 2) - path = self._path(params[2]) - is_executable, is_symlink = self._mode(params[0]) - if is_symlink: - kind = commands.SYMLINK_KIND - else: - kind = commands.FILE_KIND - if params[1] == 'inline': - dataref = None - data = self._get_data('filemodify') - else: - dataref = params[1] - data = None - return commands.FileModifyCommand(path, kind, is_executable, dataref, - data) - - def _parse_reset(self, ref): - """Parse a reset command.""" - from_ = self._get_from() - return commands.ResetCommand(ref, from_) - - def _parse_tag(self, name): - """Parse a tag command.""" - from_ = self._get_from('tag') - tagger = self._get_user_info('tag', 'tagger') - message = self._get_data('tag', 'message') - return commands.TagCommand(name, from_, tagger, message) - - def _get_mark_if_any(self): - """Parse a mark section.""" - line = self.next_line() - if line.startswith('mark :'): - return line[len('mark :'):] - else: - self.push_line(line) - return None - - def _get_from(self, required_for=None): - """Parse a from section.""" - line = self.next_line() - if line.startswith('from '): - return line[len('from '):] - elif required_for: - self.abort(errors.MissingSection, required_for, 'from') - else: - self.push_line(line) - return None - - def _get_merge(self): - """Parse a merge section.""" - line = self.next_line() - if line is None: # EOF after last "merge" line - return None - elif line.startswith('merge '): - return line[len('merge '):] - else: - #print "not a merge:", line - self.push_line(line) - return None - - def _get_user_info(self, cmd, section, required=True): - """Parse a user section.""" - line = self.next_line() - if line.startswith(section + ' '): - return self._who_when(line[len(section + ' '):], cmd, section) - elif required: - self.abort(errors.MissingSection, cmd, section) - else: - self.push_line(line) - return None - - def _get_data(self, required_for, section='data'): - """Parse a data section.""" - line = self.next_line() - if line.startswith('data '): - rest = line[len('data '):] - if rest.startswith('<<'): - return self.read_until(rest[2:]) - else: - size = int(rest) - result = self.read_bytes(size) - # optional LF after data. - next = self.input.readline() - self.lineno += 1 - if len(next) > 1 or next != "\n": - self.push_line(next[:-1]) - return result - else: - self.abort(errors.MissingSection, required_for, section) - - def _who_when(self, s, cmd, section): - """Parse who and when information from a string. - - :return: a tuple of (name,email,timestamp,timezone) - """ - match = _WHO_AND_WHEN_RE.search(s) - if match: - datestr = match.group(3) - if self.date_parser is None: - # auto-detect the date format - if len(datestr.split(' ')) == 2: - format = 'raw' - elif datestr == 'now': - format = 'now' - else: - format = 'rfc2822' - self.date_parser = dates.DATE_PARSERS_BY_NAME[format] - when = self.date_parser(datestr) - return (match.group(1),match.group(2),when[0],when[1]) - else: - self.abort(errors.BadFormat, cmd, section, s) - - def _path(self, s): - """Parse a path.""" - if s.startswith('"'): - if s[-1] != '"': - self.abort(errors.BadFormat) - else: - s = _unquote_c_string(s[1:-1]) - # Check path for sanity - sp = s.split("/") - if "" in sp or ".." in sp: - self.abort(errors.BadFormat) - return s - - def _path_pair(self, s): - """Parse two paths separated by a space.""" - # TODO: handle a space in the first path - parts = s.split(' ', 1) - return map(_unquote_c_string, parts) - - def _mode(self, s): - """Parse a file mode into executable and symlink flags. - - :return (is_executable, is_symlink) - """ - # Note: Output from git-fast-export slightly different to spec - if s in ['644', '100644', '0100644']: - return False, False - elif s in ['755', '100755', '0100755']: - return True, False - elif s in ['120000', '0120000']: - return False, True - else: - self.abort(errors.BadFormat, 'filemodify', 'mode', s) - - -def _unquote_c_string(s): - """replace C-style escape sequences (\n, \", etc.) with real chars.""" - # HACK: Python strings are close enough - return s.decode('string_escape', 'replace')
--- a/fastimport/processor.py Mon May 04 19:38:20 2009 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,163 +0,0 @@ -# Copyright (C) 2008 Canonical Ltd -# -# This program is free software; you can redistribute it and/or modify -# it under the terms of the GNU General Public License as published by -# the Free Software Foundation; either version 2 of the License, or -# (at your option) any later version. -# -# This program is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU General Public License for more details. -# -# You should have received a copy of the GNU General Public License -# along with this program; if not, write to the Free Software -# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - -"""Processor of import commands. - -This module provides core processing functionality including an abstract class -for basing real processors on. See the processors package for examples. -""" - - -#from bzrlib.errors import NotBranchError -import errors - - -class ImportProcessor(object): - """Base class for import processors. - - Subclasses should override the pre_*, post_* and *_handler - methods as appropriate. - """ - - # XXX this is useless now that we process multiple input streams: - # we only want to call setup() and teardown() once for all of them! - def process(self, command_iter): - """Process the stream of commands. - - :param command_iter: an iterator providing commands - """ - raise RuntimeError("hey! who's calling this?!?") - self.setup() - try: - self._process(command_iter) - finally: - self.teardown() - - def _process(self, command_iter): - self.pre_process() - for cmd in command_iter(): - #print cmd.dump_str(verbose=True) - #print "starting" - try: - #print cmd.name - handler = self.__class__.__dict__[cmd.name + "_handler"] - except KeyError: - raise errors.MissingHandler(cmd.name) - else: - self.pre_handler(cmd) - handler(self, cmd) - self.post_handler(cmd) - if self.finished: - break - #print "around again" - self.post_process() - - def setup(self): - pass - - def teardown(self): - pass - - def pre_process(self): - """Hook for logic at start of processing.""" - pass - - def post_process(self): - """Hook for logic at end of processing.""" - pass - - def pre_handler(self, cmd): - """Hook for logic before each handler starts.""" - pass - - def post_handler(self, cmd): - """Hook for logic after each handler finishes.""" - pass - - def progress_handler(self, cmd): - """Process a ProgressCommand.""" - raise NotImplementedError(self.progress_handler) - - def blob_handler(self, cmd): - """Process a BlobCommand.""" - raise NotImplementedError(self.blob_handler) - - def checkpoint_handler(self, cmd): - """Process a CheckpointCommand.""" - raise NotImplementedError(self.checkpoint_handler) - - def commit_handler(self, cmd): - """Process a CommitCommand.""" - raise NotImplementedError(self.commit_handler) - - def reset_handler(self, cmd): - """Process a ResetCommand.""" - raise NotImplementedError(self.reset_handler) - - def tag_handler(self, cmd): - """Process a TagCommand.""" - raise NotImplementedError(self.tag_handler) - - -class CommitHandler(object): - """Base class for commit handling. - - Subclasses should override the pre_*, post_* and *_handler - methods as appropriate. - """ - - def __init__(self, command): - self.command = command - - def process(self): - self.pre_process_files() - for fc in self.command.file_iter(): - #print fc.dump_str(verbose=True) - try: - handler = self.__class__.__dict__[fc.name[4:] + "_handler"] - except KeyError: - raise errors.MissingHandler(fc.name) - else: - handler(self, fc) - self.post_process_files() - - def pre_process_files(self): - """Prepare for committing.""" - pass - - def post_process_files(self): - """Save the revision.""" - pass - - def modify_handler(self, filecmd): - """Handle a filemodify command.""" - raise NotImplementedError(self.modify_handler) - - def delete_handler(self, filecmd): - """Handle a filedelete command.""" - raise NotImplementedError(self.delete_handler) - - def copy_handler(self, filecmd): - """Handle a filecopy command.""" - raise NotImplementedError(self.copy_handler) - - def rename_handler(self, filecmd): - """Handle a filerename command.""" - raise NotImplementedError(self.rename_handler) - - def deleteall_handler(self, filecmd): - """Handle a filedeleteall command.""" - raise NotImplementedError(self.deleteall_handler)
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/hgfastimport/__init__.py Tue May 05 10:27:27 2009 -0400 @@ -0,0 +1,26 @@ +from mercurial import commands + +import parser +import hgechoprocessor +import hgimport + +def fastimport(ui, repo, *sources, **opts): + proc = hgimport.HgImportProcessor(ui, repo, **opts) + #proc = hgechoprocessor.HgEchoProcessor(ui, repo, **opts) + proc.setup() + try: + for source in sources: + ui.write("Reading source: %s\n" % source) + f = open(source) + p = parser.ImportParser(f) + proc._process(p.iter_commands) + f.close() + finally: + proc.teardown() + +cmdtable = { + "fastimport": + (fastimport, + [], + 'hg fastimport SOURCE ...') +}
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/hgfastimport/commands.py Tue May 05 10:27:27 2009 -0400 @@ -0,0 +1,216 @@ +# Copyright (C) 2008 Canonical Ltd +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, write to the Free Software +# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + +"""Import command classes.""" + + +# Lists of command names +COMMAND_NAMES = ['blob', 'checkpoint', 'commit', 'progress', 'reset', 'tag'] +FILE_COMMAND_NAMES = ['filemodify', 'filedelete', 'filecopy', 'filerename', + 'filedeleteall'] + +# Bazaar file kinds +FILE_KIND = 'file' +SYMLINK_KIND = 'symlink' + + +class ImportCommand(object): + """Base class for import commands.""" + + def __init__(self, name): + self.name = name + # List of field names not to display + self._binary = [] + + def __repr__(self): + return "<%s at %x: %s>" % (self.__class__.__name__, id(self), self) + + def __str__(self): + return self.name + + def dump_str(self, names=None, child_lists=None, verbose=False): + """Dump fields as a string. + + :param names: the list of fields to include or + None for all public fields + :param child_lists: dictionary of child command names to + fields for that child command to include + :param verbose: if True, prefix each line with the command class and + display fields as a dictionary; if False, dump just the field + values with tabs between them + """ + interesting = {} + if names is None: + fields = [k for k in self.__dict__.keys() if not k.startswith('_')] + else: + fields = names + for field in fields: + value = self.__dict__.get(field) + if field in self._binary and value is not None: + value = '(...)' + interesting[field] = value + if verbose: + return "%s: %s" % (self.__class__.__name__, interesting) + else: + return "\t".join([str(interesting[k]) for k in fields]) + + +class BlobCommand(ImportCommand): + + def __init__(self, mark, data, lineno=0): + ImportCommand.__init__(self, 'blob') + self.mark = mark + self.data = data + self.lineno = lineno + # Provide a unique id in case the mark is missing + if mark is None: + self.id = '@%d' % lineno + else: + self.id = ':' + mark + self._binary = ['data'] + + def __str__(self): + return self.id + + +class CheckpointCommand(ImportCommand): + + def __init__(self): + ImportCommand.__init__(self, 'checkpoint') + + +class CommitCommand(ImportCommand): + + def __init__(self, ref, mark, author, committer, message, from_, + parents, file_iter, lineno=0): + ImportCommand.__init__(self, 'commit') + self.ref = ref + self.mark = mark + self.author = author + self.committer = committer + self.message = message + self.from_ = from_ + self.parents = parents + self.file_iter = file_iter + self.lineno = lineno + self._binary = ['file_iter'] + # Provide a unique id in case the mark is missing + if mark is None: + self.id = '@%d' % lineno + else: + self.id = ':' + mark + + def __str__(self): + return "ref %s, mark %s" % (self.ref, self.mark) + + def dump_str(self, names=None, child_lists=None, verbose=False): + result = [ImportCommand.dump_str(self, names, verbose=verbose)] + for f in self.file_iter(): + if child_lists is None: + continue + try: + child_names = child_lists[f.name] + except KeyError: + continue + result.append("\t%s" % f.dump_str(child_names, verbose=verbose)) + return '\n'.join(result) + + +class ProgressCommand(ImportCommand): + + def __init__(self, message): + ImportCommand.__init__(self, 'progress') + self.message = message + + +class ResetCommand(ImportCommand): + + def __init__(self, ref, from_): + ImportCommand.__init__(self, 'reset') + self.ref = ref + self.from_ = from_ + + +class TagCommand(ImportCommand): + + def __init__(self, id, from_, tagger, message): + ImportCommand.__init__(self, 'tag') + self.id = id + self.from_ = from_ + self.tagger = tagger + self.message = message + + def __str__(self): + return self.id + + +class FileCommand(ImportCommand): + """Base class for file commands.""" + pass + + +class FileModifyCommand(FileCommand): + + def __init__(self, path, kind, is_executable, dataref, data): + # Either dataref or data should be null + FileCommand.__init__(self, 'filemodify') + self.path = path + self.kind = kind + self.is_executable = is_executable + self.dataref = dataref + self.data = data + self._binary = ['data'] + + def __str__(self): + return self.path + + +class FileDeleteCommand(FileCommand): + + def __init__(self, path): + FileCommand.__init__(self, 'filedelete') + self.path = path + + def __str__(self): + return self.path + + +class FileCopyCommand(FileCommand): + + def __init__(self, src_path, dest_path): + FileCommand.__init__(self, 'filecopy') + self.src_path = src_path + self.dest_path = dest_path + + def __str__(self): + return "%s -> %s" % (self.src_path, self.dest_path) + + +class FileRenameCommand(FileCommand): + + def __init__(self, old_path, new_path): + FileCommand.__init__(self, 'filerename') + self.old_path = old_path + self.new_path = new_path + + def __str__(self): + return "%s -> %s" % (self.old_path, self.new_path) + + +class FileDeleteAllCommand(FileCommand): + + def __init__(self): + FileCommand.__init__(self, 'filedeleteall')
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/hgfastimport/dates.py Tue May 05 10:27:27 2009 -0400 @@ -0,0 +1,76 @@ +# Copyright (C) 2008 Canonical Ltd +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, write to the Free Software +# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + +"""Date parsing routines. + +Each routine returns timestamp,timezone where + +* timestamp is seconds since epoch +* timezone is the offset from UTC in seconds. +""" + + +import time + + +def parse_raw(s): + """Parse a date from a raw string. + + The format must be exactly "seconds-since-epoch offset-utc". + See the spec for details. + """ + timestamp_str, timezone_str = s.split(' ', 1) + timestamp = float(timestamp_str) + timezone = _parse_tz(timezone_str) + return timestamp, timezone + + +def _parse_tz(tz): + """Parse a timezone specification in the [+|-]HHMM format. + + :return: the timezone offset in seconds. + """ + # from git_repository.py in bzr-git + assert len(tz) == 5 + sign = {'+': +1, '-': -1}[tz[0]] + hours = int(tz[1:3]) + minutes = int(tz[3:]) + return sign * 60 * (60 * hours + minutes) + + +def parse_rfc2822(s): + """Parse a date from a rfc2822 string. + + See the spec for details. + """ + raise NotImplementedError(parse_rfc2822) + + +def parse_now(s): + """Parse a date from a string. + + The format must be exactly "now". + See the spec for details. + """ + return time.time(), 0 + + +# Lookup tabel of date parsing routines +DATE_PARSERS_BY_NAME = { + 'raw': parse_raw, + 'rfc2822': parse_rfc2822, + 'now': parse_now, + }
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/hgfastimport/errors.py Tue May 05 10:27:27 2009 -0400 @@ -0,0 +1,171 @@ +# Copyright (C) 2008 Canonical Ltd +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, write to the Free Software +# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + +"""Exception classes for fastimport""" + +#from bzrlib import errors as bzr_errors + + +# Prefix to messages to show location information +_LOCATION_FMT = "line %(lineno)d: " + +class FmtException(StandardError): + def __str__(self): + return repr(self) + + def __repr__(self): + return self._fmt % self.__dict__ + +class ImportError(FmtException): + """The base exception class for all import processing exceptions.""" + + _fmt = "Unknown Import Error" + + +class ParsingError(ImportError): + """The base exception class for all import processing exceptions.""" + + _fmt = _LOCATION_FMT + "Unknown Import Parsing Error" + + def __init__(self, lineno): + ImportError.__init__(self) + self.lineno = lineno + + +class MissingBytes(ParsingError): + """Raised when EOF encountered while expecting to find more bytes.""" + + _fmt = (_LOCATION_FMT + "Unexpected EOF - expected %(expected)d bytes," + " found %(found)d") + + def __init__(self, lineno, expected, found): + ParsingError.__init__(self, lineno) + self.expected = expected + self.found = found + + +class MissingTerminator(ParsingError): + """Raised when EOF encountered while expecting to find a terminator.""" + + _fmt = (_LOCATION_FMT + + "Unexpected EOF - expected '%(terminator)s' terminator") + + def __init__(self, lineno, terminator): + ParsingError.__init__(self, lineno) + self.terminator = terminator + + +class InvalidCommand(ParsingError): + """Raised when an unknown command found.""" + + _fmt = (_LOCATION_FMT + "Invalid command '%(cmd)s'") + + def __init__(self, lineno, cmd): + ParsingError.__init__(self, lineno) + self.cmd = cmd + + +class MissingSection(ParsingError): + """Raised when a section is required in a command but not present.""" + + _fmt = (_LOCATION_FMT + "Command %(cmd)s is missing section %(section)s") + + def __init__(self, lineno, cmd, section): + ParsingError.__init__(self, lineno) + self.cmd = cmd + self.section = section + + +class BadFormat(ParsingError): + """Raised when a section is formatted incorrectly.""" + + _fmt = (_LOCATION_FMT + "Bad format for section %(section)s in " + "command %(cmd)s: found '%(text)s'") + + def __init__(self, lineno, cmd, section, text): + ParsingError.__init__(self, lineno) + self.cmd = cmd + self.section = section + self.text = text + + +class InvalidTimezone(ParsingError): + """Raised when converting a string timezone to a seconds offset.""" + + _fmt = (_LOCATION_FMT + + "Timezone %(timezone)r could not be converted.%(reason)s") + + def __init__(self, lineno, timezone, reason=None): + ParsingError.__init__(self, lineno) + self.timezone = timezone + if reason: + self.reason = ' ' + reason + else: + self.reason = '' + + +class UnknownDateFormat(ImportError): + """Raised when an unknown date format is given.""" + + _fmt = ("Unknown date format '%(format)s'") + + def __init__(self, format): + ImportError.__init__(self) + self.format = format + + +class MissingHandler(ImportError): + """Raised when a processor can't handle a command.""" + + _fmt = ("Missing handler for command %(cmd)s") + + def __init__(self, cmd): + ImportError.__init__(self) + self.cmd = cmd + + +class UnknownParameter(ImportError): + """Raised when an unknown parameter is passed to a processor.""" + + _fmt = ("Unknown parameter - '%(param)s' not in %(knowns)s") + + def __init__(self, param, knowns): + ImportError.__init__(self) + self.param = param + self.knowns = knowns + + +class BadRepositorySize(ImportError): + """Raised when the repository has an incorrect number of revisions.""" + + _fmt = ("Bad repository size - %(found)d revisions found, " + "%(expected)d expected") + + def __init__(self, expected, found): + ImportError.__init__(self) + self.expected = expected + self.found = found + + +class BadRestart(ImportError): + """Raised when the import stream and id-map do not match up.""" + + _fmt = ("Bad restart - attempted to skip commit %(commit_id)s " + "but matching revision-id is unknown") + + def __init__(self, commit_id): + ImportError.__init__(self) + self.commit_id = commit_id
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/hgfastimport/hgechoprocessor.py Tue May 05 10:27:27 2009 -0400 @@ -0,0 +1,75 @@ +# Copyright (C) 2008 Canonical Ltd +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, write to the Free Software +# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + +"""Processor of import commands. + +This module provides core processing functionality including an abstract class +for basing real processors on. See the processors package for examples. +""" + + +import processor + +class HgEchoProcessor(processor.ImportProcessor): + + def __init__(self, ui, repo, **opts): + self.ui = ui + self.repo = repo + self.opts = opts + self.finished = False + + def progress_handler(self, cmd): + self.ui.write(cmd.dump_str(verbose=True) + "\n") + + def blob_handler(self, cmd): + self.ui.write(cmd.dump_str(verbose=True) + "\n") + + def checkpoint_handler(self, cmd): + self.ui.write(cmd.dump_str(verbose=True) + "\n") + + def commit_handler(self, cmd): + commit_handler = HgEchoCommitHandler(cmd, self.ui, self.repo, **self.opts) + commit_handler.process() + self.ui.write(cmd.dump_str(verbose=True) + "\n") + + def reset_handler(self, cmd): + self.ui.write(cmd.dump_str(verbose=True) + "\n") + + def tag_handler(self, cmd): + self.ui.write(cmd.dump_str(verbose=True) + "\n") + +class HgEchoCommitHandler(processor.CommitHandler): + + def __init__(self, command, ui, repo, **opts): + self.command = command + self.ui = ui + self.repo = repo + self.opts = opts + + def modify_handler(self, filecmd): + self.ui.write(filecmd.dump_str(verbose=True) + "\n") + + def delete_handler(self, filecmd): + self.ui.write(filecmd.dump_str(verbose=True) + "\n") + + def copy_handler(self, filecmd): + self.ui.write(filecmd.dump_str(verbose=True) + "\n") + + def rename_handler(self, filecmd): + self.ui.write(filecmd.dump_str(verbose=True) + "\n") + + def deleteall_handler(self, filecmd): + self.ui.write(filecmd.dump_str(verbose=True) + "\n")
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/hgfastimport/hgimport.py Tue May 05 10:27:27 2009 -0400 @@ -0,0 +1,227 @@ +# Copyright (C) 2008 Canonical Ltd +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, write to the Free Software +# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + +"""Processor of import commands. + +This module provides core processing functionality including an abstract class +for basing real processors on. See the processors package for examples. +""" + +import os +import os.path +import errno +import shutil + +import mercurial.hg +import mercurial.commands +from mercurial import util +from mercurial.node import nullrev +import processor + +import hgechoprocessor + +class HgImportProcessor(processor.ImportProcessor): + + def __init__(self, ui, repo, **opts): + self.ui = ui + self.repo = repo + self.opts = opts + self.last_mark = None + self.mark_map = {} + self.branch_map = {} + #self.tag_map = {} + #self.tag_back_map = {} + self.finished = False + + self.numblobs = 0 # for progress reporting + self.blobdir = None + + def teardown(self): + if self.blobdir and os.path.exists(self.blobdir): + self.ui.status("Removing blob dir %r ...\n" % self.blobdir) + shutil.rmtree(self.blobdir) + + def progress_handler(self, cmd): + self.ui.write("Progress: %s\n" % cmd.message) + + def blob_handler(self, cmd): + if self.blobdir is None: # no blobs seen yet + # XXX cleanup? + self.blobdir = os.path.join(self.repo.root, ".hg", "blobs") + os.mkdir(self.blobdir) + + fn = self.getblobfilename(cmd.id) + blobfile = open(fn, "wb") + #self.ui.debug("writing blob %s to %s (%d bytes)\n" + # % (cmd.id, fn, len(cmd.data))) + blobfile.write(cmd.data) + blobfile.close() + + self.numblobs += 1 + if self.numblobs % 500 == 0: + self.ui.status("%d blobs read\n" % self.numblobs) + + def getblobfilename(self, blobid): + if self.blobdir is None: + raise RuntimeError("no blobs seen, so no blob directory created") + # XXX should escape ":" for windows + return os.path.join(self.blobdir, "blob-" + blobid) + + def checkpoint_handler(self, cmd): + # This command means nothing to us + pass + + def committish_rev(self, committish): + if committish.startswith(":"): + return self.mark_map[committish] + else: + return self.branch_map[committish] + + def commit_handler(self, cmd): + if cmd.ref == "refs/heads/TAG.FIXUP": + #self.tag_back_map[cmd.mark] == first_parent + commit_handler = hgechoprocessor.HgEchoCommitHandler(cmd, self.ui, self.repo, **self.opts) + commit_handler.process() + return + if cmd.from_: + first_parent = self.committish_rev(cmd.from_) + else: + first_parent = self.branch_map.get(cmd.ref, nullrev) + #self.ui.write("First parent: %s\n" % first_parent) + # Update to the first parent + mercurial.hg.clean(self.repo, self.repo.lookup(first_parent)) + #self.ui.write("Bing\n") + if cmd.parents: + #self.ui.write("foo") + if len(cmd.parents) > 1: + raise NotImplementedError("Can't handle more than two parents") + second_parent = self.committish_rev(cmd.parents[0]) + #self.ui.write("Second parent: %s\n" % second_parent) + mercurial.commands.debugsetparents(self.ui, self.repo, + first_parent, second_parent) + #self.ui.write("Bing\n") + if cmd.ref == "refs/heads/master": + branch = "default" + else: + branch = cmd.ref[len("refs/heads/"):] + #self.ui.write("Branch: %s\n" % branch) + self.repo.dirstate.setbranch(branch) + #self.ui.write("Bing\n") + #print "vvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvv" + commit_handler = HgImportCommitHandler( + self, cmd, self.ui, self.repo, **self.opts) + commit_handler.process() + #print "^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^" + #self.ui.write(cmd.dump_str(verbose=True)) + + # in case we are converting from git or bzr, prefer author but + # fallback to committer (committer is required, author is + # optional) + userinfo = cmd.author or cmd.committer + user = "%s <%s>" % (userinfo[0], userinfo[1]) + node = self.repo.rawcommit(files = commit_handler.filelist(), + text = cmd.message, + user = user, + date = self.convert_date(userinfo)) + rev = self.repo.changelog.rev(node) + if cmd.mark is not None: + self.mark_map[":" + cmd.mark] = rev + self.branch_map[cmd.ref] = rev + self.ui.write("Done commit of rev %d\n" % rev) + #self.ui.write("%s\n" % self.mark_map) + + def convert_date(self, c): + res = (int(c[2]), int(c[3])) + #print c, res + #print type((0, 0)), type(res), len(res), type(res) is type((0, 0)) + #if type(res) is type((0, 0)) and len(res) == 2: + # print "go for it" + #return res + return "%d %d" % res + + def reset_handler(self, cmd): + if cmd.from_ is not None: + self.branch_map[cmd.ref] = self.committish_rev(cmd.from_) + + def tag_handler(self, cmd): + # self.tag_map[cmd.id] = self.tag_back_map[cmd.from_] + pass + +class HgImportCommitHandler(processor.CommitHandler): + + def __init__(self, parent, command, ui, repo, **opts): + self.parent = parent # HgImportProcessor running the show + self.command = command + self.ui = ui + self.repo = repo + self.opts = opts + self.files = set() + + def _make_container(self, path): + if '/' in path: + d = os.path.dirname(path) + if not os.path.isdir(d): + os.makedirs(d) + + def modify_handler(self, filecmd): + #print "============================" + filecmd.path + # FIXME: handle mode + self.files.add(filecmd.path) + fullpath = os.path.join(self.repo.root, filecmd.path) + self._make_container(fullpath) + #print "made dirs, writing file" + if filecmd.dataref: + # reference to a blob that has already appeared in the stream + fn = self.parent.getblobfilename(filecmd.dataref) + if os.path.exists(fullpath): + os.remove(fullpath) + try: + os.link(fn, fullpath) + except OSError, err: + if err.errno == errno.ENOENT: + # if this happens, it's a problem in the fast-import + # stream + raise util.Abort("bad blob ref %r (no such file %s)" + % (filecmd.dataref, fn)) + else: + # anything else is a bug in this extension + # (cross-device move, permissions, etc.) + raise + elif filecmd.data: + f = open(fullpath, "w") + f.write(filecmd.data) + f.close() + else: + raise RuntimeError("either filecmd.dataref or filecmd.data must be set") + #print self.repo.add([filecmd.path]) + #print "Done:", filecmd.path + + def delete_handler(self, filecmd): + self.files.add(filecmd.path) + self.repo.remove([filecmd.path], unlink=True) + + #def copy_handler(self, filecmd): + # self.files.add(filecmd.path) + # """Handle a filecopy command.""" + # self.ui.write("Cmd: %s\n" % repr(filecmd)) + + #def rename_handler(self, filecmd): + # self.files.add(filecmd.path) + # """Handle a filerename command.""" + # self.ui.write("Cmd: %s\n" % repr(filecmd)) + + def filelist(self): + return list(self.files)
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/hgfastimport/parser.py Tue May 05 10:27:27 2009 -0400 @@ -0,0 +1,507 @@ +# Copyright (C) 2008 Canonical Ltd +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, write to the Free Software +# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + +"""Parser of import data into command objects. + +In order to reuse existing front-ends, the stream format is a subset of +the one used by git-fast-import (as of the 1.5.4 release of git at least). +The grammar is: + + stream ::= cmd*; + + cmd ::= new_blob + | new_commit + | new_tag + | reset_branch + | checkpoint + | progress + ; + + new_blob ::= 'blob' lf + mark? + file_content; + file_content ::= data; + + new_commit ::= 'commit' sp ref_str lf + mark? + ('author' sp name '<' email '>' when lf)? + 'committer' sp name '<' email '>' when lf + commit_msg + ('from' sp (ref_str | hexsha1 | sha1exp_str | idnum) lf)? + ('merge' sp (ref_str | hexsha1 | sha1exp_str | idnum) lf)* + file_change* + lf?; + commit_msg ::= data; + + file_change ::= file_clr + | file_del + | file_rnm + | file_cpy + | file_obm + | file_inm; + file_clr ::= 'deleteall' lf; + file_del ::= 'D' sp path_str lf; + file_rnm ::= 'R' sp path_str sp path_str lf; + file_cpy ::= 'C' sp path_str sp path_str lf; + file_obm ::= 'M' sp mode sp (hexsha1 | idnum) sp path_str lf; + file_inm ::= 'M' sp mode sp 'inline' sp path_str lf + data; + + new_tag ::= 'tag' sp tag_str lf + 'from' sp (ref_str | hexsha1 | sha1exp_str | idnum) lf + 'tagger' sp name '<' email '>' when lf + tag_msg; + tag_msg ::= data; + + reset_branch ::= 'reset' sp ref_str lf + ('from' sp (ref_str | hexsha1 | sha1exp_str | idnum) lf)? + lf?; + + checkpoint ::= 'checkpoint' lf + lf?; + + progress ::= 'progress' sp not_lf* lf + lf?; + + # note: the first idnum in a stream should be 1 and subsequent + # idnums should not have gaps between values as this will cause + # the stream parser to reserve space for the gapped values. An + # idnum can be updated in the future to a new object by issuing + # a new mark directive with the old idnum. + # + mark ::= 'mark' sp idnum lf; + data ::= (delimited_data | exact_data) + lf?; + + # note: delim may be any string but must not contain lf. + # data_line may contain any data but must not be exactly + # delim. + delimited_data ::= 'data' sp '<<' delim lf + (data_line lf)* + delim lf; + + # note: declen indicates the length of binary_data in bytes. + # declen does not include the lf preceeding the binary data. + # + exact_data ::= 'data' sp declen lf + binary_data; + + # note: quoted strings are C-style quoting supporting \c for + # common escapes of 'c' (e..g \n, \t, \\, \") or \nnn where nnn + # is the signed byte value in octal. Note that the only + # characters which must actually be escaped to protect the + # stream formatting is: \, \" and LF. Otherwise these values + # are UTF8. + # + ref_str ::= ref; + sha1exp_str ::= sha1exp; + tag_str ::= tag; + path_str ::= path | '"' quoted(path) '"' ; + mode ::= '100644' | '644' + | '100755' | '755' + | '120000' + ; + + declen ::= # unsigned 32 bit value, ascii base10 notation; + bigint ::= # unsigned integer value, ascii base10 notation; + binary_data ::= # file content, not interpreted; + + when ::= raw_when | rfc2822_when; + raw_when ::= ts sp tz; + rfc2822_when ::= # Valid RFC 2822 date and time; + + sp ::= # ASCII space character; + lf ::= # ASCII newline (LF) character; + + # note: a colon (':') must precede the numerical value assigned to + # an idnum. This is to distinguish it from a ref or tag name as + # GIT does not permit ':' in ref or tag strings. + # + idnum ::= ':' bigint; + path ::= # GIT style file path, e.g. "a/b/c"; + ref ::= # GIT ref name, e.g. "refs/heads/MOZ_GECKO_EXPERIMENT"; + tag ::= # GIT tag name, e.g. "FIREFOX_1_5"; + sha1exp ::= # Any valid GIT SHA1 expression; + hexsha1 ::= # SHA1 in hexadecimal format; + + # note: name and email are UTF8 strings, however name must not + # contain '<' or lf and email must not contain any of the + # following: '<', '>', lf. + # + name ::= # valid GIT author/committer name; + email ::= # valid GIT author/committer email; + ts ::= # time since the epoch in seconds, ascii base10 notation; + tz ::= # GIT style timezone; + + # note: comments may appear anywhere in the input, except + # within a data command. Any form of the data command + # always escapes the related input from comment processing. + # + # In case it is not clear, the '#' that starts the comment + # must be the first character on that the line (an lf have + # preceeded it). + # + comment ::= '#' not_lf* lf; + not_lf ::= # Any byte that is not ASCII newline (LF); +""" + + +import re +import sys + +import commands +import dates +import errors + + +## Stream parsing ## + +class LineBasedParser(object): + + def __init__(self, input): + """A Parser that keeps track of line numbers. + + :param input: the file-like object to read from + """ + self.input = input + self.lineno = 0 + # Lines pushed back onto the input stream + self._buffer = [] + + def abort(self, exception, *args): + """Raise an exception providing line number information.""" + raise exception(self.lineno, *args) + + def readline(self): + """Get the next line including the newline or '' on EOF.""" + self.lineno += 1 + if self._buffer: + return self._buffer.pop() + else: + return self.input.readline() + + def next_line(self): + """Get the next line without the newline or None on EOF.""" + line = self.readline() + if line: + return line[:-1] + else: + return None + + def push_line(self, line): + """Push line back onto the line buffer. + + :param line: the line with no trailing newline + """ + self.lineno -= 1 + self._buffer.append(line + "\n") + + def read_bytes(self, count): + """Read a given number of bytes from the input stream. + + Throws MissingBytes if the bytes are not found. + + Note: This method does not read from the line buffer. + + :return: a string + """ + lines = [] + left = count + found = 0 + while left > 0: + line = self.input.readline(left) + if line: + line_len = len(line) + left -= line_len + found += line_len + lines.append(line) + if line.endswith('\n'): + self.lineno += 1 + else: + left = 0 + if found != count: + self.abort(errors.MissingBytes, count, found) + return ''.join(lines) + + def read_until(self, terminator): + """Read the input stream until the terminator is found. + + Throws MissingTerminator if the terminator is not found. + + Note: This method does not read from the line buffer. + + :return: the bytes read up to but excluding the terminator. + """ + raise NotImplementedError(self.read_until) + + +# Regular expression used for parsing. (Note: The spec states that the name +# part should be non-empty, but git-fast-export doesn't always do that.) +_WHO_AND_WHEN_RE = re.compile(r'([^\<\n]+) <([^\>\n]+)> (.+)') + + +class ImportParser(LineBasedParser): + + def __init__(self, input, verbose=False, output=sys.stdout): + """A Parser of import commands. + + :param input: the file-like object to read from + :param verbose: display extra information of not + :param output: the file-like object to write messages to (YAGNI?) + """ + LineBasedParser.__init__(self, input) + self.verbose = verbose + self.output = output + # We auto-detect the date format when a date is first encountered + self.date_parser = None + + def iter_commands(self): + """Iterator returning ImportCommand objects.""" + while True: + line = self.next_line() + if line is None: + break + elif len(line) == 0 or line.startswith('#'): + continue + # Search for commands in order of likelihood + elif line.startswith('commit '): + yield self._parse_commit(line[len('commit '):]) + elif line.startswith('blob'): + yield self._parse_blob() + elif line.startswith('progress '): + yield commands.ProgressCommand(line[len('progress '):]) + elif line.startswith('reset '): + yield self._parse_reset(line[len('reset '):]) + elif line.startswith('tag '): + yield self._parse_tag(line[len('tag '):]) + elif line.startswith('checkpoint'): + yield commands.CheckpointCommand() + else: + print line + self.abort(errors.InvalidCommand, line) + + def iter_file_commands(self): + """Iterator returning FileCommand objects. + + If an invalid file command is found, the line is silently + pushed back and iteration ends. + """ + while True: + line = self.next_line() + if line is None: + break + elif len(line) == 0 or line.startswith('#'): + continue + # Search for file commands in order of likelihood + elif line.startswith('M '): + yield self._parse_file_modify(line[2:]) + elif line.startswith('D '): + path = self._path(line[2:]) + yield commands.FileDeleteCommand(path) + elif line.startswith('R '): + old, new = self._path_pair(line[2:]) + yield commands.FileRenameCommand(old, new) + elif line.startswith('C '): + src, dest = self._path_pair(line[2:]) + yield commands.FileRenameCommand(src, dest) + elif line.startswith('deleteall'): + yield commands.FileDeleteAllCommand() + else: + self.push_line(line) + break + + def _parse_blob(self): + """Parse a blob command.""" + lineno = self.lineno + mark = self._get_mark_if_any() + data = self._get_data('blob') + return commands.BlobCommand(mark, data, lineno) + + def _parse_commit(self, ref): + """Parse a commit command.""" + lineno = self.lineno + mark = self._get_mark_if_any() + author = self._get_user_info('commit', 'author', False) + committer = self._get_user_info('commit', 'committer') + message = self._get_data('commit', 'message') + from_ = self._get_from() + parents = [] + while True: + merge = self._get_merge() + if merge is not None: + parents.append(merge) + else: + break + return commands.CommitCommand(ref, mark, author, committer, message, from_, + parents, self.iter_file_commands, lineno) + + def _parse_file_modify(self, info): + """Parse a filemodify command within a commit. + + :param info: a string in the format "mode dataref path" + (where dataref might be the hard-coded literal 'inline'). + """ + params = info.split(' ', 2) + path = self._path(params[2]) + is_executable, is_symlink = self._mode(params[0]) + if is_symlink: + kind = commands.SYMLINK_KIND + else: + kind = commands.FILE_KIND + if params[1] == 'inline': + dataref = None + data = self._get_data('filemodify') + else: + dataref = params[1] + data = None + return commands.FileModifyCommand(path, kind, is_executable, dataref, + data) + + def _parse_reset(self, ref): + """Parse a reset command.""" + from_ = self._get_from() + return commands.ResetCommand(ref, from_) + + def _parse_tag(self, name): + """Parse a tag command.""" + from_ = self._get_from('tag') + tagger = self._get_user_info('tag', 'tagger') + message = self._get_data('tag', 'message') + return commands.TagCommand(name, from_, tagger, message) + + def _get_mark_if_any(self): + """Parse a mark section.""" + line = self.next_line() + if line.startswith('mark :'): + return line[len('mark :'):] + else: + self.push_line(line) + return None + + def _get_from(self, required_for=None): + """Parse a from section.""" + line = self.next_line() + if line.startswith('from '): + return line[len('from '):] + elif required_for: + self.abort(errors.MissingSection, required_for, 'from') + else: + self.push_line(line) + return None + + def _get_merge(self): + """Parse a merge section.""" + line = self.next_line() + if line is None: # EOF after last "merge" line + return None + elif line.startswith('merge '): + return line[len('merge '):] + else: + #print "not a merge:", line + self.push_line(line) + return None + + def _get_user_info(self, cmd, section, required=True): + """Parse a user section.""" + line = self.next_line() + if line.startswith(section + ' '): + return self._who_when(line[len(section + ' '):], cmd, section) + elif required: + self.abort(errors.MissingSection, cmd, section) + else: + self.push_line(line) + return None + + def _get_data(self, required_for, section='data'): + """Parse a data section.""" + line = self.next_line() + if line.startswith('data '): + rest = line[len('data '):] + if rest.startswith('<<'): + return self.read_until(rest[2:]) + else: + size = int(rest) + result = self.read_bytes(size) + # optional LF after data. + next = self.input.readline() + self.lineno += 1 + if len(next) > 1 or next != "\n": + self.push_line(next[:-1]) + return result + else: + self.abort(errors.MissingSection, required_for, section) + + def _who_when(self, s, cmd, section): + """Parse who and when information from a string. + + :return: a tuple of (name,email,timestamp,timezone) + """ + match = _WHO_AND_WHEN_RE.search(s) + if match: + datestr = match.group(3) + if self.date_parser is None: + # auto-detect the date format + if len(datestr.split(' ')) == 2: + format = 'raw' + elif datestr == 'now': + format = 'now' + else: + format = 'rfc2822' + self.date_parser = dates.DATE_PARSERS_BY_NAME[format] + when = self.date_parser(datestr) + return (match.group(1),match.group(2),when[0],when[1]) + else: + self.abort(errors.BadFormat, cmd, section, s) + + def _path(self, s): + """Parse a path.""" + if s.startswith('"'): + if s[-1] != '"': + self.abort(errors.BadFormat) + else: + s = _unquote_c_string(s[1:-1]) + # Check path for sanity + sp = s.split("/") + if "" in sp or ".." in sp: + self.abort(errors.BadFormat) + return s + + def _path_pair(self, s): + """Parse two paths separated by a space.""" + # TODO: handle a space in the first path + parts = s.split(' ', 1) + return map(_unquote_c_string, parts) + + def _mode(self, s): + """Parse a file mode into executable and symlink flags. + + :return (is_executable, is_symlink) + """ + # Note: Output from git-fast-export slightly different to spec + if s in ['644', '100644', '0100644']: + return False, False + elif s in ['755', '100755', '0100755']: + return True, False + elif s in ['120000', '0120000']: + return False, True + else: + self.abort(errors.BadFormat, 'filemodify', 'mode', s) + + +def _unquote_c_string(s): + """replace C-style escape sequences (\n, \", etc.) with real chars.""" + # HACK: Python strings are close enough + return s.decode('string_escape', 'replace')
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/hgfastimport/processor.py Tue May 05 10:27:27 2009 -0400 @@ -0,0 +1,163 @@ +# Copyright (C) 2008 Canonical Ltd +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, write to the Free Software +# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + +"""Processor of import commands. + +This module provides core processing functionality including an abstract class +for basing real processors on. See the processors package for examples. +""" + + +#from bzrlib.errors import NotBranchError +import errors + + +class ImportProcessor(object): + """Base class for import processors. + + Subclasses should override the pre_*, post_* and *_handler + methods as appropriate. + """ + + # XXX this is useless now that we process multiple input streams: + # we only want to call setup() and teardown() once for all of them! + def process(self, command_iter): + """Process the stream of commands. + + :param command_iter: an iterator providing commands + """ + raise RuntimeError("hey! who's calling this?!?") + self.setup() + try: + self._process(command_iter) + finally: + self.teardown() + + def _process(self, command_iter): + self.pre_process() + for cmd in command_iter(): + #print cmd.dump_str(verbose=True) + #print "starting" + try: + #print cmd.name + handler = self.__class__.__dict__[cmd.name + "_handler"] + except KeyError: + raise errors.MissingHandler(cmd.name) + else: + self.pre_handler(cmd) + handler(self, cmd) + self.post_handler(cmd) + if self.finished: + break + #print "around again" + self.post_process() + + def setup(self): + pass + + def teardown(self): + pass + + def pre_process(self): + """Hook for logic at start of processing.""" + pass + + def post_process(self): + """Hook for logic at end of processing.""" + pass + + def pre_handler(self, cmd): + """Hook for logic before each handler starts.""" + pass + + def post_handler(self, cmd): + """Hook for logic after each handler finishes.""" + pass + + def progress_handler(self, cmd): + """Process a ProgressCommand.""" + raise NotImplementedError(self.progress_handler) + + def blob_handler(self, cmd): + """Process a BlobCommand.""" + raise NotImplementedError(self.blob_handler) + + def checkpoint_handler(self, cmd): + """Process a CheckpointCommand.""" + raise NotImplementedError(self.checkpoint_handler) + + def commit_handler(self, cmd): + """Process a CommitCommand.""" + raise NotImplementedError(self.commit_handler) + + def reset_handler(self, cmd): + """Process a ResetCommand.""" + raise NotImplementedError(self.reset_handler) + + def tag_handler(self, cmd): + """Process a TagCommand.""" + raise NotImplementedError(self.tag_handler) + + +class CommitHandler(object): + """Base class for commit handling. + + Subclasses should override the pre_*, post_* and *_handler + methods as appropriate. + """ + + def __init__(self, command): + self.command = command + + def process(self): + self.pre_process_files() + for fc in self.command.file_iter(): + #print fc.dump_str(verbose=True) + try: + handler = self.__class__.__dict__[fc.name[4:] + "_handler"] + except KeyError: + raise errors.MissingHandler(fc.name) + else: + handler(self, fc) + self.post_process_files() + + def pre_process_files(self): + """Prepare for committing.""" + pass + + def post_process_files(self): + """Save the revision.""" + pass + + def modify_handler(self, filecmd): + """Handle a filemodify command.""" + raise NotImplementedError(self.modify_handler) + + def delete_handler(self, filecmd): + """Handle a filedelete command.""" + raise NotImplementedError(self.delete_handler) + + def copy_handler(self, filecmd): + """Handle a filecopy command.""" + raise NotImplementedError(self.copy_handler) + + def rename_handler(self, filecmd): + """Handle a filerename command.""" + raise NotImplementedError(self.rename_handler) + + def deleteall_handler(self, filecmd): + """Handle a filedeleteall command.""" + raise NotImplementedError(self.deleteall_handler)
