changeset 0:d107c6d36780

Add the start of the hg fastimport command
author Paul Crowley <paul@lshift.net>
date Fri, 29 Feb 2008 12:19:18 +0000
parents
children 9461f5c3a67c
files fastimport/__init__.py fastimport/commands.py fastimport/dates.py fastimport/errors.py fastimport/hgechoprocessor.py fastimport/parser.py fastimport/processor.py
diffstat 7 files changed, 1232 insertions(+), 0 deletions(-) [+]
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/fastimport/__init__.py	Fri Feb 29 12:19:18 2008 +0000
@@ -0,0 +1,23 @@
+from mercurial import commands
+
+import parser
+#import dates
+#import commands
+#from fastimport.hgechoprocessor import HgEchoProcessor
+import hgechoprocessor
+#import hhhh
+#from hhhh import HgEchoProcessor
+
+def fastimport(ui, repo, source, **opts):
+    ui.write("Source is %s\n" % source)
+    f = open(source)
+    proc = hgechoprocessor.HgEchoProcessor(ui, repo, **opts)
+    p = parser.ImportParser(f)
+    proc.process(p.iter_commands)
+
+cmdtable = {
+    "fastimport":
+        (fastimport,
+         [],
+         'hg fastimport SOURCE')
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/fastimport/commands.py	Fri Feb 29 12:19:18 2008 +0000
@@ -0,0 +1,188 @@
+# Copyright (C) 2008 Canonical Ltd
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+
+"""Import command classes."""
+
+
+# Lists of command names
+COMMAND_NAMES = ['blob', 'checkpoint', 'commit', 'progress', 'reset', 'tag']
+FILE_COMMAND_NAMES = ['filemodify', 'filedelete', 'filecopy', 'filerename',
+    'filedeleteall']
+
+# Bazaar file kinds
+FILE_KIND = 'file'
+SYMLINK_KIND = 'symlink'
+
+
+class ImportCommand(object):
+    """Base class for import commands."""
+
+    def __init__(self, name):
+        self.name = name
+        # List of field names not to display
+        self._binary = []
+
+    def dump_str(self, names=None, child_lists=None, verbose=False):
+        """Dump fields as a string.
+
+        :param names: the list of fields to include or
+            None for all public fields
+        :param child_lists: dictionary of child command names to
+            fields for that child command to include
+        :param verbose: if True, prefix each line with the command class and
+            display fields as a dictionary; if False, dump just the field
+            values with tabs between them
+        """
+        interesting = {}
+        if names is None:
+            fields = [k for k in self.__dict__.keys() if not k.startswith('_')]
+        else:
+            fields = names
+        for field in fields:
+            value = self.__dict__.get(field)
+            if field in self._binary and value is not None:
+                value = '(...)'
+            interesting[field] = value
+        if verbose:
+            return "%s: %s" % (self.__class__.__name__, interesting)
+        else:
+            return "\t".join([str(interesting[k]) for k in fields])
+
+
+class BlobCommand(ImportCommand):
+
+    def __init__(self, mark, data, lineno=0):
+        ImportCommand.__init__(self, 'blob')
+        self.mark = mark
+        self.data = data
+        self.lineno = lineno
+        # Provide a unique id in case the mark is missing
+        if mark is None:
+            self.id = '@%d' % lineno
+        else:
+            self.id = ':' + mark
+        self._binary = ['data']
+
+
+class CheckpointCommand(ImportCommand):
+
+    def __init__(self):
+        ImportCommand.__init__(self, 'checkpoint')
+
+
+class CommitCommand(ImportCommand):
+
+    def __init__(self, ref, mark, author, committer, message, parents,
+        file_iter, lineno=0):
+        ImportCommand.__init__(self, 'commit')
+        self.ref = ref
+        self.mark = mark
+        self.author = author
+        self.committer = committer
+        self.message = message
+        self.parents = parents
+        self.file_iter = file_iter
+        self.lineno = lineno
+        self._binary = ['file_iter']
+        # Provide a unique id in case the mark is missing
+        if mark is None:
+            self.id = '@%d' % lineno
+        else:
+            self.id = ':' + mark
+
+    def dump_str(self, names=None, child_lists=None, verbose=False):
+        result = [ImportCommand.dump_str(self, names, verbose=verbose)]
+        for f in self.file_iter():
+            if child_lists is None:
+                continue
+            try:
+                child_names = child_lists[f.name]
+            except KeyError:
+                continue
+            result.append("\t%s" % f.dump_str(child_names, verbose=verbose))
+        return '\n'.join(result)
+
+
+class ProgressCommand(ImportCommand):
+
+    def __init__(self, message):
+        ImportCommand.__init__(self, 'progress')
+        self.message = message
+
+
+class ResetCommand(ImportCommand):
+
+    def __init__(self, ref, from_):
+        ImportCommand.__init__(self, 'reset')
+        self.ref = ref
+        self.from_ = from_
+
+
+class TagCommand(ImportCommand):
+
+    def __init__(self, id, from_, tagger, message):
+        ImportCommand.__init__(self, 'tag')
+        self.id = id
+        self.from_ = from_
+        self.tagger = tagger
+        self.message = message
+
+
+class FileCommand(ImportCommand):
+    """Base class for file commands."""
+    pass
+
+
+class FileModifyCommand(FileCommand):
+
+    def __init__(self, path, kind, is_executable, dataref, data):
+        # Either dataref or data should be null
+        FileCommand.__init__(self, 'filemodify')
+        self.path = path
+        self.kind = kind
+        self.is_executable = is_executable
+        self.dataref = dataref
+        self.data = data
+        self._binary = ['data']
+
+
+class FileDeleteCommand(FileCommand):
+
+    def __init__(self, path):
+        FileCommand.__init__(self, 'filedelete')
+        self.path = path
+
+
+class FileCopyCommand(FileCommand):
+
+    def __init__(self, src_path, dest_path):
+        FileCommand.__init__(self, 'filecopy')
+        self.src_path = src_path
+        self.dest_path = dest_path
+
+
+class FileRenameCommand(FileCommand):
+
+    def __init__(self, old_path, new_path):
+        FileCommand.__init__(self, 'filerename')
+        self.old_path = old_path
+        self.new_path = new_path
+
+
+class FileDeleteAllCommand(FileCommand):
+
+    def __init__(self):
+        FileCommand.__init__(self, 'filedeleteall')
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/fastimport/dates.py	Fri Feb 29 12:19:18 2008 +0000
@@ -0,0 +1,76 @@
+# Copyright (C) 2008 Canonical Ltd
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+
+"""Date parsing routines.
+
+Each routine returns timestamp,timezone where
+
+* timestamp is seconds since epoch
+* timezone is the offset from UTC in seconds.
+"""
+
+
+import time
+
+
+def parse_raw(s):
+    """Parse a date from a raw string.
+    
+    The format must be exactly "seconds-since-epoch offset-utc".
+    See the spec for details.
+    """
+    timestamp_str, timezone_str = s.split(' ', 1)
+    timestamp = float(timestamp_str)
+    timezone = _parse_tz(timezone_str)
+    return timestamp, timezone
+
+
+def _parse_tz(tz):
+    """Parse a timezone specification in the [+|-]HHMM format.
+
+    :return: the timezone offset in seconds.
+    """
+    # from git_repository.py in bzr-git
+    assert len(tz) == 5
+    sign = {'+': +1, '-': -1}[tz[0]]
+    hours = int(tz[1:3])
+    minutes = int(tz[3:])
+    return sign * 60 * (60 * hours + minutes)
+
+
+def parse_rfc2822(s):
+    """Parse a date from a rfc2822 string.
+    
+    See the spec for details.
+    """
+    raise NotImplementedError(parse_rfc2822)
+
+
+def parse_now(s):
+    """Parse a date from a string.
+
+    The format must be exactly "now".
+    See the spec for details.
+    """
+    return time.time(), 0
+
+
+# Lookup tabel of date parsing routines
+DATE_PARSERS_BY_NAME = {
+    'raw':      parse_raw,
+    'rfc2822':  parse_rfc2822,
+    'now':      parse_now,
+    }
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/fastimport/errors.py	Fri Feb 29 12:19:18 2008 +0000
@@ -0,0 +1,165 @@
+# Copyright (C) 2008 Canonical Ltd
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+
+"""Exception classes for fastimport"""
+
+#from bzrlib import errors as bzr_errors
+
+
+# Prefix to messages to show location information
+_LOCATION_FMT = "line %(lineno)d: "
+
+
+class ImportError(Exception):
+    """The base exception class for all import processing exceptions."""
+
+    _fmt = "Unknown Import Error"
+
+
+class ParsingError(ImportError):
+    """The base exception class for all import processing exceptions."""
+
+    _fmt = _LOCATION_FMT + "Unknown Import Parsing Error"
+
+    def __init__(self, lineno):
+        ImportError.__init__(self)
+        self.lineno = lineno
+
+
+class MissingBytes(ParsingError):
+    """Raised when EOF encountered while expecting to find more bytes."""
+
+    _fmt = (_LOCATION_FMT + "Unexpected EOF - expected %(expected)d bytes,"
+        " found %(found)d")
+
+    def __init__(self, lineno, expected, found):
+        ParsingError.__init__(self, lineno)
+        self.expected = expected
+        self.found = found
+
+
+class MissingTerminator(ParsingError):
+    """Raised when EOF encountered while expecting to find a terminator."""
+
+    _fmt = (_LOCATION_FMT +
+        "Unexpected EOF - expected '%(terminator)s' terminator")
+
+    def __init__(self, lineno, terminator):
+        ParsingError.__init__(self, lineno)
+        self.terminator = terminator
+
+
+class InvalidCommand(ParsingError):
+    """Raised when an unknown command found."""
+
+    _fmt = (_LOCATION_FMT + "Invalid command '%(cmd)s'")
+
+    def __init__(self, lineno, cmd):
+        ParsingError.__init__(self, lineno)
+        self.cmd = cmd
+
+
+class MissingSection(ParsingError):
+    """Raised when a section is required in a command but not present."""
+
+    _fmt = (_LOCATION_FMT + "Command %(cmd)s is missing section %(section)s")
+
+    def __init__(self, lineno, cmd, section):
+        ParsingError.__init__(self, lineno)
+        self.cmd = cmd
+        self.section = section
+
+
+class BadFormat(ParsingError):
+    """Raised when a section is formatted incorrectly."""
+
+    _fmt = (_LOCATION_FMT + "Bad format for section %(section)s in "
+        "command %(cmd)s: found '%(text)s'")
+
+    def __init__(self, lineno, cmd, section, text):
+        ParsingError.__init__(self, lineno)
+        self.cmd = cmd
+        self.section = section
+        self.text = text
+
+
+class InvalidTimezone(ParsingError):
+    """Raised when converting a string timezone to a seconds offset."""
+
+    _fmt = (_LOCATION_FMT +
+        "Timezone %(timezone)r could not be converted.%(reason)s")
+
+    def __init__(self, lineno, timezone, reason=None):
+        ParsingError.__init__(self, lineno)
+        self.timezone = timezone
+        if reason:
+            self.reason = ' ' + reason
+        else:
+            self.reason = ''
+
+
+class UnknownDateFormat(ImportError):
+    """Raised when an unknown date format is given."""
+
+    _fmt = ("Unknown date format '%(format)s'")
+
+    def __init__(self, format):
+        ImportError.__init__(self)
+        self.format = format
+
+
+class MissingHandler(ImportError):
+    """Raised when a processor can't handle a command."""
+
+    _fmt = ("Missing handler for command %(cmd)s")
+
+    def __init__(self, cmd):
+        ImportError.__init__(self)
+        self.cmd = cmd
+
+
+class UnknownParameter(ImportError):
+    """Raised when an unknown parameter is passed to a processor."""
+
+    _fmt = ("Unknown parameter - '%(param)s' not in %(knowns)s")
+
+    def __init__(self, param, knowns):
+        ImportError.__init__(self)
+        self.param = param
+        self.knowns = knowns
+
+
+class BadRepositorySize(ImportError):
+    """Raised when the repository has an incorrect number of revisions."""
+
+    _fmt = ("Bad repository size - %(found)d revisions found, "
+        "%(expected)d expected")
+
+    def __init__(self, expected, found):
+        ImportError.__init__(self)
+        self.expected = expected
+        self.found = found
+
+
+class BadRestart(ImportError):
+    """Raised when the import stream and id-map do not match up."""
+
+    _fmt = ("Bad restart - attempted to skip commit %(commit_id)s "
+        "but matching revision-id is unknown")
+
+    def __init__(self, commit_id):
+        ImportError.__init__(self)
+        self.commit_id = commit_id
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/fastimport/hgechoprocessor.py	Fri Feb 29 12:19:18 2008 +0000
@@ -0,0 +1,115 @@
+# Copyright (C) 2008 Canonical Ltd
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+
+"""Processor of import commands.
+
+This module provides core processing functionality including an abstract class
+for basing real processors on. See the processors package for examples.
+"""
+
+
+import processor
+
+class HgEchoProcessor(processor.ImportProcessor):
+    
+    def __init__(self, ui, repo, **opts):
+        self.ui = ui
+        self.repo = repo
+        self.opts = opts
+        self.finished = False
+        
+    def progress_handler(self, cmd):
+        """Process a ProgressCommand."""
+        self.ui.write("Cmd: %s\n" % repr(cmd))
+
+    def blob_handler(self, cmd):
+        """Process a BlobCommand."""
+        self.ui.write("Cmd: %s\n" % repr(cmd))
+
+    def checkpoint_handler(self, cmd):
+        """Process a CheckpointCommand."""
+        self.ui.write("Cmd: %s\n" % repr(cmd))
+
+    def commit_handler(self, cmd):
+        """Process a CommitCommand."""
+        self.ui.write("Commit: %s\n" % repr(cmd))
+        commit_handler = HgEchoCommitHandler(cmd, self.ui, self.repo, **self.opts)
+        commit_handler.process()
+        self.ui.write("Done commit\n")
+
+    def reset_handler(self, cmd):
+        """Process a ResetCommand."""
+        self.ui.write("Cmd: %s\n" % repr(cmd))
+
+    def tag_handler(self, cmd):
+        """Process a TagCommand."""
+        self.ui.write("Cmd: %s\n" % repr(cmd))
+
+    def finished(self):
+        self.ui.write("Finished")
+
+    def pre_handler(self, cmd):
+        self.ui.write("Pre-handler: %s\n" % repr(cmd))
+
+    def post_handler(self, cmd):
+        self.ui.write("Post-handler: %s\n" % repr(cmd))
+
+class HgEchoCommitHandler(processor.CommitHandler):
+
+    def __init__(self, command, ui, repo, **opts):
+        self.command = command
+        self.ui = ui
+        self.repo = repo
+        self.opts = opts
+
+    def process(self):
+        self.pre_process_files()
+        for fc in self.command.file_iter():
+            try:
+                handler = self.__class__.__dict__[fc.name[4:] + "_handler"]
+            except KeyError:
+                raise errors.MissingHandler(fc.name)
+            else:
+                handler(self, fc)
+        self.post_process_files()
+
+    def pre_process_files(self):
+        """Prepare for committing."""
+        pass
+
+    def post_process_files(self):
+        """Save the revision."""
+        pass
+
+    def modify_handler(self, filecmd):
+        """Handle a filemodify command."""
+        self.ui.write("Cmd: %s\n" % repr(filecmd))
+
+    def delete_handler(self, filecmd):
+        """Handle a filedelete command."""
+        self.ui.write("Cmd: %s\n" % repr(filecmd))
+
+    def copy_handler(self, filecmd):
+        """Handle a filecopy command."""
+        self.ui.write("Cmd: %s\n" % repr(filecmd))
+
+    def rename_handler(self, filecmd):
+        """Handle a filerename command."""
+        self.ui.write("Cmd: %s\n" % repr(filecmd))
+
+    def deleteall_handler(self, filecmd):
+        """Handle a filedeleteall command."""
+        self.ui.write("Cmd: %s\n" % repr(filecmd))
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/fastimport/parser.py	Fri Feb 29 12:19:18 2008 +0000
@@ -0,0 +1,507 @@
+# Copyright (C) 2008 Canonical Ltd
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+
+"""Parser of import data into command objects.
+
+In order to reuse existing front-ends, the stream format is a subset of
+the one used by git-fast-import (as of the 1.5.4 release of git at least).
+The grammar is:
+
+  stream ::= cmd*;
+
+  cmd ::= new_blob
+        | new_commit
+        | new_tag
+        | reset_branch
+        | checkpoint
+        | progress
+        ;
+
+  new_blob ::= 'blob' lf
+    mark?
+    file_content;
+  file_content ::= data;
+
+  new_commit ::= 'commit' sp ref_str lf
+    mark?
+    ('author' sp name '<' email '>' when lf)?
+    'committer' sp name '<' email '>' when lf
+    commit_msg
+    ('from' sp (ref_str | hexsha1 | sha1exp_str | idnum) lf)?
+    ('merge' sp (ref_str | hexsha1 | sha1exp_str | idnum) lf)*
+    file_change*
+    lf?;
+  commit_msg ::= data;
+
+  file_change ::= file_clr
+    | file_del
+    | file_rnm
+    | file_cpy
+    | file_obm
+    | file_inm;
+  file_clr ::= 'deleteall' lf;
+  file_del ::= 'D' sp path_str lf;
+  file_rnm ::= 'R' sp path_str sp path_str lf;
+  file_cpy ::= 'C' sp path_str sp path_str lf;
+  file_obm ::= 'M' sp mode sp (hexsha1 | idnum) sp path_str lf;
+  file_inm ::= 'M' sp mode sp 'inline' sp path_str lf
+    data;
+
+  new_tag ::= 'tag' sp tag_str lf
+    'from' sp (ref_str | hexsha1 | sha1exp_str | idnum) lf
+    'tagger' sp name '<' email '>' when lf
+    tag_msg;
+  tag_msg ::= data;
+
+  reset_branch ::= 'reset' sp ref_str lf
+    ('from' sp (ref_str | hexsha1 | sha1exp_str | idnum) lf)?
+    lf?;
+
+  checkpoint ::= 'checkpoint' lf
+    lf?;
+
+  progress ::= 'progress' sp not_lf* lf
+    lf?;
+
+     # note: the first idnum in a stream should be 1 and subsequent
+     # idnums should not have gaps between values as this will cause
+     # the stream parser to reserve space for the gapped values.  An
+     # idnum can be updated in the future to a new object by issuing
+     # a new mark directive with the old idnum.
+     #
+  mark ::= 'mark' sp idnum lf;
+  data ::= (delimited_data | exact_data)
+    lf?;
+
+    # note: delim may be any string but must not contain lf.
+    # data_line may contain any data but must not be exactly
+    # delim.
+  delimited_data ::= 'data' sp '<<' delim lf
+    (data_line lf)*
+    delim lf;
+
+     # note: declen indicates the length of binary_data in bytes.
+     # declen does not include the lf preceeding the binary data.
+     #
+  exact_data ::= 'data' sp declen lf
+    binary_data;
+
+     # note: quoted strings are C-style quoting supporting \c for
+     # common escapes of 'c' (e..g \n, \t, \\, \") or \nnn where nnn
+     # is the signed byte value in octal.  Note that the only
+     # characters which must actually be escaped to protect the
+     # stream formatting is: \, " and LF.  Otherwise these values
+     # are UTF8.
+     #
+  ref_str     ::= ref;
+  sha1exp_str ::= sha1exp;
+  tag_str     ::= tag;
+  path_str    ::= path    | '"' quoted(path)    '"' ;
+  mode        ::= '100644' | '644'
+                | '100755' | '755'
+                | '120000'
+                ;
+
+  declen ::= # unsigned 32 bit value, ascii base10 notation;
+  bigint ::= # unsigned integer value, ascii base10 notation;
+  binary_data ::= # file content, not interpreted;
+
+  when         ::= raw_when | rfc2822_when;
+  raw_when     ::= ts sp tz;
+  rfc2822_when ::= # Valid RFC 2822 date and time;
+
+  sp ::= # ASCII space character;
+  lf ::= # ASCII newline (LF) character;
+
+     # note: a colon (':') must precede the numerical value assigned to
+     # an idnum.  This is to distinguish it from a ref or tag name as
+     # GIT does not permit ':' in ref or tag strings.
+     #
+  idnum   ::= ':' bigint;
+  path    ::= # GIT style file path, e.g. "a/b/c";
+  ref     ::= # GIT ref name, e.g. "refs/heads/MOZ_GECKO_EXPERIMENT";
+  tag     ::= # GIT tag name, e.g. "FIREFOX_1_5";
+  sha1exp ::= # Any valid GIT SHA1 expression;
+  hexsha1 ::= # SHA1 in hexadecimal format;
+
+     # note: name and email are UTF8 strings, however name must not
+     # contain '<' or lf and email must not contain any of the
+     # following: '<', '>', lf.
+     #
+  name  ::= # valid GIT author/committer name;
+  email ::= # valid GIT author/committer email;
+  ts    ::= # time since the epoch in seconds, ascii base10 notation;
+  tz    ::= # GIT style timezone;
+
+     # note: comments may appear anywhere in the input, except
+     # within a data command.  Any form of the data command
+     # always escapes the related input from comment processing.
+     #
+     # In case it is not clear, the '#' that starts the comment
+     # must be the first character on that the line (an lf have
+     # preceeded it).
+     #
+  comment ::= '#' not_lf* lf;
+  not_lf  ::= # Any byte that is not ASCII newline (LF);
+"""
+
+
+import re
+import sys
+
+import commands
+import dates
+import errors
+
+
+## Stream parsing ##
+
+class LineBasedParser(object):
+
+    def __init__(self, input):
+        """A Parser that keeps track of line numbers.
+
+        :param input: the file-like object to read from
+        """
+        self.input = input
+        self.lineno = 0
+        # Lines pushed back onto the input stream
+        self._buffer = []
+
+    def abort(self, exception, *args):
+        """Raise an exception providing line number information."""
+        raise exception(self.lineno, *args)
+
+    def readline(self):
+        """Get the next line including the newline or '' on EOF."""
+        self.lineno += 1
+        if self._buffer:
+            return self._buffer.pop()
+        else:
+            return self.input.readline()
+
+    def next_line(self):
+        """Get the next line without the newline or None on EOF."""
+        line = self.readline()
+        if line:
+            return line[:-1]
+        else:
+            return None
+
+    def push_line(self, line):
+        """Push line back onto the line buffer.
+        
+        :param line: the line with no trailing newline
+        """
+        self.lineno -= 1
+        self._buffer.append(line + "\n")
+
+    def read_bytes(self, count):
+        """Read a given number of bytes from the input stream.
+        
+        Throws MissingBytes if the bytes are not found.
+
+        Note: This method does not read from the line buffer.
+
+        :return: a string
+        """
+        lines = []
+        left = count
+        found = 0
+        while left > 0:
+            line = self.input.readline(left)
+            if line:
+                line_len = len(line)
+                left -= line_len
+                found += line_len
+                lines.append(line)
+                if line.endswith('\n'):
+                    self.lineno += 1
+            else:
+                left = 0
+        if found != count:
+            self.abort(errors.MissingBytes, count, found)
+        return ''.join(lines)
+
+    def read_until(self, terminator):
+        """Read the input stream until the terminator is found.
+        
+        Throws MissingTerminator if the terminator is not found.
+
+        Note: This method does not read from the line buffer.
+
+        :return: the bytes read up to but excluding the terminator.
+        """
+        raise NotImplementedError(self.read_until)
+
+
+# Regular expression used for parsing. (Note: The spec states that the name
+# part should be non-empty but git-fast-export doesn't always do that so
+# the first bit is \w*, not \w+.)
+_WHO_AND_WHEN_RE = re.compile(r'(\w*) <(.+)> (.+)')
+
+
+class ImportParser(LineBasedParser):
+
+    def __init__(self, input, verbose=False, output=sys.stdout):
+        """A Parser of import commands.
+
+        :param input: the file-like object to read from
+        :param verbose: display extra information of not
+        :param output: the file-like object to write messages to (YAGNI?)
+        """
+        LineBasedParser.__init__(self, input)
+        self.verbose = verbose
+        self.output = output
+        # We auto-detect the date format when a date is first encountered
+        self.date_parser = None
+        self.last_mark = None
+
+    def iter_commands(self):
+        """Iterator returning ImportCommand objects."""
+        while True:
+            line = self.next_line()
+            if line is None:
+                break
+            elif len(line) == 0 or line.startswith('#'):
+                continue
+            # Search for commands in order of likelihood
+            elif line.startswith('commit '):
+                yield self._parse_commit(line[len('commit '):])
+            elif line.startswith('blob'):
+                yield self._parse_blob()
+            elif line.startswith('progress '):
+                yield commands.ProgressCommand(line[len('progress '):])
+            elif line.startswith('reset '):
+                yield self._parse_reset(line[len('reset '):])
+            elif line.startswith('tag '):
+                yield self._parse_tag(line[len('tag '):])
+            elif line.startswith('checkpoint'):
+                yield commands.CheckpointCommand()
+            else:
+                #print line
+                self.abort(errors.InvalidCommand, line)
+
+    def iter_file_commands(self):
+        """Iterator returning FileCommand objects.
+        
+        If an invalid file command is found, the line is silently
+        pushed back and iteration ends.
+        """
+        while True:
+            line = self.next_line()
+            if line is None:
+                break
+            elif len(line) == 0 or line.startswith('#'):
+                continue
+            # Search for file commands in order of likelihood
+            elif line.startswith('M '):
+                yield self._parse_file_modify(line[2:])
+            elif line.startswith('D '):
+                path = self._path(line[2:])
+                yield commands.FileDeleteCommand(path)
+            elif line.startswith('R '):
+                old, new = self._path_pair(line[2:])
+                yield commands.FileRenameCommand(old, new)
+            elif line.startswith('C '):
+                src, dest = self._path_pair(line[2:])
+                yield commands.FileRenameCommand(src, dest)
+            elif line.startswith('deleteall'):
+                yield commands.FileDeleteAllCommand()
+            else:
+                self.push_line(line)
+                break
+
+    def _parse_blob(self):
+        """Parse a blob command."""
+        lineno = self.lineno
+        mark = self._get_mark_if_any()
+        data = self._get_data('blob')
+        return commands.BlobCommand(mark, data, lineno)
+
+    def _parse_commit(self, ref):
+        """Parse a commit command."""
+        lineno  = self.lineno
+        mark = self._get_mark_if_any()
+        author = self._get_user_info('commit', 'author', False)
+        committer = self._get_user_info('commit', 'committer')
+        message = self._get_data('commit', 'message')
+        from_ = self._get_from()
+        if from_ is None:
+            from_ = self.last_mark
+        self.last_mark = mark
+        if from_ is not None:
+            parents = [from_]
+            while True:
+                merge = self._get_merge()
+                if merge is not None:
+                    parents.append(merge)
+                else:
+                    break
+        else:
+            parents = []
+        return commands.CommitCommand(ref, mark, author, committer, message,
+            parents, self.iter_file_commands, lineno)
+
+    def _parse_file_modify(self, info):
+        """Parse a filemodify command within a commit.
+
+        :param info: a string in the format "mode dataref path"
+          (where dataref might be the hard-coded literal 'inline').
+        """
+        params = info.split(' ', 2)
+        path = self._path(params[2])
+        is_executable, is_symlink = self._mode(params[0])
+        if is_symlink:
+            kind = commands.SYMLINK_KIND
+        else:
+            kind = commands.FILE_KIND
+        if params[1] == 'inline':
+            dataref = None
+            data = self._get_data('filemodify')
+        else:
+            dataref = params[1]
+            data = None
+        return commands.FileModifyCommand(path, kind, is_executable, dataref,
+            data)
+
+    def _parse_reset(self, ref):
+        """Parse a reset command."""
+        from_ = self._get_from()
+        return commands.ResetCommand(ref, from_)
+
+    def _parse_tag(self, name):
+        """Parse a tag command."""
+        from_ = self._get_from('tag')
+        tagger = self._get_user_info('tag', 'tagger')
+        message = self._get_data('tag', 'message')
+        return commands.TagCommand(name, from_, tagger, message)
+
+    def _get_mark_if_any(self):
+        """Parse a mark section."""
+        line = self.next_line()
+        if line.startswith('mark :'):
+            return line[len('mark :'):]
+        else:
+            self.push_line(line)
+            return None
+
+    def _get_from(self, required_for=None):
+        """Parse a from section."""
+        line = self.next_line()
+        if line.startswith('from '):
+            return line[len('from '):]
+        elif required_for:
+            self.abort(errors.MissingSection, required_for, 'from')
+        else:
+            self.push_line(line)
+            return None
+
+    def _get_merge(self):
+        """Parse a merge section."""
+        line = self.next_line()
+        if line.startswith('merge '):
+            return line[len('merge '):]
+        else:
+            #print "not a merge:", line
+            self.push_line(line)
+            return None
+
+    def _get_user_info(self, cmd, section, required=True):
+        """Parse a user section."""
+        line = self.next_line()
+        if line.startswith(section + ' '):
+            return self._who_when(line[len(section + ' '):], cmd, section)
+        elif required:
+            self.abort(errors.MissingSection, cmd, section)
+        else:
+            self.push_line(line)
+            return None
+
+    def _get_data(self, required_for, section='data'):
+        """Parse a data section."""
+        line = self.next_line()
+        if line.startswith('data '):
+            rest = line[len('data '):]
+            if rest.startswith('<<'):
+                return self.read_until(rest[2:])
+            else:
+                size = int(rest)
+                res = self.read_bytes(size)
+                # consume extra LF if present
+                line = self.next_line()
+                if line != '':
+                    self.push_line(line)
+        else:
+            self.abort(errors.MissingSection, required_for, section)
+
+    def _who_when(self, s, cmd, section):
+        """Parse who and when information from a string.
+        
+        :return: a tuple of (name,email,timestamp,timezone)
+        """
+        match = _WHO_AND_WHEN_RE.search(s)
+        if match:
+            datestr = match.group(3)
+            if self.date_parser is None:
+                # auto-detect the date format
+                if len(datestr.split(' ')) == 2:
+                    format = 'raw'
+                elif datestr == 'now':
+                    format = 'now'
+                else:
+                    format = 'rfc2822'
+                self.date_parser = dates.DATE_PARSERS_BY_NAME[format]
+            when = self.date_parser(datestr)
+            return (match.group(1),match.group(2),when[0],when[1])
+        else:
+            self.abort(errors.BadFormat, cmd, section, s)
+
+    def _path(self, s):
+        """Parse a path."""
+        if s.startswith('"'):
+            if s[-1] != '"':
+                self.abort(errors.BadFormat, cmd, section, s)
+            else:
+                return _unquote_c_string(s[1:-1])
+        return s
+
+    def _path_pair(self, s):
+        """Parse two paths separated by a space."""
+        # TODO: handle a space in the first path
+        parts = s.split(' ', 1)
+        return map(_unquote_c_string, parts)
+
+    def _mode(self, s):
+        """Parse a file mode into executable and symlink flags.
+        
+        :return (is_executable, is_symlink)
+        """
+        # Note: Output from git-fast-export slightly different to spec
+        if s in ['644', '100644', '0100644']:
+            return False, False
+        elif s in ['755', '100755', '0100755']:
+            return True, False
+        elif s in ['120000', '0120000']:
+            return False, True
+        else:
+            self.abort(errors.BadFormat, 'filemodify', 'mode', s)
+
+
+def _unquote_c_string(s):
+    """replace C-style escape sequences (\n, \", etc.) with real chars."""
+    # HACK: Python strings are close enough
+    return s.decode('string_escape', 'replace')
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/fastimport/processor.py	Fri Feb 29 12:19:18 2008 +0000
@@ -0,0 +1,158 @@
+# Copyright (C) 2008 Canonical Ltd
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+
+"""Processor of import commands.
+
+This module provides core processing functionality including an abstract class
+for basing real processors on. See the processors package for examples.
+"""
+
+
+#from bzrlib.errors import NotBranchError
+import errors
+
+
+class ImportProcessor(object):
+    """Base class for import processors.
+    
+    Subclasses should override the pre_*, post_* and *_handler
+    methods as appropriate.
+    """
+    
+    def process(self, command_iter):
+        """Process the stream of commands.
+
+        :param command_iter: an iterator providing commands
+        """
+        self.setup()
+        try:
+            self._process(command_iter)
+        finally:
+            self.teardown()
+
+    def _process(self, command_iter):
+        self.pre_process()
+        for cmd in command_iter():
+            #print "starting"
+            try:
+                #print cmd.name
+                handler = self.__class__.__dict__[cmd.name + "_handler"]
+            except KeyError:
+                raise errors.MissingHandler(cmd.name)
+            else:
+                self.pre_handler(cmd)
+                handler(self, cmd)
+                self.post_handler(cmd)
+            if self.finished:
+                break
+            #print "around again"
+        self.post_process()
+
+    def setup(self):
+        pass
+    
+    def teardown(self):
+        pass
+        
+    def pre_process(self):
+        """Hook for logic at start of processing."""
+        pass
+
+    def post_process(self):
+        """Hook for logic at end of processing."""
+        pass
+
+    def pre_handler(self, cmd):
+        """Hook for logic before each handler starts."""
+        pass
+
+    def post_handler(self, cmd):
+        """Hook for logic after each handler finishes."""
+        pass
+
+    def progress_handler(self, cmd):
+        """Process a ProgressCommand."""
+        raise NotImplementedError(self.progress_handler)
+
+    def blob_handler(self, cmd):
+        """Process a BlobCommand."""
+        raise NotImplementedError(self.blob_handler)
+
+    def checkpoint_handler(self, cmd):
+        """Process a CheckpointCommand."""
+        raise NotImplementedError(self.checkpoint_handler)
+
+    def commit_handler(self, cmd):
+        """Process a CommitCommand."""
+        raise NotImplementedError(self.commit_handler)
+
+    def reset_handler(self, cmd):
+        """Process a ResetCommand."""
+        raise NotImplementedError(self.reset_handler)
+
+    def tag_handler(self, cmd):
+        """Process a TagCommand."""
+        raise NotImplementedError(self.tag_handler)
+
+
+class CommitHandler(object):
+    """Base class for commit handling.
+    
+    Subclasses should override the pre_*, post_* and *_handler
+    methods as appropriate.
+    """
+
+    def __init__(self, command):
+        self.command = command
+
+    def process(self):
+        self.pre_process_files()
+        for fc in self.command.file_iter():
+            try:
+                handler = self.__class__.__dict__[fc.name[4:] + "_handler"]
+            except KeyError:
+                raise errors.MissingHandler(fc.name)
+            else:
+                handler(self, fc)
+        self.post_process_files()
+
+    def pre_process_files(self):
+        """Prepare for committing."""
+        pass
+
+    def post_process_files(self):
+        """Save the revision."""
+        pass
+
+    def modify_handler(self, filecmd):
+        """Handle a filemodify command."""
+        raise NotImplementedError(self.modify_handler)
+
+    def delete_handler(self, filecmd):
+        """Handle a filedelete command."""
+        raise NotImplementedError(self.delete_handler)
+
+    def copy_handler(self, filecmd):
+        """Handle a filecopy command."""
+        raise NotImplementedError(self.copy_handler)
+
+    def rename_handler(self, filecmd):
+        """Handle a filerename command."""
+        raise NotImplementedError(self.rename_handler)
+
+    def deleteall_handler(self, filecmd):
+        """Handle a filedeleteall command."""
+        raise NotImplementedError(self.deleteall_handler)