Mercurial > hg > hg-fastimport
changeset 0:d107c6d36780
Add the start of the hg fastimport command
| author | Paul Crowley <paul@lshift.net> |
|---|---|
| date | Fri, 29 Feb 2008 12:19:18 +0000 |
| parents | |
| children | 9461f5c3a67c |
| files | fastimport/__init__.py fastimport/commands.py fastimport/dates.py fastimport/errors.py fastimport/hgechoprocessor.py fastimport/parser.py fastimport/processor.py |
| diffstat | 7 files changed, 1232 insertions(+), 0 deletions(-) [+] |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fastimport/__init__.py Fri Feb 29 12:19:18 2008 +0000 @@ -0,0 +1,23 @@ +from mercurial import commands + +import parser +#import dates +#import commands +#from fastimport.hgechoprocessor import HgEchoProcessor +import hgechoprocessor +#import hhhh +#from hhhh import HgEchoProcessor + +def fastimport(ui, repo, source, **opts): + ui.write("Source is %s\n" % source) + f = open(source) + proc = hgechoprocessor.HgEchoProcessor(ui, repo, **opts) + p = parser.ImportParser(f) + proc.process(p.iter_commands) + +cmdtable = { + "fastimport": + (fastimport, + [], + 'hg fastimport SOURCE') +}
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fastimport/commands.py Fri Feb 29 12:19:18 2008 +0000 @@ -0,0 +1,188 @@ +# Copyright (C) 2008 Canonical Ltd +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, write to the Free Software +# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + +"""Import command classes.""" + + +# Lists of command names +COMMAND_NAMES = ['blob', 'checkpoint', 'commit', 'progress', 'reset', 'tag'] +FILE_COMMAND_NAMES = ['filemodify', 'filedelete', 'filecopy', 'filerename', + 'filedeleteall'] + +# Bazaar file kinds +FILE_KIND = 'file' +SYMLINK_KIND = 'symlink' + + +class ImportCommand(object): + """Base class for import commands.""" + + def __init__(self, name): + self.name = name + # List of field names not to display + self._binary = [] + + def dump_str(self, names=None, child_lists=None, verbose=False): + """Dump fields as a string. + + :param names: the list of fields to include or + None for all public fields + :param child_lists: dictionary of child command names to + fields for that child command to include + :param verbose: if True, prefix each line with the command class and + display fields as a dictionary; if False, dump just the field + values with tabs between them + """ + interesting = {} + if names is None: + fields = [k for k in self.__dict__.keys() if not k.startswith('_')] + else: + fields = names + for field in fields: + value = self.__dict__.get(field) + if field in self._binary and value is not None: + value = '(...)' + interesting[field] = value + if verbose: + return "%s: %s" % (self.__class__.__name__, interesting) + else: + return "\t".join([str(interesting[k]) for k in fields]) + + +class BlobCommand(ImportCommand): + + def __init__(self, mark, data, lineno=0): + ImportCommand.__init__(self, 'blob') + self.mark = mark + self.data = data + self.lineno = lineno + # Provide a unique id in case the mark is missing + if mark is None: + self.id = '@%d' % lineno + else: + self.id = ':' + mark + self._binary = ['data'] + + +class CheckpointCommand(ImportCommand): + + def __init__(self): + ImportCommand.__init__(self, 'checkpoint') + + +class CommitCommand(ImportCommand): + + def __init__(self, ref, mark, author, committer, message, parents, + file_iter, lineno=0): + ImportCommand.__init__(self, 'commit') + self.ref = ref + self.mark = mark + self.author = author + self.committer = committer + self.message = message + self.parents = parents + self.file_iter = file_iter + self.lineno = lineno + self._binary = ['file_iter'] + # Provide a unique id in case the mark is missing + if mark is None: + self.id = '@%d' % lineno + else: + self.id = ':' + mark + + def dump_str(self, names=None, child_lists=None, verbose=False): + result = [ImportCommand.dump_str(self, names, verbose=verbose)] + for f in self.file_iter(): + if child_lists is None: + continue + try: + child_names = child_lists[f.name] + except KeyError: + continue + result.append("\t%s" % f.dump_str(child_names, verbose=verbose)) + return '\n'.join(result) + + +class ProgressCommand(ImportCommand): + + def __init__(self, message): + ImportCommand.__init__(self, 'progress') + self.message = message + + +class ResetCommand(ImportCommand): + + def __init__(self, ref, from_): + ImportCommand.__init__(self, 'reset') + self.ref = ref + self.from_ = from_ + + +class TagCommand(ImportCommand): + + def __init__(self, id, from_, tagger, message): + ImportCommand.__init__(self, 'tag') + self.id = id + self.from_ = from_ + self.tagger = tagger + self.message = message + + +class FileCommand(ImportCommand): + """Base class for file commands.""" + pass + + +class FileModifyCommand(FileCommand): + + def __init__(self, path, kind, is_executable, dataref, data): + # Either dataref or data should be null + FileCommand.__init__(self, 'filemodify') + self.path = path + self.kind = kind + self.is_executable = is_executable + self.dataref = dataref + self.data = data + self._binary = ['data'] + + +class FileDeleteCommand(FileCommand): + + def __init__(self, path): + FileCommand.__init__(self, 'filedelete') + self.path = path + + +class FileCopyCommand(FileCommand): + + def __init__(self, src_path, dest_path): + FileCommand.__init__(self, 'filecopy') + self.src_path = src_path + self.dest_path = dest_path + + +class FileRenameCommand(FileCommand): + + def __init__(self, old_path, new_path): + FileCommand.__init__(self, 'filerename') + self.old_path = old_path + self.new_path = new_path + + +class FileDeleteAllCommand(FileCommand): + + def __init__(self): + FileCommand.__init__(self, 'filedeleteall')
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fastimport/dates.py Fri Feb 29 12:19:18 2008 +0000 @@ -0,0 +1,76 @@ +# Copyright (C) 2008 Canonical Ltd +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, write to the Free Software +# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + +"""Date parsing routines. + +Each routine returns timestamp,timezone where + +* timestamp is seconds since epoch +* timezone is the offset from UTC in seconds. +""" + + +import time + + +def parse_raw(s): + """Parse a date from a raw string. + + The format must be exactly "seconds-since-epoch offset-utc". + See the spec for details. + """ + timestamp_str, timezone_str = s.split(' ', 1) + timestamp = float(timestamp_str) + timezone = _parse_tz(timezone_str) + return timestamp, timezone + + +def _parse_tz(tz): + """Parse a timezone specification in the [+|-]HHMM format. + + :return: the timezone offset in seconds. + """ + # from git_repository.py in bzr-git + assert len(tz) == 5 + sign = {'+': +1, '-': -1}[tz[0]] + hours = int(tz[1:3]) + minutes = int(tz[3:]) + return sign * 60 * (60 * hours + minutes) + + +def parse_rfc2822(s): + """Parse a date from a rfc2822 string. + + See the spec for details. + """ + raise NotImplementedError(parse_rfc2822) + + +def parse_now(s): + """Parse a date from a string. + + The format must be exactly "now". + See the spec for details. + """ + return time.time(), 0 + + +# Lookup tabel of date parsing routines +DATE_PARSERS_BY_NAME = { + 'raw': parse_raw, + 'rfc2822': parse_rfc2822, + 'now': parse_now, + }
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fastimport/errors.py Fri Feb 29 12:19:18 2008 +0000 @@ -0,0 +1,165 @@ +# Copyright (C) 2008 Canonical Ltd +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, write to the Free Software +# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + +"""Exception classes for fastimport""" + +#from bzrlib import errors as bzr_errors + + +# Prefix to messages to show location information +_LOCATION_FMT = "line %(lineno)d: " + + +class ImportError(Exception): + """The base exception class for all import processing exceptions.""" + + _fmt = "Unknown Import Error" + + +class ParsingError(ImportError): + """The base exception class for all import processing exceptions.""" + + _fmt = _LOCATION_FMT + "Unknown Import Parsing Error" + + def __init__(self, lineno): + ImportError.__init__(self) + self.lineno = lineno + + +class MissingBytes(ParsingError): + """Raised when EOF encountered while expecting to find more bytes.""" + + _fmt = (_LOCATION_FMT + "Unexpected EOF - expected %(expected)d bytes," + " found %(found)d") + + def __init__(self, lineno, expected, found): + ParsingError.__init__(self, lineno) + self.expected = expected + self.found = found + + +class MissingTerminator(ParsingError): + """Raised when EOF encountered while expecting to find a terminator.""" + + _fmt = (_LOCATION_FMT + + "Unexpected EOF - expected '%(terminator)s' terminator") + + def __init__(self, lineno, terminator): + ParsingError.__init__(self, lineno) + self.terminator = terminator + + +class InvalidCommand(ParsingError): + """Raised when an unknown command found.""" + + _fmt = (_LOCATION_FMT + "Invalid command '%(cmd)s'") + + def __init__(self, lineno, cmd): + ParsingError.__init__(self, lineno) + self.cmd = cmd + + +class MissingSection(ParsingError): + """Raised when a section is required in a command but not present.""" + + _fmt = (_LOCATION_FMT + "Command %(cmd)s is missing section %(section)s") + + def __init__(self, lineno, cmd, section): + ParsingError.__init__(self, lineno) + self.cmd = cmd + self.section = section + + +class BadFormat(ParsingError): + """Raised when a section is formatted incorrectly.""" + + _fmt = (_LOCATION_FMT + "Bad format for section %(section)s in " + "command %(cmd)s: found '%(text)s'") + + def __init__(self, lineno, cmd, section, text): + ParsingError.__init__(self, lineno) + self.cmd = cmd + self.section = section + self.text = text + + +class InvalidTimezone(ParsingError): + """Raised when converting a string timezone to a seconds offset.""" + + _fmt = (_LOCATION_FMT + + "Timezone %(timezone)r could not be converted.%(reason)s") + + def __init__(self, lineno, timezone, reason=None): + ParsingError.__init__(self, lineno) + self.timezone = timezone + if reason: + self.reason = ' ' + reason + else: + self.reason = '' + + +class UnknownDateFormat(ImportError): + """Raised when an unknown date format is given.""" + + _fmt = ("Unknown date format '%(format)s'") + + def __init__(self, format): + ImportError.__init__(self) + self.format = format + + +class MissingHandler(ImportError): + """Raised when a processor can't handle a command.""" + + _fmt = ("Missing handler for command %(cmd)s") + + def __init__(self, cmd): + ImportError.__init__(self) + self.cmd = cmd + + +class UnknownParameter(ImportError): + """Raised when an unknown parameter is passed to a processor.""" + + _fmt = ("Unknown parameter - '%(param)s' not in %(knowns)s") + + def __init__(self, param, knowns): + ImportError.__init__(self) + self.param = param + self.knowns = knowns + + +class BadRepositorySize(ImportError): + """Raised when the repository has an incorrect number of revisions.""" + + _fmt = ("Bad repository size - %(found)d revisions found, " + "%(expected)d expected") + + def __init__(self, expected, found): + ImportError.__init__(self) + self.expected = expected + self.found = found + + +class BadRestart(ImportError): + """Raised when the import stream and id-map do not match up.""" + + _fmt = ("Bad restart - attempted to skip commit %(commit_id)s " + "but matching revision-id is unknown") + + def __init__(self, commit_id): + ImportError.__init__(self) + self.commit_id = commit_id
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fastimport/hgechoprocessor.py Fri Feb 29 12:19:18 2008 +0000 @@ -0,0 +1,115 @@ +# Copyright (C) 2008 Canonical Ltd +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, write to the Free Software +# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + +"""Processor of import commands. + +This module provides core processing functionality including an abstract class +for basing real processors on. See the processors package for examples. +""" + + +import processor + +class HgEchoProcessor(processor.ImportProcessor): + + def __init__(self, ui, repo, **opts): + self.ui = ui + self.repo = repo + self.opts = opts + self.finished = False + + def progress_handler(self, cmd): + """Process a ProgressCommand.""" + self.ui.write("Cmd: %s\n" % repr(cmd)) + + def blob_handler(self, cmd): + """Process a BlobCommand.""" + self.ui.write("Cmd: %s\n" % repr(cmd)) + + def checkpoint_handler(self, cmd): + """Process a CheckpointCommand.""" + self.ui.write("Cmd: %s\n" % repr(cmd)) + + def commit_handler(self, cmd): + """Process a CommitCommand.""" + self.ui.write("Commit: %s\n" % repr(cmd)) + commit_handler = HgEchoCommitHandler(cmd, self.ui, self.repo, **self.opts) + commit_handler.process() + self.ui.write("Done commit\n") + + def reset_handler(self, cmd): + """Process a ResetCommand.""" + self.ui.write("Cmd: %s\n" % repr(cmd)) + + def tag_handler(self, cmd): + """Process a TagCommand.""" + self.ui.write("Cmd: %s\n" % repr(cmd)) + + def finished(self): + self.ui.write("Finished") + + def pre_handler(self, cmd): + self.ui.write("Pre-handler: %s\n" % repr(cmd)) + + def post_handler(self, cmd): + self.ui.write("Post-handler: %s\n" % repr(cmd)) + +class HgEchoCommitHandler(processor.CommitHandler): + + def __init__(self, command, ui, repo, **opts): + self.command = command + self.ui = ui + self.repo = repo + self.opts = opts + + def process(self): + self.pre_process_files() + for fc in self.command.file_iter(): + try: + handler = self.__class__.__dict__[fc.name[4:] + "_handler"] + except KeyError: + raise errors.MissingHandler(fc.name) + else: + handler(self, fc) + self.post_process_files() + + def pre_process_files(self): + """Prepare for committing.""" + pass + + def post_process_files(self): + """Save the revision.""" + pass + + def modify_handler(self, filecmd): + """Handle a filemodify command.""" + self.ui.write("Cmd: %s\n" % repr(filecmd)) + + def delete_handler(self, filecmd): + """Handle a filedelete command.""" + self.ui.write("Cmd: %s\n" % repr(filecmd)) + + def copy_handler(self, filecmd): + """Handle a filecopy command.""" + self.ui.write("Cmd: %s\n" % repr(filecmd)) + + def rename_handler(self, filecmd): + """Handle a filerename command.""" + self.ui.write("Cmd: %s\n" % repr(filecmd)) + + def deleteall_handler(self, filecmd): + """Handle a filedeleteall command.""" + self.ui.write("Cmd: %s\n" % repr(filecmd))
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fastimport/parser.py Fri Feb 29 12:19:18 2008 +0000 @@ -0,0 +1,507 @@ +# Copyright (C) 2008 Canonical Ltd +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, write to the Free Software +# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + +"""Parser of import data into command objects. + +In order to reuse existing front-ends, the stream format is a subset of +the one used by git-fast-import (as of the 1.5.4 release of git at least). +The grammar is: + + stream ::= cmd*; + + cmd ::= new_blob + | new_commit + | new_tag + | reset_branch + | checkpoint + | progress + ; + + new_blob ::= 'blob' lf + mark? + file_content; + file_content ::= data; + + new_commit ::= 'commit' sp ref_str lf + mark? + ('author' sp name '<' email '>' when lf)? + 'committer' sp name '<' email '>' when lf + commit_msg + ('from' sp (ref_str | hexsha1 | sha1exp_str | idnum) lf)? + ('merge' sp (ref_str | hexsha1 | sha1exp_str | idnum) lf)* + file_change* + lf?; + commit_msg ::= data; + + file_change ::= file_clr + | file_del + | file_rnm + | file_cpy + | file_obm + | file_inm; + file_clr ::= 'deleteall' lf; + file_del ::= 'D' sp path_str lf; + file_rnm ::= 'R' sp path_str sp path_str lf; + file_cpy ::= 'C' sp path_str sp path_str lf; + file_obm ::= 'M' sp mode sp (hexsha1 | idnum) sp path_str lf; + file_inm ::= 'M' sp mode sp 'inline' sp path_str lf + data; + + new_tag ::= 'tag' sp tag_str lf + 'from' sp (ref_str | hexsha1 | sha1exp_str | idnum) lf + 'tagger' sp name '<' email '>' when lf + tag_msg; + tag_msg ::= data; + + reset_branch ::= 'reset' sp ref_str lf + ('from' sp (ref_str | hexsha1 | sha1exp_str | idnum) lf)? + lf?; + + checkpoint ::= 'checkpoint' lf + lf?; + + progress ::= 'progress' sp not_lf* lf + lf?; + + # note: the first idnum in a stream should be 1 and subsequent + # idnums should not have gaps between values as this will cause + # the stream parser to reserve space for the gapped values. An + # idnum can be updated in the future to a new object by issuing + # a new mark directive with the old idnum. + # + mark ::= 'mark' sp idnum lf; + data ::= (delimited_data | exact_data) + lf?; + + # note: delim may be any string but must not contain lf. + # data_line may contain any data but must not be exactly + # delim. + delimited_data ::= 'data' sp '<<' delim lf + (data_line lf)* + delim lf; + + # note: declen indicates the length of binary_data in bytes. + # declen does not include the lf preceeding the binary data. + # + exact_data ::= 'data' sp declen lf + binary_data; + + # note: quoted strings are C-style quoting supporting \c for + # common escapes of 'c' (e..g \n, \t, \\, \") or \nnn where nnn + # is the signed byte value in octal. Note that the only + # characters which must actually be escaped to protect the + # stream formatting is: \, " and LF. Otherwise these values + # are UTF8. + # + ref_str ::= ref; + sha1exp_str ::= sha1exp; + tag_str ::= tag; + path_str ::= path | '"' quoted(path) '"' ; + mode ::= '100644' | '644' + | '100755' | '755' + | '120000' + ; + + declen ::= # unsigned 32 bit value, ascii base10 notation; + bigint ::= # unsigned integer value, ascii base10 notation; + binary_data ::= # file content, not interpreted; + + when ::= raw_when | rfc2822_when; + raw_when ::= ts sp tz; + rfc2822_when ::= # Valid RFC 2822 date and time; + + sp ::= # ASCII space character; + lf ::= # ASCII newline (LF) character; + + # note: a colon (':') must precede the numerical value assigned to + # an idnum. This is to distinguish it from a ref or tag name as + # GIT does not permit ':' in ref or tag strings. + # + idnum ::= ':' bigint; + path ::= # GIT style file path, e.g. "a/b/c"; + ref ::= # GIT ref name, e.g. "refs/heads/MOZ_GECKO_EXPERIMENT"; + tag ::= # GIT tag name, e.g. "FIREFOX_1_5"; + sha1exp ::= # Any valid GIT SHA1 expression; + hexsha1 ::= # SHA1 in hexadecimal format; + + # note: name and email are UTF8 strings, however name must not + # contain '<' or lf and email must not contain any of the + # following: '<', '>', lf. + # + name ::= # valid GIT author/committer name; + email ::= # valid GIT author/committer email; + ts ::= # time since the epoch in seconds, ascii base10 notation; + tz ::= # GIT style timezone; + + # note: comments may appear anywhere in the input, except + # within a data command. Any form of the data command + # always escapes the related input from comment processing. + # + # In case it is not clear, the '#' that starts the comment + # must be the first character on that the line (an lf have + # preceeded it). + # + comment ::= '#' not_lf* lf; + not_lf ::= # Any byte that is not ASCII newline (LF); +""" + + +import re +import sys + +import commands +import dates +import errors + + +## Stream parsing ## + +class LineBasedParser(object): + + def __init__(self, input): + """A Parser that keeps track of line numbers. + + :param input: the file-like object to read from + """ + self.input = input + self.lineno = 0 + # Lines pushed back onto the input stream + self._buffer = [] + + def abort(self, exception, *args): + """Raise an exception providing line number information.""" + raise exception(self.lineno, *args) + + def readline(self): + """Get the next line including the newline or '' on EOF.""" + self.lineno += 1 + if self._buffer: + return self._buffer.pop() + else: + return self.input.readline() + + def next_line(self): + """Get the next line without the newline or None on EOF.""" + line = self.readline() + if line: + return line[:-1] + else: + return None + + def push_line(self, line): + """Push line back onto the line buffer. + + :param line: the line with no trailing newline + """ + self.lineno -= 1 + self._buffer.append(line + "\n") + + def read_bytes(self, count): + """Read a given number of bytes from the input stream. + + Throws MissingBytes if the bytes are not found. + + Note: This method does not read from the line buffer. + + :return: a string + """ + lines = [] + left = count + found = 0 + while left > 0: + line = self.input.readline(left) + if line: + line_len = len(line) + left -= line_len + found += line_len + lines.append(line) + if line.endswith('\n'): + self.lineno += 1 + else: + left = 0 + if found != count: + self.abort(errors.MissingBytes, count, found) + return ''.join(lines) + + def read_until(self, terminator): + """Read the input stream until the terminator is found. + + Throws MissingTerminator if the terminator is not found. + + Note: This method does not read from the line buffer. + + :return: the bytes read up to but excluding the terminator. + """ + raise NotImplementedError(self.read_until) + + +# Regular expression used for parsing. (Note: The spec states that the name +# part should be non-empty but git-fast-export doesn't always do that so +# the first bit is \w*, not \w+.) +_WHO_AND_WHEN_RE = re.compile(r'(\w*) <(.+)> (.+)') + + +class ImportParser(LineBasedParser): + + def __init__(self, input, verbose=False, output=sys.stdout): + """A Parser of import commands. + + :param input: the file-like object to read from + :param verbose: display extra information of not + :param output: the file-like object to write messages to (YAGNI?) + """ + LineBasedParser.__init__(self, input) + self.verbose = verbose + self.output = output + # We auto-detect the date format when a date is first encountered + self.date_parser = None + self.last_mark = None + + def iter_commands(self): + """Iterator returning ImportCommand objects.""" + while True: + line = self.next_line() + if line is None: + break + elif len(line) == 0 or line.startswith('#'): + continue + # Search for commands in order of likelihood + elif line.startswith('commit '): + yield self._parse_commit(line[len('commit '):]) + elif line.startswith('blob'): + yield self._parse_blob() + elif line.startswith('progress '): + yield commands.ProgressCommand(line[len('progress '):]) + elif line.startswith('reset '): + yield self._parse_reset(line[len('reset '):]) + elif line.startswith('tag '): + yield self._parse_tag(line[len('tag '):]) + elif line.startswith('checkpoint'): + yield commands.CheckpointCommand() + else: + #print line + self.abort(errors.InvalidCommand, line) + + def iter_file_commands(self): + """Iterator returning FileCommand objects. + + If an invalid file command is found, the line is silently + pushed back and iteration ends. + """ + while True: + line = self.next_line() + if line is None: + break + elif len(line) == 0 or line.startswith('#'): + continue + # Search for file commands in order of likelihood + elif line.startswith('M '): + yield self._parse_file_modify(line[2:]) + elif line.startswith('D '): + path = self._path(line[2:]) + yield commands.FileDeleteCommand(path) + elif line.startswith('R '): + old, new = self._path_pair(line[2:]) + yield commands.FileRenameCommand(old, new) + elif line.startswith('C '): + src, dest = self._path_pair(line[2:]) + yield commands.FileRenameCommand(src, dest) + elif line.startswith('deleteall'): + yield commands.FileDeleteAllCommand() + else: + self.push_line(line) + break + + def _parse_blob(self): + """Parse a blob command.""" + lineno = self.lineno + mark = self._get_mark_if_any() + data = self._get_data('blob') + return commands.BlobCommand(mark, data, lineno) + + def _parse_commit(self, ref): + """Parse a commit command.""" + lineno = self.lineno + mark = self._get_mark_if_any() + author = self._get_user_info('commit', 'author', False) + committer = self._get_user_info('commit', 'committer') + message = self._get_data('commit', 'message') + from_ = self._get_from() + if from_ is None: + from_ = self.last_mark + self.last_mark = mark + if from_ is not None: + parents = [from_] + while True: + merge = self._get_merge() + if merge is not None: + parents.append(merge) + else: + break + else: + parents = [] + return commands.CommitCommand(ref, mark, author, committer, message, + parents, self.iter_file_commands, lineno) + + def _parse_file_modify(self, info): + """Parse a filemodify command within a commit. + + :param info: a string in the format "mode dataref path" + (where dataref might be the hard-coded literal 'inline'). + """ + params = info.split(' ', 2) + path = self._path(params[2]) + is_executable, is_symlink = self._mode(params[0]) + if is_symlink: + kind = commands.SYMLINK_KIND + else: + kind = commands.FILE_KIND + if params[1] == 'inline': + dataref = None + data = self._get_data('filemodify') + else: + dataref = params[1] + data = None + return commands.FileModifyCommand(path, kind, is_executable, dataref, + data) + + def _parse_reset(self, ref): + """Parse a reset command.""" + from_ = self._get_from() + return commands.ResetCommand(ref, from_) + + def _parse_tag(self, name): + """Parse a tag command.""" + from_ = self._get_from('tag') + tagger = self._get_user_info('tag', 'tagger') + message = self._get_data('tag', 'message') + return commands.TagCommand(name, from_, tagger, message) + + def _get_mark_if_any(self): + """Parse a mark section.""" + line = self.next_line() + if line.startswith('mark :'): + return line[len('mark :'):] + else: + self.push_line(line) + return None + + def _get_from(self, required_for=None): + """Parse a from section.""" + line = self.next_line() + if line.startswith('from '): + return line[len('from '):] + elif required_for: + self.abort(errors.MissingSection, required_for, 'from') + else: + self.push_line(line) + return None + + def _get_merge(self): + """Parse a merge section.""" + line = self.next_line() + if line.startswith('merge '): + return line[len('merge '):] + else: + #print "not a merge:", line + self.push_line(line) + return None + + def _get_user_info(self, cmd, section, required=True): + """Parse a user section.""" + line = self.next_line() + if line.startswith(section + ' '): + return self._who_when(line[len(section + ' '):], cmd, section) + elif required: + self.abort(errors.MissingSection, cmd, section) + else: + self.push_line(line) + return None + + def _get_data(self, required_for, section='data'): + """Parse a data section.""" + line = self.next_line() + if line.startswith('data '): + rest = line[len('data '):] + if rest.startswith('<<'): + return self.read_until(rest[2:]) + else: + size = int(rest) + res = self.read_bytes(size) + # consume extra LF if present + line = self.next_line() + if line != '': + self.push_line(line) + else: + self.abort(errors.MissingSection, required_for, section) + + def _who_when(self, s, cmd, section): + """Parse who and when information from a string. + + :return: a tuple of (name,email,timestamp,timezone) + """ + match = _WHO_AND_WHEN_RE.search(s) + if match: + datestr = match.group(3) + if self.date_parser is None: + # auto-detect the date format + if len(datestr.split(' ')) == 2: + format = 'raw' + elif datestr == 'now': + format = 'now' + else: + format = 'rfc2822' + self.date_parser = dates.DATE_PARSERS_BY_NAME[format] + when = self.date_parser(datestr) + return (match.group(1),match.group(2),when[0],when[1]) + else: + self.abort(errors.BadFormat, cmd, section, s) + + def _path(self, s): + """Parse a path.""" + if s.startswith('"'): + if s[-1] != '"': + self.abort(errors.BadFormat, cmd, section, s) + else: + return _unquote_c_string(s[1:-1]) + return s + + def _path_pair(self, s): + """Parse two paths separated by a space.""" + # TODO: handle a space in the first path + parts = s.split(' ', 1) + return map(_unquote_c_string, parts) + + def _mode(self, s): + """Parse a file mode into executable and symlink flags. + + :return (is_executable, is_symlink) + """ + # Note: Output from git-fast-export slightly different to spec + if s in ['644', '100644', '0100644']: + return False, False + elif s in ['755', '100755', '0100755']: + return True, False + elif s in ['120000', '0120000']: + return False, True + else: + self.abort(errors.BadFormat, 'filemodify', 'mode', s) + + +def _unquote_c_string(s): + """replace C-style escape sequences (\n, \", etc.) with real chars.""" + # HACK: Python strings are close enough + return s.decode('string_escape', 'replace')
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fastimport/processor.py Fri Feb 29 12:19:18 2008 +0000 @@ -0,0 +1,158 @@ +# Copyright (C) 2008 Canonical Ltd +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, write to the Free Software +# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + +"""Processor of import commands. + +This module provides core processing functionality including an abstract class +for basing real processors on. See the processors package for examples. +""" + + +#from bzrlib.errors import NotBranchError +import errors + + +class ImportProcessor(object): + """Base class for import processors. + + Subclasses should override the pre_*, post_* and *_handler + methods as appropriate. + """ + + def process(self, command_iter): + """Process the stream of commands. + + :param command_iter: an iterator providing commands + """ + self.setup() + try: + self._process(command_iter) + finally: + self.teardown() + + def _process(self, command_iter): + self.pre_process() + for cmd in command_iter(): + #print "starting" + try: + #print cmd.name + handler = self.__class__.__dict__[cmd.name + "_handler"] + except KeyError: + raise errors.MissingHandler(cmd.name) + else: + self.pre_handler(cmd) + handler(self, cmd) + self.post_handler(cmd) + if self.finished: + break + #print "around again" + self.post_process() + + def setup(self): + pass + + def teardown(self): + pass + + def pre_process(self): + """Hook for logic at start of processing.""" + pass + + def post_process(self): + """Hook for logic at end of processing.""" + pass + + def pre_handler(self, cmd): + """Hook for logic before each handler starts.""" + pass + + def post_handler(self, cmd): + """Hook for logic after each handler finishes.""" + pass + + def progress_handler(self, cmd): + """Process a ProgressCommand.""" + raise NotImplementedError(self.progress_handler) + + def blob_handler(self, cmd): + """Process a BlobCommand.""" + raise NotImplementedError(self.blob_handler) + + def checkpoint_handler(self, cmd): + """Process a CheckpointCommand.""" + raise NotImplementedError(self.checkpoint_handler) + + def commit_handler(self, cmd): + """Process a CommitCommand.""" + raise NotImplementedError(self.commit_handler) + + def reset_handler(self, cmd): + """Process a ResetCommand.""" + raise NotImplementedError(self.reset_handler) + + def tag_handler(self, cmd): + """Process a TagCommand.""" + raise NotImplementedError(self.tag_handler) + + +class CommitHandler(object): + """Base class for commit handling. + + Subclasses should override the pre_*, post_* and *_handler + methods as appropriate. + """ + + def __init__(self, command): + self.command = command + + def process(self): + self.pre_process_files() + for fc in self.command.file_iter(): + try: + handler = self.__class__.__dict__[fc.name[4:] + "_handler"] + except KeyError: + raise errors.MissingHandler(fc.name) + else: + handler(self, fc) + self.post_process_files() + + def pre_process_files(self): + """Prepare for committing.""" + pass + + def post_process_files(self): + """Save the revision.""" + pass + + def modify_handler(self, filecmd): + """Handle a filemodify command.""" + raise NotImplementedError(self.modify_handler) + + def delete_handler(self, filecmd): + """Handle a filedelete command.""" + raise NotImplementedError(self.delete_handler) + + def copy_handler(self, filecmd): + """Handle a filecopy command.""" + raise NotImplementedError(self.copy_handler) + + def rename_handler(self, filecmd): + """Handle a filerename command.""" + raise NotImplementedError(self.rename_handler) + + def deleteall_handler(self, filecmd): + """Handle a filedeleteall command.""" + raise NotImplementedError(self.deleteall_handler)
