# HG changeset patch # User Greg Ward # Date 1241537333 14400 # Node ID 08e2157aaa9ad9ba4c8b797a604bf44fe3c52c90 # Parent 31b9a5805f02455556616c5d1427b49c24b483b3 Remove local fork of bzr-fastimport; use my fastimport library instead. diff -r 31b9a5805f02 -r 08e2157aaa9a hgfastimport/__init__.py --- a/hgfastimport/__init__.py Tue May 05 10:27:27 2009 -0400 +++ b/hgfastimport/__init__.py Tue May 05 11:28:53 2009 -0400 @@ -1,6 +1,6 @@ from mercurial import commands -import parser +from fastimport import parser import hgechoprocessor import hgimport @@ -13,7 +13,7 @@ ui.write("Reading source: %s\n" % source) f = open(source) p = parser.ImportParser(f) - proc._process(p.iter_commands) + proc.process(p.iter_commands) f.close() finally: proc.teardown() diff -r 31b9a5805f02 -r 08e2157aaa9a hgfastimport/commands.py --- a/hgfastimport/commands.py Tue May 05 10:27:27 2009 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,216 +0,0 @@ -# Copyright (C) 2008 Canonical Ltd -# -# This program is free software; you can redistribute it and/or modify -# it under the terms of the GNU General Public License as published by -# the Free Software Foundation; either version 2 of the License, or -# (at your option) any later version. -# -# This program is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU General Public License for more details. -# -# You should have received a copy of the GNU General Public License -# along with this program; if not, write to the Free Software -# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - -"""Import command classes.""" - - -# Lists of command names -COMMAND_NAMES = ['blob', 'checkpoint', 'commit', 'progress', 'reset', 'tag'] -FILE_COMMAND_NAMES = ['filemodify', 'filedelete', 'filecopy', 'filerename', - 'filedeleteall'] - -# Bazaar file kinds -FILE_KIND = 'file' -SYMLINK_KIND = 'symlink' - - -class ImportCommand(object): - """Base class for import commands.""" - - def __init__(self, name): - self.name = name - # List of field names not to display - self._binary = [] - - def __repr__(self): - return "<%s at %x: %s>" % (self.__class__.__name__, id(self), self) - - def __str__(self): - return self.name - - def dump_str(self, names=None, child_lists=None, verbose=False): - """Dump fields as a string. - - :param names: the list of fields to include or - None for all public fields - :param child_lists: dictionary of child command names to - fields for that child command to include - :param verbose: if True, prefix each line with the command class and - display fields as a dictionary; if False, dump just the field - values with tabs between them - """ - interesting = {} - if names is None: - fields = [k for k in self.__dict__.keys() if not k.startswith('_')] - else: - fields = names - for field in fields: - value = self.__dict__.get(field) - if field in self._binary and value is not None: - value = '(...)' - interesting[field] = value - if verbose: - return "%s: %s" % (self.__class__.__name__, interesting) - else: - return "\t".join([str(interesting[k]) for k in fields]) - - -class BlobCommand(ImportCommand): - - def __init__(self, mark, data, lineno=0): - ImportCommand.__init__(self, 'blob') - self.mark = mark - self.data = data - self.lineno = lineno - # Provide a unique id in case the mark is missing - if mark is None: - self.id = '@%d' % lineno - else: - self.id = ':' + mark - self._binary = ['data'] - - def __str__(self): - return self.id - - -class CheckpointCommand(ImportCommand): - - def __init__(self): - ImportCommand.__init__(self, 'checkpoint') - - -class CommitCommand(ImportCommand): - - def __init__(self, ref, mark, author, committer, message, from_, - parents, file_iter, lineno=0): - ImportCommand.__init__(self, 'commit') - self.ref = ref - self.mark = mark - self.author = author - self.committer = committer - self.message = message - self.from_ = from_ - self.parents = parents - self.file_iter = file_iter - self.lineno = lineno - self._binary = ['file_iter'] - # Provide a unique id in case the mark is missing - if mark is None: - self.id = '@%d' % lineno - else: - self.id = ':' + mark - - def __str__(self): - return "ref %s, mark %s" % (self.ref, self.mark) - - def dump_str(self, names=None, child_lists=None, verbose=False): - result = [ImportCommand.dump_str(self, names, verbose=verbose)] - for f in self.file_iter(): - if child_lists is None: - continue - try: - child_names = child_lists[f.name] - except KeyError: - continue - result.append("\t%s" % f.dump_str(child_names, verbose=verbose)) - return '\n'.join(result) - - -class ProgressCommand(ImportCommand): - - def __init__(self, message): - ImportCommand.__init__(self, 'progress') - self.message = message - - -class ResetCommand(ImportCommand): - - def __init__(self, ref, from_): - ImportCommand.__init__(self, 'reset') - self.ref = ref - self.from_ = from_ - - -class TagCommand(ImportCommand): - - def __init__(self, id, from_, tagger, message): - ImportCommand.__init__(self, 'tag') - self.id = id - self.from_ = from_ - self.tagger = tagger - self.message = message - - def __str__(self): - return self.id - - -class FileCommand(ImportCommand): - """Base class for file commands.""" - pass - - -class FileModifyCommand(FileCommand): - - def __init__(self, path, kind, is_executable, dataref, data): - # Either dataref or data should be null - FileCommand.__init__(self, 'filemodify') - self.path = path - self.kind = kind - self.is_executable = is_executable - self.dataref = dataref - self.data = data - self._binary = ['data'] - - def __str__(self): - return self.path - - -class FileDeleteCommand(FileCommand): - - def __init__(self, path): - FileCommand.__init__(self, 'filedelete') - self.path = path - - def __str__(self): - return self.path - - -class FileCopyCommand(FileCommand): - - def __init__(self, src_path, dest_path): - FileCommand.__init__(self, 'filecopy') - self.src_path = src_path - self.dest_path = dest_path - - def __str__(self): - return "%s -> %s" % (self.src_path, self.dest_path) - - -class FileRenameCommand(FileCommand): - - def __init__(self, old_path, new_path): - FileCommand.__init__(self, 'filerename') - self.old_path = old_path - self.new_path = new_path - - def __str__(self): - return "%s -> %s" % (self.old_path, self.new_path) - - -class FileDeleteAllCommand(FileCommand): - - def __init__(self): - FileCommand.__init__(self, 'filedeleteall') diff -r 31b9a5805f02 -r 08e2157aaa9a hgfastimport/dates.py --- a/hgfastimport/dates.py Tue May 05 10:27:27 2009 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,76 +0,0 @@ -# Copyright (C) 2008 Canonical Ltd -# -# This program is free software; you can redistribute it and/or modify -# it under the terms of the GNU General Public License as published by -# the Free Software Foundation; either version 2 of the License, or -# (at your option) any later version. -# -# This program is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU General Public License for more details. -# -# You should have received a copy of the GNU General Public License -# along with this program; if not, write to the Free Software -# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - -"""Date parsing routines. - -Each routine returns timestamp,timezone where - -* timestamp is seconds since epoch -* timezone is the offset from UTC in seconds. -""" - - -import time - - -def parse_raw(s): - """Parse a date from a raw string. - - The format must be exactly "seconds-since-epoch offset-utc". - See the spec for details. - """ - timestamp_str, timezone_str = s.split(' ', 1) - timestamp = float(timestamp_str) - timezone = _parse_tz(timezone_str) - return timestamp, timezone - - -def _parse_tz(tz): - """Parse a timezone specification in the [+|-]HHMM format. - - :return: the timezone offset in seconds. - """ - # from git_repository.py in bzr-git - assert len(tz) == 5 - sign = {'+': +1, '-': -1}[tz[0]] - hours = int(tz[1:3]) - minutes = int(tz[3:]) - return sign * 60 * (60 * hours + minutes) - - -def parse_rfc2822(s): - """Parse a date from a rfc2822 string. - - See the spec for details. - """ - raise NotImplementedError(parse_rfc2822) - - -def parse_now(s): - """Parse a date from a string. - - The format must be exactly "now". - See the spec for details. - """ - return time.time(), 0 - - -# Lookup tabel of date parsing routines -DATE_PARSERS_BY_NAME = { - 'raw': parse_raw, - 'rfc2822': parse_rfc2822, - 'now': parse_now, - } diff -r 31b9a5805f02 -r 08e2157aaa9a hgfastimport/errors.py --- a/hgfastimport/errors.py Tue May 05 10:27:27 2009 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,171 +0,0 @@ -# Copyright (C) 2008 Canonical Ltd -# -# This program is free software; you can redistribute it and/or modify -# it under the terms of the GNU General Public License as published by -# the Free Software Foundation; either version 2 of the License, or -# (at your option) any later version. -# -# This program is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU General Public License for more details. -# -# You should have received a copy of the GNU General Public License -# along with this program; if not, write to the Free Software -# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - -"""Exception classes for fastimport""" - -#from bzrlib import errors as bzr_errors - - -# Prefix to messages to show location information -_LOCATION_FMT = "line %(lineno)d: " - -class FmtException(StandardError): - def __str__(self): - return repr(self) - - def __repr__(self): - return self._fmt % self.__dict__ - -class ImportError(FmtException): - """The base exception class for all import processing exceptions.""" - - _fmt = "Unknown Import Error" - - -class ParsingError(ImportError): - """The base exception class for all import processing exceptions.""" - - _fmt = _LOCATION_FMT + "Unknown Import Parsing Error" - - def __init__(self, lineno): - ImportError.__init__(self) - self.lineno = lineno - - -class MissingBytes(ParsingError): - """Raised when EOF encountered while expecting to find more bytes.""" - - _fmt = (_LOCATION_FMT + "Unexpected EOF - expected %(expected)d bytes," - " found %(found)d") - - def __init__(self, lineno, expected, found): - ParsingError.__init__(self, lineno) - self.expected = expected - self.found = found - - -class MissingTerminator(ParsingError): - """Raised when EOF encountered while expecting to find a terminator.""" - - _fmt = (_LOCATION_FMT + - "Unexpected EOF - expected '%(terminator)s' terminator") - - def __init__(self, lineno, terminator): - ParsingError.__init__(self, lineno) - self.terminator = terminator - - -class InvalidCommand(ParsingError): - """Raised when an unknown command found.""" - - _fmt = (_LOCATION_FMT + "Invalid command '%(cmd)s'") - - def __init__(self, lineno, cmd): - ParsingError.__init__(self, lineno) - self.cmd = cmd - - -class MissingSection(ParsingError): - """Raised when a section is required in a command but not present.""" - - _fmt = (_LOCATION_FMT + "Command %(cmd)s is missing section %(section)s") - - def __init__(self, lineno, cmd, section): - ParsingError.__init__(self, lineno) - self.cmd = cmd - self.section = section - - -class BadFormat(ParsingError): - """Raised when a section is formatted incorrectly.""" - - _fmt = (_LOCATION_FMT + "Bad format for section %(section)s in " - "command %(cmd)s: found '%(text)s'") - - def __init__(self, lineno, cmd, section, text): - ParsingError.__init__(self, lineno) - self.cmd = cmd - self.section = section - self.text = text - - -class InvalidTimezone(ParsingError): - """Raised when converting a string timezone to a seconds offset.""" - - _fmt = (_LOCATION_FMT + - "Timezone %(timezone)r could not be converted.%(reason)s") - - def __init__(self, lineno, timezone, reason=None): - ParsingError.__init__(self, lineno) - self.timezone = timezone - if reason: - self.reason = ' ' + reason - else: - self.reason = '' - - -class UnknownDateFormat(ImportError): - """Raised when an unknown date format is given.""" - - _fmt = ("Unknown date format '%(format)s'") - - def __init__(self, format): - ImportError.__init__(self) - self.format = format - - -class MissingHandler(ImportError): - """Raised when a processor can't handle a command.""" - - _fmt = ("Missing handler for command %(cmd)s") - - def __init__(self, cmd): - ImportError.__init__(self) - self.cmd = cmd - - -class UnknownParameter(ImportError): - """Raised when an unknown parameter is passed to a processor.""" - - _fmt = ("Unknown parameter - '%(param)s' not in %(knowns)s") - - def __init__(self, param, knowns): - ImportError.__init__(self) - self.param = param - self.knowns = knowns - - -class BadRepositorySize(ImportError): - """Raised when the repository has an incorrect number of revisions.""" - - _fmt = ("Bad repository size - %(found)d revisions found, " - "%(expected)d expected") - - def __init__(self, expected, found): - ImportError.__init__(self) - self.expected = expected - self.found = found - - -class BadRestart(ImportError): - """Raised when the import stream and id-map do not match up.""" - - _fmt = ("Bad restart - attempted to skip commit %(commit_id)s " - "but matching revision-id is unknown") - - def __init__(self, commit_id): - ImportError.__init__(self) - self.commit_id = commit_id diff -r 31b9a5805f02 -r 08e2157aaa9a hgfastimport/hgechoprocessor.py --- a/hgfastimport/hgechoprocessor.py Tue May 05 10:27:27 2009 -0400 +++ b/hgfastimport/hgechoprocessor.py Tue May 05 11:28:53 2009 -0400 @@ -20,8 +20,7 @@ for basing real processors on. See the processors package for examples. """ - -import processor +from fastimport import processor class HgEchoProcessor(processor.ImportProcessor): diff -r 31b9a5805f02 -r 08e2157aaa9a hgfastimport/hgimport.py --- a/hgfastimport/hgimport.py Tue May 05 10:27:27 2009 -0400 +++ b/hgfastimport/hgimport.py Tue May 05 11:28:53 2009 -0400 @@ -29,9 +29,9 @@ import mercurial.commands from mercurial import util from mercurial.node import nullrev -import processor -import hgechoprocessor +from fastimport import processor +from hgfastimport import hgechoprocessor class HgImportProcessor(processor.ImportProcessor): @@ -49,7 +49,12 @@ self.numblobs = 0 # for progress reporting self.blobdir = None + def setup(self): + """Setup before processing any streams.""" + pass + def teardown(self): + """Cleanup after processing all streams.""" if self.blobdir and os.path.exists(self.blobdir): self.ui.status("Removing blob dir %r ...\n" % self.blobdir) shutil.rmtree(self.blobdir) @@ -104,11 +109,11 @@ # Update to the first parent mercurial.hg.clean(self.repo, self.repo.lookup(first_parent)) #self.ui.write("Bing\n") - if cmd.parents: + if cmd.merges: #self.ui.write("foo") - if len(cmd.parents) > 1: + if len(cmd.merges) > 1: raise NotImplementedError("Can't handle more than two parents") - second_parent = self.committish_rev(cmd.parents[0]) + second_parent = self.committish_rev(cmd.merges[0]) #self.ui.write("Second parent: %s\n" % second_parent) mercurial.commands.debugsetparents(self.ui, self.repo, first_parent, second_parent) @@ -132,10 +137,12 @@ # optional) userinfo = cmd.author or cmd.committer user = "%s <%s>" % (userinfo[0], userinfo[1]) - node = self.repo.rawcommit(files = commit_handler.filelist(), - text = cmd.message, - user = user, - date = self.convert_date(userinfo)) + + # XXX is this the right way to specify filename encoding?!? + files = [f.encode("utf-8") for f in commit_handler.filelist()] + date = self.convert_date(userinfo) + node = self.repo.rawcommit( + files=files, text=cmd.message, user=user, date=date) rev = self.repo.changelog.rev(node) if cmd.mark is not None: self.mark_map[":" + cmd.mark] = rev diff -r 31b9a5805f02 -r 08e2157aaa9a hgfastimport/parser.py --- a/hgfastimport/parser.py Tue May 05 10:27:27 2009 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,507 +0,0 @@ -# Copyright (C) 2008 Canonical Ltd -# -# This program is free software; you can redistribute it and/or modify -# it under the terms of the GNU General Public License as published by -# the Free Software Foundation; either version 2 of the License, or -# (at your option) any later version. -# -# This program is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU General Public License for more details. -# -# You should have received a copy of the GNU General Public License -# along with this program; if not, write to the Free Software -# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - -"""Parser of import data into command objects. - -In order to reuse existing front-ends, the stream format is a subset of -the one used by git-fast-import (as of the 1.5.4 release of git at least). -The grammar is: - - stream ::= cmd*; - - cmd ::= new_blob - | new_commit - | new_tag - | reset_branch - | checkpoint - | progress - ; - - new_blob ::= 'blob' lf - mark? - file_content; - file_content ::= data; - - new_commit ::= 'commit' sp ref_str lf - mark? - ('author' sp name '<' email '>' when lf)? - 'committer' sp name '<' email '>' when lf - commit_msg - ('from' sp (ref_str | hexsha1 | sha1exp_str | idnum) lf)? - ('merge' sp (ref_str | hexsha1 | sha1exp_str | idnum) lf)* - file_change* - lf?; - commit_msg ::= data; - - file_change ::= file_clr - | file_del - | file_rnm - | file_cpy - | file_obm - | file_inm; - file_clr ::= 'deleteall' lf; - file_del ::= 'D' sp path_str lf; - file_rnm ::= 'R' sp path_str sp path_str lf; - file_cpy ::= 'C' sp path_str sp path_str lf; - file_obm ::= 'M' sp mode sp (hexsha1 | idnum) sp path_str lf; - file_inm ::= 'M' sp mode sp 'inline' sp path_str lf - data; - - new_tag ::= 'tag' sp tag_str lf - 'from' sp (ref_str | hexsha1 | sha1exp_str | idnum) lf - 'tagger' sp name '<' email '>' when lf - tag_msg; - tag_msg ::= data; - - reset_branch ::= 'reset' sp ref_str lf - ('from' sp (ref_str | hexsha1 | sha1exp_str | idnum) lf)? - lf?; - - checkpoint ::= 'checkpoint' lf - lf?; - - progress ::= 'progress' sp not_lf* lf - lf?; - - # note: the first idnum in a stream should be 1 and subsequent - # idnums should not have gaps between values as this will cause - # the stream parser to reserve space for the gapped values. An - # idnum can be updated in the future to a new object by issuing - # a new mark directive with the old idnum. - # - mark ::= 'mark' sp idnum lf; - data ::= (delimited_data | exact_data) - lf?; - - # note: delim may be any string but must not contain lf. - # data_line may contain any data but must not be exactly - # delim. - delimited_data ::= 'data' sp '<<' delim lf - (data_line lf)* - delim lf; - - # note: declen indicates the length of binary_data in bytes. - # declen does not include the lf preceeding the binary data. - # - exact_data ::= 'data' sp declen lf - binary_data; - - # note: quoted strings are C-style quoting supporting \c for - # common escapes of 'c' (e..g \n, \t, \\, \") or \nnn where nnn - # is the signed byte value in octal. Note that the only - # characters which must actually be escaped to protect the - # stream formatting is: \, \" and LF. Otherwise these values - # are UTF8. - # - ref_str ::= ref; - sha1exp_str ::= sha1exp; - tag_str ::= tag; - path_str ::= path | '"' quoted(path) '"' ; - mode ::= '100644' | '644' - | '100755' | '755' - | '120000' - ; - - declen ::= # unsigned 32 bit value, ascii base10 notation; - bigint ::= # unsigned integer value, ascii base10 notation; - binary_data ::= # file content, not interpreted; - - when ::= raw_when | rfc2822_when; - raw_when ::= ts sp tz; - rfc2822_when ::= # Valid RFC 2822 date and time; - - sp ::= # ASCII space character; - lf ::= # ASCII newline (LF) character; - - # note: a colon (':') must precede the numerical value assigned to - # an idnum. This is to distinguish it from a ref or tag name as - # GIT does not permit ':' in ref or tag strings. - # - idnum ::= ':' bigint; - path ::= # GIT style file path, e.g. "a/b/c"; - ref ::= # GIT ref name, e.g. "refs/heads/MOZ_GECKO_EXPERIMENT"; - tag ::= # GIT tag name, e.g. "FIREFOX_1_5"; - sha1exp ::= # Any valid GIT SHA1 expression; - hexsha1 ::= # SHA1 in hexadecimal format; - - # note: name and email are UTF8 strings, however name must not - # contain '<' or lf and email must not contain any of the - # following: '<', '>', lf. - # - name ::= # valid GIT author/committer name; - email ::= # valid GIT author/committer email; - ts ::= # time since the epoch in seconds, ascii base10 notation; - tz ::= # GIT style timezone; - - # note: comments may appear anywhere in the input, except - # within a data command. Any form of the data command - # always escapes the related input from comment processing. - # - # In case it is not clear, the '#' that starts the comment - # must be the first character on that the line (an lf have - # preceeded it). - # - comment ::= '#' not_lf* lf; - not_lf ::= # Any byte that is not ASCII newline (LF); -""" - - -import re -import sys - -import commands -import dates -import errors - - -## Stream parsing ## - -class LineBasedParser(object): - - def __init__(self, input): - """A Parser that keeps track of line numbers. - - :param input: the file-like object to read from - """ - self.input = input - self.lineno = 0 - # Lines pushed back onto the input stream - self._buffer = [] - - def abort(self, exception, *args): - """Raise an exception providing line number information.""" - raise exception(self.lineno, *args) - - def readline(self): - """Get the next line including the newline or '' on EOF.""" - self.lineno += 1 - if self._buffer: - return self._buffer.pop() - else: - return self.input.readline() - - def next_line(self): - """Get the next line without the newline or None on EOF.""" - line = self.readline() - if line: - return line[:-1] - else: - return None - - def push_line(self, line): - """Push line back onto the line buffer. - - :param line: the line with no trailing newline - """ - self.lineno -= 1 - self._buffer.append(line + "\n") - - def read_bytes(self, count): - """Read a given number of bytes from the input stream. - - Throws MissingBytes if the bytes are not found. - - Note: This method does not read from the line buffer. - - :return: a string - """ - lines = [] - left = count - found = 0 - while left > 0: - line = self.input.readline(left) - if line: - line_len = len(line) - left -= line_len - found += line_len - lines.append(line) - if line.endswith('\n'): - self.lineno += 1 - else: - left = 0 - if found != count: - self.abort(errors.MissingBytes, count, found) - return ''.join(lines) - - def read_until(self, terminator): - """Read the input stream until the terminator is found. - - Throws MissingTerminator if the terminator is not found. - - Note: This method does not read from the line buffer. - - :return: the bytes read up to but excluding the terminator. - """ - raise NotImplementedError(self.read_until) - - -# Regular expression used for parsing. (Note: The spec states that the name -# part should be non-empty, but git-fast-export doesn't always do that.) -_WHO_AND_WHEN_RE = re.compile(r'([^\<\n]+) <([^\>\n]+)> (.+)') - - -class ImportParser(LineBasedParser): - - def __init__(self, input, verbose=False, output=sys.stdout): - """A Parser of import commands. - - :param input: the file-like object to read from - :param verbose: display extra information of not - :param output: the file-like object to write messages to (YAGNI?) - """ - LineBasedParser.__init__(self, input) - self.verbose = verbose - self.output = output - # We auto-detect the date format when a date is first encountered - self.date_parser = None - - def iter_commands(self): - """Iterator returning ImportCommand objects.""" - while True: - line = self.next_line() - if line is None: - break - elif len(line) == 0 or line.startswith('#'): - continue - # Search for commands in order of likelihood - elif line.startswith('commit '): - yield self._parse_commit(line[len('commit '):]) - elif line.startswith('blob'): - yield self._parse_blob() - elif line.startswith('progress '): - yield commands.ProgressCommand(line[len('progress '):]) - elif line.startswith('reset '): - yield self._parse_reset(line[len('reset '):]) - elif line.startswith('tag '): - yield self._parse_tag(line[len('tag '):]) - elif line.startswith('checkpoint'): - yield commands.CheckpointCommand() - else: - print line - self.abort(errors.InvalidCommand, line) - - def iter_file_commands(self): - """Iterator returning FileCommand objects. - - If an invalid file command is found, the line is silently - pushed back and iteration ends. - """ - while True: - line = self.next_line() - if line is None: - break - elif len(line) == 0 or line.startswith('#'): - continue - # Search for file commands in order of likelihood - elif line.startswith('M '): - yield self._parse_file_modify(line[2:]) - elif line.startswith('D '): - path = self._path(line[2:]) - yield commands.FileDeleteCommand(path) - elif line.startswith('R '): - old, new = self._path_pair(line[2:]) - yield commands.FileRenameCommand(old, new) - elif line.startswith('C '): - src, dest = self._path_pair(line[2:]) - yield commands.FileRenameCommand(src, dest) - elif line.startswith('deleteall'): - yield commands.FileDeleteAllCommand() - else: - self.push_line(line) - break - - def _parse_blob(self): - """Parse a blob command.""" - lineno = self.lineno - mark = self._get_mark_if_any() - data = self._get_data('blob') - return commands.BlobCommand(mark, data, lineno) - - def _parse_commit(self, ref): - """Parse a commit command.""" - lineno = self.lineno - mark = self._get_mark_if_any() - author = self._get_user_info('commit', 'author', False) - committer = self._get_user_info('commit', 'committer') - message = self._get_data('commit', 'message') - from_ = self._get_from() - parents = [] - while True: - merge = self._get_merge() - if merge is not None: - parents.append(merge) - else: - break - return commands.CommitCommand(ref, mark, author, committer, message, from_, - parents, self.iter_file_commands, lineno) - - def _parse_file_modify(self, info): - """Parse a filemodify command within a commit. - - :param info: a string in the format "mode dataref path" - (where dataref might be the hard-coded literal 'inline'). - """ - params = info.split(' ', 2) - path = self._path(params[2]) - is_executable, is_symlink = self._mode(params[0]) - if is_symlink: - kind = commands.SYMLINK_KIND - else: - kind = commands.FILE_KIND - if params[1] == 'inline': - dataref = None - data = self._get_data('filemodify') - else: - dataref = params[1] - data = None - return commands.FileModifyCommand(path, kind, is_executable, dataref, - data) - - def _parse_reset(self, ref): - """Parse a reset command.""" - from_ = self._get_from() - return commands.ResetCommand(ref, from_) - - def _parse_tag(self, name): - """Parse a tag command.""" - from_ = self._get_from('tag') - tagger = self._get_user_info('tag', 'tagger') - message = self._get_data('tag', 'message') - return commands.TagCommand(name, from_, tagger, message) - - def _get_mark_if_any(self): - """Parse a mark section.""" - line = self.next_line() - if line.startswith('mark :'): - return line[len('mark :'):] - else: - self.push_line(line) - return None - - def _get_from(self, required_for=None): - """Parse a from section.""" - line = self.next_line() - if line.startswith('from '): - return line[len('from '):] - elif required_for: - self.abort(errors.MissingSection, required_for, 'from') - else: - self.push_line(line) - return None - - def _get_merge(self): - """Parse a merge section.""" - line = self.next_line() - if line is None: # EOF after last "merge" line - return None - elif line.startswith('merge '): - return line[len('merge '):] - else: - #print "not a merge:", line - self.push_line(line) - return None - - def _get_user_info(self, cmd, section, required=True): - """Parse a user section.""" - line = self.next_line() - if line.startswith(section + ' '): - return self._who_when(line[len(section + ' '):], cmd, section) - elif required: - self.abort(errors.MissingSection, cmd, section) - else: - self.push_line(line) - return None - - def _get_data(self, required_for, section='data'): - """Parse a data section.""" - line = self.next_line() - if line.startswith('data '): - rest = line[len('data '):] - if rest.startswith('<<'): - return self.read_until(rest[2:]) - else: - size = int(rest) - result = self.read_bytes(size) - # optional LF after data. - next = self.input.readline() - self.lineno += 1 - if len(next) > 1 or next != "\n": - self.push_line(next[:-1]) - return result - else: - self.abort(errors.MissingSection, required_for, section) - - def _who_when(self, s, cmd, section): - """Parse who and when information from a string. - - :return: a tuple of (name,email,timestamp,timezone) - """ - match = _WHO_AND_WHEN_RE.search(s) - if match: - datestr = match.group(3) - if self.date_parser is None: - # auto-detect the date format - if len(datestr.split(' ')) == 2: - format = 'raw' - elif datestr == 'now': - format = 'now' - else: - format = 'rfc2822' - self.date_parser = dates.DATE_PARSERS_BY_NAME[format] - when = self.date_parser(datestr) - return (match.group(1),match.group(2),when[0],when[1]) - else: - self.abort(errors.BadFormat, cmd, section, s) - - def _path(self, s): - """Parse a path.""" - if s.startswith('"'): - if s[-1] != '"': - self.abort(errors.BadFormat) - else: - s = _unquote_c_string(s[1:-1]) - # Check path for sanity - sp = s.split("/") - if "" in sp or ".." in sp: - self.abort(errors.BadFormat) - return s - - def _path_pair(self, s): - """Parse two paths separated by a space.""" - # TODO: handle a space in the first path - parts = s.split(' ', 1) - return map(_unquote_c_string, parts) - - def _mode(self, s): - """Parse a file mode into executable and symlink flags. - - :return (is_executable, is_symlink) - """ - # Note: Output from git-fast-export slightly different to spec - if s in ['644', '100644', '0100644']: - return False, False - elif s in ['755', '100755', '0100755']: - return True, False - elif s in ['120000', '0120000']: - return False, True - else: - self.abort(errors.BadFormat, 'filemodify', 'mode', s) - - -def _unquote_c_string(s): - """replace C-style escape sequences (\n, \", etc.) with real chars.""" - # HACK: Python strings are close enough - return s.decode('string_escape', 'replace') diff -r 31b9a5805f02 -r 08e2157aaa9a hgfastimport/processor.py --- a/hgfastimport/processor.py Tue May 05 10:27:27 2009 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,163 +0,0 @@ -# Copyright (C) 2008 Canonical Ltd -# -# This program is free software; you can redistribute it and/or modify -# it under the terms of the GNU General Public License as published by -# the Free Software Foundation; either version 2 of the License, or -# (at your option) any later version. -# -# This program is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU General Public License for more details. -# -# You should have received a copy of the GNU General Public License -# along with this program; if not, write to the Free Software -# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - -"""Processor of import commands. - -This module provides core processing functionality including an abstract class -for basing real processors on. See the processors package for examples. -""" - - -#from bzrlib.errors import NotBranchError -import errors - - -class ImportProcessor(object): - """Base class for import processors. - - Subclasses should override the pre_*, post_* and *_handler - methods as appropriate. - """ - - # XXX this is useless now that we process multiple input streams: - # we only want to call setup() and teardown() once for all of them! - def process(self, command_iter): - """Process the stream of commands. - - :param command_iter: an iterator providing commands - """ - raise RuntimeError("hey! who's calling this?!?") - self.setup() - try: - self._process(command_iter) - finally: - self.teardown() - - def _process(self, command_iter): - self.pre_process() - for cmd in command_iter(): - #print cmd.dump_str(verbose=True) - #print "starting" - try: - #print cmd.name - handler = self.__class__.__dict__[cmd.name + "_handler"] - except KeyError: - raise errors.MissingHandler(cmd.name) - else: - self.pre_handler(cmd) - handler(self, cmd) - self.post_handler(cmd) - if self.finished: - break - #print "around again" - self.post_process() - - def setup(self): - pass - - def teardown(self): - pass - - def pre_process(self): - """Hook for logic at start of processing.""" - pass - - def post_process(self): - """Hook for logic at end of processing.""" - pass - - def pre_handler(self, cmd): - """Hook for logic before each handler starts.""" - pass - - def post_handler(self, cmd): - """Hook for logic after each handler finishes.""" - pass - - def progress_handler(self, cmd): - """Process a ProgressCommand.""" - raise NotImplementedError(self.progress_handler) - - def blob_handler(self, cmd): - """Process a BlobCommand.""" - raise NotImplementedError(self.blob_handler) - - def checkpoint_handler(self, cmd): - """Process a CheckpointCommand.""" - raise NotImplementedError(self.checkpoint_handler) - - def commit_handler(self, cmd): - """Process a CommitCommand.""" - raise NotImplementedError(self.commit_handler) - - def reset_handler(self, cmd): - """Process a ResetCommand.""" - raise NotImplementedError(self.reset_handler) - - def tag_handler(self, cmd): - """Process a TagCommand.""" - raise NotImplementedError(self.tag_handler) - - -class CommitHandler(object): - """Base class for commit handling. - - Subclasses should override the pre_*, post_* and *_handler - methods as appropriate. - """ - - def __init__(self, command): - self.command = command - - def process(self): - self.pre_process_files() - for fc in self.command.file_iter(): - #print fc.dump_str(verbose=True) - try: - handler = self.__class__.__dict__[fc.name[4:] + "_handler"] - except KeyError: - raise errors.MissingHandler(fc.name) - else: - handler(self, fc) - self.post_process_files() - - def pre_process_files(self): - """Prepare for committing.""" - pass - - def post_process_files(self): - """Save the revision.""" - pass - - def modify_handler(self, filecmd): - """Handle a filemodify command.""" - raise NotImplementedError(self.modify_handler) - - def delete_handler(self, filecmd): - """Handle a filedelete command.""" - raise NotImplementedError(self.delete_handler) - - def copy_handler(self, filecmd): - """Handle a filecopy command.""" - raise NotImplementedError(self.copy_handler) - - def rename_handler(self, filecmd): - """Handle a filerename command.""" - raise NotImplementedError(self.rename_handler) - - def deleteall_handler(self, filecmd): - """Handle a filedeleteall command.""" - raise NotImplementedError(self.deleteall_handler)