Mercurial > hg > hg-fastimport
comparison hgext3rd/fastimport/hgimport.py @ 79:59a9e4d0aa72
Move hgfastimport directory to hgext3rd/fastimport
This will allow hgrc to enable this extension like so:
[extensions]
fastimport =
| author | Roy Marples <roy@marples.name> |
|---|---|
| date | Mon, 18 Jan 2021 23:04:05 +0000 |
| parents | hgfastimport/hgimport.py@a4f13dc5e3f7 |
| children | e6602cc471d5 |
comparison
equal
deleted
inserted
replaced
| 74:a4f13dc5e3f7 | 79:59a9e4d0aa72 |
|---|---|
| 1 # Copyright (C) 2008 Canonical Ltd | |
| 2 # | |
| 3 # This program is free software; you can redistribute it and/or modify | |
| 4 # it under the terms of the GNU General Public License as published by | |
| 5 # the Free Software Foundation; either version 2 of the License, or | |
| 6 # (at your option) any later version. | |
| 7 # | |
| 8 # This program is distributed in the hope that it will be useful, | |
| 9 # but WITHOUT ANY WARRANTY; without even the implied warranty of | |
| 10 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
| 11 # GNU General Public License for more details. | |
| 12 # | |
| 13 # You should have received a copy of the GNU General Public License | |
| 14 # along with this program; if not, write to the Free Software | |
| 15 # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA | |
| 16 | |
| 17 """Processor of import commands. | |
| 18 | |
| 19 This module provides core processing functionality including an abstract class | |
| 20 for basing real processors on. See the processors package for examples. | |
| 21 """ | |
| 22 | |
| 23 import os | |
| 24 import shutil | |
| 25 import stat | |
| 26 import sys | |
| 27 | |
| 28 from hgext.convert import common, hg as converthg | |
| 29 from mercurial import util | |
| 30 from mercurial.i18n import _ | |
| 31 | |
| 32 from fastimport import processor, parser | |
| 33 | |
| 34 | |
| 35 class fastimport_source(common.converter_source): | |
| 36 """Interface between the fastimport processor below and Mercurial's | |
| 37 normal conversion infrastructure. | |
| 38 """ | |
| 39 def __init__(self, ui, repotype, repo, sources): | |
| 40 self.ui = ui | |
| 41 self.sources = sources | |
| 42 self.processor = HgImportProcessor(ui, repo) | |
| 43 self.parsed = False | |
| 44 self.repotype = repotype | |
| 45 | |
| 46 # converter_source methods | |
| 47 | |
| 48 def before(self): | |
| 49 self.processor.setup() | |
| 50 | |
| 51 def after(self): | |
| 52 self.processor.teardown() | |
| 53 | |
| 54 def getheads(self): | |
| 55 """Return a list of this repository's heads""" | |
| 56 self._parse() | |
| 57 allheads = [] | |
| 58 for branchheads in self.processor.branchmap.values(): | |
| 59 allheads.extend(branchheads) | |
| 60 return allheads | |
| 61 | |
| 62 def getfile(self, name, fileid): | |
| 63 if fileid is None: # deleted file | |
| 64 return None, None | |
| 65 return (self.processor.getblob(fileid), | |
| 66 self.processor.getmode(name, fileid)) | |
| 67 | |
| 68 def getchanges(self, commitid, full): | |
| 69 """Returns a tuple of (files, copies, cleanp2). | |
| 70 | |
| 71 files is a sorted list of (filename, id) tuples for all files | |
| 72 changed between commitid and its first parent returned by | |
| 73 getcommit(). | |
| 74 commitid id is the source revision id of the file. | |
| 75 cleanp2 is currently unused and an empty set is returned. | |
| 76 | |
| 77 copies is a dictionary of dest: source | |
| 78 """ | |
| 79 if full: | |
| 80 raise util.Abort(_("convert from fastimport does not support --full")) | |
| 81 return (self.processor.modified[commitid], | |
| 82 self.processor.copies[commitid], | |
| 83 set()) | |
| 84 | |
| 85 def getcommit(self, commitid): | |
| 86 """Return the commit object for commitid""" | |
| 87 if commitid is None: | |
| 88 return None | |
| 89 else: | |
| 90 return self.processor.commitmap[commitid] | |
| 91 | |
| 92 def gettags(self): | |
| 93 """Return the tags as a dictionary of name: revision""" | |
| 94 # oops, this loses order | |
| 95 return dict(self.processor.tags) | |
| 96 | |
| 97 def getchangedfiles(self, rev, i): | |
| 98 """Return the files changed by rev compared to parent[i]. | |
| 99 | |
| 100 i is an index selecting one of the parents of rev. The return | |
| 101 value should be the list of files that are different in rev and | |
| 102 this parent. | |
| 103 | |
| 104 If rev has no parents, i is None. | |
| 105 | |
| 106 This function is only needed to support --filemap | |
| 107 """ | |
| 108 raise NotImplementedError() | |
| 109 | |
| 110 # private worker methods | |
| 111 | |
| 112 def _parse(self): | |
| 113 if self.parsed: | |
| 114 return | |
| 115 for source in self.sources: | |
| 116 if source == b"-": | |
| 117 infile = sys.stdin | |
| 118 else: | |
| 119 infile = open(source, 'rb') | |
| 120 try: | |
| 121 p = parser.ImportParser(infile) | |
| 122 self.processor.process(p.iter_commands) | |
| 123 finally: | |
| 124 if infile is not sys.stdin: | |
| 125 infile.close() | |
| 126 self.parsed = True | |
| 127 | |
| 128 | |
| 129 class HgImportProcessor(processor.ImportProcessor): | |
| 130 | |
| 131 tagprefix = b"refs/tags/" | |
| 132 | |
| 133 def __init__(self, ui, repo): | |
| 134 super(HgImportProcessor, self).__init__() | |
| 135 self.ui = ui | |
| 136 self.repo = repo | |
| 137 | |
| 138 self.commitmap = {} # map commit ID (":1") to commit object | |
| 139 self.branchmap = {} # map branch name to list of heads | |
| 140 | |
| 141 # see HgImportCommitHandler for details on these three | |
| 142 self.modified = {} # map commit id to list of file mods | |
| 143 self.filemodes = {} # map commit id to {filename: mode} map | |
| 144 self.copies = {} # map commit id to dict of file copies | |
| 145 | |
| 146 self.tags = [] # list of (tag, mark) tuples | |
| 147 | |
| 148 self.numblobs = 0 # for progress reporting | |
| 149 self.blobdir = None | |
| 150 | |
| 151 def setup(self): | |
| 152 """Setup before processing any streams.""" | |
| 153 pass | |
| 154 | |
| 155 def teardown(self): | |
| 156 """Cleanup after processing all streams.""" | |
| 157 if self.blobdir and os.path.exists(self.blobdir): | |
| 158 self.ui.debug(b"Removing blob dir %s ...\n" % self.blobdir) | |
| 159 shutil.rmtree(self.blobdir) | |
| 160 | |
| 161 def progress_handler(self, cmd): | |
| 162 self.ui.write(b"Progress: %s\n" % cmd.message) | |
| 163 | |
| 164 def blob_handler(self, cmd): | |
| 165 self.writeblob(cmd.id, cmd.data) | |
| 166 | |
| 167 def _getblobfilename(self, blobid): | |
| 168 if self.blobdir is None: | |
| 169 raise RuntimeError("no blobs seen, so no blob directory created") | |
| 170 # XXX should escape ":" for windows | |
| 171 return os.path.join(self.blobdir, b"blob-" + blobid) | |
| 172 | |
| 173 def getblob(self, fileid): | |
| 174 (commitid, blobid) = fileid | |
| 175 f = open(self._getblobfilename(blobid), "rb") | |
| 176 try: | |
| 177 return f.read() | |
| 178 finally: | |
| 179 f.close() | |
| 180 | |
| 181 def writeblob(self, blobid, data): | |
| 182 if self.blobdir is None: # no blobs seen yet | |
| 183 self.blobdir = os.path.join(self.repo.root, b".hg", b"blobs") | |
| 184 os.mkdir(self.blobdir) | |
| 185 | |
| 186 fn = self._getblobfilename(blobid) | |
| 187 blobfile = open(fn, "wb") | |
| 188 #self.ui.debug("writing blob %s to %s (%d bytes)\n" | |
| 189 # % (blobid, fn, len(data))) | |
| 190 blobfile.write(data) | |
| 191 blobfile.close() | |
| 192 | |
| 193 self.numblobs += 1 | |
| 194 if self.numblobs % 500 == 0: | |
| 195 self.ui.status(b"%d blobs read\n" % self.numblobs) | |
| 196 | |
| 197 def getmode(self, name, fileid): | |
| 198 (commitid, blobid) = fileid | |
| 199 return self.filemodes[commitid][name] | |
| 200 | |
| 201 def checkpoint_handler(self, cmd): | |
| 202 # This command means nothing to us | |
| 203 pass | |
| 204 | |
| 205 def _getcommit(self, commitref): | |
| 206 """Given a mark reference or a branch name, return the | |
| 207 appropriate commit object. Return None if commitref is a tag | |
| 208 or a branch with no commits. Raises KeyError if anything else | |
| 209 is out of whack. | |
| 210 """ | |
| 211 if commitref.startswith(b":"): | |
| 212 # KeyError here indicates the input stream is broken. | |
| 213 return self.commitmap[commitref] | |
| 214 elif commitref.startswith(self.tagprefix): | |
| 215 return None | |
| 216 else: | |
| 217 branch = self._getbranch(commitref) | |
| 218 if branch is None: | |
| 219 raise ValueError(b"invalid commit ref: %s" % commitref) | |
| 220 | |
| 221 heads = self.branchmap.get(branch) | |
| 222 if heads is None: | |
| 223 return None | |
| 224 else: | |
| 225 # KeyError here indicates bad commit id in self.branchmap. | |
| 226 return self.commitmap[heads[-1]] | |
| 227 | |
| 228 def _getbranch(self, ref): | |
| 229 """Translate a Git head ref to corresponding Mercurial branch | |
| 230 name. E.g. \"refs/heads/foo\" is translated to \"foo\". | |
| 231 Special case: \"refs/heads/master\" becomes \"default\". If | |
| 232 'ref' is not a head ref, return None. | |
| 233 """ | |
| 234 prefix = b"refs/heads/" | |
| 235 if ref.startswith(prefix): | |
| 236 branch = ref[len(prefix):] | |
| 237 if branch == b"master": | |
| 238 return b"default" | |
| 239 else: | |
| 240 return branch | |
| 241 else: | |
| 242 return None | |
| 243 | |
| 244 def commit_handler(self, cmd): | |
| 245 # XXX this assumes the fixup branch name used by cvs2git. In | |
| 246 # contrast, git-fast-import(1) recommends "TAG_FIXUP" (not under | |
| 247 # refs/heads), and implies that it can be called whatever the | |
| 248 # creator of the fastimport dump wants to call it. So the name | |
| 249 # of the fixup branch should be configurable! | |
| 250 fixup = (cmd.ref == b"refs/heads/TAG.FIXUP") | |
| 251 | |
| 252 if cmd.ref.startswith(self.tagprefix) and cmd.mark: | |
| 253 tag = cmd.ref[len(self.tagprefix):] | |
| 254 self.tags.append((tag, b':' + cmd.mark)) | |
| 255 | |
| 256 if cmd.from_: | |
| 257 first_parent = cmd.from_ | |
| 258 else: | |
| 259 first_parent = self._getcommit(cmd.ref) # commit object | |
| 260 if first_parent is not None: | |
| 261 first_parent = first_parent.rev # commit id | |
| 262 | |
| 263 if cmd.merges: | |
| 264 if len(cmd.merges) > 1: | |
| 265 raise NotImplementedError("Can't handle more than two parents") | |
| 266 second_parent = cmd.merges[0] | |
| 267 else: | |
| 268 second_parent = None | |
| 269 | |
| 270 if first_parent is None and second_parent is not None: | |
| 271 # First commit on a new branch that has 'merge' but no 'from': | |
| 272 # special case meaning branch starts with no files; the contents of | |
| 273 # the first commit (this one) determine the list of files at branch | |
| 274 # time. | |
| 275 first_parent = second_parent | |
| 276 second_parent = None | |
| 277 no_files = True # XXX this is ignored... | |
| 278 | |
| 279 bfirst_parent = first_parent or b'' | |
| 280 bsecond_parent = second_parent or b'' | |
| 281 self.ui.debug(b"commit %s: first_parent = %s, second_parent = %s\n" | |
| 282 % (cmd, bfirst_parent, bsecond_parent)) | |
| 283 assert ((first_parent != second_parent) or | |
| 284 (first_parent is second_parent is None)), \ | |
| 285 (b"commit %s: first_parent == second parent = %s" | |
| 286 % (cmd, bfirst_parent)) | |
| 287 | |
| 288 # Figure out the Mercurial branch name. | |
| 289 if fixup and first_parent is not None: | |
| 290 # If this is a fixup commit, pretend it happened on the same | |
| 291 # branch as its first parent. (We don't want a Mercurial | |
| 292 # named branch called "TAG.FIXUP" in the output repository.) | |
| 293 branch = self.commitmap[first_parent].branch | |
| 294 else: | |
| 295 branch = self._getbranch(cmd.ref) | |
| 296 | |
| 297 commit_handler = HgImportCommitHandler( | |
| 298 self, cmd, self.ui) | |
| 299 commit_handler.process() | |
| 300 self.modified[cmd.id] = commit_handler.modified | |
| 301 self.filemodes[cmd.id] = commit_handler.mode | |
| 302 self.copies[cmd.id] = commit_handler.copies | |
| 303 | |
| 304 # in case we are converting from git or bzr, prefer author but | |
| 305 # fallback to committer (committer is required, author is | |
| 306 # optional) | |
| 307 userinfo = cmd.author or cmd.committer | |
| 308 if userinfo[0] == userinfo[1]: | |
| 309 # In order to conform to fastimport syntax, cvs2git with no | |
| 310 # authormap produces author names like "jsmith <jsmith>"; if | |
| 311 # we see that, revert to plain old "jsmith". | |
| 312 user = userinfo[0] | |
| 313 else: | |
| 314 user = b"%s <%s>" % (userinfo[0], userinfo[1]) | |
| 315 | |
| 316 text = cmd.message | |
| 317 date = self.convert_date(userinfo) | |
| 318 parents = [] | |
| 319 if first_parent: | |
| 320 parents.append(first_parent) | |
| 321 if second_parent: | |
| 322 parents.append(second_parent) | |
| 323 | |
| 324 commit = common.commit(user, date, text, parents, branch, | |
| 325 rev=cmd.id, sortkey=int(cmd.id[1:])) | |
| 326 | |
| 327 self.commitmap[cmd.id] = commit | |
| 328 heads = self.branchmap.get(branch) | |
| 329 if heads is None: | |
| 330 heads = [cmd.id] | |
| 331 else: | |
| 332 # adding to an existing branch: replace the previous head | |
| 333 try: | |
| 334 heads.remove(first_parent) | |
| 335 except ValueError: # first parent not a head: no problem | |
| 336 pass | |
| 337 heads.append(cmd.id) # at end means this is tipmost | |
| 338 self.branchmap[branch] = heads | |
| 339 self.ui.debug(b"processed commit %s\n" % cmd) | |
| 340 | |
| 341 def convert_date(self, c): | |
| 342 res = (int(c[2]), -int(c[3])) | |
| 343 #print c, res | |
| 344 #print type((0, 0)), type(res), len(res), type(res) is type((0, 0)) | |
| 345 #if type(res) is type((0, 0)) and len(res) == 2: | |
| 346 # print "go for it" | |
| 347 #return res | |
| 348 return b"%d %d" % res | |
| 349 | |
| 350 def reset_handler(self, cmd): | |
| 351 branch = self._getbranch(cmd.ref) | |
| 352 if branch: | |
| 353 # The usual case for 'reset': (re)create the named branch. | |
| 354 # XXX what should we do if cmd.from_ is None? | |
| 355 if cmd.from_ is not None: | |
| 356 self.branchmap[branch] = [cmd.from_] | |
| 357 else: | |
| 358 # pretend the branch never existed... is this right?!? | |
| 359 try: | |
| 360 del self.branchmap[branch] | |
| 361 except KeyError: | |
| 362 pass | |
| 363 #else: | |
| 364 # # XXX filename? line number? | |
| 365 # self.ui.warn("ignoring branch reset with no 'from'\n") | |
| 366 elif cmd.ref.startswith(self.tagprefix): | |
| 367 # Create a "lightweight tag" in Git terms. As I understand | |
| 368 # it, that's a tag with no description and no history -- | |
| 369 # rather like CVS tags. cvs2git turns CVS tags into Git | |
| 370 # lightweight tags, so we should make sure they become | |
| 371 # Mercurial tags. But we don't have to fake a history for | |
| 372 # them; save them up for the end. | |
| 373 if cmd.from_ is not None: | |
| 374 tag = cmd.ref[len(self.tagprefix):] | |
| 375 self.tags.append((tag, cmd.from_)) | |
| 376 | |
| 377 def tag_handler(self, cmd): | |
| 378 pass | |
| 379 | |
| 380 | |
| 381 class HgImportCommitHandler(processor.CommitHandler): | |
| 382 | |
| 383 def __init__(self, parent, command, ui): | |
| 384 self.parent = parent # HgImportProcessor running the show | |
| 385 self.command = command # CommitCommand that we're processing | |
| 386 self.ui = ui | |
| 387 | |
| 388 # Files changes by this commit as a list of (filename, id) | |
| 389 # tuples where id is (commitid, blobid). The blobid is | |
| 390 # needed to fetch the file's contents later, and the commitid | |
| 391 # is needed to fetch the mode. | |
| 392 # (XXX what about inline file contents?) | |
| 393 # (XXX how to describe deleted files?) | |
| 394 self.modified = [] | |
| 395 | |
| 396 # mode of files listed in self.modified: '', 'x', or 'l' | |
| 397 self.mode = {} | |
| 398 | |
| 399 # dictionary of src: dest (renamed files are in here and self.modified) | |
| 400 self.copies = {} | |
| 401 | |
| 402 # number of inline files seen in this commit | |
| 403 self.inlinecount = 0 | |
| 404 | |
| 405 def modify_handler(self, filecmd): | |
| 406 if filecmd.dataref: | |
| 407 blobid = filecmd.dataref # blobid is the mark of the blob | |
| 408 else: | |
| 409 blobid = b"%s-inline:%d" % (self.command.id, self.inlinecount) | |
| 410 assert filecmd.data is not None | |
| 411 self.parent.writeblob(blobid, filecmd.data) | |
| 412 self.inlinecount += 1 | |
| 413 | |
| 414 fileid = (self.command.id, blobid) | |
| 415 | |
| 416 self.modified.append((filecmd.path, fileid)) | |
| 417 if stat.S_ISLNK(filecmd.mode): # link | |
| 418 mode = b'l' | |
| 419 elif filecmd.mode & 0o111: # executable | |
| 420 mode = b'x' | |
| 421 elif stat.S_ISREG(filecmd.mode): # regular file | |
| 422 mode = b'' | |
| 423 else: | |
| 424 raise RuntimeError(b"mode %s unsupported" % filecmd.mode) | |
| 425 | |
| 426 self.mode[filecmd.path] = mode | |
| 427 | |
| 428 def delete_handler(self, filecmd): | |
| 429 self.modified.append((filecmd.path, None)) | |
| 430 | |
| 431 def copy_handler(self, filecmd): | |
| 432 self.copies[filecmd.src_path] = filecmd.dest_path | |
| 433 | |
| 434 def rename_handler(self, filecmd): | |
| 435 # copy oldname to newname and delete oldname | |
| 436 self.copies[filecmd.new_path] = filecmd.old_path | |
| 437 self.modified.append((filecmd.old_path, None)) |
