comparison hgext3rd/fastimport/hgimport.py @ 79:59a9e4d0aa72

Move hgfastimport directory to hgext3rd/fastimport This will allow hgrc to enable this extension like so: [extensions] fastimport =
author Roy Marples <roy@marples.name>
date Mon, 18 Jan 2021 23:04:05 +0000
parents hgfastimport/hgimport.py@a4f13dc5e3f7
children e6602cc471d5
comparison
equal deleted inserted replaced
74:a4f13dc5e3f7 79:59a9e4d0aa72
1 # Copyright (C) 2008 Canonical Ltd
2 #
3 # This program is free software; you can redistribute it and/or modify
4 # it under the terms of the GNU General Public License as published by
5 # the Free Software Foundation; either version 2 of the License, or
6 # (at your option) any later version.
7 #
8 # This program is distributed in the hope that it will be useful,
9 # but WITHOUT ANY WARRANTY; without even the implied warranty of
10 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11 # GNU General Public License for more details.
12 #
13 # You should have received a copy of the GNU General Public License
14 # along with this program; if not, write to the Free Software
15 # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
16
17 """Processor of import commands.
18
19 This module provides core processing functionality including an abstract class
20 for basing real processors on. See the processors package for examples.
21 """
22
23 import os
24 import shutil
25 import stat
26 import sys
27
28 from hgext.convert import common, hg as converthg
29 from mercurial import util
30 from mercurial.i18n import _
31
32 from fastimport import processor, parser
33
34
35 class fastimport_source(common.converter_source):
36 """Interface between the fastimport processor below and Mercurial's
37 normal conversion infrastructure.
38 """
39 def __init__(self, ui, repotype, repo, sources):
40 self.ui = ui
41 self.sources = sources
42 self.processor = HgImportProcessor(ui, repo)
43 self.parsed = False
44 self.repotype = repotype
45
46 # converter_source methods
47
48 def before(self):
49 self.processor.setup()
50
51 def after(self):
52 self.processor.teardown()
53
54 def getheads(self):
55 """Return a list of this repository's heads"""
56 self._parse()
57 allheads = []
58 for branchheads in self.processor.branchmap.values():
59 allheads.extend(branchheads)
60 return allheads
61
62 def getfile(self, name, fileid):
63 if fileid is None: # deleted file
64 return None, None
65 return (self.processor.getblob(fileid),
66 self.processor.getmode(name, fileid))
67
68 def getchanges(self, commitid, full):
69 """Returns a tuple of (files, copies, cleanp2).
70
71 files is a sorted list of (filename, id) tuples for all files
72 changed between commitid and its first parent returned by
73 getcommit().
74 commitid id is the source revision id of the file.
75 cleanp2 is currently unused and an empty set is returned.
76
77 copies is a dictionary of dest: source
78 """
79 if full:
80 raise util.Abort(_("convert from fastimport does not support --full"))
81 return (self.processor.modified[commitid],
82 self.processor.copies[commitid],
83 set())
84
85 def getcommit(self, commitid):
86 """Return the commit object for commitid"""
87 if commitid is None:
88 return None
89 else:
90 return self.processor.commitmap[commitid]
91
92 def gettags(self):
93 """Return the tags as a dictionary of name: revision"""
94 # oops, this loses order
95 return dict(self.processor.tags)
96
97 def getchangedfiles(self, rev, i):
98 """Return the files changed by rev compared to parent[i].
99
100 i is an index selecting one of the parents of rev. The return
101 value should be the list of files that are different in rev and
102 this parent.
103
104 If rev has no parents, i is None.
105
106 This function is only needed to support --filemap
107 """
108 raise NotImplementedError()
109
110 # private worker methods
111
112 def _parse(self):
113 if self.parsed:
114 return
115 for source in self.sources:
116 if source == b"-":
117 infile = sys.stdin
118 else:
119 infile = open(source, 'rb')
120 try:
121 p = parser.ImportParser(infile)
122 self.processor.process(p.iter_commands)
123 finally:
124 if infile is not sys.stdin:
125 infile.close()
126 self.parsed = True
127
128
129 class HgImportProcessor(processor.ImportProcessor):
130
131 tagprefix = b"refs/tags/"
132
133 def __init__(self, ui, repo):
134 super(HgImportProcessor, self).__init__()
135 self.ui = ui
136 self.repo = repo
137
138 self.commitmap = {} # map commit ID (":1") to commit object
139 self.branchmap = {} # map branch name to list of heads
140
141 # see HgImportCommitHandler for details on these three
142 self.modified = {} # map commit id to list of file mods
143 self.filemodes = {} # map commit id to {filename: mode} map
144 self.copies = {} # map commit id to dict of file copies
145
146 self.tags = [] # list of (tag, mark) tuples
147
148 self.numblobs = 0 # for progress reporting
149 self.blobdir = None
150
151 def setup(self):
152 """Setup before processing any streams."""
153 pass
154
155 def teardown(self):
156 """Cleanup after processing all streams."""
157 if self.blobdir and os.path.exists(self.blobdir):
158 self.ui.debug(b"Removing blob dir %s ...\n" % self.blobdir)
159 shutil.rmtree(self.blobdir)
160
161 def progress_handler(self, cmd):
162 self.ui.write(b"Progress: %s\n" % cmd.message)
163
164 def blob_handler(self, cmd):
165 self.writeblob(cmd.id, cmd.data)
166
167 def _getblobfilename(self, blobid):
168 if self.blobdir is None:
169 raise RuntimeError("no blobs seen, so no blob directory created")
170 # XXX should escape ":" for windows
171 return os.path.join(self.blobdir, b"blob-" + blobid)
172
173 def getblob(self, fileid):
174 (commitid, blobid) = fileid
175 f = open(self._getblobfilename(blobid), "rb")
176 try:
177 return f.read()
178 finally:
179 f.close()
180
181 def writeblob(self, blobid, data):
182 if self.blobdir is None: # no blobs seen yet
183 self.blobdir = os.path.join(self.repo.root, b".hg", b"blobs")
184 os.mkdir(self.blobdir)
185
186 fn = self._getblobfilename(blobid)
187 blobfile = open(fn, "wb")
188 #self.ui.debug("writing blob %s to %s (%d bytes)\n"
189 # % (blobid, fn, len(data)))
190 blobfile.write(data)
191 blobfile.close()
192
193 self.numblobs += 1
194 if self.numblobs % 500 == 0:
195 self.ui.status(b"%d blobs read\n" % self.numblobs)
196
197 def getmode(self, name, fileid):
198 (commitid, blobid) = fileid
199 return self.filemodes[commitid][name]
200
201 def checkpoint_handler(self, cmd):
202 # This command means nothing to us
203 pass
204
205 def _getcommit(self, commitref):
206 """Given a mark reference or a branch name, return the
207 appropriate commit object. Return None if commitref is a tag
208 or a branch with no commits. Raises KeyError if anything else
209 is out of whack.
210 """
211 if commitref.startswith(b":"):
212 # KeyError here indicates the input stream is broken.
213 return self.commitmap[commitref]
214 elif commitref.startswith(self.tagprefix):
215 return None
216 else:
217 branch = self._getbranch(commitref)
218 if branch is None:
219 raise ValueError(b"invalid commit ref: %s" % commitref)
220
221 heads = self.branchmap.get(branch)
222 if heads is None:
223 return None
224 else:
225 # KeyError here indicates bad commit id in self.branchmap.
226 return self.commitmap[heads[-1]]
227
228 def _getbranch(self, ref):
229 """Translate a Git head ref to corresponding Mercurial branch
230 name. E.g. \"refs/heads/foo\" is translated to \"foo\".
231 Special case: \"refs/heads/master\" becomes \"default\". If
232 'ref' is not a head ref, return None.
233 """
234 prefix = b"refs/heads/"
235 if ref.startswith(prefix):
236 branch = ref[len(prefix):]
237 if branch == b"master":
238 return b"default"
239 else:
240 return branch
241 else:
242 return None
243
244 def commit_handler(self, cmd):
245 # XXX this assumes the fixup branch name used by cvs2git. In
246 # contrast, git-fast-import(1) recommends "TAG_FIXUP" (not under
247 # refs/heads), and implies that it can be called whatever the
248 # creator of the fastimport dump wants to call it. So the name
249 # of the fixup branch should be configurable!
250 fixup = (cmd.ref == b"refs/heads/TAG.FIXUP")
251
252 if cmd.ref.startswith(self.tagprefix) and cmd.mark:
253 tag = cmd.ref[len(self.tagprefix):]
254 self.tags.append((tag, b':' + cmd.mark))
255
256 if cmd.from_:
257 first_parent = cmd.from_
258 else:
259 first_parent = self._getcommit(cmd.ref) # commit object
260 if first_parent is not None:
261 first_parent = first_parent.rev # commit id
262
263 if cmd.merges:
264 if len(cmd.merges) > 1:
265 raise NotImplementedError("Can't handle more than two parents")
266 second_parent = cmd.merges[0]
267 else:
268 second_parent = None
269
270 if first_parent is None and second_parent is not None:
271 # First commit on a new branch that has 'merge' but no 'from':
272 # special case meaning branch starts with no files; the contents of
273 # the first commit (this one) determine the list of files at branch
274 # time.
275 first_parent = second_parent
276 second_parent = None
277 no_files = True # XXX this is ignored...
278
279 bfirst_parent = first_parent or b''
280 bsecond_parent = second_parent or b''
281 self.ui.debug(b"commit %s: first_parent = %s, second_parent = %s\n"
282 % (cmd, bfirst_parent, bsecond_parent))
283 assert ((first_parent != second_parent) or
284 (first_parent is second_parent is None)), \
285 (b"commit %s: first_parent == second parent = %s"
286 % (cmd, bfirst_parent))
287
288 # Figure out the Mercurial branch name.
289 if fixup and first_parent is not None:
290 # If this is a fixup commit, pretend it happened on the same
291 # branch as its first parent. (We don't want a Mercurial
292 # named branch called "TAG.FIXUP" in the output repository.)
293 branch = self.commitmap[first_parent].branch
294 else:
295 branch = self._getbranch(cmd.ref)
296
297 commit_handler = HgImportCommitHandler(
298 self, cmd, self.ui)
299 commit_handler.process()
300 self.modified[cmd.id] = commit_handler.modified
301 self.filemodes[cmd.id] = commit_handler.mode
302 self.copies[cmd.id] = commit_handler.copies
303
304 # in case we are converting from git or bzr, prefer author but
305 # fallback to committer (committer is required, author is
306 # optional)
307 userinfo = cmd.author or cmd.committer
308 if userinfo[0] == userinfo[1]:
309 # In order to conform to fastimport syntax, cvs2git with no
310 # authormap produces author names like "jsmith <jsmith>"; if
311 # we see that, revert to plain old "jsmith".
312 user = userinfo[0]
313 else:
314 user = b"%s <%s>" % (userinfo[0], userinfo[1])
315
316 text = cmd.message
317 date = self.convert_date(userinfo)
318 parents = []
319 if first_parent:
320 parents.append(first_parent)
321 if second_parent:
322 parents.append(second_parent)
323
324 commit = common.commit(user, date, text, parents, branch,
325 rev=cmd.id, sortkey=int(cmd.id[1:]))
326
327 self.commitmap[cmd.id] = commit
328 heads = self.branchmap.get(branch)
329 if heads is None:
330 heads = [cmd.id]
331 else:
332 # adding to an existing branch: replace the previous head
333 try:
334 heads.remove(first_parent)
335 except ValueError: # first parent not a head: no problem
336 pass
337 heads.append(cmd.id) # at end means this is tipmost
338 self.branchmap[branch] = heads
339 self.ui.debug(b"processed commit %s\n" % cmd)
340
341 def convert_date(self, c):
342 res = (int(c[2]), -int(c[3]))
343 #print c, res
344 #print type((0, 0)), type(res), len(res), type(res) is type((0, 0))
345 #if type(res) is type((0, 0)) and len(res) == 2:
346 # print "go for it"
347 #return res
348 return b"%d %d" % res
349
350 def reset_handler(self, cmd):
351 branch = self._getbranch(cmd.ref)
352 if branch:
353 # The usual case for 'reset': (re)create the named branch.
354 # XXX what should we do if cmd.from_ is None?
355 if cmd.from_ is not None:
356 self.branchmap[branch] = [cmd.from_]
357 else:
358 # pretend the branch never existed... is this right?!?
359 try:
360 del self.branchmap[branch]
361 except KeyError:
362 pass
363 #else:
364 # # XXX filename? line number?
365 # self.ui.warn("ignoring branch reset with no 'from'\n")
366 elif cmd.ref.startswith(self.tagprefix):
367 # Create a "lightweight tag" in Git terms. As I understand
368 # it, that's a tag with no description and no history --
369 # rather like CVS tags. cvs2git turns CVS tags into Git
370 # lightweight tags, so we should make sure they become
371 # Mercurial tags. But we don't have to fake a history for
372 # them; save them up for the end.
373 if cmd.from_ is not None:
374 tag = cmd.ref[len(self.tagprefix):]
375 self.tags.append((tag, cmd.from_))
376
377 def tag_handler(self, cmd):
378 pass
379
380
381 class HgImportCommitHandler(processor.CommitHandler):
382
383 def __init__(self, parent, command, ui):
384 self.parent = parent # HgImportProcessor running the show
385 self.command = command # CommitCommand that we're processing
386 self.ui = ui
387
388 # Files changes by this commit as a list of (filename, id)
389 # tuples where id is (commitid, blobid). The blobid is
390 # needed to fetch the file's contents later, and the commitid
391 # is needed to fetch the mode.
392 # (XXX what about inline file contents?)
393 # (XXX how to describe deleted files?)
394 self.modified = []
395
396 # mode of files listed in self.modified: '', 'x', or 'l'
397 self.mode = {}
398
399 # dictionary of src: dest (renamed files are in here and self.modified)
400 self.copies = {}
401
402 # number of inline files seen in this commit
403 self.inlinecount = 0
404
405 def modify_handler(self, filecmd):
406 if filecmd.dataref:
407 blobid = filecmd.dataref # blobid is the mark of the blob
408 else:
409 blobid = b"%s-inline:%d" % (self.command.id, self.inlinecount)
410 assert filecmd.data is not None
411 self.parent.writeblob(blobid, filecmd.data)
412 self.inlinecount += 1
413
414 fileid = (self.command.id, blobid)
415
416 self.modified.append((filecmd.path, fileid))
417 if stat.S_ISLNK(filecmd.mode): # link
418 mode = b'l'
419 elif filecmd.mode & 0o111: # executable
420 mode = b'x'
421 elif stat.S_ISREG(filecmd.mode): # regular file
422 mode = b''
423 else:
424 raise RuntimeError(b"mode %s unsupported" % filecmd.mode)
425
426 self.mode[filecmd.path] = mode
427
428 def delete_handler(self, filecmd):
429 self.modified.append((filecmd.path, None))
430
431 def copy_handler(self, filecmd):
432 self.copies[filecmd.src_path] = filecmd.dest_path
433
434 def rename_handler(self, filecmd):
435 # copy oldname to newname and delete oldname
436 self.copies[filecmd.new_path] = filecmd.old_path
437 self.modified.append((filecmd.old_path, None))