annotate hgext3rd/fastimport/__init__.py @ 95:3b398a887b95

Use a sqlite3 database to store the blob data if available This is much more performant than using a filesystem when we are dealing with a large number of blobs. If sqlite3 is not available, then fallback to writing to the filesystem. In both cases, the blob data is compressed before writing to save space. A new option has also been added to specify a path for persistent blob data. This is only really important for large continuous interations where the source data has no concept of export marks and thus only gets bigger. What we gain here is a reduction in the write load on the disk.
author Roy Marples <roy@marples.name>
date Thu, 21 Jan 2021 23:59:21 +0000
parents dc1d11466aa6
children 7eb15a5c4cad
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
73
a99e5c6c8e1c Fix compatibility with 4.6+
Daniel Johnson <daniel@daniel-johnson.org>
parents: 72
diff changeset
1 ''' import Git fast-import streams '''
a99e5c6c8e1c Fix compatibility with 4.6+
Daniel Johnson <daniel@daniel-johnson.org>
parents: 72
diff changeset
2 from __future__ import absolute_import
a99e5c6c8e1c Fix compatibility with 4.6+
Daniel Johnson <daniel@daniel-johnson.org>
parents: 72
diff changeset
3
a99e5c6c8e1c Fix compatibility with 4.6+
Daniel Johnson <daniel@daniel-johnson.org>
parents: 72
diff changeset
4 from mercurial import (
74
a4f13dc5e3f7 Support Mercurial 5.6 and Python-3.6
Roy Marples <roy@marples.name>
parents: 73
diff changeset
5 commands,
73
a99e5c6c8e1c Fix compatibility with 4.6+
Daniel Johnson <daniel@daniel-johnson.org>
parents: 72
diff changeset
6 encoding,
74
a4f13dc5e3f7 Support Mercurial 5.6 and Python-3.6
Roy Marples <roy@marples.name>
parents: 73
diff changeset
7 error,
a4f13dc5e3f7 Support Mercurial 5.6 and Python-3.6
Roy Marples <roy@marples.name>
parents: 73
diff changeset
8 pycompat,
73
a99e5c6c8e1c Fix compatibility with 4.6+
Daniel Johnson <daniel@daniel-johnson.org>
parents: 72
diff changeset
9 )
a99e5c6c8e1c Fix compatibility with 4.6+
Daniel Johnson <daniel@daniel-johnson.org>
parents: 72
diff changeset
10
50
b027552d517b Do sort options just like hgext.convert.
Greg Ward <greg-hg@gerg.ca>
parents: 47
diff changeset
11 from mercurial.i18n import _
73
a99e5c6c8e1c Fix compatibility with 4.6+
Daniel Johnson <daniel@daniel-johnson.org>
parents: 72
diff changeset
12
a99e5c6c8e1c Fix compatibility with 4.6+
Daniel Johnson <daniel@daniel-johnson.org>
parents: 72
diff changeset
13 from hgext.convert import (
a99e5c6c8e1c Fix compatibility with 4.6+
Daniel Johnson <daniel@daniel-johnson.org>
parents: 72
diff changeset
14 convcmd,
a99e5c6c8e1c Fix compatibility with 4.6+
Daniel Johnson <daniel@daniel-johnson.org>
parents: 72
diff changeset
15 hg,
a99e5c6c8e1c Fix compatibility with 4.6+
Daniel Johnson <daniel@daniel-johnson.org>
parents: 72
diff changeset
16 )
a99e5c6c8e1c Fix compatibility with 4.6+
Daniel Johnson <daniel@daniel-johnson.org>
parents: 72
diff changeset
17
a99e5c6c8e1c Fix compatibility with 4.6+
Daniel Johnson <daniel@daniel-johnson.org>
parents: 72
diff changeset
18 from .hgimport import fastimport_source
0
d107c6d36780 Add the start of the hg fastimport command
Paul Crowley <paul@lshift.net>
parents:
diff changeset
19
80
dc1d11466aa6 Add __version__ and set to 0.1.0
Roy Marples <roy@marples.name>
parents: 79
diff changeset
20 __version__ = b'0.1.0'
74
a4f13dc5e3f7 Support Mercurial 5.6 and Python-3.6
Roy Marples <roy@marples.name>
parents: 73
diff changeset
21 testedwith = b'5.6.1'
a4f13dc5e3f7 Support Mercurial 5.6 and Python-3.6
Roy Marples <roy@marples.name>
parents: 73
diff changeset
22 minimumhgversion = b'4.1'
a4f13dc5e3f7 Support Mercurial 5.6 and Python-3.6
Roy Marples <roy@marples.name>
parents: 73
diff changeset
23
73
a99e5c6c8e1c Fix compatibility with 4.6+
Daniel Johnson <daniel@daniel-johnson.org>
parents: 72
diff changeset
24 cmdtable = {}
a99e5c6c8e1c Fix compatibility with 4.6+
Daniel Johnson <daniel@daniel-johnson.org>
parents: 72
diff changeset
25 try:
a99e5c6c8e1c Fix compatibility with 4.6+
Daniel Johnson <daniel@daniel-johnson.org>
parents: 72
diff changeset
26 from mercurial import registrar
a99e5c6c8e1c Fix compatibility with 4.6+
Daniel Johnson <daniel@daniel-johnson.org>
parents: 72
diff changeset
27 command = registrar.command(cmdtable)
a99e5c6c8e1c Fix compatibility with 4.6+
Daniel Johnson <daniel@daniel-johnson.org>
parents: 72
diff changeset
28 except (ImportError, AttributeError):
74
a4f13dc5e3f7 Support Mercurial 5.6 and Python-3.6
Roy Marples <roy@marples.name>
parents: 73
diff changeset
29 from mercurial import cmdutil
73
a99e5c6c8e1c Fix compatibility with 4.6+
Daniel Johnson <daniel@daniel-johnson.org>
parents: 72
diff changeset
30 command = cmdutil.command(cmdtable)
a99e5c6c8e1c Fix compatibility with 4.6+
Daniel Johnson <daniel@daniel-johnson.org>
parents: 72
diff changeset
31
74
a4f13dc5e3f7 Support Mercurial 5.6 and Python-3.6
Roy Marples <roy@marples.name>
parents: 73
diff changeset
32 @command(b'fastimport',
a4f13dc5e3f7 Support Mercurial 5.6 and Python-3.6
Roy Marples <roy@marples.name>
parents: 73
diff changeset
33 [(b'', b'branchsort', None, _(b'try to sort changesets by branches')),
a4f13dc5e3f7 Support Mercurial 5.6 and Python-3.6
Roy Marples <roy@marples.name>
parents: 73
diff changeset
34 (b'', b'datesort', None, _(b'try to sort changesets by date')),
95
3b398a887b95 Use a sqlite3 database to store the blob data if available
Roy Marples <roy@marples.name>
parents: 80
diff changeset
35 (b'', b'sourcesort', None, _(b'preserve source changesets order')),
3b398a887b95 Use a sqlite3 database to store the blob data if available
Roy Marples <roy@marples.name>
parents: 80
diff changeset
36 (b'', b'blobpath', b'', _(b'path for persistent blob data'))],
74
a4f13dc5e3f7 Support Mercurial 5.6 and Python-3.6
Roy Marples <roy@marples.name>
parents: 73
diff changeset
37 _(b'hg fastimport SOURCE ...'),
a4f13dc5e3f7 Support Mercurial 5.6 and Python-3.6
Roy Marples <roy@marples.name>
parents: 73
diff changeset
38 norepo=False)
0
d107c6d36780 Add the start of the hg fastimport command
Paul Crowley <paul@lshift.net>
parents:
diff changeset
39
14
f6f0fd01b34a Preliminary support for reading multiple input files.
Greg Ward <greg-hg@gerg.ca>
parents: 1
diff changeset
40 def fastimport(ui, repo, *sources, **opts):
74
a4f13dc5e3f7 Support Mercurial 5.6 and Python-3.6
Roy Marples <roy@marples.name>
parents: 73
diff changeset
41 '''Convert a git fastimport dump into Mercurial changesets.
47
7ff36dc9f0b1 Massive rework to use infrastructure provided by convert extension.
Greg Ward <greg-hg@gerg.ca>
parents: 34
diff changeset
42
7ff36dc9f0b1 Massive rework to use infrastructure provided by convert extension.
Greg Ward <greg-hg@gerg.ca>
parents: 34
diff changeset
43 Reads a series of SOURCE fastimport dumps and adds the resulting
7ff36dc9f0b1 Massive rework to use infrastructure provided by convert extension.
Greg Ward <greg-hg@gerg.ca>
parents: 34
diff changeset
44 changes to the current Mercurial repository.
74
a4f13dc5e3f7 Support Mercurial 5.6 and Python-3.6
Roy Marples <roy@marples.name>
parents: 73
diff changeset
45 '''
47
7ff36dc9f0b1 Massive rework to use infrastructure provided by convert extension.
Greg Ward <greg-hg@gerg.ca>
parents: 34
diff changeset
46 # Would be nice to just call hgext.convert.convcmd.convert() and let
7ff36dc9f0b1 Massive rework to use infrastructure provided by convert extension.
Greg Ward <greg-hg@gerg.ca>
parents: 34
diff changeset
47 # it take care of things. But syntax and semantics are just a
7ff36dc9f0b1 Massive rework to use infrastructure provided by convert extension.
Greg Ward <greg-hg@gerg.ca>
parents: 34
diff changeset
48 # little mismatched:
7ff36dc9f0b1 Massive rework to use infrastructure provided by convert extension.
Greg Ward <greg-hg@gerg.ca>
parents: 34
diff changeset
49 # - fastimport takes multiple source paths (mainly because cvs2git
7ff36dc9f0b1 Massive rework to use infrastructure provided by convert extension.
Greg Ward <greg-hg@gerg.ca>
parents: 34
diff changeset
50 # produces 2 dump files)
7ff36dc9f0b1 Massive rework to use infrastructure provided by convert extension.
Greg Ward <greg-hg@gerg.ca>
parents: 34
diff changeset
51 # - fastimport's dest is implicitly the current repo
7ff36dc9f0b1 Massive rework to use infrastructure provided by convert extension.
Greg Ward <greg-hg@gerg.ca>
parents: 34
diff changeset
52 #
7ff36dc9f0b1 Massive rework to use infrastructure provided by convert extension.
Greg Ward <greg-hg@gerg.ca>
parents: 34
diff changeset
53 # So for the time being, I have copied bits of convert() over here.
7ff36dc9f0b1 Massive rework to use infrastructure provided by convert extension.
Greg Ward <greg-hg@gerg.ca>
parents: 34
diff changeset
54 # Boo, hiss.
7ff36dc9f0b1 Massive rework to use infrastructure provided by convert extension.
Greg Ward <greg-hg@gerg.ca>
parents: 34
diff changeset
55
72
6b716ecb1cf3 Allow empty source argument
Dennis Schridde <devurandom@gmx.net>
parents: 66
diff changeset
56 if not sources:
74
a4f13dc5e3f7 Support Mercurial 5.6 and Python-3.6
Roy Marples <roy@marples.name>
parents: 73
diff changeset
57 sources = (b'-')
72
6b716ecb1cf3 Allow empty source argument
Dennis Schridde <devurandom@gmx.net>
parents: 66
diff changeset
58
95
3b398a887b95 Use a sqlite3 database to store the blob data if available
Roy Marples <roy@marples.name>
parents: 80
diff changeset
59 opts = pycompat.byteskwargs(opts)
3b398a887b95 Use a sqlite3 database to store the blob data if available
Roy Marples <roy@marples.name>
parents: 80
diff changeset
60
47
7ff36dc9f0b1 Massive rework to use infrastructure provided by convert extension.
Greg Ward <greg-hg@gerg.ca>
parents: 34
diff changeset
61 # assume fastimport metadata (usernames, commit messages) are
7ff36dc9f0b1 Massive rework to use infrastructure provided by convert extension.
Greg Ward <greg-hg@gerg.ca>
parents: 34
diff changeset
62 # encoded UTF-8
7ff36dc9f0b1 Massive rework to use infrastructure provided by convert extension.
Greg Ward <greg-hg@gerg.ca>
parents: 34
diff changeset
63 convcmd.orig_encoding = encoding.encoding
74
a4f13dc5e3f7 Support Mercurial 5.6 and Python-3.6
Roy Marples <roy@marples.name>
parents: 73
diff changeset
64 encoding.encoding = b'UTF-8'
47
7ff36dc9f0b1 Massive rework to use infrastructure provided by convert extension.
Greg Ward <greg-hg@gerg.ca>
parents: 34
diff changeset
65
7ff36dc9f0b1 Massive rework to use infrastructure provided by convert extension.
Greg Ward <greg-hg@gerg.ca>
parents: 34
diff changeset
66 # sink is the current repo, src is the list of fastimport streams
74
a4f13dc5e3f7 Support Mercurial 5.6 and Python-3.6
Roy Marples <roy@marples.name>
parents: 73
diff changeset
67 destc = hg.mercurial_sink(ui, b'hg', repo.root)
95
3b398a887b95 Use a sqlite3 database to store the blob data if available
Roy Marples <roy@marples.name>
parents: 80
diff changeset
68 srcc = fastimport_source(ui, b'fastimport', repo, sources, opts[b'blobpath'])
47
7ff36dc9f0b1 Massive rework to use infrastructure provided by convert extension.
Greg Ward <greg-hg@gerg.ca>
parents: 34
diff changeset
69
74
a4f13dc5e3f7 Support Mercurial 5.6 and Python-3.6
Roy Marples <roy@marples.name>
parents: 73
diff changeset
70 defaultsort = b'branchsort' # for efficiency and consistency
a4f13dc5e3f7 Support Mercurial 5.6 and Python-3.6
Roy Marples <roy@marples.name>
parents: 73
diff changeset
71 sortmodes = (b'branchsort', b'datesort', b'sourcesort')
50
b027552d517b Do sort options just like hgext.convert.
Greg Ward <greg-hg@gerg.ca>
parents: 47
diff changeset
72 sortmode = [m for m in sortmodes if opts.get(m)]
b027552d517b Do sort options just like hgext.convert.
Greg Ward <greg-hg@gerg.ca>
parents: 47
diff changeset
73 if len(sortmode) > 1:
74
a4f13dc5e3f7 Support Mercurial 5.6 and Python-3.6
Roy Marples <roy@marples.name>
parents: 73
diff changeset
74 raise error.Abort(_(b'more than one sort mode specified'))
a4f13dc5e3f7 Support Mercurial 5.6 and Python-3.6
Roy Marples <roy@marples.name>
parents: 73
diff changeset
75 if sortmode:
a4f13dc5e3f7 Support Mercurial 5.6 and Python-3.6
Roy Marples <roy@marples.name>
parents: 73
diff changeset
76 sortmode = sortmode[0]
a4f13dc5e3f7 Support Mercurial 5.6 and Python-3.6
Roy Marples <roy@marples.name>
parents: 73
diff changeset
77 else:
a4f13dc5e3f7 Support Mercurial 5.6 and Python-3.6
Roy Marples <roy@marples.name>
parents: 73
diff changeset
78 sortmode = defaultsort
73
a99e5c6c8e1c Fix compatibility with 4.6+
Daniel Johnson <daniel@daniel-johnson.org>
parents: 72
diff changeset
79
47
7ff36dc9f0b1 Massive rework to use infrastructure provided by convert extension.
Greg Ward <greg-hg@gerg.ca>
parents: 34
diff changeset
80 # not implemented: filemap, revmapfile
7ff36dc9f0b1 Massive rework to use infrastructure provided by convert extension.
Greg Ward <greg-hg@gerg.ca>
parents: 34
diff changeset
81 revmapfile = destc.revmapfile()
7ff36dc9f0b1 Massive rework to use infrastructure provided by convert extension.
Greg Ward <greg-hg@gerg.ca>
parents: 34
diff changeset
82 c = convcmd.converter(ui, srcc, destc, revmapfile, opts)
50
b027552d517b Do sort options just like hgext.convert.
Greg Ward <greg-hg@gerg.ca>
parents: 47
diff changeset
83 c.convert(sortmode)