view hgext3rd/fastimport/__init__.py @ 95:3b398a887b95

Use a sqlite3 database to store the blob data if available This is much more performant than using a filesystem when we are dealing with a large number of blobs. If sqlite3 is not available, then fallback to writing to the filesystem. In both cases, the blob data is compressed before writing to save space. A new option has also been added to specify a path for persistent blob data. This is only really important for large continuous interations where the source data has no concept of export marks and thus only gets bigger. What we gain here is a reduction in the write load on the disk.
author Roy Marples <roy@marples.name>
date Thu, 21 Jan 2021 23:59:21 +0000
parents dc1d11466aa6
children 7eb15a5c4cad
line wrap: on
line source

''' import Git fast-import streams '''
from __future__ import absolute_import

from mercurial import (
    commands,
    encoding,
    error,
    pycompat,
)

from mercurial.i18n import _

from hgext.convert import (
    convcmd,
    hg,
)

from .hgimport import fastimport_source

__version__ = b'0.1.0'
testedwith = b'5.6.1'
minimumhgversion = b'4.1'

cmdtable = {}
try:
    from mercurial import registrar
    command = registrar.command(cmdtable)
except (ImportError, AttributeError):
    from mercurial import cmdutil
    command = cmdutil.command(cmdtable)

@command(b'fastimport',
         [(b'', b'branchsort', None, _(b'try to sort changesets by branches')),
          (b'', b'datesort', None, _(b'try to sort changesets by date')),
          (b'', b'sourcesort', None, _(b'preserve source changesets order')),
          (b'', b'blobpath', b'', _(b'path for persistent blob data'))],
         _(b'hg fastimport SOURCE ...'),
          norepo=False)

def fastimport(ui, repo, *sources, **opts):
    '''Convert a git fastimport dump into Mercurial changesets.

    Reads a series of SOURCE fastimport dumps and adds the resulting
    changes to the current Mercurial repository.
    '''
    # Would be nice to just call hgext.convert.convcmd.convert() and let
    # it take care of things.  But syntax and semantics are just a
    # little mismatched:
    #   - fastimport takes multiple source paths (mainly because cvs2git
    #     produces 2 dump files)
    #   - fastimport's dest is implicitly the current repo
    #
    # So for the time being, I have copied bits of convert() over here.
    # Boo, hiss.

    if not sources:
        sources = (b'-')

    opts = pycompat.byteskwargs(opts)

    # assume fastimport metadata (usernames, commit messages) are
    # encoded UTF-8
    convcmd.orig_encoding = encoding.encoding
    encoding.encoding = b'UTF-8'

    # sink is the current repo, src is the list of fastimport streams
    destc = hg.mercurial_sink(ui, b'hg', repo.root)
    srcc = fastimport_source(ui, b'fastimport', repo, sources, opts[b'blobpath'])

    defaultsort = b'branchsort'          # for efficiency and consistency
    sortmodes = (b'branchsort', b'datesort', b'sourcesort')
    sortmode = [m for m in sortmodes if opts.get(m)]
    if len(sortmode) > 1:
        raise error.Abort(_(b'more than one sort mode specified'))
    if sortmode:
        sortmode = sortmode[0]
    else:
        sortmode = defaultsort

    # not implemented: filemap, revmapfile
    revmapfile = destc.revmapfile()
    c = convcmd.converter(ui, srcc, destc, revmapfile, opts)
    c.convert(sortmode)