Mercurial > hg > hg-fastimport
changeset 37:513449a88de2
Handle non-ASCII input correctly (assuming UTF-8 encoding).
- fastimport library now returns filenames as byte strings,
so leave them be
- re-encode commit message as UTF-8
- monkeypatch mercurial.encoding to assume UTF-8 for everything
| author | Greg Ward <greg-hg@gerg.ca> |
|---|---|
| date | Fri, 08 May 2009 11:03:16 -0400 |
| parents | 0e4e40caea58 |
| children | 3048a2dcf68a |
| files | hgfastimport/hgimport.py |
| diffstat | 1 files changed, 10 insertions(+), 3 deletions(-) [+] |
line wrap: on
line diff
--- a/hgfastimport/hgimport.py Tue May 05 21:04:06 2009 -0400 +++ b/hgfastimport/hgimport.py Fri May 08 11:03:16 2009 -0400 @@ -138,11 +138,18 @@ userinfo = cmd.author or cmd.committer user = "%s <%s>" % (userinfo[0], userinfo[1]) - # XXX is this the right way to specify filename encoding?!? - files = [f.encode("utf-8") for f in commit_handler.filelist()] + # Blech: have to monkeypatch mercurial.encoding to ensure that + # everything under rawcommit() assumes the same encoding, + # regardless of current locale. + from mercurial import encoding + encoding.encoding = "UTF-8" + + files = commit_handler.filelist() + assert type(cmd.message) is unicode + text = cmd.message.encode("utf-8") # XXX cmd.message is unicode date = self.convert_date(userinfo) node = self.repo.rawcommit( - files=files, text=cmd.message, user=user, date=date) + files=files, text=text, user=user, date=date) rev = self.repo.changelog.rev(node) if cmd.mark is not None: self.mark_map[":" + cmd.mark] = rev
