# HG changeset patch # User Greg Ward # Date 1241794996 14400 # Node ID 513449a88de2c11d7b13dfbfbd3066eca1a8a7cb # Parent 0e4e40caea58d2a8b67cf147050e4e763a698ac5 Handle non-ASCII input correctly (assuming UTF-8 encoding). - fastimport library now returns filenames as byte strings, so leave them be - re-encode commit message as UTF-8 - monkeypatch mercurial.encoding to assume UTF-8 for everything diff -r 0e4e40caea58 -r 513449a88de2 hgfastimport/hgimport.py --- a/hgfastimport/hgimport.py Tue May 05 21:04:06 2009 -0400 +++ b/hgfastimport/hgimport.py Fri May 08 11:03:16 2009 -0400 @@ -138,11 +138,18 @@ userinfo = cmd.author or cmd.committer user = "%s <%s>" % (userinfo[0], userinfo[1]) - # XXX is this the right way to specify filename encoding?!? - files = [f.encode("utf-8") for f in commit_handler.filelist()] + # Blech: have to monkeypatch mercurial.encoding to ensure that + # everything under rawcommit() assumes the same encoding, + # regardless of current locale. + from mercurial import encoding + encoding.encoding = "UTF-8" + + files = commit_handler.filelist() + assert type(cmd.message) is unicode + text = cmd.message.encode("utf-8") # XXX cmd.message is unicode date = self.convert_date(userinfo) node = self.repo.rawcommit( - files=files, text=cmd.message, user=user, date=date) + files=files, text=text, user=user, date=date) rev = self.repo.changelog.rev(node) if cmd.mark is not None: self.mark_map[":" + cmd.mark] = rev