Mercurial > hg > hg-fastimport
annotate hgext3rd/fastimport/vendor/python_fastimport/processors/filter_processor.py @ 88:2fc99e3479d9
python-fastimport: Import our own modules using relative pathing
This allows python-fastimport to be embedded as vendor code within
other modules.
This patch has been accepted upstream.
| author | Roy Marples <roy@marples.name> |
|---|---|
| date | Tue, 19 Jan 2021 23:00:01 +0000 |
| parents | 28704a2a7461 |
| children |
| rev | line source |
|---|---|
| 86 | 1 # Copyright (C) 2009 Canonical Ltd |
| 2 # | |
| 3 # This program is free software; you can redistribute it and/or modify | |
| 4 # it under the terms of the GNU General Public License as published by | |
| 5 # the Free Software Foundation; either version 2 of the License, or | |
| 6 # (at your option) any later version. | |
| 7 # | |
| 8 # This program is distributed in the hope that it will be useful, | |
| 9 # but WITHOUT ANY WARRANTY; without even the implied warranty of | |
| 10 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
| 11 # GNU General Public License for more details. | |
| 12 # | |
| 13 # You should have received a copy of the GNU General Public License | |
| 14 # along with this program. If not, see <http://www.gnu.org/licenses/>. | |
| 15 | |
| 16 """Import processor that filters the input (and doesn't import).""" | |
|
88
2fc99e3479d9
python-fastimport: Import our own modules using relative pathing
Roy Marples <roy@marples.name>
parents:
86
diff
changeset
|
17 from .. import ( |
| 86 | 18 commands, |
| 19 helpers, | |
| 20 processor, | |
| 21 ) | |
| 22 import stat | |
| 23 | |
| 24 | |
| 25 class FilterProcessor(processor.ImportProcessor): | |
| 26 """An import processor that filters the input to include/exclude objects. | |
| 27 | |
| 28 No changes to the current repository are made. | |
| 29 | |
| 30 Here are the supported parameters: | |
| 31 | |
| 32 * include_paths - a list of paths that commits must change in order to | |
| 33 be kept in the output stream | |
| 34 | |
| 35 * exclude_paths - a list of paths that should not appear in the output | |
| 36 stream | |
| 37 | |
| 38 * squash_empty_commits - if set to False, squash commits that don't have | |
| 39 any changes after the filter has been applied | |
| 40 """ | |
| 41 | |
| 42 known_params = [ | |
| 43 b'include_paths', | |
| 44 b'exclude_paths', | |
| 45 b'squash_empty_commits' | |
| 46 ] | |
| 47 | |
| 48 def pre_process(self): | |
| 49 self.includes = self.params.get(b'include_paths') | |
| 50 self.excludes = self.params.get(b'exclude_paths') | |
| 51 self.squash_empty_commits = bool( | |
| 52 self.params.get(b'squash_empty_commits', True)) | |
| 53 # What's the new root, if any | |
| 54 self.new_root = helpers.common_directory(self.includes) | |
| 55 # Buffer of blobs until we know we need them: mark -> cmd | |
| 56 self.blobs = {} | |
| 57 # These are the commits we've squashed so far | |
| 58 self.squashed_commits = set() | |
| 59 # Map of commit-id to list of parents | |
| 60 self.parents = {} | |
| 61 | |
| 62 def pre_handler(self, cmd): | |
| 63 self.command = cmd | |
| 64 # Should this command be included in the output or not? | |
| 65 self.keep = False | |
| 66 # Blobs to dump into the output before dumping the command itself | |
| 67 self.referenced_blobs = [] | |
| 68 | |
| 69 def post_handler(self, cmd): | |
| 70 if not self.keep: | |
| 71 return | |
| 72 # print referenced blobs and the command | |
| 73 for blob_id in self.referenced_blobs: | |
| 74 self._print_command(self.blobs[blob_id]) | |
| 75 self._print_command(self.command) | |
| 76 | |
| 77 def progress_handler(self, cmd): | |
| 78 """Process a ProgressCommand.""" | |
| 79 # These always pass through | |
| 80 self.keep = True | |
| 81 | |
| 82 def blob_handler(self, cmd): | |
| 83 """Process a BlobCommand.""" | |
| 84 # These never pass through directly. We buffer them and only | |
| 85 # output them if referenced by an interesting command. | |
| 86 self.blobs[cmd.id] = cmd | |
| 87 self.keep = False | |
| 88 | |
| 89 def checkpoint_handler(self, cmd): | |
| 90 """Process a CheckpointCommand.""" | |
| 91 # These always pass through | |
| 92 self.keep = True | |
| 93 | |
| 94 def commit_handler(self, cmd): | |
| 95 """Process a CommitCommand.""" | |
| 96 # These pass through if they meet the filtering conditions | |
| 97 interesting_filecmds = self._filter_filecommands(cmd.iter_files) | |
| 98 if interesting_filecmds or not self.squash_empty_commits: | |
| 99 # If all we have is a single deleteall, skip this commit | |
| 100 if len(interesting_filecmds) == 1 and isinstance( | |
| 101 interesting_filecmds[0], commands.FileDeleteAllCommand): | |
| 102 pass | |
| 103 else: | |
| 104 # Remember just the interesting file commands | |
| 105 self.keep = True | |
| 106 cmd.file_iter = iter(interesting_filecmds) | |
| 107 | |
| 108 # Record the referenced blobs | |
| 109 for fc in interesting_filecmds: | |
| 110 if isinstance(fc, commands.FileModifyCommand): | |
| 111 if (fc.dataref is not None and | |
| 112 not stat.S_ISDIR(fc.mode)): | |
| 113 self.referenced_blobs.append(fc.dataref) | |
| 114 | |
| 115 # Update from and merges to refer to commits in the output | |
| 116 cmd.from_ = self._find_interesting_from(cmd.from_) | |
| 117 cmd.merges = self._find_interesting_merges(cmd.merges) | |
| 118 else: | |
| 119 self.squashed_commits.add(cmd.id) | |
| 120 | |
| 121 # Keep track of the parents | |
| 122 if cmd.from_ and cmd.merges: | |
| 123 parents = [cmd.from_] + cmd.merges | |
| 124 elif cmd.from_: | |
| 125 parents = [cmd.from_] | |
| 126 else: | |
| 127 parents = None | |
| 128 if cmd.mark is not None: | |
| 129 self.parents[b':' + cmd.mark] = parents | |
| 130 | |
| 131 def reset_handler(self, cmd): | |
| 132 """Process a ResetCommand.""" | |
| 133 if cmd.from_ is None: | |
| 134 # We pass through resets that init a branch because we have to | |
| 135 # assume the branch might be interesting. | |
| 136 self.keep = True | |
| 137 else: | |
| 138 # Keep resets if they indirectly reference something we kept | |
| 139 cmd.from_ = self._find_interesting_from(cmd.from_) | |
| 140 self.keep = cmd.from_ is not None | |
| 141 | |
| 142 def tag_handler(self, cmd): | |
| 143 """Process a TagCommand.""" | |
| 144 # Keep tags if they indirectly reference something we kept | |
| 145 cmd.from_ = self._find_interesting_from(cmd.from_) | |
| 146 self.keep = cmd.from_ is not None | |
| 147 | |
| 148 def feature_handler(self, cmd): | |
| 149 """Process a FeatureCommand.""" | |
| 150 feature = cmd.feature_name | |
| 151 if feature not in commands.FEATURE_NAMES: | |
| 152 self.warning("feature %s is not supported - parsing may fail" | |
| 153 % (feature,)) | |
| 154 # These always pass through | |
| 155 self.keep = True | |
| 156 | |
| 157 def _print_command(self, cmd): | |
| 158 """Wrapper to avoid adding unnecessary blank lines.""" | |
| 159 text = helpers.repr_bytes(cmd) | |
| 160 self.outf.write(text) | |
| 161 if not text.endswith(b'\n'): | |
| 162 self.outf.write(b'\n') | |
| 163 | |
| 164 def _filter_filecommands(self, filecmd_iter): | |
| 165 """Return the filecommands filtered by includes & excludes. | |
| 166 | |
| 167 :return: a list of FileCommand objects | |
| 168 """ | |
| 169 if self.includes is None and self.excludes is None: | |
| 170 return list(filecmd_iter()) | |
| 171 | |
| 172 # Do the filtering, adjusting for the new_root | |
| 173 result = [] | |
| 174 for fc in filecmd_iter(): | |
| 175 if (isinstance(fc, commands.FileModifyCommand) or | |
| 176 isinstance(fc, commands.FileDeleteCommand)): | |
| 177 if self._path_to_be_kept(fc.path): | |
| 178 fc.path = self._adjust_for_new_root(fc.path) | |
| 179 else: | |
| 180 continue | |
| 181 elif isinstance(fc, commands.FileDeleteAllCommand): | |
| 182 pass | |
| 183 elif isinstance(fc, commands.FileRenameCommand): | |
| 184 fc = self._convert_rename(fc) | |
| 185 elif isinstance(fc, commands.FileCopyCommand): | |
| 186 fc = self._convert_copy(fc) | |
| 187 else: | |
| 188 self.warning("cannot handle FileCommands of class %s - ignoring", | |
| 189 fc.__class__) | |
| 190 continue | |
| 191 if fc is not None: | |
| 192 result.append(fc) | |
| 193 return result | |
| 194 | |
| 195 def _path_to_be_kept(self, path): | |
| 196 """Does the given path pass the filtering criteria?""" | |
| 197 if self.excludes and (path in self.excludes | |
| 198 or helpers.is_inside_any(self.excludes, path)): | |
| 199 return False | |
| 200 if self.includes: | |
| 201 return (path in self.includes | |
| 202 or helpers.is_inside_any(self.includes, path)) | |
| 203 return True | |
| 204 | |
| 205 def _adjust_for_new_root(self, path): | |
| 206 """Adjust a path given the new root directory of the output.""" | |
| 207 if self.new_root is None: | |
| 208 return path | |
| 209 elif path.startswith(self.new_root): | |
| 210 return path[len(self.new_root):] | |
| 211 else: | |
| 212 return path | |
| 213 | |
| 214 def _find_interesting_parent(self, commit_ref): | |
| 215 while True: | |
| 216 if commit_ref not in self.squashed_commits: | |
| 217 return commit_ref | |
| 218 parents = self.parents.get(commit_ref) | |
| 219 if not parents: | |
| 220 return None | |
| 221 commit_ref = parents[0] | |
| 222 | |
| 223 def _find_interesting_from(self, commit_ref): | |
| 224 if commit_ref is None: | |
| 225 return None | |
| 226 return self._find_interesting_parent(commit_ref) | |
| 227 | |
| 228 def _find_interesting_merges(self, commit_refs): | |
| 229 if commit_refs is None: | |
| 230 return None | |
| 231 merges = [] | |
| 232 for commit_ref in commit_refs: | |
| 233 parent = self._find_interesting_parent(commit_ref) | |
| 234 if parent is not None: | |
| 235 merges.append(parent) | |
| 236 if merges: | |
| 237 return merges | |
| 238 else: | |
| 239 return None | |
| 240 | |
| 241 def _convert_rename(self, fc): | |
| 242 """Convert a FileRenameCommand into a new FileCommand. | |
| 243 | |
| 244 :return: None if the rename is being ignored, otherwise a | |
| 245 new FileCommand based on the whether the old and new paths | |
| 246 are inside or outside of the interesting locations. | |
| 247 """ | |
| 248 old = fc.old_path | |
| 249 new = fc.new_path | |
| 250 keep_old = self._path_to_be_kept(old) | |
| 251 keep_new = self._path_to_be_kept(new) | |
| 252 if keep_old and keep_new: | |
| 253 fc.old_path = self._adjust_for_new_root(old) | |
| 254 fc.new_path = self._adjust_for_new_root(new) | |
| 255 return fc | |
| 256 elif keep_old: | |
| 257 # The file has been renamed to a non-interesting location. | |
| 258 # Delete it! | |
| 259 old = self._adjust_for_new_root(old) | |
| 260 return commands.FileDeleteCommand(old) | |
| 261 elif keep_new: | |
| 262 # The file has been renamed into an interesting location | |
| 263 # We really ought to add it but we don't currently buffer | |
| 264 # the contents of all previous files and probably never want | |
| 265 # to. Maybe fast-import-info needs to be extended to | |
| 266 # remember all renames and a config file can be passed | |
| 267 # into here ala fast-import? | |
| 268 self.warning("cannot turn rename of %s into an add of %s yet" % | |
| 269 (old, new)) | |
| 270 return None | |
| 271 | |
| 272 def _convert_copy(self, fc): | |
| 273 """Convert a FileCopyCommand into a new FileCommand. | |
| 274 | |
| 275 :return: None if the copy is being ignored, otherwise a | |
| 276 new FileCommand based on the whether the source and destination | |
| 277 paths are inside or outside of the interesting locations. | |
| 278 """ | |
| 279 src = fc.src_path | |
| 280 dest = fc.dest_path | |
| 281 keep_src = self._path_to_be_kept(src) | |
| 282 keep_dest = self._path_to_be_kept(dest) | |
| 283 if keep_src and keep_dest: | |
| 284 fc.src_path = self._adjust_for_new_root(src) | |
| 285 fc.dest_path = self._adjust_for_new_root(dest) | |
| 286 return fc | |
| 287 elif keep_src: | |
| 288 # The file has been copied to a non-interesting location. | |
| 289 # Ignore it! | |
| 290 return None | |
| 291 elif keep_dest: | |
| 292 # The file has been copied into an interesting location | |
| 293 # We really ought to add it but we don't currently buffer | |
| 294 # the contents of all previous files and probably never want | |
| 295 # to. Maybe fast-import-info needs to be extended to | |
| 296 # remember all copies and a config file can be passed | |
| 297 # into here ala fast-import? | |
| 298 self.warning("cannot turn copy of %s into an add of %s yet" % | |
| 299 (src, dest)) | |
| 300 return None |
