comparison hgext3rd/fastimport/vendor/python_fastimport/processors/filter_processor.py @ 86:28704a2a7461 vendor/python-fastimport

Import python-fastimport-0.9.8
author Roy Marples <roy@marples.name>
date Tue, 19 Jan 2021 22:56:34 +0000
parents
children 2fc99e3479d9
comparison
equal deleted inserted replaced
85:1f5544a8870b 86:28704a2a7461
1 # Copyright (C) 2009 Canonical Ltd
2 #
3 # This program is free software; you can redistribute it and/or modify
4 # it under the terms of the GNU General Public License as published by
5 # the Free Software Foundation; either version 2 of the License, or
6 # (at your option) any later version.
7 #
8 # This program is distributed in the hope that it will be useful,
9 # but WITHOUT ANY WARRANTY; without even the implied warranty of
10 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11 # GNU General Public License for more details.
12 #
13 # You should have received a copy of the GNU General Public License
14 # along with this program. If not, see <http://www.gnu.org/licenses/>.
15
16 """Import processor that filters the input (and doesn't import)."""
17 from fastimport import (
18 commands,
19 helpers,
20 processor,
21 )
22 import stat
23
24
25 class FilterProcessor(processor.ImportProcessor):
26 """An import processor that filters the input to include/exclude objects.
27
28 No changes to the current repository are made.
29
30 Here are the supported parameters:
31
32 * include_paths - a list of paths that commits must change in order to
33 be kept in the output stream
34
35 * exclude_paths - a list of paths that should not appear in the output
36 stream
37
38 * squash_empty_commits - if set to False, squash commits that don't have
39 any changes after the filter has been applied
40 """
41
42 known_params = [
43 b'include_paths',
44 b'exclude_paths',
45 b'squash_empty_commits'
46 ]
47
48 def pre_process(self):
49 self.includes = self.params.get(b'include_paths')
50 self.excludes = self.params.get(b'exclude_paths')
51 self.squash_empty_commits = bool(
52 self.params.get(b'squash_empty_commits', True))
53 # What's the new root, if any
54 self.new_root = helpers.common_directory(self.includes)
55 # Buffer of blobs until we know we need them: mark -> cmd
56 self.blobs = {}
57 # These are the commits we've squashed so far
58 self.squashed_commits = set()
59 # Map of commit-id to list of parents
60 self.parents = {}
61
62 def pre_handler(self, cmd):
63 self.command = cmd
64 # Should this command be included in the output or not?
65 self.keep = False
66 # Blobs to dump into the output before dumping the command itself
67 self.referenced_blobs = []
68
69 def post_handler(self, cmd):
70 if not self.keep:
71 return
72 # print referenced blobs and the command
73 for blob_id in self.referenced_blobs:
74 self._print_command(self.blobs[blob_id])
75 self._print_command(self.command)
76
77 def progress_handler(self, cmd):
78 """Process a ProgressCommand."""
79 # These always pass through
80 self.keep = True
81
82 def blob_handler(self, cmd):
83 """Process a BlobCommand."""
84 # These never pass through directly. We buffer them and only
85 # output them if referenced by an interesting command.
86 self.blobs[cmd.id] = cmd
87 self.keep = False
88
89 def checkpoint_handler(self, cmd):
90 """Process a CheckpointCommand."""
91 # These always pass through
92 self.keep = True
93
94 def commit_handler(self, cmd):
95 """Process a CommitCommand."""
96 # These pass through if they meet the filtering conditions
97 interesting_filecmds = self._filter_filecommands(cmd.iter_files)
98 if interesting_filecmds or not self.squash_empty_commits:
99 # If all we have is a single deleteall, skip this commit
100 if len(interesting_filecmds) == 1 and isinstance(
101 interesting_filecmds[0], commands.FileDeleteAllCommand):
102 pass
103 else:
104 # Remember just the interesting file commands
105 self.keep = True
106 cmd.file_iter = iter(interesting_filecmds)
107
108 # Record the referenced blobs
109 for fc in interesting_filecmds:
110 if isinstance(fc, commands.FileModifyCommand):
111 if (fc.dataref is not None and
112 not stat.S_ISDIR(fc.mode)):
113 self.referenced_blobs.append(fc.dataref)
114
115 # Update from and merges to refer to commits in the output
116 cmd.from_ = self._find_interesting_from(cmd.from_)
117 cmd.merges = self._find_interesting_merges(cmd.merges)
118 else:
119 self.squashed_commits.add(cmd.id)
120
121 # Keep track of the parents
122 if cmd.from_ and cmd.merges:
123 parents = [cmd.from_] + cmd.merges
124 elif cmd.from_:
125 parents = [cmd.from_]
126 else:
127 parents = None
128 if cmd.mark is not None:
129 self.parents[b':' + cmd.mark] = parents
130
131 def reset_handler(self, cmd):
132 """Process a ResetCommand."""
133 if cmd.from_ is None:
134 # We pass through resets that init a branch because we have to
135 # assume the branch might be interesting.
136 self.keep = True
137 else:
138 # Keep resets if they indirectly reference something we kept
139 cmd.from_ = self._find_interesting_from(cmd.from_)
140 self.keep = cmd.from_ is not None
141
142 def tag_handler(self, cmd):
143 """Process a TagCommand."""
144 # Keep tags if they indirectly reference something we kept
145 cmd.from_ = self._find_interesting_from(cmd.from_)
146 self.keep = cmd.from_ is not None
147
148 def feature_handler(self, cmd):
149 """Process a FeatureCommand."""
150 feature = cmd.feature_name
151 if feature not in commands.FEATURE_NAMES:
152 self.warning("feature %s is not supported - parsing may fail"
153 % (feature,))
154 # These always pass through
155 self.keep = True
156
157 def _print_command(self, cmd):
158 """Wrapper to avoid adding unnecessary blank lines."""
159 text = helpers.repr_bytes(cmd)
160 self.outf.write(text)
161 if not text.endswith(b'\n'):
162 self.outf.write(b'\n')
163
164 def _filter_filecommands(self, filecmd_iter):
165 """Return the filecommands filtered by includes & excludes.
166
167 :return: a list of FileCommand objects
168 """
169 if self.includes is None and self.excludes is None:
170 return list(filecmd_iter())
171
172 # Do the filtering, adjusting for the new_root
173 result = []
174 for fc in filecmd_iter():
175 if (isinstance(fc, commands.FileModifyCommand) or
176 isinstance(fc, commands.FileDeleteCommand)):
177 if self._path_to_be_kept(fc.path):
178 fc.path = self._adjust_for_new_root(fc.path)
179 else:
180 continue
181 elif isinstance(fc, commands.FileDeleteAllCommand):
182 pass
183 elif isinstance(fc, commands.FileRenameCommand):
184 fc = self._convert_rename(fc)
185 elif isinstance(fc, commands.FileCopyCommand):
186 fc = self._convert_copy(fc)
187 else:
188 self.warning("cannot handle FileCommands of class %s - ignoring",
189 fc.__class__)
190 continue
191 if fc is not None:
192 result.append(fc)
193 return result
194
195 def _path_to_be_kept(self, path):
196 """Does the given path pass the filtering criteria?"""
197 if self.excludes and (path in self.excludes
198 or helpers.is_inside_any(self.excludes, path)):
199 return False
200 if self.includes:
201 return (path in self.includes
202 or helpers.is_inside_any(self.includes, path))
203 return True
204
205 def _adjust_for_new_root(self, path):
206 """Adjust a path given the new root directory of the output."""
207 if self.new_root is None:
208 return path
209 elif path.startswith(self.new_root):
210 return path[len(self.new_root):]
211 else:
212 return path
213
214 def _find_interesting_parent(self, commit_ref):
215 while True:
216 if commit_ref not in self.squashed_commits:
217 return commit_ref
218 parents = self.parents.get(commit_ref)
219 if not parents:
220 return None
221 commit_ref = parents[0]
222
223 def _find_interesting_from(self, commit_ref):
224 if commit_ref is None:
225 return None
226 return self._find_interesting_parent(commit_ref)
227
228 def _find_interesting_merges(self, commit_refs):
229 if commit_refs is None:
230 return None
231 merges = []
232 for commit_ref in commit_refs:
233 parent = self._find_interesting_parent(commit_ref)
234 if parent is not None:
235 merges.append(parent)
236 if merges:
237 return merges
238 else:
239 return None
240
241 def _convert_rename(self, fc):
242 """Convert a FileRenameCommand into a new FileCommand.
243
244 :return: None if the rename is being ignored, otherwise a
245 new FileCommand based on the whether the old and new paths
246 are inside or outside of the interesting locations.
247 """
248 old = fc.old_path
249 new = fc.new_path
250 keep_old = self._path_to_be_kept(old)
251 keep_new = self._path_to_be_kept(new)
252 if keep_old and keep_new:
253 fc.old_path = self._adjust_for_new_root(old)
254 fc.new_path = self._adjust_for_new_root(new)
255 return fc
256 elif keep_old:
257 # The file has been renamed to a non-interesting location.
258 # Delete it!
259 old = self._adjust_for_new_root(old)
260 return commands.FileDeleteCommand(old)
261 elif keep_new:
262 # The file has been renamed into an interesting location
263 # We really ought to add it but we don't currently buffer
264 # the contents of all previous files and probably never want
265 # to. Maybe fast-import-info needs to be extended to
266 # remember all renames and a config file can be passed
267 # into here ala fast-import?
268 self.warning("cannot turn rename of %s into an add of %s yet" %
269 (old, new))
270 return None
271
272 def _convert_copy(self, fc):
273 """Convert a FileCopyCommand into a new FileCommand.
274
275 :return: None if the copy is being ignored, otherwise a
276 new FileCommand based on the whether the source and destination
277 paths are inside or outside of the interesting locations.
278 """
279 src = fc.src_path
280 dest = fc.dest_path
281 keep_src = self._path_to_be_kept(src)
282 keep_dest = self._path_to_be_kept(dest)
283 if keep_src and keep_dest:
284 fc.src_path = self._adjust_for_new_root(src)
285 fc.dest_path = self._adjust_for_new_root(dest)
286 return fc
287 elif keep_src:
288 # The file has been copied to a non-interesting location.
289 # Ignore it!
290 return None
291 elif keep_dest:
292 # The file has been copied into an interesting location
293 # We really ought to add it but we don't currently buffer
294 # the contents of all previous files and probably never want
295 # to. Maybe fast-import-info needs to be extended to
296 # remember all copies and a config file can be passed
297 # into here ala fast-import?
298 self.warning("cannot turn copy of %s into an add of %s yet" %
299 (src, dest))
300 return None