|
86
|
1 # Copyright (C) 2009 Canonical Ltd |
|
|
2 # |
|
|
3 # This program is free software; you can redistribute it and/or modify |
|
|
4 # it under the terms of the GNU General Public License as published by |
|
|
5 # the Free Software Foundation; either version 2 of the License, or |
|
|
6 # (at your option) any later version. |
|
|
7 # |
|
|
8 # This program is distributed in the hope that it will be useful, |
|
|
9 # but WITHOUT ANY WARRANTY; without even the implied warranty of |
|
|
10 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
|
|
11 # GNU General Public License for more details. |
|
|
12 # |
|
|
13 # You should have received a copy of the GNU General Public License |
|
|
14 # along with this program. If not, see <http://www.gnu.org/licenses/>. |
|
|
15 |
|
|
16 """Import processor that filters the input (and doesn't import).""" |
|
|
17 from fastimport import ( |
|
|
18 commands, |
|
|
19 helpers, |
|
|
20 processor, |
|
|
21 ) |
|
|
22 import stat |
|
|
23 |
|
|
24 |
|
|
25 class FilterProcessor(processor.ImportProcessor): |
|
|
26 """An import processor that filters the input to include/exclude objects. |
|
|
27 |
|
|
28 No changes to the current repository are made. |
|
|
29 |
|
|
30 Here are the supported parameters: |
|
|
31 |
|
|
32 * include_paths - a list of paths that commits must change in order to |
|
|
33 be kept in the output stream |
|
|
34 |
|
|
35 * exclude_paths - a list of paths that should not appear in the output |
|
|
36 stream |
|
|
37 |
|
|
38 * squash_empty_commits - if set to False, squash commits that don't have |
|
|
39 any changes after the filter has been applied |
|
|
40 """ |
|
|
41 |
|
|
42 known_params = [ |
|
|
43 b'include_paths', |
|
|
44 b'exclude_paths', |
|
|
45 b'squash_empty_commits' |
|
|
46 ] |
|
|
47 |
|
|
48 def pre_process(self): |
|
|
49 self.includes = self.params.get(b'include_paths') |
|
|
50 self.excludes = self.params.get(b'exclude_paths') |
|
|
51 self.squash_empty_commits = bool( |
|
|
52 self.params.get(b'squash_empty_commits', True)) |
|
|
53 # What's the new root, if any |
|
|
54 self.new_root = helpers.common_directory(self.includes) |
|
|
55 # Buffer of blobs until we know we need them: mark -> cmd |
|
|
56 self.blobs = {} |
|
|
57 # These are the commits we've squashed so far |
|
|
58 self.squashed_commits = set() |
|
|
59 # Map of commit-id to list of parents |
|
|
60 self.parents = {} |
|
|
61 |
|
|
62 def pre_handler(self, cmd): |
|
|
63 self.command = cmd |
|
|
64 # Should this command be included in the output or not? |
|
|
65 self.keep = False |
|
|
66 # Blobs to dump into the output before dumping the command itself |
|
|
67 self.referenced_blobs = [] |
|
|
68 |
|
|
69 def post_handler(self, cmd): |
|
|
70 if not self.keep: |
|
|
71 return |
|
|
72 # print referenced blobs and the command |
|
|
73 for blob_id in self.referenced_blobs: |
|
|
74 self._print_command(self.blobs[blob_id]) |
|
|
75 self._print_command(self.command) |
|
|
76 |
|
|
77 def progress_handler(self, cmd): |
|
|
78 """Process a ProgressCommand.""" |
|
|
79 # These always pass through |
|
|
80 self.keep = True |
|
|
81 |
|
|
82 def blob_handler(self, cmd): |
|
|
83 """Process a BlobCommand.""" |
|
|
84 # These never pass through directly. We buffer them and only |
|
|
85 # output them if referenced by an interesting command. |
|
|
86 self.blobs[cmd.id] = cmd |
|
|
87 self.keep = False |
|
|
88 |
|
|
89 def checkpoint_handler(self, cmd): |
|
|
90 """Process a CheckpointCommand.""" |
|
|
91 # These always pass through |
|
|
92 self.keep = True |
|
|
93 |
|
|
94 def commit_handler(self, cmd): |
|
|
95 """Process a CommitCommand.""" |
|
|
96 # These pass through if they meet the filtering conditions |
|
|
97 interesting_filecmds = self._filter_filecommands(cmd.iter_files) |
|
|
98 if interesting_filecmds or not self.squash_empty_commits: |
|
|
99 # If all we have is a single deleteall, skip this commit |
|
|
100 if len(interesting_filecmds) == 1 and isinstance( |
|
|
101 interesting_filecmds[0], commands.FileDeleteAllCommand): |
|
|
102 pass |
|
|
103 else: |
|
|
104 # Remember just the interesting file commands |
|
|
105 self.keep = True |
|
|
106 cmd.file_iter = iter(interesting_filecmds) |
|
|
107 |
|
|
108 # Record the referenced blobs |
|
|
109 for fc in interesting_filecmds: |
|
|
110 if isinstance(fc, commands.FileModifyCommand): |
|
|
111 if (fc.dataref is not None and |
|
|
112 not stat.S_ISDIR(fc.mode)): |
|
|
113 self.referenced_blobs.append(fc.dataref) |
|
|
114 |
|
|
115 # Update from and merges to refer to commits in the output |
|
|
116 cmd.from_ = self._find_interesting_from(cmd.from_) |
|
|
117 cmd.merges = self._find_interesting_merges(cmd.merges) |
|
|
118 else: |
|
|
119 self.squashed_commits.add(cmd.id) |
|
|
120 |
|
|
121 # Keep track of the parents |
|
|
122 if cmd.from_ and cmd.merges: |
|
|
123 parents = [cmd.from_] + cmd.merges |
|
|
124 elif cmd.from_: |
|
|
125 parents = [cmd.from_] |
|
|
126 else: |
|
|
127 parents = None |
|
|
128 if cmd.mark is not None: |
|
|
129 self.parents[b':' + cmd.mark] = parents |
|
|
130 |
|
|
131 def reset_handler(self, cmd): |
|
|
132 """Process a ResetCommand.""" |
|
|
133 if cmd.from_ is None: |
|
|
134 # We pass through resets that init a branch because we have to |
|
|
135 # assume the branch might be interesting. |
|
|
136 self.keep = True |
|
|
137 else: |
|
|
138 # Keep resets if they indirectly reference something we kept |
|
|
139 cmd.from_ = self._find_interesting_from(cmd.from_) |
|
|
140 self.keep = cmd.from_ is not None |
|
|
141 |
|
|
142 def tag_handler(self, cmd): |
|
|
143 """Process a TagCommand.""" |
|
|
144 # Keep tags if they indirectly reference something we kept |
|
|
145 cmd.from_ = self._find_interesting_from(cmd.from_) |
|
|
146 self.keep = cmd.from_ is not None |
|
|
147 |
|
|
148 def feature_handler(self, cmd): |
|
|
149 """Process a FeatureCommand.""" |
|
|
150 feature = cmd.feature_name |
|
|
151 if feature not in commands.FEATURE_NAMES: |
|
|
152 self.warning("feature %s is not supported - parsing may fail" |
|
|
153 % (feature,)) |
|
|
154 # These always pass through |
|
|
155 self.keep = True |
|
|
156 |
|
|
157 def _print_command(self, cmd): |
|
|
158 """Wrapper to avoid adding unnecessary blank lines.""" |
|
|
159 text = helpers.repr_bytes(cmd) |
|
|
160 self.outf.write(text) |
|
|
161 if not text.endswith(b'\n'): |
|
|
162 self.outf.write(b'\n') |
|
|
163 |
|
|
164 def _filter_filecommands(self, filecmd_iter): |
|
|
165 """Return the filecommands filtered by includes & excludes. |
|
|
166 |
|
|
167 :return: a list of FileCommand objects |
|
|
168 """ |
|
|
169 if self.includes is None and self.excludes is None: |
|
|
170 return list(filecmd_iter()) |
|
|
171 |
|
|
172 # Do the filtering, adjusting for the new_root |
|
|
173 result = [] |
|
|
174 for fc in filecmd_iter(): |
|
|
175 if (isinstance(fc, commands.FileModifyCommand) or |
|
|
176 isinstance(fc, commands.FileDeleteCommand)): |
|
|
177 if self._path_to_be_kept(fc.path): |
|
|
178 fc.path = self._adjust_for_new_root(fc.path) |
|
|
179 else: |
|
|
180 continue |
|
|
181 elif isinstance(fc, commands.FileDeleteAllCommand): |
|
|
182 pass |
|
|
183 elif isinstance(fc, commands.FileRenameCommand): |
|
|
184 fc = self._convert_rename(fc) |
|
|
185 elif isinstance(fc, commands.FileCopyCommand): |
|
|
186 fc = self._convert_copy(fc) |
|
|
187 else: |
|
|
188 self.warning("cannot handle FileCommands of class %s - ignoring", |
|
|
189 fc.__class__) |
|
|
190 continue |
|
|
191 if fc is not None: |
|
|
192 result.append(fc) |
|
|
193 return result |
|
|
194 |
|
|
195 def _path_to_be_kept(self, path): |
|
|
196 """Does the given path pass the filtering criteria?""" |
|
|
197 if self.excludes and (path in self.excludes |
|
|
198 or helpers.is_inside_any(self.excludes, path)): |
|
|
199 return False |
|
|
200 if self.includes: |
|
|
201 return (path in self.includes |
|
|
202 or helpers.is_inside_any(self.includes, path)) |
|
|
203 return True |
|
|
204 |
|
|
205 def _adjust_for_new_root(self, path): |
|
|
206 """Adjust a path given the new root directory of the output.""" |
|
|
207 if self.new_root is None: |
|
|
208 return path |
|
|
209 elif path.startswith(self.new_root): |
|
|
210 return path[len(self.new_root):] |
|
|
211 else: |
|
|
212 return path |
|
|
213 |
|
|
214 def _find_interesting_parent(self, commit_ref): |
|
|
215 while True: |
|
|
216 if commit_ref not in self.squashed_commits: |
|
|
217 return commit_ref |
|
|
218 parents = self.parents.get(commit_ref) |
|
|
219 if not parents: |
|
|
220 return None |
|
|
221 commit_ref = parents[0] |
|
|
222 |
|
|
223 def _find_interesting_from(self, commit_ref): |
|
|
224 if commit_ref is None: |
|
|
225 return None |
|
|
226 return self._find_interesting_parent(commit_ref) |
|
|
227 |
|
|
228 def _find_interesting_merges(self, commit_refs): |
|
|
229 if commit_refs is None: |
|
|
230 return None |
|
|
231 merges = [] |
|
|
232 for commit_ref in commit_refs: |
|
|
233 parent = self._find_interesting_parent(commit_ref) |
|
|
234 if parent is not None: |
|
|
235 merges.append(parent) |
|
|
236 if merges: |
|
|
237 return merges |
|
|
238 else: |
|
|
239 return None |
|
|
240 |
|
|
241 def _convert_rename(self, fc): |
|
|
242 """Convert a FileRenameCommand into a new FileCommand. |
|
|
243 |
|
|
244 :return: None if the rename is being ignored, otherwise a |
|
|
245 new FileCommand based on the whether the old and new paths |
|
|
246 are inside or outside of the interesting locations. |
|
|
247 """ |
|
|
248 old = fc.old_path |
|
|
249 new = fc.new_path |
|
|
250 keep_old = self._path_to_be_kept(old) |
|
|
251 keep_new = self._path_to_be_kept(new) |
|
|
252 if keep_old and keep_new: |
|
|
253 fc.old_path = self._adjust_for_new_root(old) |
|
|
254 fc.new_path = self._adjust_for_new_root(new) |
|
|
255 return fc |
|
|
256 elif keep_old: |
|
|
257 # The file has been renamed to a non-interesting location. |
|
|
258 # Delete it! |
|
|
259 old = self._adjust_for_new_root(old) |
|
|
260 return commands.FileDeleteCommand(old) |
|
|
261 elif keep_new: |
|
|
262 # The file has been renamed into an interesting location |
|
|
263 # We really ought to add it but we don't currently buffer |
|
|
264 # the contents of all previous files and probably never want |
|
|
265 # to. Maybe fast-import-info needs to be extended to |
|
|
266 # remember all renames and a config file can be passed |
|
|
267 # into here ala fast-import? |
|
|
268 self.warning("cannot turn rename of %s into an add of %s yet" % |
|
|
269 (old, new)) |
|
|
270 return None |
|
|
271 |
|
|
272 def _convert_copy(self, fc): |
|
|
273 """Convert a FileCopyCommand into a new FileCommand. |
|
|
274 |
|
|
275 :return: None if the copy is being ignored, otherwise a |
|
|
276 new FileCommand based on the whether the source and destination |
|
|
277 paths are inside or outside of the interesting locations. |
|
|
278 """ |
|
|
279 src = fc.src_path |
|
|
280 dest = fc.dest_path |
|
|
281 keep_src = self._path_to_be_kept(src) |
|
|
282 keep_dest = self._path_to_be_kept(dest) |
|
|
283 if keep_src and keep_dest: |
|
|
284 fc.src_path = self._adjust_for_new_root(src) |
|
|
285 fc.dest_path = self._adjust_for_new_root(dest) |
|
|
286 return fc |
|
|
287 elif keep_src: |
|
|
288 # The file has been copied to a non-interesting location. |
|
|
289 # Ignore it! |
|
|
290 return None |
|
|
291 elif keep_dest: |
|
|
292 # The file has been copied into an interesting location |
|
|
293 # We really ought to add it but we don't currently buffer |
|
|
294 # the contents of all previous files and probably never want |
|
|
295 # to. Maybe fast-import-info needs to be extended to |
|
|
296 # remember all copies and a config file can be passed |
|
|
297 # into here ala fast-import? |
|
|
298 self.warning("cannot turn copy of %s into an add of %s yet" % |
|
|
299 (src, dest)) |
|
|
300 return None |