Kiln » Dependencies » Dulwich Read More
Clone URL:  
Pushed to one repository · View In Graph Contained in master

Actually create deltas when creating packs.

Changeset 9ff047a59ffa

Parent 65b638efb980

by Jelmer Vernooij

Changes to 3 files · Browse files at 9ff047a59ffa Showing diff from parent 65b638efb980 Diff from another changeset...

Change 1 of 1 Show Entire File NEWS Stacked
 
12
13
14
 
 
15
16
17
 
12
13
14
15
16
17
18
19
@@ -12,6 +12,8 @@
  * Fix use of SubprocessWrapper on Windows. (Paulo Madeira, #670035)     * Fix compilation on newer versions of Mac OS X (Lion and up). (Ryan McKern, #794543) + + * Actually create deltas when creating packs. (Jelmer Vernooij, #562673)     API CHANGES  
 
271
272
273
274
 
275
276
277
 
271
272
273
 
274
275
276
277
@@ -271,7 +271,7 @@
  objects = set()   for sha in self._iter_loose_objects():   objects.add((self._get_loose_object(sha), None)) - self.add_objects(objects) + self.add_objects(list(objects))   for obj, path in objects:   self._remove_loose_object(obj.id)   return len(objects)
Change 1 of 3 Show Entire File dulwich/​pack.py Stacked
 
1094
1095
1096
1097
1098
1099
1100
 
1121
1122
1123
1124
 
1125
1126
1127
 
1173
1174
1175
1176
1177
1178
1179
1180
1181
1182
1183
1184
1185
1186
1187
1188
 
 
 
 
 
 
 
 
1189
1190
1191
1192
1193
1194
 
1195
1196
1197
1198
1199
1200
1201
1202
1203
1204
1205
1206
1207
1208
1209
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1210
1211
1212
 
1094
1095
1096
 
1097
1098
1099
 
1120
1121
1122
 
1123
1124
1125
1126
 
1172
1173
1174
 
1175
 
 
 
 
 
 
 
 
 
 
 
1176
1177
1178
1179
1180
1181
1182
1183
1184
1185
1186
1187
1188
 
1189
1190
 
1191
 
 
 
 
 
 
 
 
 
 
 
 
1192
1193
1194
1195
1196
1197
1198
1199
1200
1201
1202
1203
1204
1205
1206
1207
1208
1209
1210
@@ -1094,7 +1094,6 @@
  :param object: Object to write   :return: Tuple with offset at which the object was written, and crc32   """ - offset = f.tell()   packed_data_hdr = ""   if type == OFS_DELTA:   (delta_base_offset, object) = object @@ -1121,7 +1120,7 @@
  packed_data_hdr += basename   packed_data = packed_data_hdr + zlib.compress(object)   f.write(packed_data) - return (offset, (zlib.crc32(packed_data) & 0xffffffff)) + return (zlib.crc32(packed_data) & 0xffffffff)      def write_pack(filename, objects, num_objects=None): @@ -1173,40 +1172,39 @@
  else:   num_objects = len(objects)   - # FIXME: Somehow limit delta depth   # FIXME: Make thin-pack optional (its not used when cloning a pack) - # # Build a list of objects ordered by the magic Linus heuristic - # # This helps us find good objects to diff against us - # magic = [] - # for obj, path in objects: - # magic.append( (obj.type_num, path, 1, -obj.raw_length(), obj) ) - # magic.sort() - # # Build a map of objects and their index in magic - so we can find - # # preceeding objects to diff against - # offs = {} - # for i in range(len(magic)): - # offs[magic[i][4]] = i + # Build a list of objects ordered by the magic Linus heuristic + # This helps us find good objects to diff against us + magic = [] + for obj, path in objects: + magic.append((obj.type_num, path, -obj.raw_length(), obj)) + magic.sort() + + possible_bases = deque()     # Write the pack   entries = []   f = SHA1Writer(f)   write_pack_header(f, num_objects) - for o, path in objects: + for type_num, path, neg_length, o in magic:   sha1 = o.sha().digest() - orig_t = o.type_num   raw = o.as_raw_string() - winner = raw - t = orig_t - #for i in range(offs[o]-window, window): - # if i < 0 or i >= len(offs): continue - # b = magic[i][4] - # if b.type_num != orig_t: continue - # base = b.as_raw_string() - # delta = create_delta(base, raw) - # if len(delta) < len(winner): - # winner = delta - # t = 6 if magic[i][2] == 1 else 7 - offset, crc32 = write_pack_object(f, t, winner) + winner = (type_num, raw) + for base, base_offset in possible_bases: + if base.type_num != type_num: + continue + delta = create_delta(base.as_raw_string(), raw) + if len(delta) < len(winner): + base_id = base.sha().digest() + assert base_offset is not None + winner = (OFS_DELTA, (base_offset, delta)) + # t = REF_DELTA + # winner = (base_id, delta) + offset = f.tell() + possible_bases.appendleft((o, offset)) + if len(possible_bases) > window: + possible_bases.pop() + crc32 = write_pack_object(f, winner[0], winner[1])   entries.append((sha1, offset, crc32))   return entries, f.write_sha()