Kiln » Dependencies » Dulwich Read More
Clone URL:  
Pushed to one repository · View In Graph Contained in master

Separate out delta generation from pack writing.

Changeset e1e67c56ddcd

Parent 6345f5ac4ff2

by Jelmer Vernooij

Changes to 5 files · Browse files at e1e67c56ddcd Showing diff from parent 6345f5ac4ff2 Diff from another changeset...

Change 1 of 2 Show Entire File dulwich/​client.py Stacked
 
38
39
40
41
 
42
43
44
 
184
185
186
187
 
188
189
190
 
38
39
40
 
41
42
43
44
 
184
185
186
 
187
188
189
190
@@ -38,7 +38,7 @@
  extract_capabilities,   )  from dulwich.pack import ( - write_pack_data, + write_pack_objects,   )     @@ -184,7 +184,7 @@
  if not want:   return new_refs   objects = generate_pack_contents(have, want) - entries, sha = write_pack_data(proto.write_file(), objects) + entries, sha = write_pack_objects(proto.write_file(), objects)     if 'report-status' in self._send_capabilities:   self._parse_status_report(proto)
 
54
55
56
57
58
 
59
60
61
 
321
322
323
324
 
325
326
327
 
54
55
56
 
57
58
59
60
61
 
321
322
323
 
324
325
326
327
@@ -54,8 +54,8 @@
  iter_sha1,   load_pack_index,   write_pack, - write_pack_data,   write_pack_index_v2, + write_pack_objects,   )    INFODIR = 'info' @@ -321,7 +321,7 @@
  # Don't bother writing an empty pack file   return   f, commit = self.add_pack() - write_pack_data(f, objects) + write_pack_objects(f, objects)   return commit()    
Change 1 of 5 Show Entire File dulwich/​pack.py Stacked
 
1065
1066
1067
 
1068
1069
1070
1071
1072
 
1073
1074
1075
1076
1077
 
1078
1079
1080
1081
1082
1083
 
 
 
1084
1085
1086
 
1136
1137
1138
1139
 
 
1140
1141
1142
 
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174
1175
 
 
 
 
 
 
1176
1177
1178
 
1182
1183
1184
1185
1186
1187
1188
1189
1190
1191
 
 
1192
1193
1194
1195
1196
1197
1198
1199
1200
1201
1202
1203
1204
 
 
 
1205
1206
1207
1208
1209
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1210
1211
1212
 
1510
1511
1512
1513
 
1514
1515
1516
 
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
 
1142
1143
1144
 
1145
1146
1147
1148
1149
 
1162
1163
1164
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1165
1166
1167
1168
1169
1170
1171
1172
1173
 
1177
1178
1179
 
 
 
 
1180
1181
 
1182
1183
1184
1185
1186
1187
1188
 
 
 
 
 
 
 
 
1189
1190
1191
1192
1193
1194
 
 
1195
1196
1197
1198
1199
1200
1201
1202
1203
1204
1205
1206
1207
1208
1209
1210
1211
1212
1213
1214
1215
1216
1217
1218
1219
1220
1221
1222
1223
1224
1225
1226
1227
1228
1229
1230
1231
1232
1233
1234
1235
1236
1237
1238
1239
1240
1241
1242
1243
1244
 
1542
1543
1544
 
1545
1546
1547
1548
@@ -1065,22 +1065,28 @@
    def __init__(self, f):   self.f = f + self.length = 0   self.sha1 = make_sha("")     def write(self, data):   self.sha1.update(data)   self.f.write(data) + self.length += len(data)     def write_sha(self):   sha = self.sha1.digest()   assert len(sha) == 20   self.f.write(sha) + self.length += len(sha)   return sha     def close(self):   sha = self.write_sha()   self.f.close()   return sha + + def offset(self): + return self.length     def tell(self):   return self.f.tell() @@ -1136,7 +1142,8 @@
  DeprecationWarning)   f = GitFile(filename + ".pack", 'wb')   try: - entries, data_sum = write_pack_data(f, objects, num_objects=num_objects) + entries, data_sum = write_pack_objects(f, objects, + num_objects=num_objects)   finally:   f.close()   entries = [(k, v[0], v[1]) for (k, v) in entries.iteritems()] @@ -1155,24 +1162,12 @@
  f.write(struct.pack('>L', num_objects)) # Number of objects in pack     -def write_pack_data(f, objects, num_objects=None, window=10): - """Write a new pack data file. - - :param f: File to write to - :param objects: Iterable of (object, path) tuples to write. - Should provide __len__ - :param window: Sliding window size for searching for deltas; currently - unimplemented - :return: Dict mapping id -> (offset, crc32 checksum), pack checksum - """ - if num_objects is not None: - warnings.warn("num_objects argument to write_pack_data is deprecated", - DeprecationWarning) - # Previously it was possible to pass in an iterable - objects = list(objects) - else: - num_objects = len(objects) - +def deltify_pack_objects(objects, window=10): + """Generate deltas for pack objects. + + :param objects: Objects to deltify + :param window: Window size + """   # Build a list of objects ordered by the magic Linus heuristic   # This helps us find good objects to diff against us   magic = [] @@ -1182,31 +1177,68 @@
    possible_bases = deque()   - # Write the pack - entries = {} - f = SHA1Writer(f) - write_pack_header(f, num_objects)   for type_num, path, neg_length, o in magic:   raw = o.as_raw_string() - winner = (type_num, raw) + winner = raw + winner_base = None   for base in possible_bases:   if base.type_num != type_num:   continue   delta = create_delta(base.as_raw_string(), raw)   if len(delta) < len(winner): - base_id = base.sha().digest() - try: - base_offset, base_crc32 = entries[base_id] - except KeyError: - winner = (OFS_DELTA, (base_offset, delta)) - else: - winner = (REF_DELTA, (base_id, delta)) - offset = f.tell() + winner_base = base.sha().digest() + winner = delta + yield type_num, o.sha().digest(), winner_base, winner   possible_bases.appendleft(o)   while len(possible_bases) > window:   possible_bases.pop() - crc32 = write_pack_object(f, winner[0], winner[1]) - entries[o.sha().digest()] = (offset, crc32) + + +def write_pack_objects(f, objects, window=10, num_objects=None): + """Write a new pack data file. + + :param f: File to write to + :param objects: Iterable of (object, path) tuples to write. + Should provide __len__ + :param window: Sliding window size for searching for deltas; currently + unimplemented + :param num_objects: Number of objects (do not use, deprecated) + :return: Dict mapping id -> (offset, crc32 checksum), pack checksum + """ + if num_objects is None: + num_objects = len(objects) + # FIXME: pack_contents = deltify_pack_objects(objects, window) + pack_contents = ( + (o.type_num, o.sha().digest(), None, o.as_raw_string()) + for (o, path) in objects) + return write_pack_data(f, num_objects, pack_contents) + + +def write_pack_data(f, num_records, records): + """Write a new pack data file. + + :param f: File to write to + :param num_records: Number of records + :param records: Iterator over type_num, object_id, delta_base, raw + :return: Dict mapping id -> (offset, crc32 checksum), pack checksum + """ + # Write the pack + entries = {} + f = SHA1Writer(f) + write_pack_header(f, num_records) + for type_num, object_id, delta_base, raw in records: + if delta_base is not None: + try: + base_offset, base_crc32 = entries[delta_base] + except KeyError: + type_num = REF_DELTA + raw = (delta_base, raw) + else: + type_num = OFS_DELTA + raw = (base_offset, raw) + offset = f.offset() + crc32 = write_pack_object(f, type_num, raw) + entries[object_id] = (offset, crc32)   return entries, f.write_sha()     @@ -1510,7 +1542,7 @@
  *self.data.resolve_object(offset, type, obj))     def pack_tuples(self): - """Provide an iterable for use with write_pack_data. + """Provide an iterable for use with write_pack_objects.     :return: Object that can iterate over (object, path) tuples   and provides __len__
Change 1 of 2 Show Entire File dulwich/​server.py Stacked
 
45
46
47
48
 
49
50
51
 
280
281
282
283
 
284
285
286
 
45
46
47
 
48
49
50
51
 
280
281
282
 
283
284
285
286
@@ -45,7 +45,7 @@
  )  from dulwich.pack import (   PackStreamReader, - write_pack_data, + write_pack_objects,   )  from dulwich.protocol import (   BufferedPktLineWriter, @@ -280,7 +280,7 @@
    self.progress("dul-daemon says what\n")   self.progress("counting objects: %d, done.\n" % len(objects_iter)) - write_pack_data(ProtocolFile(None, write), objects_iter) + write_pack_objects(ProtocolFile(None, write), objects_iter)   self.progress("how was that, then?\n")   # we are done   self.proto.write("0000")
 
42
43
44
45
 
46
47
48
 
226
227
228
229
 
230
231
232
233
234
235
236
 
237
238
239
 
42
43
44
 
45
46
47
48
 
226
227
228
 
229
230
231
232
233
234
235
 
236
237
238
239
@@ -42,7 +42,7 @@
  tree_lookup_path,   )  from dulwich.pack import ( - write_pack_data, + write_pack_objects,   )  from dulwich.tests import (   TestCase, @@ -226,14 +226,14 @@
  o = DiskObjectStore(self.store_dir)   f, commit = o.add_pack()   b = make_object(Blob, data="more yummy data") - write_pack_data(f, [(b, None)]) + write_pack_objects(f, [(b, None)])   commit()     def test_add_thin_pack(self):   o = DiskObjectStore(self.store_dir)   f, commit = o.add_thin_pack()   b = make_object(Blob, data="more yummy data") - write_pack_data(f, [(b, None)]) + write_pack_objects(f, [(b, None)])   commit()