Changeset 154a3761de2e…
by Tyler G. Hicks-Wright <tghw@fogcreek.com>
Changes to 79 files · Browse files at 154a3761de2e Diff from another changeset...
|
@@ -0,0 +1,11 @@ + tests/httpserver\.(log|pid)$
+
+syntax: glob
+*.pyc
+*.pyo
+*~
+.\#*
+*.swp
+tests/*.err
+tests/tmp/
+kiln_extensions.zip
|
|
|
@@ -0,0 +1,318 @@ + r"""A simple, fast, extensible JSON encoder and decoder
+
+JSON (JavaScript Object Notation) <http://json.org> is a subset of
+JavaScript syntax (ECMA-262 3rd edition) used as a lightweight data
+interchange format.
+
+json exposes an API familiar to uses of the standard library
+marshal and pickle modules.
+
+Encoding basic Python object hierarchies::
+
+ >>> import json
+ >>> json.dumps(['foo', {'bar': ('baz', None, 1.0, 2)}])
+ '["foo", {"bar": ["baz", null, 1.0, 2]}]'
+ >>> print json.dumps("\"foo\bar")
+ "\"foo\bar"
+ >>> print json.dumps(u'\u1234')
+ "\u1234"
+ >>> print json.dumps('\\')
+ "\\"
+ >>> print json.dumps({"c": 0, "b": 0, "a": 0}, sort_keys=True)
+ {"a": 0, "b": 0, "c": 0}
+ >>> from StringIO import StringIO
+ >>> io = StringIO()
+ >>> json.dump(['streaming API'], io)
+ >>> io.getvalue()
+ '["streaming API"]'
+
+Compact encoding::
+
+ >>> import json
+ >>> json.dumps([1,2,3,{'4': 5, '6': 7}], separators=(',',':'))
+ '[1,2,3,{"4":5,"6":7}]'
+
+Pretty printing (using repr() because of extraneous whitespace in the output)::
+
+ >>> import json
+ >>> print repr(json.dumps({'4': 5, '6': 7}, sort_keys=True, indent=4))
+ '{\n "4": 5, \n "6": 7\n}'
+
+Decoding JSON::
+
+ >>> import json
+ >>> json.loads('["foo", {"bar":["baz", null, 1.0, 2]}]')
+ [u'foo', {u'bar': [u'baz', None, 1.0, 2]}]
+ >>> json.loads('"\\"foo\\bar"')
+ u'"foo\x08ar'
+ >>> from StringIO import StringIO
+ >>> io = StringIO('["streaming API"]')
+ >>> json.load(io)
+ [u'streaming API']
+
+Specializing JSON object decoding::
+
+ >>> import json
+ >>> def as_complex(dct):
+ ... if '__complex__' in dct:
+ ... return complex(dct['real'], dct['imag'])
+ ... return dct
+ ...
+ >>> json.loads('{"__complex__": true, "real": 1, "imag": 2}',
+ ... object_hook=as_complex)
+ (1+2j)
+ >>> import decimal
+ >>> json.loads('1.1', parse_float=decimal.Decimal)
+ Decimal('1.1')
+
+Extending JSONEncoder::
+
+ >>> import json
+ >>> class ComplexEncoder(json.JSONEncoder):
+ ... def default(self, obj):
+ ... if isinstance(obj, complex):
+ ... return [obj.real, obj.imag]
+ ... return json.JSONEncoder.default(self, obj)
+ ...
+ >>> dumps(2 + 1j, cls=ComplexEncoder)
+ '[2.0, 1.0]'
+ >>> ComplexEncoder().encode(2 + 1j)
+ '[2.0, 1.0]'
+ >>> list(ComplexEncoder().iterencode(2 + 1j))
+ ['[', '2.0', ', ', '1.0', ']']
+
+
+Using json.tool from the shell to validate and
+pretty-print::
+
+ $ echo '{"json":"obj"}' | python -mjson.tool
+ {
+ "json": "obj"
+ }
+ $ echo '{ 1.2:3.4}' | python -mjson.tool
+ Expecting property name: line 1 column 2 (char 2)
+
+Note that the JSON produced by this module's default settings
+is a subset of YAML, so it may be used as a serializer for that as well.
+
+"""
+
+__version__ = '1.9'
+__all__ = [
+ 'dump', 'dumps', 'load', 'loads',
+ 'JSONDecoder', 'JSONEncoder',
+]
+
+__author__ = 'Bob Ippolito <bob@redivi.com>'
+
+from .decoder import JSONDecoder
+from .encoder import JSONEncoder
+
+_default_encoder = JSONEncoder(
+ skipkeys=False,
+ ensure_ascii=True,
+ check_circular=True,
+ allow_nan=True,
+ indent=None,
+ separators=None,
+ encoding='utf-8',
+ default=None,
+)
+
+def dump(obj, fp, skipkeys=False, ensure_ascii=True, check_circular=True,
+ allow_nan=True, cls=None, indent=None, separators=None,
+ encoding='utf-8', default=None, **kw):
+ """Serialize ``obj`` as a JSON formatted stream to ``fp`` (a
+ ``.write()``-supporting file-like object).
+
+ If ``skipkeys`` is ``True`` then ``dict`` keys that are not basic types
+ (``str``, ``unicode``, ``int``, ``long``, ``float``, ``bool``, ``None``)
+ will be skipped instead of raising a ``TypeError``.
+
+ If ``ensure_ascii`` is ``False``, then the some chunks written to ``fp``
+ may be ``unicode`` instances, subject to normal Python ``str`` to
+ ``unicode`` coercion rules. Unless ``fp.write()`` explicitly
+ understands ``unicode`` (as in ``codecs.getwriter()``) this is likely
+ to cause an error.
+
+ If ``check_circular`` is ``False``, then the circular reference check
+ for container types will be skipped and a circular reference will
+ result in an ``OverflowError`` (or worse).
+
+ If ``allow_nan`` is ``False``, then it will be a ``ValueError`` to
+ serialize out of range ``float`` values (``nan``, ``inf``, ``-inf``)
+ in strict compliance of the JSON specification, instead of using the
+ JavaScript equivalents (``NaN``, ``Infinity``, ``-Infinity``).
+
+ If ``indent`` is a non-negative integer, then JSON array elements and object
+ members will be pretty-printed with that indent level. An indent level
+ of 0 will only insert newlines. ``None`` is the most compact representation.
+
+ If ``separators`` is an ``(item_separator, dict_separator)`` tuple
+ then it will be used instead of the default ``(', ', ': ')`` separators.
+ ``(',', ':')`` is the most compact JSON representation.
+
+ ``encoding`` is the character encoding for str instances, default is UTF-8.
+
+ ``default(obj)`` is a function that should return a serializable version
+ of obj or raise TypeError. The default simply raises TypeError.
+
+ To use a custom ``JSONEncoder`` subclass (e.g. one that overrides the
+ ``.default()`` method to serialize additional types), specify it with
+ the ``cls`` kwarg.
+
+ """
+ # cached encoder
+ if (skipkeys is False and ensure_ascii is True and
+ check_circular is True and allow_nan is True and
+ cls is None and indent is None and separators is None and
+ encoding == 'utf-8' and default is None and not kw):
+ iterable = _default_encoder.iterencode(obj)
+ else:
+ if cls is None:
+ cls = JSONEncoder
+ iterable = cls(skipkeys=skipkeys, ensure_ascii=ensure_ascii,
+ check_circular=check_circular, allow_nan=allow_nan, indent=indent,
+ separators=separators, encoding=encoding,
+ default=default, **kw).iterencode(obj)
+ # could accelerate with writelines in some versions of Python, at
+ # a debuggability cost
+ for chunk in iterable:
+ fp.write(chunk)
+
+
+def dumps(obj, skipkeys=False, ensure_ascii=True, check_circular=True,
+ allow_nan=True, cls=None, indent=None, separators=None,
+ encoding='utf-8', default=None, **kw):
+ """Serialize ``obj`` to a JSON formatted ``str``.
+
+ If ``skipkeys`` is ``True`` then ``dict`` keys that are not basic types
+ (``str``, ``unicode``, ``int``, ``long``, ``float``, ``bool``, ``None``)
+ will be skipped instead of raising a ``TypeError``.
+
+ If ``ensure_ascii`` is ``False``, then the return value will be a
+ ``unicode`` instance subject to normal Python ``str`` to ``unicode``
+ coercion rules instead of being escaped to an ASCII ``str``.
+
+ If ``check_circular`` is ``False``, then the circular reference check
+ for container types will be skipped and a circular reference will
+ result in an ``OverflowError`` (or worse).
+
+ If ``allow_nan`` is ``False``, then it will be a ``ValueError`` to
+ serialize out of range ``float`` values (``nan``, ``inf``, ``-inf``) in
+ strict compliance of the JSON specification, instead of using the
+ JavaScript equivalents (``NaN``, ``Infinity``, ``-Infinity``).
+
+ If ``indent`` is a non-negative integer, then JSON array elements and
+ object members will be pretty-printed with that indent level. An indent
+ level of 0 will only insert newlines. ``None`` is the most compact
+ representation.
+
+ If ``separators`` is an ``(item_separator, dict_separator)`` tuple
+ then it will be used instead of the default ``(', ', ': ')`` separators.
+ ``(',', ':')`` is the most compact JSON representation.
+
+ ``encoding`` is the character encoding for str instances, default is UTF-8.
+
+ ``default(obj)`` is a function that should return a serializable version
+ of obj or raise TypeError. The default simply raises TypeError.
+
+ To use a custom ``JSONEncoder`` subclass (e.g. one that overrides the
+ ``.default()`` method to serialize additional types), specify it with
+ the ``cls`` kwarg.
+
+ """
+ # cached encoder
+ if (skipkeys is False and ensure_ascii is True and
+ check_circular is True and allow_nan is True and
+ cls is None and indent is None and separators is None and
+ encoding == 'utf-8' and default is None and not kw):
+ return _default_encoder.encode(obj)
+ if cls is None:
+ cls = JSONEncoder
+ return cls(
+ skipkeys=skipkeys, ensure_ascii=ensure_ascii,
+ check_circular=check_circular, allow_nan=allow_nan, indent=indent,
+ separators=separators, encoding=encoding, default=default,
+ **kw).encode(obj)
+
+
+_default_decoder = JSONDecoder(encoding=None, object_hook=None)
+
+
+def load(fp, encoding=None, cls=None, object_hook=None, parse_float=None,
+ parse_int=None, parse_constant=None, **kw):
+ """Deserialize ``fp`` (a ``.read()``-supporting file-like object
+ containing a JSON document) to a Python object.
+
+ If the contents of ``fp`` is encoded with an ASCII based encoding other
+ than utf-8 (e.g. latin-1), then an appropriate ``encoding`` name must
+ be specified. Encodings that are not ASCII based (such as UCS-2) are
+ not allowed, and should be wrapped with
+ ``codecs.getreader(fp)(encoding)``, or simply decoded to a ``unicode``
+ object and passed to ``loads()``
+
+ ``object_hook`` is an optional function that will be called with the
+ result of any object literal decode (a ``dict``). The return value of
+ ``object_hook`` will be used instead of the ``dict``. This feature
+ can be used to implement custom decoders (e.g. JSON-RPC class hinting).
+
+ To use a custom ``JSONDecoder`` subclass, specify it with the ``cls``
+ kwarg.
+
+ """
+ return loads(fp.read(),
+ encoding=encoding, cls=cls, object_hook=object_hook,
+ parse_float=parse_float, parse_int=parse_int,
+ parse_constant=parse_constant, **kw)
+
+
+def loads(s, encoding=None, cls=None, object_hook=None, parse_float=None,
+ parse_int=None, parse_constant=None, **kw):
+ """Deserialize ``s`` (a ``str`` or ``unicode`` instance containing a JSON
+ document) to a Python object.
+
+ If ``s`` is a ``str`` instance and is encoded with an ASCII based encoding
+ other than utf-8 (e.g. latin-1) then an appropriate ``encoding`` name
+ must be specified. Encodings that are not ASCII based (such as UCS-2)
+ are not allowed and should be decoded to ``unicode`` first.
+
+ ``object_hook`` is an optional function that will be called with the
+ result of any object literal decode (a ``dict``). The return value of
+ ``object_hook`` will be used instead of the ``dict``. This feature
+ can be used to implement custom decoders (e.g. JSON-RPC class hinting).
+
+ ``parse_float``, if specified, will be called with the string
+ of every JSON float to be decoded. By default this is equivalent to
+ float(num_str). This can be used to use another datatype or parser
+ for JSON floats (e.g. decimal.Decimal).
+
+ ``parse_int``, if specified, will be called with the string
+ of every JSON int to be decoded. By default this is equivalent to
+ int(num_str). This can be used to use another datatype or parser
+ for JSON integers (e.g. float).
+
+ ``parse_constant``, if specified, will be called with one of the
+ following strings: -Infinity, Infinity, NaN, null, true, false.
+ This can be used to raise an exception if invalid JSON numbers
+ are encountered.
+
+ To use a custom ``JSONDecoder`` subclass, specify it with the ``cls``
+ kwarg.
+
+ """
+ if (cls is None and encoding is None and object_hook is None and
+ parse_int is None and parse_float is None and
+ parse_constant is None and not kw):
+ return _default_decoder.decode(s)
+ if cls is None:
+ cls = JSONDecoder
+ if object_hook is not None:
+ kw['object_hook'] = object_hook
+ if parse_float is not None:
+ kw['parse_float'] = parse_float
+ if parse_int is not None:
+ kw['parse_int'] = parse_int
+ if parse_constant is not None:
+ kw['parse_constant'] = parse_constant
+ return cls(encoding=encoding, **kw).decode(s)
|
|
|
@@ -0,0 +1,339 @@ + """Implementation of JSONDecoder
+"""
+
+import re
+import sys
+
+from json.scanner import Scanner, pattern
+try:
+ from _json import scanstring as c_scanstring
+except ImportError:
+ c_scanstring = None
+
+__all__ = ['JSONDecoder']
+
+FLAGS = re.VERBOSE | re.MULTILINE | re.DOTALL
+
+NaN, PosInf, NegInf = float('nan'), float('inf'), float('-inf')
+
+
+def linecol(doc, pos):
+ lineno = doc.count('\n', 0, pos) + 1
+ if lineno == 1:
+ colno = pos
+ else:
+ colno = pos - doc.rindex('\n', 0, pos)
+ return lineno, colno
+
+
+def errmsg(msg, doc, pos, end=None):
+ lineno, colno = linecol(doc, pos)
+ if end is None:
+ fmt = '{0}: line {1} column {2} (char {3})'
+ return fmt.format(msg, lineno, colno, pos)
+ endlineno, endcolno = linecol(doc, end)
+ fmt = '{0}: line {1} column {2} - line {3} column {4} (char {5} - {6})'
+ return fmt.format(msg, lineno, colno, endlineno, endcolno, pos, end)
+
+
+_CONSTANTS = {
+ '-Infinity': NegInf,
+ 'Infinity': PosInf,
+ 'NaN': NaN,
+ 'true': True,
+ 'false': False,
+ 'null': None,
+}
+
+
+def JSONConstant(match, context, c=_CONSTANTS):
+ s = match.group(0)
+ fn = getattr(context, 'parse_constant', None)
+ if fn is None:
+ rval = c[s]
+ else:
+ rval = fn(s)
+ return rval, None
+pattern('(-?Infinity|NaN|true|false|null)')(JSONConstant)
+
+
+def JSONNumber(match, context):
+ match = JSONNumber.regex.match(match.string, *match.span())
+ integer, frac, exp = match.groups()
+ if frac or exp:
+ fn = getattr(context, 'parse_float', None) or float
+ res = fn(integer + (frac or '') + (exp or ''))
+ else:
+ fn = getattr(context, 'parse_int', None) or int
+ res = fn(integer)
+ return res, None
+pattern(r'(-?(?:0|[1-9]\d*))(\.\d+)?([eE][-+]?\d+)?')(JSONNumber)
+
+
+STRINGCHUNK = re.compile(r'(.*?)(["\\\x00-\x1f])', FLAGS)
+BACKSLASH = {
+ '"': u'"', '\\': u'\\', '/': u'/',
+ 'b': u'\b', 'f': u'\f', 'n': u'\n', 'r': u'\r', 't': u'\t',
+}
+
+DEFAULT_ENCODING = "utf-8"
+
+
+def py_scanstring(s, end, encoding=None, strict=True, _b=BACKSLASH, _m=STRINGCHUNK.match):
+ if encoding is None:
+ encoding = DEFAULT_ENCODING
+ chunks = []
+ _append = chunks.append
+ begin = end - 1
+ while 1:
+ chunk = _m(s, end)
+ if chunk is None:
+ raise ValueError(
+ errmsg("Unterminated string starting at", s, begin))
+ end = chunk.end()
+ content, terminator = chunk.groups()
+ if content:
+ if not isinstance(content, unicode):
+ content = unicode(content, encoding)
+ _append(content)
+ if terminator == '"':
+ break
+ elif terminator != '\\':
+ if strict:
+ msg = "Invalid control character {0!r} at".format(terminator)
+ raise ValueError(errmsg(msg, s, end))
+ else:
+ _append(terminator)
+ continue
+ try:
+ esc = s[end]
+ except IndexError:
+ raise ValueError(
+ errmsg("Unterminated string starting at", s, begin))
+ if esc != 'u':
+ try:
+ m = _b[esc]
+ except KeyError:
+ msg = "Invalid \\escape: {0!r}".format(esc)
+ raise ValueError(errmsg(msg, s, end))
+ end += 1
+ else:
+ esc = s[end + 1:end + 5]
+ next_end = end + 5
+ msg = "Invalid \\uXXXX escape"
+ try:
+ if len(esc) != 4:
+ raise ValueError
+ uni = int(esc, 16)
+ if 0xd800 <= uni <= 0xdbff and sys.maxunicode > 65535:
+ msg = "Invalid \\uXXXX\\uXXXX surrogate pair"
+ if not s[end + 5:end + 7] == '\\u':
+ raise ValueError
+ esc2 = s[end + 7:end + 11]
+ if len(esc2) != 4:
+ raise ValueError
+ uni2 = int(esc2, 16)
+ uni = 0x10000 + (((uni - 0xd800) << 10) | (uni2 - 0xdc00))
+ next_end += 6
+ m = unichr(uni)
+ except ValueError:
+ raise ValueError(errmsg(msg, s, end))
+ end = next_end
+ _append(m)
+ return u''.join(chunks), end
+
+
+# Use speedup
+if c_scanstring is not None:
+ scanstring = c_scanstring
+else:
+ scanstring = py_scanstring
+
+def JSONString(match, context):
+ encoding = getattr(context, 'encoding', None)
+ strict = getattr(context, 'strict', True)
+ return scanstring(match.string, match.end(), encoding, strict)
+pattern(r'"')(JSONString)
+
+
+WHITESPACE = re.compile(r'\s*', FLAGS)
+
+
+def JSONObject(match, context, _w=WHITESPACE.match):
+ pairs = {}
+ s = match.string
+ end = _w(s, match.end()).end()
+ nextchar = s[end:end + 1]
+ # Trivial empty object
+ if nextchar == '}':
+ return pairs, end + 1
+ if nextchar != '"':
+ raise ValueError(errmsg("Expecting property name", s, end))
+ end += 1
+ encoding = getattr(context, 'encoding', None)
+ strict = getattr(context, 'strict', True)
+ iterscan = JSONScanner.iterscan
+ while True:
+ key, end = scanstring(s, end, encoding, strict)
+ end = _w(s, end).end()
+ if s[end:end + 1] != ':':
+ raise ValueError(errmsg("Expecting : delimiter", s, end))
+ end = _w(s, end + 1).end()
+ try:
+ value, end = iterscan(s, idx=end, context=context).next()
+ except StopIteration:
+ raise ValueError(errmsg("Expecting object", s, end))
+ pairs[key] = value
+ end = _w(s, end).end()
+ nextchar = s[end:end + 1]
+ end += 1
+ if nextchar == '}':
+ break
+ if nextchar != ',':
+ raise ValueError(errmsg("Expecting , delimiter", s, end - 1))
+ end = _w(s, end).end()
+ nextchar = s[end:end + 1]
+ end += 1
+ if nextchar != '"':
+ raise ValueError(errmsg("Expecting property name", s, end - 1))
+ object_hook = getattr(context, 'object_hook', None)
+ if object_hook is not None:
+ pairs = object_hook(pairs)
+ return pairs, end
+pattern(r'{')(JSONObject)
+
+
+def JSONArray(match, context, _w=WHITESPACE.match):
+ values = []
+ s = match.string
+ end = _w(s, match.end()).end()
+ # Look-ahead for trivial empty array
+ nextchar = s[end:end + 1]
+ if nextchar == ']':
+ return values, end + 1
+ iterscan = JSONScanner.iterscan
+ while True:
+ try:
+ value, end = iterscan(s, idx=end, context=context).next()
+ except StopIteration:
+ raise ValueError(errmsg("Expecting object", s, end))
+ values.append(value)
+ end = _w(s, end).end()
+ nextchar = s[end:end + 1]
+ end += 1
+ if nextchar == ']':
+ break
+ if nextchar != ',':
+ raise ValueError(errmsg("Expecting , delimiter", s, end))
+ end = _w(s, end).end()
+ return values, end
+pattern(r'\[')(JSONArray)
+
+
+ANYTHING = [
+ JSONObject,
+ JSONArray,
+ JSONString,
+ JSONConstant,
+ JSONNumber,
+]
+
+JSONScanner = Scanner(ANYTHING)
+
+
+class JSONDecoder(object):
+ """Simple JSON <http://json.org> decoder
+
+ Performs the following translations in decoding by default:
+
+ +---------------+-------------------+
+ | JSON | Python |
+ +===============+===================+
+ | object | dict |
+ +---------------+-------------------+
+ | array | list |
+ +---------------+-------------------+
+ | string | unicode |
+ +---------------+-------------------+
+ | number (int) | int, long |
+ +---------------+-------------------+
+ | number (real) | float |
+ +---------------+-------------------+
+ | true | True |
+ +---------------+-------------------+
+ | false | False |
+ +---------------+-------------------+
+ | null | None |
+ +---------------+-------------------+
+
+ It also understands ``NaN``, ``Infinity``, and ``-Infinity`` as
+ their corresponding ``float`` values, which is outside the JSON spec.
+ """
+
+ _scanner = Scanner(ANYTHING)
+ __all__ = ['__init__', 'decode', 'raw_decode']
+
+ def __init__(self, encoding=None, object_hook=None, parse_float=None,
+ parse_int=None, parse_constant=None, strict=True):
+ """``encoding`` determines the encoding used to interpret any ``str``
+ objects decoded by this instance (utf-8 by default). It has no
+ effect when decoding ``unicode`` objects.
+
+ Note that currently only encodings that are a superset of ASCII work,
+ strings of other encodings should be passed in as ``unicode``.
+
+ ``object_hook``, if specified, will be called with the result of
+ every JSON object decoded and its return value will be used in
+ place of the given ``dict``. This can be used to provide custom
+ deserializations (e.g. to support JSON-RPC class hinting).
+
+ ``parse_float``, if specified, will be called with the string
+ of every JSON float to be decoded. By default this is equivalent to
+ float(num_str). This can be used to use another datatype or parser
+ for JSON floats (e.g. decimal.Decimal).
+
+ ``parse_int``, if specified, will be called with the string
+ of every JSON int to be decoded. By default this is equivalent to
+ int(num_str). This can be used to use another datatype or parser
+ for JSON integers (e.g. float).
+
+ ``parse_constant``, if specified, will be called with one of the
+ following strings: -Infinity, Infinity, NaN, null, true, false.
+ This can be used to raise an exception if invalid JSON numbers
+ are encountered.
+
+ """
+ self.encoding = encoding
+ self.object_hook = object_hook
+ self.parse_float = parse_float
+ self.parse_int = parse_int
+ self.parse_constant = parse_constant
+ self.strict = strict
+
+ def decode(self, s, _w=WHITESPACE.match):
+ """
+ Return the Python representation of ``s`` (a ``str`` or ``unicode``
+ instance containing a JSON document)
+
+ """
+ obj, end = self.raw_decode(s, idx=_w(s, 0).end())
+ end = _w(s, end).end()
+ if end != len(s):
+ raise ValueError(errmsg("Extra data", s, end, len(s)))
+ return obj
+
+ def raw_decode(self, s, **kw):
+ """Decode a JSON document from ``s`` (a ``str`` or ``unicode`` beginning
+ with a JSON document) and return a 2-tuple of the Python
+ representation and the index in ``s`` where the document ended.
+
+ This can be used to decode a JSON document from a string that may
+ have extraneous data at the end.
+
+ """
+ kw.setdefault('context', self)
+ try:
+ obj, end = self._scanner.iterscan(s, **kw).next()
+ except StopIteration:
+ raise ValueError("No JSON object could be decoded")
+ return obj, end
|
|
|
@@ -0,0 +1,384 @@ + """Implementation of JSONEncoder
+"""
+
+import re
+import math
+
+try:
+ from _json import encode_basestring_ascii as c_encode_basestring_ascii
+except ImportError:
+ c_encode_basestring_ascii = None
+
+__all__ = ['JSONEncoder']
+
+ESCAPE = re.compile(r'[\x00-\x1f\\"\b\f\n\r\t]')
+ESCAPE_ASCII = re.compile(r'([\\"]|[^\ -~])')
+HAS_UTF8 = re.compile(r'[\x80-\xff]')
+ESCAPE_DCT = {
+ '\\': '\\\\',
+ '"': '\\"',
+ '\b': '\\b',
+ '\f': '\\f',
+ '\n': '\\n',
+ '\r': '\\r',
+ '\t': '\\t',
+}
+for i in range(0x20):
+ ESCAPE_DCT.setdefault(chr(i), '\\u{0:04x}'.format(i))
+
+FLOAT_REPR = repr
+
+def floatstr(o, allow_nan=True):
+ # Check for specials. Note that this type of test is processor- and/or
+ # platform-specific, so do tests which don't depend on the internals.
+
+ if math.isnan(o):
+ text = 'NaN'
+ elif math.isinf(o):
+ if math.copysign(1., o) == 1.:
+ text = 'Infinity'
+ else:
+ text = '-Infinity'
+ else:
+ return FLOAT_REPR(o)
+
+ if not allow_nan:
+ msg = "Out of range float values are not JSON compliant: " + repr(o)
+ raise ValueError(msg)
+
+ return text
+
+
+def encode_basestring(s):
+ """Return a JSON representation of a Python string
+
+ """
+ def replace(match):
+ return ESCAPE_DCT[match.group(0)]
+ return '"' + ESCAPE.sub(replace, s) + '"'
+
+
+def py_encode_basestring_ascii(s):
+ if isinstance(s, str) and HAS_UTF8.search(s) is not None:
+ s = s.decode('utf-8')
+ def replace(match):
+ s = match.group(0)
+ try:
+ return ESCAPE_DCT[s]
+ except KeyError:
+ n = ord(s)
+ if n < 0x10000:
+ return '\\u{0:04x}'.format(n)
+ else:
+ # surrogate pair
+ n -= 0x10000
+ s1 = 0xd800 | ((n >> 10) & 0x3ff)
+ s2 = 0xdc00 | (n & 0x3ff)
+ return '\\u{0:04x}\\u{1:04x}'.format(s1, s2)
+ return '"' + str(ESCAPE_ASCII.sub(replace, s)) + '"'
+
+
+if c_encode_basestring_ascii is not None:
+ encode_basestring_ascii = c_encode_basestring_ascii
+else:
+ encode_basestring_ascii = py_encode_basestring_ascii
+
+
+class JSONEncoder(object):
+ """Extensible JSON <http://json.org> encoder for Python data structures.
+
+ Supports the following objects and types by default:
+
+ +-------------------+---------------+
+ | Python | JSON |
+ +===================+===============+
+ | dict | object |
+ +-------------------+---------------+
+ | list, tuple | array |
+ +-------------------+---------------+
+ | str, unicode | string |
+ +-------------------+---------------+
+ | int, long, float | number |
+ +-------------------+---------------+
+ | True | true |
+ +-------------------+---------------+
+ | False | false |
+ +-------------------+---------------+
+ | None | null |
+ +-------------------+---------------+
+
+ To extend this to recognize other objects, subclass and implement a
+ ``.default()`` method with another method that returns a serializable
+ object for ``o`` if possible, otherwise it should call the superclass
+ implementation (to raise ``TypeError``).
+
+ """
+ __all__ = ['__init__', 'default', 'encode', 'iterencode']
+ item_separator = ', '
+ key_separator = ': '
+ def __init__(self, skipkeys=False, ensure_ascii=True,
+ check_circular=True, allow_nan=True, sort_keys=False,
+ indent=None, separators=None, encoding='utf-8', default=None):
+ """Constructor for JSONEncoder, with sensible defaults.
+
+ If skipkeys is False, then it is a TypeError to attempt
+ encoding of keys that are not str, int, long, float or None. If
+ skipkeys is True, such items are simply skipped.
+
+ If ensure_ascii is True, the output is guaranteed to be str
+ objects with all incoming unicode characters escaped. If
+ ensure_ascii is false, the output will be unicode object.
+
+ If check_circular is True, then lists, dicts, and custom encoded
+ objects will be checked for circular references during encoding to
+ prevent an infinite recursion (which would cause an OverflowError).
+ Otherwise, no such check takes place.
+
+ If allow_nan is True, then NaN, Infinity, and -Infinity will be
+ encoded as such. This behavior is not JSON specification compliant,
+ but is consistent with most JavaScript based encoders and decoders.
+ Otherwise, it will be a ValueError to encode such floats.
+
+ If sort_keys is True, then the output of dictionaries will be
+ sorted by key; this is useful for regression tests to ensure
+ that JSON serializations can be compared on a day-to-day basis.
+
+ If indent is a non-negative integer, then JSON array
+ elements and object members will be pretty-printed with that
+ indent level. An indent level of 0 will only insert newlines.
+ None is the most compact representation.
+
+ If specified, separators should be a (item_separator, key_separator)
+ tuple. The default is (', ', ': '). To get the most compact JSON
+ representation you should specify (',', ':') to eliminate whitespace.
+
+ If specified, default is a function that gets called for objects
+ that can't otherwise be serialized. It should return a JSON encodable
+ version of the object or raise a ``TypeError``.
+
+ If encoding is not None, then all input strings will be
+ transformed into unicode using that encoding prior to JSON-encoding.
+ The default is UTF-8.
+
+ """
+ self.skipkeys = skipkeys
+ self.ensure_ascii = ensure_ascii
+ self.check_circular = check_circular
+ self.allow_nan = allow_nan
+ self.sort_keys = sort_keys
+ self.indent = indent
+ self.current_indent_level = 0
+ if separators is not None:
+ self.item_separator, self.key_separator = separators
+ if default is not None:
+ self.default = default
+ self.encoding = encoding
+
+ def _newline_indent(self):
+ return '\n' + (' ' * (self.indent * self.current_indent_level))
+
+ def _iterencode_list(self, lst, markers=None):
+ if not lst:
+ yield '[]'
+ return
+ if markers is not None:
+ markerid = id(lst)
+ if markerid in markers:
+ raise ValueError("Circular reference detected")
+ markers[markerid] = lst
+ yield '['
+ if self.indent is not None:
+ self.current_indent_level += 1
+ newline_indent = self._newline_indent()
+ separator = self.item_separator + newline_indent
+ yield newline_indent
+ else:
+ newline_indent = None
+ separator = self.item_separator
+ first = True
+ for value in lst:
+ if first:
+ first = False
+ else:
+ yield separator
+ for chunk in self._iterencode(value, markers):
+ yield chunk
+ if newline_indent is not None:
+ self.current_indent_level -= 1
+ yield self._newline_indent()
+ yield ']'
+ if markers is not None:
+ del markers[markerid]
+
+ def _iterencode_dict(self, dct, markers=None):
+ if not dct:
+ yield '{}'
+ return
+ if markers is not None:
+ markerid = id(dct)
+ if markerid in markers:
+ raise ValueError("Circular reference detected")
+ markers[markerid] = dct
+ yield '{'
+ key_separator = self.key_separator
+ if self.indent is not None:
+ self.current_indent_level += 1
+ newline_indent = self._newline_indent()
+ item_separator = self.item_separator + newline_indent
+ yield newline_indent
+ else:
+ newline_indent = None
+ item_separator = self.item_separator
+ first = True
+ if self.ensure_ascii:
+ encoder = encode_basestring_ascii
+ else:
+ encoder = encode_basestring
+ allow_nan = self.allow_nan
+ if self.sort_keys:
+ keys = dct.keys()
+ keys.sort()
+ items = [(k, dct[k]) for k in keys]
+ else:
+ items = dct.iteritems()
+ _encoding = self.encoding
+ _do_decode = (_encoding is not None
+ and not (_encoding == 'utf-8'))
+ for key, value in items:
+ if isinstance(key, str):
+ if _do_decode:
+ key = key.decode(_encoding)
+ elif isinstance(key, basestring):
+ pass
+ # JavaScript is weakly typed for these, so it makes sense to
+ # also allow them. Many encoders seem to do something like this.
+ elif isinstance(key, float):
+ key = floatstr(key, allow_nan)
+ elif isinstance(key, (int, long)):
+ key = str(key)
+ elif key is True:
+ key = 'true'
+ elif key is False:
+ key = 'false'
+ elif key is None:
+ key = 'null'
+ elif self.skipkeys:
+ continue
+ else:
+ raise TypeError("key {0!r} is not a string".format(key))
+ if first:
+ first = False
+ else:
+ yield item_separator
+ yield encoder(key)
+ yield key_separator
+ for chunk in self._iterencode(value, markers):
+ yield chunk
+ if newline_indent is not None:
+ self.current_indent_level -= 1
+ yield self._newline_indent()
+ yield '}'
+ if markers is not None:
+ del markers[markerid]
+
+ def _iterencode(self, o, markers=None):
+ if isinstance(o, basestring):
+ if self.ensure_ascii:
+ encoder = encode_basestring_ascii
+ else:
+ encoder = encode_basestring
+ _encoding = self.encoding
+ if (_encoding is not None and isinstance(o, str)
+ and not (_encoding == 'utf-8')):
+ o = o.decode(_encoding)
+ yield encoder(o)
+ elif o is None:
+ yield 'null'
+ elif o is True:
+ yield 'true'
+ elif o is False:
+ yield 'false'
+ elif isinstance(o, (int, long)):
+ yield str(o)
+ elif isinstance(o, float):
+ yield floatstr(o, self.allow_nan)
+ elif isinstance(o, (list, tuple)):
+ for chunk in self._iterencode_list(o, markers):
+ yield chunk
+ elif isinstance(o, dict):
+ for chunk in self._iterencode_dict(o, markers):
+ yield chunk
+ else:
+ if markers is not None:
+ markerid = id(o)
+ if markerid in markers:
+ raise ValueError("Circular reference detected")
+ markers[markerid] = o
+ for chunk in self._iterencode_default(o, markers):
+ yield chunk
+ if markers is not None:
+ del markers[markerid]
+
+ def _iterencode_default(self, o, markers=None):
+ newobj = self.default(o)
+ return self._iterencode(newobj, markers)
+
+ def default(self, o):
+ """Implement this method in a subclass such that it returns a serializable
+ object for ``o``, or calls the base implementation (to raise a
+ ``TypeError``).
+
+ For example, to support arbitrary iterators, you could implement
+ default like this::
+
+ def default(self, o):
+ try:
+ iterable = iter(o)
+ except TypeError:
+ pass
+ else:
+ return list(iterable)
+ return JSONEncoder.default(self, o)
+
+ """
+ raise TypeError(repr(o) + " is not JSON serializable")
+
+ def encode(self, o):
+ """Return a JSON string representation of a Python data structure.
+
+ >>> JSONEncoder().encode({"foo": ["bar", "baz"]})
+ '{"foo": ["bar", "baz"]}'
+
+ """
+ # This is for extremely simple cases and benchmarks.
+ if isinstance(o, basestring):
+ if isinstance(o, str):
+ _encoding = self.encoding
+ if (_encoding is not None
+ and not (_encoding == 'utf-8')):
+ o = o.decode(_encoding)
+ if self.ensure_ascii:
+ return encode_basestring_ascii(o)
+ else:
+ return encode_basestring(o)
+ # This doesn't pass the iterator directly to ''.join() because the
+ # exceptions aren't as detailed. The list call should be roughly
+ # equivalent to the PySequence_Fast that ''.join() would do.
+ chunks = list(self.iterencode(o))
+ return ''.join(chunks)
+
+ def iterencode(self, o):
+ """Encode the given object and yield each string representation as
+ available.
+
+ For example::
+
+ for chunk in JSONEncoder().iterencode(bigobject):
+ mysocket.write(chunk)
+
+ """
+ if self.check_circular:
+ markers = {}
+ else:
+ markers = None
+ return self._iterencode(o, markers)
|
|
@@ -0,0 +1,69 @@ + """Iterator based sre token scanner
+
+"""
+
+import re
+import sre_parse
+import sre_compile
+import sre_constants
+
+from re import VERBOSE, MULTILINE, DOTALL
+from sre_constants import BRANCH, SUBPATTERN
+
+__all__ = ['Scanner', 'pattern']
+
+FLAGS = (VERBOSE | MULTILINE | DOTALL)
+
+class Scanner(object):
+ def __init__(self, lexicon, flags=FLAGS):
+ self.actions = [None]
+ # Combine phrases into a compound pattern
+ s = sre_parse.Pattern()
+ s.flags = flags
+ p = []
+ for idx, token in enumerate(lexicon):
+ phrase = token.pattern
+ try:
+ subpattern = sre_parse.SubPattern(s,
+ [(SUBPATTERN, (idx + 1, sre_parse.parse(phrase, flags)))])
+ except sre_constants.error:
+ raise
+ p.append(subpattern)
+ self.actions.append(token)
+
+ s.groups = len(p) + 1 # NOTE(guido): Added to make SRE validation work
+ p = sre_parse.SubPattern(s, [(BRANCH, (None, p))])
+ self.scanner = sre_compile.compile(p)
+
+ def iterscan(self, string, idx=0, context=None):
+ """Yield match, end_idx for each match
+
+ """
+ match = self.scanner.scanner(string, idx).match
+ actions = self.actions
+ lastend = idx
+ end = len(string)
+ while True:
+ m = match()
+ if m is None:
+ break
+ matchbegin, matchend = m.span()
+ if lastend == matchend:
+ break
+ action = actions[m.lastindex]
+ if action is not None:
+ rval, next_pos = action(m, context)
+ if next_pos is not None and next_pos != matchend:
+ # "fast forward" the scanner
+ matchend = next_pos
+ match = self.scanner.scanner(string, matchend).match
+ yield rval, matchend
+ lastend = matchend
+
+
+def pattern(pattern, flags=FLAGS):
+ def decorator(fn):
+ fn.pattern = pattern
+ fn.regex = re.compile(pattern, flags)
+ return fn
+ return decorator
|
|
@@ -0,0 +1,37 @@ + r"""Command-line tool to validate and pretty-print JSON
+
+Usage::
+
+ $ echo '{"json":"obj"}' | python -mjson.tool
+ {
+ "json": "obj"
+ }
+ $ echo '{ 1.2:3.4}' | python -mjson.tool
+ Expecting property name: line 1 column 2 (char 2)
+
+"""
+import sys
+import json
+
+def main():
+ if len(sys.argv) == 1:
+ infile = sys.stdin
+ outfile = sys.stdout
+ elif len(sys.argv) == 2:
+ infile = open(sys.argv[1], 'rb')
+ outfile = sys.stdout
+ elif len(sys.argv) == 3:
+ infile = open(sys.argv[1], 'rb')
+ outfile = open(sys.argv[2], 'wb')
+ else:
+ raise SystemExit("{0} [infile [outfile]]".format(sys.argv[0]))
+ try:
+ obj = json.load(infile)
+ except ValueError, e:
+ raise SystemExit(e)
+ json.dump(obj, outfile, sort_keys=True, indent=4)
+ outfile.write('\n')
+
+
+if __name__ == '__main__':
+ main()
|
|
@@ -0,0 +1,42 @@ + = bfiles - manage large binary files =
+This extension is based off of Greg Ward's bfiles extension which can be found
+at http://mercurial.selenic.com/wiki/BfilesExtension.
+
+== The Bfile Store ==
+
+Bfile stores are simply directories where each file is a bfile. The filename
+is the sha1 hash of the bfile. The path is not necessary because all interactions
+with the store have one of these forms:
+
+-Download a bfile with this hash
+-Upload a bfile with this hash
+-Check if the store has a bfile with this hash
+
+== The Local Repository ==
+
+The local repository has a bfile store in .hg/bfiles which holds a subset of the
+bfiles needed. On a clone only the bfiles at tip are downloaded. When bfiles are
+downloaded from the central store a copy is saved in this store.
+
+== The Global Cache ==
+
+Bfiles in a local repository store are hard linked to files in the global cache. Before
+a file is downloaded we check if it is in the global cache.
+
+== Implementation Details ==
+
+Each bfile has a standin which is in .hgbfiles. The standin is tracked by Mercurial.
+The contents of the standin is the SHA1 hash of the bfile. When a bfile is added/removed/
+copied/renamed/etc the same operation is applied to the standin. Thus the history of the
+standin is the history of the bfile.
+
+For performance reasons the contents of a standin is only updated before a commit.
+Standins are added/removed/copied/renamed from add/remove/copy/rename Mercurial
+commands but their contents will not be updated. The contents of a standin will always
+be the hash of the bfile as of the last commit. To support some commands (revert) some
+standins are temporarily updated but will be changed back after the command is finished.
+
+A Mercurial dirstate object tracks the state of the bfiles. The dirstate uses the
+last modified time and current size to detect if a file has changed (without reading
+the entire contents of the file).
+
|
|
@@ -0,0 +1,32 @@ + '''track large binary files
+
+Large binary files tend to be not very compressible, not very "diffable",
+and not at all mergeable. Such files are not handled well by Mercurial\'s
+storage format (revlog), which is based on compressed binary deltas.
+bfiles solves this problem by adding a centralized client-server layer on
+top of Mercurial: big files live in a *central store* out on the network
+somewhere, and you only fetch the big files that you need when you need
+them.
+
+bfiles works by maintaining a *standin* in .hgbfiles/ for each big file.
+The standins are small (41 bytes: an SHA-1 hash plus newline) and are
+tracked by Mercurial. Big file revisions are identified by the SHA-1 hash
+of their contents, which is written to the standin. bfiles uses that
+revision ID to get/put big file revisions from/to the central store.
+
+A complete tutorial for using bfiles is included in ``usage.txt`` in the
+bfiles source distribution. See
+http://vc.gerg.ca/hg/hg-bfiles/raw-file/tip/usage.txt for the latest
+version.
+'''
+
+from mercurial import commands
+import bfsetup
+import bfcommands
+
+reposetup = bfsetup.reposetup
+uisetup = bfsetup.uisetup
+
+commands.norepo += " kbfconvert"
+
+cmdtable = bfcommands.cmdtable
|
|
|
@@ -0,0 +1,178 @@ + '''Base class for store implementations and store-related utility code.'''
+
+import sys
+import os
+import tempfile
+import binascii
+import bfutil
+import shutil
+
+from mercurial import util, node, error, url as url_, hg
+from mercurial.i18n import _
+
+class StoreError(Exception):
+ '''Raised when there is a problem getting files from or putting
+ files to a central store.'''
+ def __init__(self, filename, hash, url, detail):
+ self.filename = filename
+ self.hash = hash
+ self.url = url
+ self.detail = detail
+
+ def longmessage(self):
+ return ("%s: %s\n"
+ "(failed URL: %s)\n"
+ % (self.filename, self.detail, self.url))
+
+ def __str__(self):
+ return "%s: %s" % (self.url, self.detail)
+
+class basestore(object):
+ def __init__(self, ui, repo, url):
+ self.ui = ui
+ self.repo = repo
+ self.url = url
+
+ def put(self, source, hash):
+ '''Put source file into the store under <filename>/<hash>.'''
+ raise NotImplementedError('abstract method')
+
+ def get(self, files):
+ '''Get the specified big files from the store and write to local
+ files under repo.root. files is a list of (filename, hash)
+ tuples. Return (success, missing), lists of files successfuly
+ downloaded and those not found in the store. success is a list
+ of (filename, hash) tuples; missing is a list of filenames that
+ we could not get. (The detailed error message will already have
+ been presented to the user, so missing is just supplied as a
+ summary.)'''
+ success = []
+ missing = []
+ ui = self.ui
+
+ at = 0
+ for filename, hash in files:
+ ui.progress(_('Getting kbfiles'), at, unit='kbfile', total=len(files))
+ at += 1
+ ui.note(_('getting %s\n') % filename)
+ outfilename = self.repo.wjoin(filename)
+ destdir = os.path.dirname(outfilename)
+ util.makedirs(destdir)
+ if not os.path.isdir(destdir):
+ self.abort(error.RepoError(_('cannot create dest directory %s') % destdir))
+
+ # No need to pass mode='wb' to fdopen(), since mkstemp() already
+ # opened the file in binary mode.
+ (tmpfd, tmpfilename) = tempfile.mkstemp(
+ dir=destdir, prefix=os.path.basename(filename))
+ tmpfile = os.fdopen(tmpfd, 'w')
+
+ try:
+ bhash = self._getfile(tmpfile, filename, hash)
+ except StoreError, err:
+ ui.warn(err.longmessage())
+ os.remove(tmpfilename)
+ missing.append(filename)
+ continue
+
+ hhash = binascii.hexlify(bhash)
+ if hhash != hash:
+ ui.warn(_('%s: data corruption (expected %s, got %s)\n')
+ % (filename, hash, hhash))
+ os.remove(tmpfilename)
+ missing.append(filename)
+ else:
+ if os.path.exists(outfilename): # for windows
+ os.remove(outfilename)
+ os.rename(tmpfilename, outfilename)
+ bfutil.copy_to_cache(self.repo, self.repo['.'].node(), filename, True)
+ success.append((filename, hhash))
+
+ ui.progress(_('Getting bfiles'), None)
+ return (success, missing)
+
+ def verify(self, revs, contents=False):
+ '''Verify the existence (and, optionally, contents) of every big
+ file revision referenced by every changeset in revs.
+ Return 0 if all is well, non-zero on any errors.'''
+ write = self.ui.write
+ failed = False
+
+ write(_('searching %d changesets for big files\n') % len(revs))
+ verified = set() # set of (filename, filenode) tuples
+
+ for rev in revs:
+ cctx = self.repo[rev]
+ cset = "%d:%s" % (cctx.rev(), node.short(cctx.node()))
+
+ for standin in cctx:
+ failed = (self._verifyfile(cctx,
+ cset,
+ contents,
+ standin,
+ verified)
+ or failed)
+
+ num_revs = len(verified)
+ num_bfiles = len(set([fname for (fname, fnode) in verified]))
+ if contents:
+ write(_('verified contents of %d revisions of %d big files\n')
+ % (num_revs, num_bfiles))
+ else:
+ write(_('verified existence of %d revisions of %d big files\n')
+ % (num_revs, num_bfiles))
+
+ return int(failed)
+
+ def _getfile(self, tmpfile, filename, hash):
+ '''Fetch one revision of one file from the store and write it
+ to tmpfile. Compute the hash of the file on-the-fly as it
+ downloads and return the binary hash. Close tmpfile. Raise
+ StoreError if unable to download the file (e.g. it does not
+ exist in the store).'''
+ raise NotImplementedError('abstract method')
+
+ def _verifyfile(self, cctx, cset, contents, standin, verified):
+ '''Perform the actual verification of a file in the store.
+ '''
+ raise NotImplementedError('abstract method')
+
+import localstore, httpstore
+
+_store_provider = {
+ 'file': (localstore, 'localstore'),
+ 'http': (httpstore, 'httpstore'),
+ 'https': (httpstore, 'httpstore'),
+ }
+
+# During clone this function is passed the src's ui object
+# but it needs the dest's ui object so it can read out of
+# the config file. Use repo.ui instead.
+def _open_store(repo, path=None, put=False):
+ ui = repo.ui
+ if not path:
+ path = ui.expandpath('default-push', 'default')
+ # If 'default-push' and 'default' can't be expanded
+ # they are just returned. In that case use the empty string which
+ # use the filescheme.
+ if path is 'default-push' or path is 'default':
+ path = ''
+
+ # The path could be a scheme so use Mercurial's normal functionality
+ # to resolve the scheme to a repository and use its path
+ if path:
+ path = hg.repository(ui, path).path
+
+ match = url_.scheme_re.match(path)
+ if not match: # regular filesystem path
+ scheme = 'file'
+ else:
+ scheme = match.group(1)
+
+ try:
+ (mod, klass) = _store_provider[scheme]
+ except KeyError:
+ raise util.Abort(_('unsupported URL scheme %r') % scheme)
+
+ klass = getattr(mod, klass)
+ return klass(ui, repo, path)
|
|
|
@@ -0,0 +1,520 @@ + '''High-level command functions: bfadd() et. al, plus the cmdtable.'''
+
+import os
+import re
+import errno
+import binascii
+import shutil
+import httplib
+import posixpath
+import BaseHTTPServer
+
+from mercurial import util, commands, match as match_, hg, node, context, error
+from mercurial.i18n import _
+
+import bfutil, basestore
+
+# -- Commands ----------------------------------------------------------
+
+def bfconvert(ui, src, dest, *pats, **opts):
+ '''Convert a repository to a repository using bfiles
+
+ Convert source repository creating an identical
+ repository, except that all files that match the
+ patterns given, or are over a given size will
+ be added as bfiles. The size of a file is the size of the
+ first version of the file. After running this command you
+ will need to set the store then run bfput on the new
+ repository to upload the bfiles to the central store.
+ '''
+
+ if opts['tonormal']:
+ tobfile = False
+ else:
+ tobfile = True
+ size = opts['size']
+ if not size:
+ size = ui.config(bfutil.long_name, 'size', default=None)
+ try:
+ size = int(size)
+ except ValueError:
+ raise util.Abort(_('bfiles.size must be integer, was %s\n') % size)
+ except TypeError:
+ raise util.Abort(_('size must be specified'))
+
+ try:
+ rsrc = hg.repository(ui, src)
+ if not rsrc.local():
+ raise util.Abort(_('%s is not a local Mercurial repo') % src)
+ except error.RepoError, err:
+ ui.traceback()
+ raise util.Abort(err.args[0])
+ if os.path.exists(dest):
+ if not os.path.isdir(dest):
+ raise util.Abort(_('destination %s already exists') % dest)
+ elif os.listdir(dest):
+ raise util.Abort(_('destination %s is not empty') % dest)
+ try:
+ ui.status(_('initializing destination %s\n') % dest)
+ rdst = hg.repository(ui, dest, create=True)
+ if not rdst.local():
+ raise util.Abort(_('%s is not a local Mercurial repo') % dest)
+ except error.RepoError:
+ ui.traceback()
+ raise util.Abort(_('%s is not a repo') % dest)
+
+ try:
+ # Lock destination to prevent modification while it is converted to.
+ # Don't need to lock src because we are just reading from its history
+ # which can't change.
+ dst_lock = rdst.lock()
+
+ # Get a list of all changesets in the source. The easy way to do this
+ # is to simply walk the changelog, using changelog.nodesbewteen().
+ # Take a look at mercurial/revlog.py:639 for more details.
+ # Use a generator instead of a list to decrease memory usage
+ ctxs = (rsrc[ctx] for ctx in rsrc.changelog.nodesbetween(None, rsrc.heads())[0])
+ revmap = {node.nullid: node.nullid}
+ if tobfile:
+ bfiles = set()
+ normalfiles = set()
+ if not pats:
+ pats = ui.config(bfutil.long_name, 'patterns', default=())
+ if pats:
+ pats = pats.split(' ')
+ if pats:
+ matcher = match_.match(rsrc.root, '', list(pats))
+ else:
+ matcher = None
+
+ bfiletohash = {}
+ for ctx in ctxs:
+ ui.progress(_('Converting revisions'), ctx.rev(), unit=_('revision'), total=rsrc['tip'].rev())
+ _bfconvert_addchangeset(rsrc, rdst, ctx, revmap,
+ bfiles, normalfiles, matcher, size, bfiletohash)
+ ui.progress(_('Converting revisions'), None)
+
+ if os.path.exists(rdst.wjoin(bfutil.short_name)):
+ shutil.rmtree(rdst.wjoin(bfutil.short_name))
+
+ for f in bfiletohash.keys():
+ if os.path.isfile(rdst.wjoin(f)):
+ os.unlink(rdst.wjoin(f))
+ try:
+ os.removedirs(os.path.dirname(rdst.wjoin(f)))
+ except:
+ pass
+
+ else:
+ for ctx in ctxs:
+ ui.progress(_('Converting revisions'), ctx.rev(), unit=_('revision'), total=rsrc['tip'].rev())
+ _addchangeset(ui, rsrc, rdst, ctx, revmap)
+
+ ui.progress(_('Converting revisions'), None)
+ except:
+ # we failed, remove the new directory
+ shutil.rmtree(rdst.root)
+ raise
+ finally:
+ dst_lock.release()
+
+def _addchangeset(ui, rsrc, rdst, ctx, revmap):
+ # Convert src parents to dst parents
+ parents = []
+ for p in ctx.parents():
+ parents.append(revmap[p.node()])
+ while len(parents) < 2:
+ parents.append(node.nullid)
+
+ # Generate list of changed files
+ files = set(ctx.files())
+ if node.nullid not in parents:
+ mc = ctx.manifest()
+ mp1 = ctx.parents()[0].manifest()
+ mp2 = ctx.parents()[1].manifest()
+ for f in mp1:
+ if f not in mc:
+ files.add(f)
+ for f in mp2:
+ if f not in mc:
+ files.add(f)
+ for f in mc:
+ if mc[f] != mp1.get(f, None) or mc[f] != mp2.get(f, None):
+ files.add(f)
+
+ def getfilectx(repo, memctx, f):
+ if bfutil.standin(f) in files:
+ # if the file isn't in the manifest then it was removed
+ # or renamed, raise IOError to indicate this
+ try:
+ fctx = ctx.filectx(bfutil.standin(f))
+ except error.LookupError:
+ raise IOError()
+ renamed = fctx.renamed()
+ if renamed:
+ renamed = bfutil.split_standin(renamed[0])
+
+ hash = fctx.data().strip()
+ path = bfutil.find_file(rsrc, hash)
+ data = ''
+ with open(path, 'rb') as fd:
+ data = fd.read()
+ return context.memfilectx(f, data, 'l' in fctx.flags(),
+ 'x' in fctx.flags(), renamed)
+ else:
+ try:
+ fctx = ctx.filectx(f)
+ except error.LookupError:
+ raise IOError()
+ renamed = fctx.renamed()
+ if renamed:
+ renamed = renamed[0]
+ data = fctx.data()
+ if f == '.hgtags':
+ newdata = []
+ for line in data.splitlines():
+ id, name = line.split(' ', 1)
+ newdata.append('%s %s\n' % (node.hex(revmap[node.bin(id)]), name))
+ data = ''.join(newdata)
+ return context.memfilectx(f, data, 'l' in fctx.flags(),
+ 'x' in fctx.flags(), renamed)
+
+ dstfiles = []
+ for file in files:
+ if bfutil.is_standin(file):
+ dstfiles.append(bfutil.split_standin(file))
+ else:
+ dstfiles.append(file)
+ # Commit
+ mctx = context.memctx(rdst, parents, ctx.description(), dstfiles,
+ getfilectx, ctx.user(), ctx.date(), ctx.extra())
+ ret = rdst.commitctx(mctx)
+ rdst.dirstate.setparents(ret)
+ revmap[ctx.node()] = rdst.changelog.tip()
+
+def _bfconvert_addchangeset(rsrc, rdst, ctx, revmap, bfiles, normalfiles, matcher, size, bfiletohash):
+ # Convert src parents to dst parents
+ parents = []
+ for p in ctx.parents():
+ parents.append(revmap[p.node()])
+ while len(parents) < 2:
+ parents.append(node.nullid)
+
+ # Generate list of changed files
+ files = set(ctx.files())
+ if node.nullid not in parents:
+ mc = ctx.manifest()
+ mp1 = ctx.parents()[0].manifest()
+ mp2 = ctx.parents()[1].manifest()
+ for f in mp1:
+ if f not in mc:
+ files.add(f)
+ for f in mp2:
+ if f not in mc:
+ files.add(f)
+ for f in mc:
+ if mc[f] != mp1.get(f, None) or mc[f] != mp2.get(f, None):
+ files.add(f)
+
+ dstfiles = []
+ for f in files:
+ if f not in bfiles and f not in normalfiles:
+ isbfile = _is_bfile(f, ctx, matcher, size)
+ # If this file was renamed or copied then copy
+ # the bfileness of its predecessor
+ if f in ctx.manifest():
+ fctx = ctx.filectx(f)
+ renamed = fctx.renamed()
+ renamedbfile = renamed and renamed[0] in bfiles
+ isbfile |= renamedbfile
+ if 'l' in fctx.flags():
+ if renamedbfile:
+ raise util.Abort(_('Renamed/copied bfile %s becomes symlink') % f)
+ isbfile = False
+ if isbfile:
+ bfiles.add(f)
+ else:
+ normalfiles.add(f)
+
+ if f in bfiles:
+ dstfiles.append(bfutil.standin(f))
+ # bfile in manifest if it has not been removed/renamed
+ if f in ctx.manifest():
+ if 'l' in ctx.filectx(f).flags():
+ if renamed and renamed[0] in bfiles:
+ raise util.Abort(_('bfile %s becomes symlink') % f)
+
+ # bfile was modified, update standins
+ fullpath = rdst.wjoin(f)
+ bfutil.create_dir(os.path.dirname(fullpath))
+ m = util.sha1('')
+ m.update(ctx[f].data())
+ hash = m.hexdigest()
+ if f not in bfiletohash or bfiletohash[f] != hash:
+ with open(fullpath, 'wb') as fd:
+ fd.write(ctx[f].data())
+ executable = 'x' in ctx[f].flags()
+ os.chmod(fullpath, bfutil.get_mode(executable))
+ bfutil.write_standin(rdst, bfutil.standin(f), hash, executable)
+ bfiletohash[f] = hash
+ else:
+ # normal file
+ dstfiles.append(f)
+
+ def getfilectx(repo, memctx, f):
+ if bfutil.is_standin(f):
+ # if the file isn't in the manifest then it was removed
+ # or renamed, raise IOError to indicate this
+ srcfname = bfutil.split_standin(f)
+ try:
+ fctx = ctx.filectx(srcfname)
+ except error.LookupError:
+ raise IOError()
+ renamed = fctx.renamed()
+ if renamed:
+ # standin is always a bfile because bfileness
+ # doesn't change after rename or copy
+ renamed = bfutil.standin(renamed[0])
+
+ return context.memfilectx(f, bfiletohash[srcfname], 'l' in fctx.flags(),
+ 'x' in fctx.flags(), renamed)
+ else:
+ try:
+ fctx = ctx.filectx(f)
+ except error.LookupError:
+ raise IOError()
+ renamed = fctx.renamed()
+ if renamed:
+ renamed = renamed[0]
+
+ data = fctx.data()
+ if f == '.hgtags':
+ newdata = []
+ for line in data.splitlines():
+ id, name = line.split(' ', 1)
+ newdata.append('%s %s\n' % (node.hex(revmap[node.bin(id)]), name))
+ data = ''.join(newdata)
+ return context.memfilectx(f, data, 'l' in fctx.flags(),
+ 'x' in fctx.flags(), renamed)
+
+ # Commit
+ mctx = context.memctx(rdst, parents, ctx.description(), dstfiles,
+ getfilectx, ctx.user(), ctx.date(), ctx.extra())
+ ret = rdst.commitctx(mctx)
+ rdst.dirstate.setparents(ret)
+ revmap[ctx.node()] = rdst.changelog.tip()
+
+def _is_bfile(file, ctx, matcher, size):
+ '''
+ A file is a bfile if it matches a pattern or is over
+ the given size.
+ '''
+ # Never store hgtags or hgignore as bfiles
+ if file == '.hgtags' or file == '.hgignore' or file == '.hgsigs':
+ return False
+ if matcher and matcher(file):
+ return True
+ try:
+ return ctx.filectx(file).size() >= size * 1024 * 1024
+ except error.LookupError:
+ return False
+
+def upload_bfiles(ui, rsrc, rdst, files):
+ '''upload big files to the central store'''
+
+ if not files:
+ return
+
+ # Don't upload locally. All bfiles are in the system wide cache
+ # so the other repo can just get them from there.
+ if not rdst.path.startswith('http'):
+ return
+
+ store = basestore._open_store(rsrc, rdst.path, put=True)
+
+ at = 1
+ for hash in files:
+ ui.progress(_('Uploading bfiles'), at, unit='bfile', total=len(files))
+ at += 2
+ source = bfutil.find_file(rsrc, hash, False)
+ if not source:
+ raise util.Abort(_('Missing bfile %s needs to be uploaded') % hash)
+ # XXX check for errors here
+ store.put(source, hash)
+ ui.progress('Uploading bfiles', None)
+
+def verify_bfiles(ui, repo, all=False, contents=False):
+ '''Verify that every big file revision in the current changeset
+ exists in the central store. With --contents, also verify that
+ the contents of each big file revision are correct (SHA-1 hash
+ matches the revision ID). With --all, check every changeset in
+ this repository.'''
+ if all:
+ # Pass a list to the function rather than an iterator because we know a list will work.
+ revs = range(len(repo))
+ else:
+ revs = ['.']
+
+ store = basestore._open_store(repo)
+ return store.verify(revs, contents=contents)
+
+def revert_bfiles(ui, repo):
+ wlock = repo.wlock()
+ try:
+ bfdirstate = bfutil.open_bfdirstate(ui, repo)
+ s = bfdirstate.status(match_.always(repo.root, repo.getcwd()), [], False, False, False)
+ (unsure, modified, added, removed, missing, unknown, ignored, clean) = s
+
+ bfiles = bfutil.list_bfiles(repo)
+ toget = []
+ at = 0
+ updated = 0
+ for bfile in bfiles:
+ if not os.path.exists(repo.wjoin(bfutil.standin(bfile))):
+ bfdirstate.remove(bfile)
+ continue
+ if os.path.exists(repo.wjoin(bfutil.standin(os.path.join(bfile + '.orig')))):
+ shutil.copyfile(repo.wjoin(bfile), repo.wjoin(bfile + '.orig'))
+ at += 1
+ expectedhash = repo[None][bfutil.standin(bfile)].data().strip()
+ mode = os.stat(repo.wjoin(bfutil.standin(bfile))).st_mode
+ if not os.path.exists(repo.wjoin(bfile)) or expectedhash != bfutil.hashfile(repo.wjoin(bfile)):
+ path = bfutil.find_file(repo, expectedhash, False)
+ if path is None:
+ toget.append((bfile, expectedhash))
+ else:
+ util.makedirs(os.path.dirname(repo.wjoin(bfile)))
+ shutil.copy(path, repo.wjoin(bfile))
+ os.chmod(repo.wjoin(bfile), mode)
+ updated += 1
+ if bfutil.standin(bfile) not in repo['.']:
+ bfdirstate.add(bfutil.unixpath(bfile))
+ elif expectedhash == repo['.'][bfutil.standin(bfile)].data().strip():
+ bfdirstate.normal(bfutil.unixpath(bfile))
+ else:
+ bfutil.dirstate_normaldirty(bfdirstate, bfutil.unixpath(bfile))
+ elif os.path.exists(repo.wjoin(bfile)) and mode != os.stat(repo.wjoin(bfile)).st_mode:
+ os.chmod(repo.wjoin(bfile), mode)
+ updated += 1
+ if bfutil.standin(bfile) not in repo['.']:
+ bfdirstate.add(bfutil.unixpath(bfile))
+ elif expectedhash == repo['.'][bfutil.standin(bfile)].data().strip():
+ bfdirstate.normal(bfutil.unixpath(bfile))
+ else:
+ bfutil.dirstate_normaldirty(bfdirstate, bfutil.unixpath(bfile))
+
+ if toget:
+ store = basestore._open_store(repo)
+ (success, missing) = store.get(toget)
+ else:
+ success, missing = [], []
+
+ for (filename, hash) in success:
+ mode = os.stat(repo.wjoin(bfutil.standin(filename))).st_mode
+ os.chmod(repo.wjoin(filename), mode)
+ updated += 1
+ if bfutil.standin(filename) not in repo['.']:
+ bfdirstate.add(bfutil.unixpath(filename))
+ elif hash == repo['.'][bfutil.standin(filename)].data().strip():
+ bfdirstate.normal(bfutil.unixpath(filename))
+ else:
+ bfutil.dirstate_normaldirty(bfdirstate, bfutil.unixpath(filename))
+
+ removed = 0
+ for bfile in bfdirstate:
+ if not os.path.exists(repo.wjoin(bfutil.standin(bfile))):
+ if os.path.exists(repo.wjoin(bfile)):
+ os.unlink(repo.wjoin(bfile))
+ removed += 1
+ if bfutil.standin(bfile) in repo['.']:
+ bfdirstate.remove(bfutil.unixpath(bfile))
+ else:
+ bfdirstate.forget(bfutil.unixpath(bfile))
+ else:
+ state = repo.dirstate[bfutil.standin(bfile)]
+ if state == 'n':
+ bfdirstate.normal(bfile)
+ elif state == 'r':
+ bfdirstate.remove(bfile)
+ elif state == 'a':
+ bfdirstate.add(bfile)
+ elif state == '?':
+ bfdirstate.forget(bfile)
+ bfdirstate.write()
+ finally:
+ wlock.release()
+
+def update_bfiles(ui, repo):
+ wlock = repo.wlock()
+ try:
+ bfdirstate = bfutil.open_bfdirstate(ui, repo)
+ s = bfdirstate.status(match_.always(repo.root, repo.getcwd()), [], False, False, False)
+ (unsure, modified, added, removed, missing, unknown, ignored, clean) = s
+
+ bfiles = bfutil.list_bfiles(repo)
+ toget = []
+ at = 0
+ updated = 0
+ printed = False
+ if bfiles:
+ ui.status(_('Getting changed bfiles\n'))
+ printed = True
+
+ for bfile in bfiles:
+ at += 1
+ expectedhash = repo[None][bfutil.standin(bfile)].data().strip()
+ mode = os.stat(repo.wjoin(bfutil.standin(bfile))).st_mode
+ if not os.path.exists(repo.wjoin(bfile)) or expectedhash != bfutil.hashfile(repo.wjoin(bfile)):
+ path = bfutil.find_file(repo, expectedhash, False)
+ if not path:
+ toget.append((bfile, expectedhash))
+ else:
+ util.makedirs(os.path.dirname(repo.wjoin(bfile)))
+ shutil.copy(path, repo.wjoin(bfile))
+ os.chmod(repo.wjoin(bfile), mode)
+ updated += 1
+ bfdirstate.normal(bfutil.unixpath(bfile))
+ elif os.path.exists(repo.wjoin(bfile)) and mode != os.stat(repo.wjoin(bfile)).st_mode:
+ os.chmod(repo.wjoin(bfile), mode)
+ updated += 1
+ bfdirstate.normal(bfutil.unixpath(bfile))
+
+ if toget:
+ store = basestore._open_store(repo)
+ (success, missing) = store.get(toget)
+ else:
+ success, missing = [],[]
+
+ for (filename, hash) in success:
+ mode = os.stat(repo.wjoin(bfutil.standin(filename))).st_mode
+ os.chmod(repo.wjoin(filename), mode)
+ updated += 1
+ bfdirstate.normal(bfutil.unixpath(filename))
+
+ removed = 0
+ for bfile in bfdirstate:
+ if bfile not in bfiles:
+ if os.path.exists(repo.wjoin(bfile)):
+ if not printed:
+ ui.status(_('Getting changed bfiles\n'))
+ printed = True
+ os.unlink(repo.wjoin(bfile))
+ removed += 1
+ bfdirstate.forget(bfutil.unixpath(bfile))
+
+ bfdirstate.write()
+ if printed:
+ ui.status(_('%d big files updated, %d removed\n') % (updated, removed))
+ finally:
+ wlock.release()
+
+# -- hg commands declarations ------------------------------------------------
+
+
+cmdtable = {
+ 'kbfconvert': (bfconvert,
+ [('s', 'size', 0, 'All files over this size '
+ '(in megabytes) will be considered bfiles. This can also be specified in your hgrc as [bfiles].size.'),
+ ('','tonormal',False, 'Convert from a bfiles repo to a normal repo')],
+ _('hg kbfconvert SOURCE DEST [FILE ...]')),
+ }
|
|
|
@@ -0,0 +1,1016 @@ + '''Setup code for bfiles extension: reposetup(), uisetup().'''
+
+import os
+import types
+import copy
+
+from mercurial import hg, extensions, commands, util, context, cmdutil, \
+ match as match_, filemerge, node, archival, httprepo, error
+from mercurial.i18n import _
+from mercurial.node import hex
+import bfutil, bfcommands
+
+def hgversion():
+ from mercurial.__version__ import version
+ return [int(n) for n in version.partition('+')[0].split('.')]
+hgversion = hgversion()
+
+# -- Wrappers: modify existing commands --------------------------------
+
+def reposetup(ui, repo):
+ # add a kbfiles-specific querystring argument to remote requests, so kiln can reject
+ # operations on a kbfiles-enabled remote repo from a non-kbfiles local repo.
+ if issubclass(repo.__class__, httprepo.httprepository):
+ class kbfilesrepo(repo.__class__):
+ # The function we want to override is do_cmd for Mercurial <= 1.6
+ # and _callstream for Mercurial > 1.6. Wrap whichever one we can find.
+ if hasattr(repo.__class__, 'do_cmd'):
+ def do_cmd(self, cmd, **args):
+ args['kbfiles'] = 'true'
+ return super(repo.__class__, self).do_cmd(cmd, **args)
+ if hasattr(repo.__class__, '_callstream'):
+ def _callstream(self, cmd, **args):
+ args['kbfiles'] = 'true'
+ return super(repo.__class__, self)._callstream(cmd, **args)
+ repo.__class__ = kbfilesrepo
+
+ # bfiles doesn't support non-local repositories -- get out quick in
+ # such a case
+ if not repo.local():
+ return
+
+ for name in ('status', 'commitctx', 'commit', 'push'):
+ method = getattr(repo, name)
+ #if not (isinstance(method, types.MethodType) and
+ # method.im_func is repo.__class__.commitctx.im_func):
+ if (isinstance(method, types.FunctionType) and
+ method.func_name == 'wrap'):
+ ui.warn(_('kbfiles: repo method %r appears to have already been '
+ 'wrapped by another extension: '
+ 'kbfiles may behave incorrectly\n')
+ % name)
+
+ class bfiles_repo(repo.__class__):
+ bfstatus = False
+ def status_nobfiles(self, *args, **kwargs):
+ return super(bfiles_repo, self).status(*args, **kwargs)
+
+ # Figure out the status of big files and insert them into the
+ # appropriate list in the result. Also removes standin files from
+ # the listing. This function reverts to the original status if
+ # self.bfstatus is False
+ def status(self, node1='.', node2=None, match=None, ignored=False, clean=False, unknown=False, subrepos=None):
+ listignored, listclean, listunknown = ignored, clean, unknown
+ if not self.bfstatus:
+ try:
+ return super(bfiles_repo, self).status(node1, node2, match, listignored, listclean, listunknown, subrepos)
+ except TypeError:
+ return super(bfiles_repo, self).status(node1, node2, match, listignored, listclean, listunknown)
+ else:
+ # some calls in this function rely on the old version of status
+ self.bfstatus = False
+ if isinstance(node1, context.changectx):
+ ctx1 = node1
+ else:
+ ctx1 = repo[node1]
+ if isinstance(node2, context.changectx):
+ ctx2 = node2
+ else:
+ ctx2 = repo[node2]
+ working = ctx2.rev() is None
+ parentworking = working and ctx1 == self['.']
+
+ def inctx(file, ctx):
+ try:
+ if ctx.rev() is None:
+ return file in ctx.manifest()
+ ctx[file]
+ return True
+ except:
+ return False
+
+ # create a copy of match that matches standins instead of bfiles
+ # if matcher not set then it is the always matcher so overwrite that
+ if match is None:
+ match = match_.always(self.root, self.getcwd())
+
+ def tostandin(file):
+ if inctx(bfutil.standin(file), ctx2):
+ return bfutil.standin(file)
+ return file
+
+ m = copy.copy(match)
+ m._files = [tostandin(f) for f in m._files]
+ orig_matchfn = m.matchfn
+ def matchfn(f):
+ if bfutil.is_standin(f):
+ return orig_matchfn(bfutil.split_standin(f))
+ else:
+ return orig_matchfn(f) and not inctx(bfutil.standin(f), ctx2)
+ m.matchfn = matchfn
+ # get ignored clean and unknown but remove them later if they were not asked for
+ try:
+ result = super(bfiles_repo, self).status(node1, node2, m, True, True, True, subrepos)
+ except TypeError:
+ result = super(bfiles_repo, self).status(node1, node2, m, True, True, True)
+ if working:
+ # Hold the wlock while we read bfiles and update the bfdirstate
+ wlock = repo.wlock()
+ try:
+ # Any non bfiles that were explicitly listed must be taken out or
+ # bfdirstate.status will report an error. The status of these files
+ # was already computed using super's status.
+ bfdirstate = bfutil.open_bfdirstate(ui, self)
+ match._files = [f for f in match._files if f in bfdirstate]
+ s = bfdirstate.status(match, [], True, True, True)
+ (unsure, modified, added, removed, missing, unknown, ignored, clean) = s
+ if parentworking:
+ for bfile in unsure:
+ if ctx1[bfutil.standin(bfile)].data().strip() != bfutil.hashfile(self.wjoin(bfile)):
+ modified.append(bfile)
+ else:
+ clean.append(bfile)
+ bfdirstate.normal(bfutil.unixpath(bfile))
+ bfdirstate.write()
+ else:
+ tocheck = unsure + modified + added + clean
+ modified, added, clean = [], [], []
+
+ for bfile in tocheck:
+ standin = bfutil.standin(bfile)
+ if inctx(standin, ctx1):
+ if ctx1[standin].data().strip() != bfutil.hashfile(self.wjoin(bfile)):
+ modified.append(bfile)
+ else:
+ clean.append(bfile)
+ else:
+ added.append(bfile)
+ finally:
+ wlock.release()
+
+ for standin in ctx1.manifest():
+ if not bfutil.is_standin(standin):
+ continue
+ bfile = bfutil.split_standin(standin)
+ if not match(bfile):
+ continue
+ if bfile not in bfdirstate:
+ removed.append(bfile)
+ # Handle unknown and ignored differently
+ bfiles = (modified, added, removed, missing, [], [], clean)
+ result = list(result)
+ # Unknown files
+ result[4] = [f for f in unknown if repo.dirstate[f] == '?' and not bfutil.is_standin(f)]
+ # Ignored files must be ignored by both the dirstate and bfdirstate
+ result[5] = set(ignored).intersection(set(result[5]))
+ # combine normal files and bfiles
+ normals = [[fn for fn in filelist if not bfutil.is_standin(fn)] for filelist in result]
+ result = [sorted(list1 + list2) for (list1, list2) in zip(normals, bfiles)]
+ else:
+ def toname(f):
+ if bfutil.is_standin(f):
+ return bfutil.split_standin(f)
+ return f
+ result = [[toname(f) for f in items] for items in result]
+
+ if not listunknown:
+ result[4] = []
+ if not listignored:
+ result[5] = []
+ if not listclean:
+ result[6] = []
+ self.bfstatus = True
+ return result
+
+ # This call happens after a commit has occurred. Copy all of the bfiles
+ # into the cache
+ def commitctx(self, *args, **kwargs):
+ node = super(bfiles_repo, self).commitctx(*args, **kwargs)
+ ctx = self[node]
+ for filename in ctx.files():
+ if bfutil.is_standin(filename) and filename in ctx.manifest():
+ realfile = bfutil.split_standin(filename)
+ bfutil.copy_to_cache(self, ctx.node(), realfile)
+
+ return node
+
+ # This call happens before a commit has occurred. The bfile standins
+ # have not had their contents updated (to reflect the hash of their bfile).
+ # Do that here.
+ def commit(self, text="", user=None, date=None, match=None, force=False,
+ editor=False, extra={}):
+ orig = super(bfiles_repo, self).commit
+
+ wlock = repo.wlock()
+ try:
+ # Case 1: user calls commit with no specific files or
+ # include/exclude patterns: refresh and commit everything.
+ if (match is None) or (not match.anypats() and not match.files()):
+ bfiles = bfutil.list_bfiles(self)
+ bfdirstate = bfutil.open_bfdirstate(ui, self)
+ # this only loops through bfiles that exist (not removed/renamed)
+ for bfile in bfiles:
+ if os.path.exists(self.wjoin(bfutil.standin(bfile))):
+ bfutil.update_standin(self, bfutil.standin(bfile))
+ bfdirstate.normal(bfutil.unixpath(bfile))
+ for bfile in bfdirstate:
+ if not os.path.exists(repo.wjoin(bfutil.standin(bfile))):
+ bfdirstate.forget(bfutil.unixpath(bfile))
+ bfdirstate.write()
+
+ return orig(text=text, user=user, date=date, match=match,
+ force=force, editor=editor, extra=extra)
+
+ for file in match.files():
+ if bfutil.is_standin(file):
+ raise util.Abort("Don't commit bfile standin. Commit bfile.")
+
+ # Case 2: user calls commit with specified patterns: refresh any
+ # matching big files.
+ smatcher = bfutil.compose_standin_matcher(self, match)
+ standins = bfutil.dirstate_walk(self.dirstate, smatcher)
+
+ # No matching big files: get out of the way and pass control to
+ # the usual commit() method.
+ if not standins:
+ return orig(text=text, user=user, date=date, match=match,
+ force=force, editor=editor, extra=extra)
+
+ # Refresh all matching big files. It's possible that the commit
+ # will end up failing, in which case the big files will stay
+ # refreshed. No harm done: the user modified them and asked to
+ # commit them, so sooner or later we're going to refresh the
+ # standins. Might as well leave them refreshed.
+ bfdirstate = bfutil.open_bfdirstate(ui, self)
+ for standin in standins:
+ bfile = bfutil.split_standin(standin)
+ if bfdirstate[bfile] is not 'r':
+ bfutil.update_standin(self, standin)
+ bfdirstate.normal(bfutil.unixpath(bfile))
+ else:
+ bfdirstate.forget(bfutil.unixpath(bfile))
+ bfdirstate.write()
+
+ # Cook up a new matcher that only matches regular files or
+ # standins corresponding to the big files requested by the user.
+ # Have to modify _files to prevent commit() from complaining
+ # "not tracked" for big files.
+ bfiles = bfutil.list_bfiles(repo)
+ match = copy.copy(match)
+ orig_matchfn = match.matchfn
+ match._files = [f for f in match._files if f not in bfiles]
+ match._files += sorted(standins)
+
+ def matchfn(f):
+ if orig_matchfn(f):
+ return f not in bfiles
+ else:
+ return f in standins
+
+ match.matchfn = matchfn
+ return orig(text=text, user=user, date=date, match=match,
+ force=force, editor=editor, extra=extra)
+ finally:
+ wlock.release()
+
+ def push(self, remote, force=False, revs=None, newbranch=False):
+ o = bfutil.findoutgoing(repo, remote, force)
+ if o:
+ toupload = set()
+ o = repo.changelog.nodesbetween(o, revs)[0]
+ for n in o:
+ parents = [p for p in repo.changelog.parents(n) if p != node.nullid]
+ ctx = repo[n]
+ files = set(ctx.files())
+ if len(parents) == 2:
+ mc = ctx.manifest()
+ mp1 = ctx.parents()[0].manifest()
+ mp2 = ctx.parents()[1].manifest()
+ for f in mp1:
+ if f not in mc:
+ files.add(f)
+ for f in mp2:
+ if f not in mc:
+ files.add(f)
+ for f in mc:
+ if mc[f] != mp1.get(f, None) or mc[f] != mp2.get(f, None):
+ files.add(f)
+
+ toupload = toupload.union(set([ctx[f].data().strip() for f in files if bfutil.is_standin(f) and f in ctx]))
+ bfcommands.upload_bfiles(ui, self, remote, toupload)
+ # Mercurial >= 1.6 takes the newbranch argument, try that first.
+ try:
+ return super(bfiles_repo, self).push(remote, force, revs, newbranch)
+ except TypeError:
+ return super(bfiles_repo, self).push(remote, force, revs)
+
+ repo.__class__ = bfiles_repo
+
+# Add works by going through the files that the user wanted to add
+# and checking if they should be added as bfiles. Then making a new
+# matcher which matches only the normal files and running the original
+# version of add.
+def override_add(orig, ui, repo, *pats, **opts):
+ bf = opts.pop('bf', None)
+
+ bfsize = opts.pop('bfsize', None)
+ if bfsize:
+ try:
+ bfsize = int(bfsize)
+ except ValueError:
+ raise util.Abort(_('size must be an integer, was %s\n') % bfsize)
+ else:
+ if os.path.exists(repo.wjoin(bfutil.short_name)):
+ bfsize = ui.config(bfutil.long_name, 'size', default='10')
+ if bfsize:
+ try:
+ bfsize = int(bfsize)
+ except ValueError:
+ raise util.Abort(_('bfiles.size must be integer, was %s\n') % bfsize)
+
+ bfmatcher = None
+ if os.path.exists(repo.wjoin(bfutil.short_name)):
+ bfpats = ui.config(bfutil.long_name, 'patterns', default=())
+ if bfpats:
+ bfpats = bfpats.split(' ')
+ bfmatcher = match_.match(repo.root, '', list(bfpats))
+
+ bfnames = []
+ m = cmdutil.match(repo, pats, opts)
+ m.bad = lambda x,y: None
+ wctx = repo[None]
+ for f in repo.walk(m):
+ exact = m.exact(f)
+ bfile = bfutil.standin(f) in wctx
+ nfile = f in wctx
+
+ if exact and bfile:
+ ui.warn(_('%s already a bfile\n') % f)
+ continue
+ # Don't warn the user when they attempt to add a normal tracked file. The normal add code
+ # will do that for us.
+ if exact and nfile:
+ continue
+ if exact or (not bfile and not nfile):
+ if bf or (bfsize and os.path.getsize(repo.wjoin(f)) >= bfsize*1024*1024) \
+ or (bfmatcher and bfmatcher(f)):
+ bfnames.append(f)
+ if ui.verbose or not exact:
+ ui.status(_('adding %s as bfile\n') % m.rel(f))
+
+ bad = []
+ standins = []
+
+ # Need to lock otherwise there could be a race condition inbetween when standins are created
+ # and added to the repo
+ wlock = repo.wlock()
+ try:
+ if not opts.get('dry_run'):
+ bfdirstate = bfutil.open_bfdirstate(ui, repo)
+ for f in bfnames:
+ standinname = bfutil.standin(f)
+ bfutil.write_standin(repo, standinname, hash='', executable=bfutil.get_executable(repo.wjoin(f)))
+ standins.append(standinname)
+ if bfdirstate[bfutil.unixpath(f)] == 'r':
+ bfdirstate.normallookup(bfutil.unixpath(f))
+ else:
+ bfdirstate.add(bfutil.unixpath(f))
+ bfdirstate.write()
+ bad += [bfutil.split_standin(f) for f in bfutil.repo_add(repo, standins) if f in m.files()]
+ finally:
+ wlock.release()
+
+ oldmatch = cmdutil.match
+ manifest = repo[None].manifest()
+ def override_match(repo, pats=[], opts={}, globbed=False, default='relpath'):
+ match = oldmatch(repo, pats, opts, globbed, default)
+ m = copy.copy(match)
+ notbfile = lambda f: not bfutil.is_standin(f) and bfutil.standin(f) not in manifest
+ m._files = [f for f in m._files if notbfile(f)]
+ m._fmap = set(m._files)
+ orig_matchfn = m.matchfn
+ m.matchfn = lambda f: notbfile(f) and orig_matchfn(f) or None
+ return m
+ cmdutil.match = override_match
+ result = orig(ui, repo, *pats, **opts)
+ cmdutil.match = oldmatch
+
+ return (result is 1 or bad) and 1 or 0
+
+def override_remove(orig, ui, repo, *pats, **opts):
+ wctx = repo[None].manifest()
+ oldmatch = cmdutil.match
+ def override_match(repo, pats=[], opts={}, globbed=False, default='relpath'):
+ match = oldmatch(repo, pats, opts, globbed, default)
+ m = copy.copy(match)
+ notbfile = lambda f: not bfutil.is_standin(f) and bfutil.standin(f) not in wctx
+ m._files = [f for f in m._files if notbfile(f)]
+ m._fmap = set(m._files)
+ orig_matchfn = m.matchfn
+ m.matchfn = lambda f: orig_matchfn(f) and notbfile(f)
+ return m
+ cmdutil.match = override_match
+ orig(ui, repo, *pats, **opts)
+ cmdutil.match = oldmatch
+
+ after, force = opts.get('after'), opts.get('force')
+ if not pats and not after:
+ raise util.Abort(_('no files specified'))
+ m = cmdutil.match(repo, pats, opts)
+ try:
+ repo.bfstatus = True
+ s = repo.status(match=m, clean=True)
+ finally:
+ repo.bfstatus = False
+ modified, added, deleted, clean = [[f for f in list if bfutil.standin(f) in wctx] for list in [s[0], s[1], s[3], s[6]]]
+
+ def warn(files, reason):
+ for f in files:
+ ui.warn(_('not removing %s: file %s (use -f to force removal)\n')
+ % (m.rel(f), reason))
+
+ if force:
+ remove, forget = modified + deleted + clean, added
+ elif after:
+ remove, forget = deleted, []
+ warn(modified + added + clean, _('still exists'))
+ else:
+ remove, forget = deleted + clean, []
+ warn(modified, _('is modified'))
+ warn(added, _('has been marked for add'))
+
+ for f in sorted(remove + forget):
+ if ui.verbose or not m.exact(f):
+ ui.status(_('removing %s\n') % m.rel(f))
+
+ # Need to lock because standin files are deleted then removed from the repository
+ # and we could race inbetween.
+ wlock = repo.wlock()
+ try:
+ bfdirstate = bfutil.open_bfdirstate(ui, repo)
+ for f in remove:
+ if not after:
+ os.unlink(repo.wjoin(f))
+ bfdirstate.remove(bfutil.unixpath(f))
+ bfdirstate.write()
+
+ forget = [bfutil.standin(f) for f in forget]
+ remove = [bfutil.standin(f) for f in remove]
+ bfutil.repo_forget(repo, forget)
+ bfutil.repo_remove(repo, remove, unlink=True)
+ finally:
+ wlock.release()
+
+def override_status(orig, ui, repo, *pats, **opts):
+ try:
+ repo.bfstatus = True
+ return orig(ui, repo, *pats, **opts)
+ finally:
+ repo.bfstatus = False
+
+def override_verify(orig, ui, repo, *pats, **opts):
+ bf = opts.pop('bf', False)
+ all = opts.pop('bfa', False)
+ contents = opts.pop('bfc', False)
+
+ result = orig(ui, repo, *pats, **opts)
+ if bf:
+ result = result or bfcommands.verify_bfiles(ui, repo, all, contents)
+ return result
+
+# Override needs to refresh standins so that update's normal merge
+# will go through properly. Then the other update hook (overriding repo.update)
+# will get the new files. Filemerge is also overriden so that the merge
+# will merge standins correctly.
+def override_update(orig, ui, repo, *pats, **opts):
+ bfdirstate = bfutil.open_bfdirstate(ui, repo)
+ s = bfdirstate.status(match_.always(repo.root, repo.getcwd()), [], False, False, False)
+ (unsure, modified, added, removed, missing, unknown, ignored, clean) = s
+
+ # Need to lock between the standins getting updated and their bfiles getting updated
+ wlock = repo.wlock()
+ try:
+ if opts['check']:
+ mod = len(modified) > 0
+ for bfile in unsure:
+ standin = bfutil.standin(bfile)
+ if repo['.'][standin].data().strip() != bfutil.hashfile(repo.wjoin(bfile)):
+ mod = True
+ else:
+ bfdirstate.normal(bfutil.unixpath(bfile))
+ bfdirstate.write()
+ if mod:
+ raise util.Abort(_('uncommitted local changes'))
+ # XXX handle removed differently
+ if not opts['clean']:
+ for bfile in unsure + modified + added:
+ bfutil.update_standin(repo, bfutil.standin(bfile))
+ finally:
+ wlock.release()
+ return orig(ui, repo, *pats, **opts)
+
+# Override filemerge to prompt the user about how they wish to merge bfiles.
+# This will handle identical edits, and copy/rename + edit without prompting the user.
+def override_filemerge(origfn, repo, mynode, orig, fcd, fco, fca):
+ # Use better variable names here. Because this is a wrapper we cannot change
+ # the variable names in the function declaration.
+ fcdest, fcother, fcancestor = fcd, fco, fca
+ if not bfutil.is_standin(orig):
+ return origfn(repo, mynode, orig, fcdest, fcother, fcancestor)
+ else:
+ if not fcother.cmp(fcdest): # files identical?
+ return None
+
+ if fcancestor == fcother: # backwards, use working dir parent as ancestor
+ fcancestor = fcdest.parents()[0]
+
+ if orig != fcother.path():
+ repo.ui.status(_('merging %s and %s to %s\n')
+ % (bfutil.split_standin(orig), bfutil.split_standin(fcother.path()), bfutil.split_standin(fcdest.path())))
+ else:
+ repo.ui.status(_('merging %s\n') % bfutil.split_standin(fcdest.path()))
+
+ if fcancestor.path() != fcother.path() and fcother.data() == fcancestor.data():
+ return 0
+ if fcancestor.path() != fcdest.path() and fcdest.data() == fcancestor.data():
+ repo.wwrite(fcdest.path(), fcother.data(), fcother.flags())
+ return 0
+
+ if repo.ui.promptchoice(_('bfile %s has a merge conflict\n'
+ 'keep (l)ocal or take (o)ther?') % bfutil.split_standin(orig),
+ (_('&Local'), _('&Other')), 0) == 0:
+ return 0
+ else:
+ repo.wwrite(fcdest.path(), fcother.data(), fcother.flags())
+ return 0
+
+# Copy first changes the matchers to match standins instead of bfiles.
+# Then it overrides util.copyfile in that function it checks if the destination
+# bfile already exists. It also keeps a list of copied files so that the bfiles
+# can be copied and the dirstate updated.
+def override_copy(orig, ui, repo, pats, opts, rename=False):
+ # doesn't remove bfile on rename
+ if len(pats) < 2:
+ # this isn't legal, let the original function deal with it
+ return orig(ui, repo, pats, opts, rename)
+
+ def makestandin(relpath):
+ return os.path.join(os.path.relpath('.', repo.getcwd()), bfutil.standin(util.canonpath(repo.root, repo.getcwd(), relpath)))
+
+ fullpats = cmdutil.expandpats(pats)
+ dest = fullpats[-1]
+
+ if os.path.isdir(dest):
+ if not os.path.isdir(makestandin(dest)):
+ os.makedirs(makestandin(dest))
+ # This could copy both bfiles and normal files in one command, but we don't want
+ # to do that first replace their matcher to only match normal files and run it
+ # then replace it to just match bfiles and run it again
+ nonormalfiles = False
+ nobfiles = False
+ oldmatch = cmdutil.match
+ try:
+ manifest = repo[None].manifest()
+ def override_match(repo, pats=[], opts={}, globbed=False, default='relpath'):
+ match = oldmatch(repo, pats, opts, globbed, default)
+ m = copy.copy(match)
+ notbfile = lambda f: not bfutil.is_standin(f) and bfutil.standin(f) not in manifest
+ m._files = [f for f in m._files if notbfile(f)]
+ m._fmap = set(m._files)
+ orig_matchfn = m.matchfn
+ m.matchfn = lambda f: notbfile(f) and orig_matchfn(f) or None
+ return m
+ cmdutil.match = override_match
+ result = orig(ui, repo, pats, opts, rename)
+ except util.Abort as e:
+ if str(e) != 'no files to copy':
+ raise e
+ else:
+ nonormalfiles = True
+ result = 0
+ finally:
+ cmdutil.match = oldmatch
+
+ # The first rename can cause our current working directory to be removed. In that case
+ # there is nothing left to copy/rename so just quit.
+ try:
+ repo.getcwd()
+ except OSError:
+ return result
+
+ try:
+ # When we call orig below it creates the standins but we don't add them to the dir state
+ # until later so lock during that time.
+ wlock = repo.wlock()
+
+ manifest = repo[None].manifest()
+ def override_match(repo, pats=[], opts={}, globbed=False, default='relpath'):
+ newpats = []
+ # The patterns were previously mangled to add .hgbfiles, we need to remove that now
+ for pat in pats:
+ if match_.patkind(pat) == None and bfutil.short_name in pat:
+ newpats.append(pat.replace( bfutil.short_name, ''))
+ else:
+ newpats.append(pat)
+ match = oldmatch(repo, newpats, opts, globbed, default)
+ m = copy.copy(match)
+ bfile = lambda f: bfutil.standin(f) in manifest
+ m._files = [bfutil.standin(f) for f in m._files if bfile(f)]
+ m._fmap = set(m._files)
+ orig_matchfn = m.matchfn
+ m.matchfn = lambda f: bfutil.is_standin(f) and bfile(bfutil.split_standin(f)) and orig_matchfn(bfutil.split_standin(f)) or None
+ return m
+ cmdutil.match = override_match
+ listpats = []
+ for pat in pats:
+ if match_.patkind(pat) != None:
+ listpats.append(pat)
+ else:
+ listpats.append(makestandin(pat))
+
+ try:
+ origcopyfile = util.copyfile
+ copiedfiles = []
+ def override_copyfile(src, dest):
+ if bfutil.short_name in src and bfutil.short_name in dest:
+ destbfile = dest.replace(bfutil.short_name, '')
+ if not opts['force'] and os.path.exists(destbfile):
+ raise IOError('', _('destination bfile already exists'))
+ copiedfiles.append((src, dest))
+ origcopyfile(src, dest)
+
+ util.copyfile = override_copyfile
+ result += orig(ui, repo, listpats, opts, rename)
+ finally:
+ util.copyfile = origcopyfile
+
+ bfdirstate = bfutil.open_bfdirstate(ui, repo)
+ for (src, dest) in copiedfiles:
+ if bfutil.short_name in src and bfutil.short_name in dest:
+ srcbfile = src.replace(bfutil.short_name, '')
+ destbfile = dest.replace(bfutil.short_name, '')
+ destbfiledir = os.path.dirname(destbfile) or '.'
+ if not os.path.isdir(destbfiledir):
+ os.makedirs(destbfiledir)
+ if rename:
+ os.rename(srcbfile, destbfile)
+ bfdirstate.remove(bfutil.unixpath(os.path.relpath(srcbfile, repo.root)))
+ else:
+ util.copyfile(srcbfile, destbfile)
+ bfdirstate.add(bfutil.unixpath(os.path.relpath(destbfile, repo.root)))
+ bfdirstate.write()
+ except util.Abort as e:
+ if str(e) != 'no files to copy':
+ raise e
+ else:
+ nobfiles = True
+ finally:
+ cmdutil.match = oldmatch
+ wlock.release()
+
+ if nobfiles and nonormalfiles:
+ raise util.Abort(_('no files to copy'))
+
+ return result
+
+# Standins are only updated (to match the hash of bfiles) before commits.
+# Update the standins then run the original revert (changing the matcher to hit standins
+# instead of bfiles). Based on the resulting standins update the bfiles. Then return the
+# standins to their proper state
+def override_revert(orig, ui, repo, *pats, **opts):
+ # Because we put the standins in a bad state (by updating them) and then return them
+ # to a correct state we need to lock to prevent others from changing them in their
+ # incorrect state.
+ wlock = repo.wlock()
+ try:
+ bfdirstate = bfutil.open_bfdirstate(ui, repo)
+ (modified, added, removed, missing, unknown, ignored, clean) = bfutil.bfdirstate_status(bfdirstate, repo, repo['.'].rev())
+ for bfile in modified:
+ bfutil.update_standin(repo, bfutil.standin(bfile))
+
+ oldmatch = cmdutil.match
+ try:
+ ctx = repo[opts.get('rev')]
+ def override_match(repo, pats=[], opts={}, globbed=False, default='relpath'):
+ match = oldmatch(repo, pats, opts, globbed, default)
+ m = copy.copy(match)
+ def tostandin(f):
+ if bfutil.standin(f) in repo[None] or bfutil.standin(f) in ctx:
+ return bfutil.standin(f)
+ return f
+ m._files = [tostandin(f) for f in m._files]
+ m._fmap = set(m._files)
+ orig_matchfn = m.matchfn
+ def matchfn(f):
+ if bfutil.is_standin(f):
+ return orig_matchfn(bfutil.split_standin(f)) and (f in repo[None] or f in ctx)
+ return orig_matchfn(f)
+ m.matchfn = matchfn
+ return m
+ cmdutil.match = override_match
+ orig(ui, repo, *pats, **opts)
+ finally:
+ cmdutil.match = oldmatch
+ bfcommands.revert_bfiles(ui, repo)
+ for bfile in modified:
+ if os.path.exists(repo.wjoin(bfutil.standin(bfile))) and bfile in repo['.']:
+ bfutil.write_standin(repo, bfutil.standin(bfile), repo['.'][bfile].data().strip(), 'x' in repo['.'][bfile].flags())
+ finally:
+ wlock.release()
+
+def hg_update(orig, repo, node):
+ result = orig(repo, node)
+ # XXX check if it worked first
+ bfcommands.update_bfiles(repo.ui, repo)
+ return result
+
+def hg_clean(orig, repo, node, show_stats=True):
+ result = orig(repo, node, show_stats)
+ bfcommands.update_bfiles(repo.ui, repo)
+ return result
+
+def hg_merge(orig, repo, node, force=None, remind=True):
+ result = orig(repo, node, force, remind)
+ bfcommands.update_bfiles(repo.ui, repo)
+ return result
+
+def override_archive(orig, repo, dest, node, kind, decode=True, matchfn=None,
+ prefix=None, mtime=None, subrepos=None):
+ # No need to lock because we are only reading history and bfile caches
+ # neither of which are modified
+
+ if kind not in archival.archivers:
+ raise util.Abort(_("unknown archive type '%s'") % kind)
+
+ ctx = repo[node]
+
+ # In Mercurial <= 1.5 the prefix is passed to the archiver so try that
+ # if that doesn't work we are probably in Mercurial >= 1.6 where the
+ # prefix is not handled by the archiver
+ try:
+ archiver = archival.archivers[kind](dest, prefix, mtime or ctx.date()[0])
+
+ def write(name, mode, islink, getdata):
+ if matchfn and not matchfn(name):
+ return
+ data = getdata()
+ if decode:
+ data = repo.wwritedata(name, data)
+ archiver.addfile(name, mode, islink, data)
+ except TypeError:
+ if kind == 'files':
+ if prefix:
+ raise util.Abort(_('cannot give prefix when archiving to files'))
+ else:
+ prefix = archival.tidyprefix(dest, kind, prefix)
+
+ def write(name, mode, islink, getdata):
+ if matchfn and not matchfn(name):
+ return
+ data = getdata()
+ if decode:
+ data = repo.wwritedata(name, data)
+ archiver.addfile(prefix + name, mode, islink, data)
+
+ archiver = archival.archivers[kind](dest, mtime or ctx.date()[0])
+
+ if repo.ui.configbool("ui", "archivemeta", True):
+ def metadata():
+ base = 'repo: %s\nnode: %s\nbranch: %s\n' % (
+ hex(repo.changelog.node(0)), hex(node), ctx.branch())
+
+ tags = ''.join('tag: %s\n' % t for t in ctx.tags()
+ if repo.tagtype(t) == 'global')
+ if not tags:
+ repo.ui.pushbuffer()
+ opts = {'template': '{latesttag}\n{latesttagdistance}',
+ 'style': '', 'patch': None, 'git': None}
+ cmdutil.show_changeset(repo.ui, repo, opts).show(ctx)
+ ltags, dist = repo.ui.popbuffer().split('\n')
+ tags = ''.join('latesttag: %s\n' % t for t in ltags.split(':'))
+ tags += 'latesttagdistance: %s\n' % dist
+
+ return base + tags
+
+ write('.hg_archival.txt', 0644, False, metadata)
+
+ for f in ctx:
+ ff = ctx.flags(f)
+ getdata = ctx[f].data
+ if bfutil.is_standin(f):
+ path = bfutil.find_file(repo, getdata().strip())
+ f = bfutil.split_standin(f)
+
+ def getdatafn():
+ with open(path, 'rb') as fd:
+ return fd.read()
+
+ getdata = getdatafn
+ write(f, 'x' in ff and 0755 or 0644, 'l' in ff, getdata)
+ archiver.done()
+
+# If a bfile is modified the change is not reflected in its standin until a commit.
+# cmdutil.bail_if_changed raises an exception if the repo has uncommitted changes.
+# Wrap it to also check if bfiles were changed. This is used by bisect and backout.
+def override_bail_if_changed(orig, repo):
+ orig(repo)
+ repo.bfstatus = True
+ modified, added, removed, deleted = repo.status()[:4]
+ repo.bfstatus = False
+ if modified or added or removed or deleted:
+ raise util.Abort(_('outstanding uncommitted changes'))
+
+# Fetch doesn't use cmdutil.bail_if_changed so override it to add the check
+def override_fetch(orig, ui, repo, *pats, **opts):
+ repo.bfstatus = True
+ modified, added, removed, deleted = repo.status()[:4]
+ repo.bfstatus = False
+ if modified or added or removed or deleted:
+ raise util.Abort(_('outstanding uncommitted changes'))
+ return orig(ui, repo, *pats, **opts)
+
+def override_forget(orig, ui, repo, *pats, **opts):
+ wctx = repo[None].manifest()
+ oldmatch = cmdutil.match
+ def override_match(repo, pats=[], opts={}, globbed=False, default='relpath'):
+ match = oldmatch(repo, pats, opts, globbed, default)
+ m = copy.copy(match)
+ notbfile = lambda f: not bfutil.is_standin(f) and bfutil.standin(f) not in wctx
+ m._files = [f for f in m._files if notbfile(f)]
+ m._fmap = set(m._files)
+ orig_matchfn = m.matchfn
+ m.matchfn = lambda f: orig_matchfn(f) and notbfile(f)
+ return m
+ cmdutil.match = override_match
+ orig(ui, repo, *pats, **opts)
+ cmdutil.match = oldmatch
+
+ m = cmdutil.match(repo, pats, opts)
+ try:
+ repo.bfstatus = True
+ s = repo.status(match=m, clean=True)
+ finally:
+ repo.bfstatus = False
+ forget = sorted(s[0] + s[1] + s[3] + s[6])
+ forget = [f for f in forget if bfutil.standin(f) in wctx]
+
+ for f in forget:
+ if bfutil.standin(f) not in repo.dirstate and not os.path.isdir(m.rel(bfutil.standin(f))):
+ ui.warn(_('not removing %s: file is already untracked\n')
+ % m.rel(f))
+
+ for f in forget:
+ if ui.verbose or not m.exact(f):
+ ui.status(_('removing %s\n') % m.rel(f))
+
+ # Need to lock because standin files are deleted then removed from the repository
+ # and we could race inbetween.
+ wlock = repo.wlock()
+ try:
+ bfdirstate = bfutil.open_bfdirstate(ui, repo)
+ for f in forget:
+ bfdirstate.remove(bfutil.unixpath(f))
+ bfdirstate.write()
+ bfutil.repo_remove(repo, [bfutil.standin(f) for f in forget], unlink=True)
+ finally:
+ wlock.release()
+
+def get_outgoing_bfiles(ui, repo, dest=None, **opts):
+ dest = ui.expandpath(dest or 'default-push', dest or 'default')
+ dest, branches = hg.parseurl(dest, opts.get('branch'))
+ revs, checkout = hg.addbranchrevs(repo, repo, branches, opts.get('rev'))
+ if revs:
+ revs = [repo.lookup(rev) for rev in revs]
+
+ # Mercurial <= 1.5 had remoteui in cmdutil, then it moved to hg
+ try:
+ remoteui = cmdutil.remoteui
+ except AttributeError:
+ remoteui = hg.remoteui
+
+ try:
+ remote = hg.repository(remoteui(repo, opts), dest)
+ except error.RepoError:
+ return None
+ o = bfutil.findoutgoing(repo, remote, False)
+ if not o:
+ return None
+ o = repo.changelog.nodesbetween(o, revs)[0]
+ if opts.get('newest_first'):
+ o.reverse()
+
+ toupload = set()
+ for n in o:
+ parents = [p for p in repo.changelog.parents(n) if p != node.nullid]
+ ctx = repo[n]
+ files = set(ctx.files())
+ if len(parents) == 2:
+ mc = ctx.manifest()
+ mp1 = ctx.parents()[0].manifest()
+ mp2 = ctx.parents()[1].manifest()
+ for f in mp1:
+ if f not in mc:
+ files.add(f)
+ for f in mp2:
+ if f not in mc:
+ files.add(f)
+ for f in mc:
+ if mc[f] != mp1.get(f, None) or mc[f] != mp2.get(f, None):
+ files.add(f)
+ toupload = toupload.union(set([f for f in files if bfutil.is_standin(f) and f in ctx]))
+ return toupload
+
+def override_outgoing(orig, ui, repo, dest=None, **opts):
+ orig(ui, repo, dest, **opts)
+
+ if opts.pop('bf', None):
+ toupload = get_outgoing_bfiles(ui, repo, dest, **opts)
+ if toupload is None:
+ ui.status(_('kbfiles: No remote repo\n'))
+ else:
+ ui.status(_('kbfiles to upload:\n'))
+ for file in toupload:
+ ui.status(bfutil.split_standin(file) + '\n')
+ ui.status('\n')
+
+def override_summary(orig, ui, repo, *pats, **opts):
+ orig(ui, repo, *pats, **opts)
+
+ if opts.pop('bf', None):
+ toupload = get_outgoing_bfiles(ui, repo, None, **opts)
+ if toupload is None:
+ ui.status(_('kbfiles: No remote repo\n'))
+ else:
+ ui.status(_('kbfiles: %d to upload\n') % len(toupload))
+
+def override_addremove(orig, ui, repo, *pats, **opts):
+ # Check if the parent or child has bfiles if they do don't allow it.
+ # If there is a symlink in the manifest then getting the manifest throws an exception
+ # catch it and let addremove deal with it. This happens in Mercurial's test
+ # test-addremove-symlink
+ try:
+ manifesttip = set(repo['tip'].manifest())
+ except util.Abort:
+ manifesttip = set()
+ try:
+ manifestworking = set(repo[None].manifest())
+ except util.Abort:
+ manifestworking = set()
+
+ # Manifests are only iterable so turn them into sets then union
+ for file in manifesttip.union(manifestworking):
+ if file.startswith(bfutil.short_name):
+ raise util.Abort(_('addremove cannot be run on a repo with bfiles'))
+
+ return orig(ui, repo, *pats, **opts)
+
+def uisetup(ui):
+ # Disable auto-status for some commands which assume that all
+ # files in the result are under Mercurial's control
+
+ entry = extensions.wrapcommand(commands.table, 'add', override_add)
+ addopt = [('', 'bf', None, _('add as bfile')),
+ ('', 'bfsize', '', _('add all files above this size (in megabytes) as bfiles (default: 10)'))]
+ entry[1].extend(addopt)
+
+ entry = extensions.wrapcommand(commands.table, 'addremove', override_addremove)
+ entry = extensions.wrapcommand(commands.table, 'remove', override_remove)
+ entry = extensions.wrapcommand(commands.table, 'forget', override_forget)
+ entry = extensions.wrapcommand(commands.table, 'status', override_status)
+
+ entry = extensions.wrapcommand(commands.table, 'verify', override_verify)
+ verifyopt = [('', 'bf', None, _('verify bfiles')),
+ ('', 'bfa', None, _('verify all revisions of bfiles not just current')),
+ ('', 'bfc', None, _('verify bfile contents not just existence'))]
+ entry[1].extend(verifyopt)
+
+ entry = extensions.wrapcommand(commands.table, 'outgoing', override_outgoing)
+ outgoingopt = [('', 'bf', None, _('display outgoing bfiles'))]
+ entry[1].extend(outgoingopt)
+ entry = extensions.wrapcommand(commands.table, 'summary', override_summary)
+ summaryopt = [('', 'bf', None, _('display outgoing bfiles'))]
+ entry[1].extend(summaryopt)
+
+ entry = extensions.wrapcommand(commands.table, 'update', override_update)
+ entry = extensions.wrapfunction(filemerge, 'filemerge', override_filemerge)
+ entry = extensions.wrapfunction(cmdutil, 'copy', override_copy)
+
+ # Backout calls revert so we need to override both the command and the function
+ entry = extensions.wrapcommand(commands.table, 'revert', override_revert)
+ entry = extensions.wrapfunction(commands, 'revert', override_revert)
+
+ # clone uses hg._update instead of hg.update even though they are the
+ # same function... so wrap both of them)
+ extensions.wrapfunction(hg, 'update', hg_update)
+ extensions.wrapfunction(hg, '_update', hg_update)
+ extensions.wrapfunction(hg, 'clean', hg_clean)
+ extensions.wrapfunction(hg, 'merge', hg_merge)
+
+ extensions.wrapfunction(archival, 'archive', override_archive)
+ extensions.wrapfunction(cmdutil, 'bail_if_changed', override_bail_if_changed)
+
+ for name, module in extensions.extensions():
+ if name == 'fetch':
+ extensions.wrapcommand(getattr(module, 'cmdtable'), 'fetch', override_fetch)
+
+
|
This file's diff was not loaded because this changeset is very large. Load changes Loading... |
|
|
@@ -0,0 +1,115 @@ + '''HTTP-based store.'''
+
+import urlparse
+import urllib2
+
+from mercurial import util, url as url_
+from mercurial.i18n import _
+
+import bfutil, basestore
+
+class httpstore(basestore.basestore):
+ """A store accessed via HTTP"""
+ def __init__(self, ui, repo, url):
+ url = bfutil.urljoin(url, 'bfile')
+ super(httpstore, self).__init__(ui, repo, url)
+ self.rawurl, self.path = urlparse.urlsplit(self.url)[1:3]
+ (baseurl, authinfo) = url_.getauthinfo(self.url)
+ self.opener = url_.opener(self.ui, authinfo)
+
+ def put(self, source, hash):
+ self.sendfile(source, hash)
+ self.ui.debug('put %s to remote store\n' % source)
+
+ def sendfile(self, filename, hash):
+ if self._verify(hash):
+ return
+
+ self.ui.debug('httpstore.sendfile(%s, %s)\n' % (filename, hash))
+ baseurl, authinfo = url_.getauthinfo(self.url)
+ try:
+ fd = open(filename, 'rb')
+ request = urllib2.Request(bfutil.urljoin(baseurl, hash))
+ request.add_data(fd.read())
+ try:
+ url = self.opener.open(request)
+ self.ui.note(_('[OK] %s/%s\n') % (self.rawurl, url.geturl()))
+ except urllib2.HTTPError, e:
+ raise util.Abort(_('unable to POST: %s\n') % e.msg)
+ except Exception, e:
+ raise util.Abort(_('%s') % e)
+ finally:
+ fd.close()
+
+ def _getfile(self, tmpfile, filename, hash):
+ (baseurl, authinfo) = url_.getauthinfo(self.url)
+ url = bfutil.urljoin(baseurl, hash)
+ try:
+ request = urllib2.Request(url)
+ infile = self.opener.open(request)
+ except urllib2.HTTPError, err:
+ detail = _("HTTP error: %s %s") % (err.code, err.msg)
+ raise basestore.StoreError(filename, hash, url, detail)
+ except urllib2.URLError, err:
+ # This usually indicates a connection problem, so don't
+ # keep trying with the other files... they will probably
+ # all fail too.
+ reason = err[0][1] # assumes err[0] is a socket.error
+ raise util.Abort('%s: %s' % (baseurl, reason))
+ return bfutil.copy_and_hash(bfutil.blockstream(infile), tmpfile)
+
+ def _verify(self, hash):
+ baseurl, authinfo = url_.getauthinfo(self.url)
+ store_path = bfutil.urljoin(baseurl, hash)
+ request = urllib2.Request(store_path)
+ request.add_header('SHA1-Request', hash)
+ try:
+ url = self.opener.open(request)
+ if 'Content-SHA1' in url.info() and hash == url.info()['Content-SHA1']:
+ return True
+ else:
+ return False
+ except:
+ return False
+
+ def _verifyfile(self, cctx, cset, contents, standin, verified):
+ baseurl, authinfo = url_.getauthinfo(self.url)
+ filename = bfutil.split_standin(standin)
+ if not filename:
+ return False
+ fctx = cctx[standin]
+ key = (filename, fctx.filenode())
+ if key in verified:
+ return False
+
+ expect_hash = fctx.data()[0:40]
+ store_path = bfutil.urljoin(baseurl, expect_hash)
+ verified.add(key)
+
+ request = urllib2.Request(store_path)
+ request.add_header('SHA1-Request',expect_hash)
+ try:
+ url = self.opener.open(request)
+ if 'Content-SHA1' in url.info():
+ rhash = url.info()['Content-SHA1']
+ if rhash == expect_hash:
+ return False
+ else:
+ self.ui.warn(
+ _('changeset %s: %s: contents differ\n (%s)\n')
+ % (cset, filename, store_path))
+ return True # failed
+ else:
+ self.ui.warn(_('remote did not send a hash, '
+ 'it probably does not understand this protocol\n'))
+ return False
+ except urllib2.HTTPError, e:
+ if e.code == 404:
+ self.ui.warn(
+ _('changeset %s: %s missing\n (%s)\n')
+ % (cset, filename, store_path))
+ return True # failed
+ else:
+ raise util.Abort(_('check failed, unexpected response'
+ 'status: %d: %s') % (e.code, e.msg))
+
|
|
@@ -0,0 +1,56 @@ + '''Store class for local filesystem.'''
+
+import os
+
+from mercurial import util
+from mercurial.i18n import _
+import bfutil, basestore
+
+class localstore(basestore.basestore):
+ '''Because there is a system wide cache, the local store always uses that cache.
+ Since the cache is updated elsewhere, we can just read from it here as if it were the store.'''
+
+ def __init__(self, ui, repo, url):
+ url = os.path.join(url, '.hg', bfutil.long_name)
+ super(localstore, self).__init__(ui, repo, util.expandpath(url))
+
+ def put(self, source, filename, hash):
+ '''Any file that is put must already be in the system wide cache so do nothing.'''
+ return
+
+ def _getfile(self, tmpfile, filename, hash):
+ if bfutil.in_system_cache(self.ui, hash):
+ return bfutil.system_cache_path(self.ui, hash)
+ raise basestore.StoreError(filename, hash, '', _("Can't get file locally"))
+
+ def _verifyfile(self, cctx, cset, contents, standin, verified):
+ filename = bfutil.split_standin(standin)
+ if not filename:
+ return False
+ fctx = cctx[standin]
+ key = (filename, fctx.filenode())
+ if key in verified:
+ return False
+
+ expect_hash = fctx.data()[0:40]
+ verified.add(key)
+ if not bfutil.in_system_cache(self.ui, expect_hash):
+ self.ui.warn(
+ _('changeset %s: %s missing\n'
+ ' (%s: %s)\n')
+ % (cset, filename, expect_hash, err.strerror))
+ return True # failed
+
+ if contents:
+ store_path = bfutil.system_cache_path(self.ui, expect_hash)
+ actual_hash = bfutil.hashfile(store_path)
+ if actual_hash != expect_hash:
+ self.ui.warn(
+ _('changeset %s: %s: contents differ\n'
+ ' (%s:\n'
+ ' expected hash %s,\n'
+ ' but got %s)\n')
+ % (cset, filename,
+ store_path, expect_hash, actual_hash))
+ return True # failed
+ return False
|
This file's diff was not loaded because this changeset is very large. Load changes Loading... |
This file's diff was not loaded because this changeset is very large. Load changes Loading... |
This file's diff was not loaded because this changeset is very large. Load changes Loading... |
This file's diff was not loaded because this changeset is very large. Load changes Loading... |
This file's diff was not loaded because this changeset is very large. Load changes Loading... |
This file's diff was not loaded because this changeset is very large. Load changes Loading... |
This file's diff was not loaded because this changeset is very large. Load changes Loading... |
|
@@ -0,0 +1,13 @@ + % test with bfiles
+hg init
+hg add --bf foo
+hg addremove
+hg commit -m 'added foo'
+hg addremove
+
+% test without bfiles
+hg init
+hg add foo
+hg addremove
+hg commit -m 'added foo'
+hg addremove
|
This file's diff was not loaded because this changeset is very large. Load changes Loading... |
This file's diff was not loaded because this changeset is very large. Load changes Loading... |
This file's diff was not loaded because this changeset is very large. Load changes Loading... |
This file's diff was not loaded because this changeset is very large. Load changes Loading... |
This file's diff was not loaded because this changeset is very large. Load changes Loading... |
This file's diff was not loaded because this changeset is very large. Load changes Loading... |
This file's diff was not loaded because this changeset is very large. Load changes Loading... |
This file's diff was not loaded because this changeset is very large. Load changes Loading... |
This file's diff was not loaded because this changeset is very large. Load changes Loading... |
This file's diff was not loaded because this changeset is very large. Load changes Loading... |
This file's diff was not loaded because this changeset is very large. Load changes Loading... |
This file's diff was not loaded because this changeset is very large. Load changes Loading... |
This file's diff was not loaded because this changeset is very large. Load changes Loading... |
This file's diff was not loaded because this changeset is very large. Load changes Loading... |
This file's diff was not loaded because this changeset is very large. Load changes Loading... |
This file's diff was not loaded because this changeset is very large. Load changes Loading... |
This file's diff was not loaded because this changeset is very large. Load changes Loading... |
This file's diff was not loaded because this changeset is very large. Load changes Loading... |
This file's diff was not loaded because this changeset is very large. Load changes Loading... |
This file's diff was not loaded because this changeset is very large. Load changes Loading... |
This file's diff was not loaded because this changeset is very large. Load changes Loading... |
This file's diff was not loaded because this changeset is very large. Load changes Loading... |
This file's diff was not loaded because this changeset is very large. Load changes Loading... |
This file's diff was not loaded because this changeset is very large. Load changes Loading... |
This file's diff was not loaded because this changeset is very large. Load changes Loading... |
This file's diff was not loaded because this changeset is very large. Load changes Loading... |
This file's diff was not loaded because this changeset is very large. Load changes Loading... |
This file's diff was not loaded because this changeset is very large. Load changes Loading... |
This file's diff was not loaded because this changeset is very large. Load changes Loading... |
This file's diff was not loaded because this changeset is very large. Load changes Loading... |
This file's diff was not loaded because this changeset is very large. Load changes Loading... |
This file's diff was not loaded because this changeset is very large. Load changes Loading... |
This file's diff was not loaded because this changeset is very large. Load changes Loading... |
This file's diff was not loaded because this changeset is very large. Load changes Loading... |
|
|
This file's diff was not loaded because this changeset is very large. Load changes Loading... |
This file's diff was not loaded because this changeset is very large. Load changes Loading... |
This file's diff was not loaded because this changeset is very large. Load changes Loading... |
This file's diff was not loaded because this changeset is very large. Load changes Loading... |
This file's diff was not loaded because this changeset is very large. Load changes Loading... |
This file's diff was not loaded because this changeset is very large. Load changes Loading... |
This file's diff was not loaded because this changeset is very large. Load changes Loading... |
This file's diff was not loaded because this changeset is very large. Load changes Loading... |
This file's diff was not loaded because this changeset is very large. Load changes Loading... |
This file's diff was not loaded because this changeset is very large. Load changes Loading... |
This file's diff was not loaded because this changeset is very large. Load changes Loading... |
This file's diff was not loaded because this changeset is very large. Load changes Loading... |
This file's diff was not loaded because this changeset is very large. Load changes Loading... |
This file's diff was not loaded because this changeset is very large. Load changes Loading... |
This file's diff was not loaded because this changeset is very large. Load changes Loading... |
This file's diff was not loaded because this changeset is very large. Load changes Loading... |
This file's diff was not loaded because this changeset is very large. Load changes Loading... |
This file's diff was not loaded because this changeset is very large. Load changes Loading... |
This file's diff was not loaded because this changeset is very large. Load changes Loading... |
This file's diff was not loaded because this changeset is very large. Load changes Loading... |
This file's diff was not loaded because this changeset is very large. Load changes Loading... |
This file's diff was not loaded because this changeset is very large. Load changes Loading... |
Loading...