kiln/api/formatter.py - Files - Kiln » Kiln Storage Service

Kiln »

Kiln Storage Service Read More

Clone URL:

formatter.py

100

101

102

103

104

105

106

107

108

109

110

111

112

113

114

115

116

117

118

119

120

121

122

123

124

125

126

127

128

129

130

131

132

133

134

135

136

137

138

139

140

141

142

143

144

145

146

147

148

149

150

151

152

153

154

155

156

157

158

159

160

161

162

163

164

165

166

167

168

169

170

171

172

173

174

175

176

177

178

179

180

181

182

183

184

185

186

187

188

189

190

191

192

193

194

195

196

197

198

199

200

201

202

203

204

205

206

# Copyright (C) 2009-2010 by Fog Creek Software. All rights reserved. # # This software may be used and distributed according to the terms of the # GNU General Public License version 2, incorporated herein by reference. import difflib import re from pygments import highlight from pygments.lexers import get_lexer_for_filename, guess_lexer_for_filename, TextLexer from pygments.formatters import HtmlFormatter EXTENSION_MAP = {'resx': 'xml', 'csproj': 'xml', 'was': 'vb', 'vbs': 'vb', 'fbp5': 'xml', 'xul': 'xml', 'ipp': 'cpp', 'jsm': 'js'} LINE_MAX = 20000 def ensurenewline(s): return s if s.endswith('\n') else s + '\n' def tweak(filename): """change filename to a known extension, if applicable""" (filename, extension) = filename.split('/')[-1].rsplit('.', 1) extension = EXTENSION_MAP.get(extension, extension) return filename + '.' + extension def lexer(filename, content=None): """select an appropriate lexer based on the filename""" try: if content: l = guess_lexer_for_filename(tweak(filename), content, stripnl=False) else: l = get_lexer_for_filename(tweak(filename), stripnl=False) except: l = TextLexer(stripnl=False) l.add_filter('whitespace', spaces=True, wstokentype=False) return l class IntralineHtmlFormatter(HtmlFormatter): in_change = False ranges = [] def __init__(self, ranges=None, *args, **kw): if ranges: self.ranges = ranges HtmlFormatter.__init__(self, *args, **kw) def _split_change_markers(self, tokensource): '''Pre-process the token stream before it is formatted, to mark the tokens that should be highlighted for intraline diffs.''' ranges = self.ranges or [] pos = 0 for ttype, value in tokensource: for value in value.splitlines(True): l = len(value) range = None rr = [r for r in ranges if (r[0] <= pos <= r[1]) or (pos <= r[0] <= r[1] <= pos + l) or (r[0] <= pos + l <= r[1])] if not rr: yield ttype, value pos += l continue last = None for r in rr: if r[0] <= pos: # r starts at or before token if r[1] <= pos + l: # range covers prefix of token self.in_change = True i = r[1] - pos yield ttype, value[:i] self.in_change = False else: # range covers whole token self.in_change = True yield ttype, value self.in_change = False else: # r starts in the middle of the token i = last[1] - pos if last else 0 j = r[0] - pos yield ttype, value[i:j] if r[1] <= pos + l: # range covers middle chunk self.in_change = True i = r[0] - pos j = r[1] - pos yield ttype, value[i:j] self.in_change = False else: # range covers suffix of token self.in_change = True i = r[0] - pos yield ttype, value[i:] self.in_change = False last = r if last[1] <= pos + l: i = last[1] - pos yield ttype, value[i:] pos += l def _format_lines(self, tokensource): return super(IntralineHtmlFormatter, self)._format_lines(self._split_change_markers(tokensource)) def _get_css_class(self, ttype): return super(IntralineHtmlFormatter, self)._get_css_class(ttype) + (' ch' if self.in_change else '') def highlighted(lex, code, ranges=None): return highlight(code, lex, IntralineHtmlFormatter(ranges, nowrap=True)) def highlight_patch(lex, lines, ranges=None): lines = [(line[0], ensurenewline(line[1:LINE_MAX])) for line in lines] for x in xrange(0, len(lines)): if lines[x][0] == '\\': lines[x] = (lines[x][0], '\n') patch = ''.join(l[1] for l in lines) patch = highlighted(lex, patch, ranges).splitlines(True) for x in xrange(0, min(len(patch), len(lines))): if lines[x][0] == '\\': lines[x] = (lines[x][0], ' No newline at end of file\n') else: lines[x] = (lines[x][0], patch[x]) return ''.join(line[0] + line[1] for line in lines) # returns a list of ranges (a, b), marking that characters a:b in the patch are changed. def intraline_diff(patch): removed_lines = [] added_lines = [] ranges = [] l = 0 for line in patch + [' ']: if line[0] == '-': removed_lines.append(line[1:]) elif line[0] == '+': added_lines.append(line[1:]) else: if added_lines or removed_lines: rtotal = sum(len(s) for s in removed_lines) atotal = sum(len(s) for s in added_lines) # split the diff text into whole words and individual non-word characters removed_words = [w for w in re.split(r'(\w+|\W)', ''.join(removed_lines)) if w] added_words = [w for w in re.split(r'(\w+|\W)', ''.join(added_lines)) if w] removed, added = l, l + rtotal seq = difflib.SequenceMatcher(); seq.set_seqs(removed_words, added_words) # find the matching words of each string, using the ranges in each opcode. # 'equal' action is for non-changed text; otherwise, mark the range as changed. for (action, r1, r2, a1, a2) in seq.get_opcodes(): ac = ''.join(added_words[a1:a2]) rc = ''.join(removed_words[r1:r2]) a = len(ac) r = len(rc) added += a removed += r if action == 'equal': continue if a != 0: ranges.append((added - a, added)) if r != 0: ranges.append((removed - r, removed)) l += atotal + rtotal removed_lines = [] added_lines = [] l += len(line) - 1 return sorted(ranges) def format(filename, diff): if not diff: return None formatted = [] patch = [] if not isinstance(diff, unicode): diff_asc = diff else: diff_asc = diff.encode('utf-8') diff_asc = diff_asc.replace('\r', '') lines = diff_asc.splitlines(True) if isinstance(diff, unicode): lines = [l.decode('utf-8') for l in lines] lex = lexer(filename) for line in lines: if line.startswith(u'@@'): if patch: formatted.extend(highlight_patch(lex, patch, intraline_diff(patch))) formatted.append(line) patch = [] else: patch.append(line) if patch: formatted.extend(highlight_patch(lex, patch, intraline_diff(patch))) return ''.join(formatted) def format_diffs(diffs): for d in diffs: d['formatted_diff'] = format(d['file']['name'], d['diff']) def format_file(filename, contents): lines = [line[:LINE_MAX] for line in contents.replace('\r', '').split('\n')] return highlighted(lexer(filename), '\n'.join(lines))

Display Options

Your session has expired