Kiln » TortoiseHg » TortoiseHg
Clone URL:  
msgfmt.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
#! /usr/bin/env python # -*- coding: iso-8859-1 -*- # Written by Martin v. Loewis <loewis@informatik.hu-berlin.de> # # Changed by Christian 'Tiran' Heimes <tiran@cheimes.de> for the placeless # translation service (PTS) of Zope # # Fixed some bugs and updated to support msgctxt # by Hanno Schlichting <hanno@hannosch.info> """Generate binary message catalog from textual translation description. This program converts a textual Uniforum-style message catalog (.po file) into a binary GNU catalog (.mo file). This is essentially the same function as the GNU msgfmt program, however, it is a simpler implementation. This file was taken from Python-2.3.2/Tools/i18n and altered in several ways. Now you can simply use it from another python module: from msgfmt import Msgfmt mo = Msgfmt(po).get() where po is path to a po file as string, an opened po file ready for reading or a list of strings (readlines of a po file) and mo is the compiled mo file as binary string. Exceptions: * IOError if the file couldn't be read * msgfmt.PoSyntaxError if the po file has syntax errors """ import struct import array from cStringIO import StringIO __version__ = "1.1-pythongettext" class PoSyntaxError(Exception): """ Syntax error in a po file """ def __init__(self, msg): self.msg = msg def __str__(self): return 'Po file syntax error: %s' % self.msg class Msgfmt: """ """ def __init__(self, po, name='unknown'): self.po = po self.name = name self.messages = {} self.openfile = False def readPoData(self): """ read po data from self.po and return an iterator """ output = [] if isinstance(self.po, str): output = open(self.po, 'rb') elif isinstance(self.po, file): self.po.seek(0) self.openfile = True output = self.po elif isinstance(self.po, list): output = self.po if not output: raise ValueError, "self.po is invalid! %s" % type(self.po) return output def add(self, context, id, str, fuzzy): "Add a non-empty and non-fuzzy translation to the dictionary." if str and not fuzzy: # The context is put before the id and separated by a EOT char. if context: id = context + '\x04' + id self.messages[id] = str def generate(self): "Return the generated output." keys = self.messages.keys() # the keys are sorted in the .mo file keys.sort() offsets = [] ids = strs = '' for id in keys: # For each string, we need size and file offset. Each string is # NUL terminated; the NUL does not count into the size. offsets.append((len(ids), len(id), len(strs), len(self.messages[id]))) ids += id + '\0' strs += self.messages[id] + '\0' output = '' # The header is 7 32-bit unsigned integers. We don't use hash tables, # so the keys start right after the index tables. keystart = 7*4+16*len(keys) # and the values start after the keys valuestart = keystart + len(ids) koffsets = [] voffsets = [] # The string table first has the list of keys, then the list of values. # Each entry has first the size of the string, then the file offset. for o1, l1, o2, l2 in offsets: koffsets += [l1, o1+keystart] voffsets += [l2, o2+valuestart] offsets = koffsets + voffsets # Even though we don't use a hashtable, we still set its offset to be # binary compatible with the gnu gettext format produced by: # msgfmt file.po --no-hash output = struct.pack("Iiiiiii", 0x950412deL, # Magic 0, # Version len(keys), # # of entries 7*4, # start of key index 7*4+len(keys)*8, # start of value index 0, keystart) # size and offset of hash table output += array.array("i", offsets).tostring() output += ids output += strs return output def get(self): """ """ self.read() # Compute output return self.generate() def read(self, header_only=False): """ """ ID = 1 STR = 2 CTXT = 3 section = None fuzzy = 0 msgid = msgstr = msgctxt = '' # Parse the catalog lno = 0 for l in self.readPoData(): lno += 1 # If we get a comment line after a msgstr or a line starting with # msgid or msgctxt, this is a new entry if section == STR and (l[0] == '#' or (l[0] == 'm' and (l.startswith('msgctxt') or l.startswith('msgid')))): self.add(msgctxt, msgid, msgstr, fuzzy) section = None msgctxt = '' fuzzy = 0 # If we only want the header we stop after the first message if header_only: break # Record a fuzzy mark if l[:2] == '#,' and 'fuzzy' in l: fuzzy = 1 # Skip comments if l[0] == '#': continue # Now we are in a msgctxt section elif l[0] == 'm': if l.startswith('msgctxt'): section = CTXT l = l[7:] msgctxt = '' # Now we are in a msgid section, output previous section elif l.startswith('msgid') and not l.startswith('msgid_plural'): if section == STR: self.add(msgid, msgstr, fuzzy) section = ID l = l[5:] msgid = msgstr = '' is_plural = False # This is a message with plural forms elif l.startswith('msgid_plural'): if section != ID: print >> sys.stderr, 'msgid_plural not preceeded by msgid on %s:%d' %\ (infile, lno) sys.exit(1) l = l[12:] msgid += '\0' # separator of singular and plural is_plural = True # Now we are in a msgstr section elif l.startswith('msgstr'): section = STR if l.startswith('msgstr['): if not is_plural: print >> sys.stderr, 'plural without msgid_plural on %s:%d' %\ (infile, lno) sys.exit(1) l = l.split(']', 1)[1] if msgstr: msgstr += '\0' # Separator of the various plural forms else: if is_plural: print >> sys.stderr, 'indexed msgstr required for plural on %s:%d' %\ (infile, lno) sys.exit(1) l = l[6:] # Skip empty lines l = l.strip() if not l: continue # XXX: Does this always follow Python escape semantics? try: l = eval(l) except Exception, msg: raise PoSyntaxError('%s (line %d of po file %s): \n%s' % (msg, lno, self.name, l)) if section == CTXT: msgctxt += l elif section == ID: msgid += l elif section == STR: msgstr += l else: raise PoSyntaxError('error in line %d of po file %s' % (lno, self.name)) # Add last entry if section == STR: self.add(msgctxt, msgid, msgstr, fuzzy) if self.openfile: self.po.close() def getAsFile(self): return StringIO(self.get())