python-docs-venv/lib/python3.11/site-packages/babel/messages/mofile.py - datasketches-python - Git at Google

 """
     babel.messages.mofile
     ~~~~~~~~~~~~~~~~~~~~~

     Writing of files in the ``gettext`` MO (machine object) format.

     :copyright: (c) 2013-2023 by the Babel Team.
     :license: BSD, see LICENSE for more details.
 """
 from __future__ import annotations

 import array
 import struct
 from typing import TYPE_CHECKING

 from babel.messages.catalog import Catalog, Message

 if TYPE_CHECKING:
     from _typeshed import SupportsRead, SupportsWrite

 LE_MAGIC: int = 0x950412de
 BE_MAGIC: int = 0xde120495


 def read_mo(fileobj: SupportsRead[bytes]) -> Catalog:
     """Read a binary MO file from the given file-like object and return a
     corresponding `Catalog` object.

     :param fileobj: the file-like object to read the MO file from

     :note: The implementation of this function is heavily based on the
            ``GNUTranslations._parse`` method of the ``gettext`` module in the
            standard library.
     """
     catalog = Catalog()
     headers = {}

     filename = getattr(fileobj, 'name', '')

     buf = fileobj.read()
     buflen = len(buf)
     unpack = struct.unpack

     # Parse the .mo file header, which consists of 5 little endian 32
     # bit words.
     magic = unpack('<I', buf[:4])[0]  # Are we big endian or little endian?
     if magic == LE_MAGIC:
         version, msgcount, origidx, transidx = unpack('<4I', buf[4:20])
         ii = '<II'
     elif magic == BE_MAGIC:
         version, msgcount, origidx, transidx = unpack('>4I', buf[4:20])
         ii = '>II'
     else:
         raise OSError(0, 'Bad magic number', filename)

     # Now put all messages from the .mo file buffer into the catalog
     # dictionary
     for _i in range(msgcount):
         mlen, moff = unpack(ii, buf[origidx:origidx + 8])
         mend = moff + mlen
         tlen, toff = unpack(ii, buf[transidx:transidx + 8])
         tend = toff + tlen
         if mend < buflen and tend < buflen:
             msg = buf[moff:mend]
             tmsg = buf[toff:tend]
         else:
             raise OSError(0, 'File is corrupt', filename)

         # See if we're looking at GNU .mo conventions for metadata
         if mlen == 0:
             # Catalog description
             lastkey = key = None
             for item in tmsg.splitlines():
                 item = item.strip()
                 if not item:
                     continue
                 if b':' in item:
                     key, value = item.split(b':', 1)
                     lastkey = key = key.strip().lower()
                     headers[key] = value.strip()
                 elif lastkey:
                     headers[lastkey] += b'\n' + item

         if b'\x04' in msg:  # context
             ctxt, msg = msg.split(b'\x04')
         else:
             ctxt = None

         if b'\x00' in msg:  # plural forms
             msg = msg.split(b'\x00')
             tmsg = tmsg.split(b'\x00')
             if catalog.charset:
                 msg = [x.decode(catalog.charset) for x in msg]
                 tmsg = [x.decode(catalog.charset) for x in tmsg]
         else:
             if catalog.charset:
                 msg = msg.decode(catalog.charset)
                 tmsg = tmsg.decode(catalog.charset)
         catalog[msg] = Message(msg, tmsg, context=ctxt)

         # advance to next entry in the seek tables
         origidx += 8
         transidx += 8

     catalog.mime_headers = headers.items()
     return catalog


 def write_mo(fileobj: SupportsWrite[bytes], catalog: Catalog, use_fuzzy: bool = False) -> None:
     """Write a catalog to the specified file-like object using the GNU MO file
     format.

     >>> import sys
     >>> from babel.messages import Catalog
     >>> from gettext import GNUTranslations
     >>> from io import BytesIO

     >>> catalog = Catalog(locale='en_US')
     >>> catalog.add('foo', 'Voh')
     <Message ...>
     >>> catalog.add((u'bar', u'baz'), (u'Bahr', u'Batz'))
     <Message ...>
     >>> catalog.add('fuz', 'Futz', flags=['fuzzy'])
     <Message ...>
     >>> catalog.add('Fizz', '')
     <Message ...>
     >>> catalog.add(('Fuzz', 'Fuzzes'), ('', ''))
     <Message ...>
     >>> buf = BytesIO()

     >>> write_mo(buf, catalog)
     >>> x = buf.seek(0)
     >>> translations = GNUTranslations(fp=buf)
     >>> if sys.version_info[0] >= 3:
     ...     translations.ugettext = translations.gettext
     ...     translations.ungettext = translations.ngettext
     >>> translations.ugettext('foo')
     u'Voh'
     >>> translations.ungettext('bar', 'baz', 1)
     u'Bahr'
     >>> translations.ungettext('bar', 'baz', 2)
     u'Batz'
     >>> translations.ugettext('fuz')
     u'fuz'
     >>> translations.ugettext('Fizz')
     u'Fizz'
     >>> translations.ugettext('Fuzz')
     u'Fuzz'
     >>> translations.ugettext('Fuzzes')
     u'Fuzzes'

     :param fileobj: the file-like object to write to
     :param catalog: the `Catalog` instance
     :param use_fuzzy: whether translations marked as "fuzzy" should be included
                       in the output
     """
     messages = list(catalog)
     messages[1:] = [m for m in messages[1:]
                     if m.string and (use_fuzzy or not m.fuzzy)]
     messages.sort()

     ids = strs = b''
     offsets = []

     for message in messages:
         # For each string, we need size and file offset.  Each string is NUL
         # terminated; the NUL does not count into the size.
         if message.pluralizable:
             msgid = b'\x00'.join([
                 msgid.encode(catalog.charset) for msgid in message.id
             ])
             msgstrs = []
             for idx, string in enumerate(message.string):
                 if not string:
                     msgstrs.append(message.id[min(int(idx), 1)])
                 else:
                     msgstrs.append(string)
             msgstr = b'\x00'.join([
                 msgstr.encode(catalog.charset) for msgstr in msgstrs
             ])
         else:
             msgid = message.id.encode(catalog.charset)
             msgstr = message.string.encode(catalog.charset)
         if message.context:
             msgid = b'\x04'.join([message.context.encode(catalog.charset),
                                   msgid])
         offsets.append((len(ids), len(msgid), len(strs), len(msgstr)))
         ids += msgid + b'\x00'
         strs += msgstr + b'\x00'

     # The header is 7 32-bit unsigned integers.  We don't use hash tables, so
     # the keys start right after the index tables.
     keystart = 7 * 4 + 16 * len(messages)
     valuestart = keystart + len(ids)

     # The string table first has the list of keys, then the list of values.
     # Each entry has first the size of the string, then the file offset.
     koffsets = []
     voffsets = []
     for o1, l1, o2, l2 in offsets:
         koffsets += [l1, o1 + keystart]
         voffsets += [l2, o2 + valuestart]
     offsets = koffsets + voffsets

     fileobj.write(struct.pack('Iiiiiii',
                               LE_MAGIC,                   # magic
                               0,                          # version
                               len(messages),              # number of entries
                               7 * 4,                      # start of key index
                               7 * 4 + len(messages) * 8,  # start of value index
                               0, 0                        # size and offset of hash table
                               ) + array.array.tobytes(array.array("i", offsets)) + ids + strs)
	"""
	babel.messages.mofile
	~~~~~~~~~~~~~~~~~~~~~

	Writing of files in the ``gettext`` MO (machine object) format.

	:copyright: (c) 2013-2023 by the Babel Team.
	:license: BSD, see LICENSE for more details.
	"""
	from __future__ import annotations

	import array
	import struct
	from typing import TYPE_CHECKING

	from babel.messages.catalog import Catalog, Message

	if TYPE_CHECKING:
	from _typeshed import SupportsRead, SupportsWrite

	LE_MAGIC: int = 0x950412de
	BE_MAGIC: int = 0xde120495


	def read_mo(fileobj: SupportsRead[bytes]) -> Catalog:
	"""Read a binary MO file from the given file-like object and return a
	corresponding `Catalog` object.

	:param fileobj: the file-like object to read the MO file from

	:note: The implementation of this function is heavily based on the
	``GNUTranslations._parse`` method of the ``gettext`` module in the
	standard library.
	"""
	catalog = Catalog()
	headers = {}

	filename = getattr(fileobj, 'name', '')

	buf = fileobj.read()
	buflen = len(buf)
	unpack = struct.unpack

	# Parse the .mo file header, which consists of 5 little endian 32
	# bit words.
	magic = unpack('<I', buf[:4])[0] # Are we big endian or little endian?
	if magic == LE_MAGIC:
	version, msgcount, origidx, transidx = unpack('<4I', buf[4:20])
	ii = '<II'
	elif magic == BE_MAGIC:
	version, msgcount, origidx, transidx = unpack('>4I', buf[4:20])
	ii = '>II'
	else:
	raise OSError(0, 'Bad magic number', filename)

	# Now put all messages from the .mo file buffer into the catalog
	# dictionary
	for _i in range(msgcount):
	mlen, moff = unpack(ii, buf[origidx:origidx + 8])
	mend = moff + mlen
	tlen, toff = unpack(ii, buf[transidx:transidx + 8])
	tend = toff + tlen
	if mend < buflen and tend < buflen:
	msg = buf[moff:mend]
	tmsg = buf[toff:tend]
	else:
	raise OSError(0, 'File is corrupt', filename)

	# See if we're looking at GNU .mo conventions for metadata
	if mlen == 0:
	# Catalog description
	lastkey = key = None
	for item in tmsg.splitlines():
	item = item.strip()
	if not item:
	continue
	if b':' in item:
	key, value = item.split(b':', 1)
	lastkey = key = key.strip().lower()
	headers[key] = value.strip()
	elif lastkey:
	headers[lastkey] += b'\n' + item

	if b'\x04' in msg: # context
	ctxt, msg = msg.split(b'\x04')
	else:
	ctxt = None

	if b'\x00' in msg: # plural forms
	msg = msg.split(b'\x00')
	tmsg = tmsg.split(b'\x00')
	if catalog.charset:
	msg = [x.decode(catalog.charset) for x in msg]
	tmsg = [x.decode(catalog.charset) for x in tmsg]
	else:
	if catalog.charset:
	msg = msg.decode(catalog.charset)
	tmsg = tmsg.decode(catalog.charset)
	catalog[msg] = Message(msg, tmsg, context=ctxt)

	# advance to next entry in the seek tables
	origidx += 8
	transidx += 8

	catalog.mime_headers = headers.items()
	return catalog


	def write_mo(fileobj: SupportsWrite[bytes], catalog: Catalog, use_fuzzy: bool = False) -> None:
	"""Write a catalog to the specified file-like object using the GNU MO file
	format.

	>>> import sys
	>>> from babel.messages import Catalog
	>>> from gettext import GNUTranslations
	>>> from io import BytesIO

	>>> catalog = Catalog(locale='en_US')
	>>> catalog.add('foo', 'Voh')
	<Message ...>
	>>> catalog.add((u'bar', u'baz'), (u'Bahr', u'Batz'))
	<Message ...>
	>>> catalog.add('fuz', 'Futz', flags=['fuzzy'])
	<Message ...>
	>>> catalog.add('Fizz', '')
	<Message ...>
	>>> catalog.add(('Fuzz', 'Fuzzes'), ('', ''))
	<Message ...>
	>>> buf = BytesIO()

	>>> write_mo(buf, catalog)
	>>> x = buf.seek(0)
	>>> translations = GNUTranslations(fp=buf)
	>>> if sys.version_info[0] >= 3:
	... translations.ugettext = translations.gettext
	... translations.ungettext = translations.ngettext
	>>> translations.ugettext('foo')
	u'Voh'
	>>> translations.ungettext('bar', 'baz', 1)
	u'Bahr'
	>>> translations.ungettext('bar', 'baz', 2)
	u'Batz'
	>>> translations.ugettext('fuz')
	u'fuz'
	>>> translations.ugettext('Fizz')
	u'Fizz'
	>>> translations.ugettext('Fuzz')
	u'Fuzz'
	>>> translations.ugettext('Fuzzes')
	u'Fuzzes'

	:param fileobj: the file-like object to write to
	:param catalog: the `Catalog` instance
	:param use_fuzzy: whether translations marked as "fuzzy" should be included
	in the output
	"""
	messages = list(catalog)
	messages[1:] = [m for m in messages[1:]
	if m.string and (use_fuzzy or not m.fuzzy)]
	messages.sort()

	ids = strs = b''
	offsets = []

	for message in messages:
	# For each string, we need size and file offset. Each string is NUL
	# terminated; the NUL does not count into the size.
	if message.pluralizable:
	msgid = b'\x00'.join([
	msgid.encode(catalog.charset) for msgid in message.id
	])
	msgstrs = []
	for idx, string in enumerate(message.string):
	if not string:
	msgstrs.append(message.id[min(int(idx), 1)])
	else:
	msgstrs.append(string)
	msgstr = b'\x00'.join([
	msgstr.encode(catalog.charset) for msgstr in msgstrs
	])
	else:
	msgid = message.id.encode(catalog.charset)
	msgstr = message.string.encode(catalog.charset)
	if message.context:
	msgid = b'\x04'.join([message.context.encode(catalog.charset),
	msgid])
	offsets.append((len(ids), len(msgid), len(strs), len(msgstr)))
	ids += msgid + b'\x00'
	strs += msgstr + b'\x00'

	# The header is 7 32-bit unsigned integers. We don't use hash tables, so
	# the keys start right after the index tables.
	keystart = 7 * 4 + 16 * len(messages)
	valuestart = keystart + len(ids)

	# The string table first has the list of keys, then the list of values.
	# Each entry has first the size of the string, then the file offset.
	koffsets = []
	voffsets = []
	for o1, l1, o2, l2 in offsets:
	koffsets += [l1, o1 + keystart]
	voffsets += [l2, o2 + valuestart]
	offsets = koffsets + voffsets

	fileobj.write(struct.pack('Iiiiiii',
	LE_MAGIC, # magic
	0, # version
	len(messages), # number of entries
	7 * 4, # start of key index
	7 * 4 + len(messages) * 8, # start of value index
	0, 0 # size and offset of hash table
	) + array.array.tobytes(array.array("i", offsets)) + ids + strs)