AVRO-2399: Use Wheels for Python Distribution (#766)
* AVRO-2399 Use Wheels for Python Distribution
diff --git a/doc/src/content/xdocs/gettingstartedpython.xml b/doc/src/content/xdocs/gettingstartedpython.xml
index 84fb838..f6216b1 100644
--- a/doc/src/content/xdocs/gettingstartedpython.xml
+++ b/doc/src/content/xdocs/gettingstartedpython.xml
@@ -49,33 +49,43 @@
</section>
<section id="download_install">
- <title>Download</title>
+ <title>Download and Install</title>
<p>
- For Python, the easiest way to get started is to install it from PyPI.
- Python's Avro API is available over <a href="https://pypi.org/project/avro/">PyPi</a>.
+ The easiest way to get started in Python is to install <a href="https://pypi.org/project/avro/">avro from PyPI</a>
+ using <a href="https://pip.pypa.io/en/stable/">pip</a>, the Python Package Installer.
</p>
<source>
$ python3 -m pip install avro
</source>
+ <p>Consider doing a local install or using a virtualenv to avoid permissions problems and interfering with system packages:</p>
+ <source>
+$ python3 -m pip install --user install avro
+ </source>
+ <p>or</p>
+ <source>
+ $ python3 -m venv avro-venv
+ $ avro-venv/bin/pip install avro
+ </source>
<p>
The official releases of the Avro implementations for C, C++, C#, Java,
PHP, Python, and Ruby can be downloaded from the <a
href="https://avro.apache.org/releases.html">Apache Avro™
- Releases</a> page. This guide uses Avro &AvroVersion;, the latest
- version at the time of writing. Download and unzip
- <em>avro-&AvroVersion;.tar.gz</em>, and install via <code>python
- setup.py</code> (this will probably require root privileges). Ensure
- that you can <code>import avro</code> from a Python prompt.
+ Releases</a> page. This guide uses Avro &AvroVersion;, the latest
+ version at the time of writing. Download and install
+ <em>avro-&AvroVersion;-py2.py3-none-any.whl</em> or
+ <em>avro-&AvroVersion;.tar.gz</em> via
+ <code>python -m pip avro-&AvroVersion;-py2.py3-none-any.whl</code>
+ or
+ <code>python -m pip avro-&AvroVersion;.tar.gz</code>.
+ (As above, consider using a virtualenv or user-local install.)
</p>
+ <p>Check that you can import avro from a Python prompt.</p>
<source>
-$ tar xvf avro-&AvroVersion;.tar.gz
-$ cd avro-&AvroVersion;
-$ python3 setup.py install
-$ python3
->>> import avro # should not raise ImportError
+$ python3 -c 'import avro; print(avro.__version__)'
</source>
+ <p>The above should print &AvroVersion;. It should not raise an <code>ImportError</code>.</p>
<p>
- Alternatively, you may build the Avro Python library from source. From
+ Alternatively, you may build the Avro Python library from source. From
your the root Avro directory, run the commands
</p>
<source>
diff --git a/lang/py/avro/test/txsample_http_client.py b/lang/py/avro/test/txsample_http_client.py
deleted file mode 100644
index 891d9de..0000000
--- a/lang/py/avro/test/txsample_http_client.py
+++ /dev/null
@@ -1,108 +0,0 @@
-#!/usr/bin/env python3
-# -*- mode: python -*-
-# -*- coding: utf-8 -*-
-
-##
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements. See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership. The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License. You may obtain a copy of the License at
-#
-# https://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-#
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-from __future__ import absolute_import, division, print_function
-
-import sys
-
-import avro.errors
-from avro import protocol, txipc
-from twisted.internet import defer, reactor
-from twisted.python.util import println
-
-MAIL_PROTOCOL_JSON = """\
-{"namespace": "example.proto",
- "protocol": "Mail",
-
- "types": [
- {"name": "Message", "type": "record",
- "fields": [
- {"name": "to", "type": "string"},
- {"name": "from", "type": "string"},
- {"name": "body", "type": "string"}
- ]
- }
- ],
-
- "messages": {
- "send": {
- "request": [{"name": "message", "type": "Message"}],
- "response": "string"
- },
- "replay": {
- "request": [],
- "response": "string"
- }
- }
-}
-"""
-MAIL_PROTOCOL = protocol.parse(MAIL_PROTOCOL_JSON)
-SERVER_HOST = 'localhost'
-SERVER_PORT = 9090
-
-
-def make_requestor(server_host, server_port, protocol):
- client = txipc.TwistedHTTPTransceiver(SERVER_HOST, SERVER_PORT)
- return txipc.TwistedRequestor(protocol, client)
-
-
-if __name__ == '__main__':
- if len(sys.argv) not in [4, 5]:
- raise avro.errors.UsageError("Usage: <to> <from> <body> [<count>]")
-
- # client code - attach to the server and send a message
- # fill in the Message record
- message = dict()
- message['to'] = sys.argv[1]
- message['from'] = sys.argv[2]
- message['body'] = sys.argv[3]
-
- try:
- num_messages = int(sys.argv[4])
- except IndexError:
- num_messages = 1
-
- # build the parameters for the request
- params = {}
- params['message'] = message
-
- requests = []
- # send the requests and print the result
- for msg_count in range(num_messages):
- requestor = make_requestor(SERVER_HOST, SERVER_PORT, MAIL_PROTOCOL)
- d = requestor.request('send', params)
- d.addCallback(lambda result: println("Result: " + result))
- requests.append(d)
- results = defer.gatherResults(requests)
-
- def replay_cb(result):
- print("Replay Result: " + result)
- reactor.stop()
-
- def replay(_):
- # try out a replay message
- requestor = make_requestor(SERVER_HOST, SERVER_PORT, MAIL_PROTOCOL)
- d = requestor.request('replay', dict())
- d.addCallback(replay_cb)
-
- results.addCallback(replay)
- reactor.run()
diff --git a/lang/py/avro/test/txsample_http_server.py b/lang/py/avro/test/txsample_http_server.py
deleted file mode 100644
index 3535e1c..0000000
--- a/lang/py/avro/test/txsample_http_server.py
+++ /dev/null
@@ -1,75 +0,0 @@
-#!/usr/bin/env python3
-# -*- mode: python -*-
-# -*- coding: utf-8 -*-
-
-##
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements. See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership. The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License. You may obtain a copy of the License at
-#
-# https://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-from __future__ import absolute_import, division, print_function
-
-from avro import ipc, protocol, txipc
-from twisted.internet import reactor
-from twisted.web import server
-
-MAIL_PROTOCOL_JSON = """\
-{"namespace": "example.proto",
- "protocol": "Mail",
-
- "types": [
- {"name": "Message", "type": "record",
- "fields": [
- {"name": "to", "type": "string"},
- {"name": "from", "type": "string"},
- {"name": "body", "type": "string"}
- ]
- }
- ],
-
- "messages": {
- "send": {
- "request": [{"name": "message", "type": "Message"}],
- "response": "string"
- },
- "replay": {
- "request": [],
- "response": "string"
- }
- }
-}
-"""
-MAIL_PROTOCOL = protocol.parse(MAIL_PROTOCOL_JSON)
-SERVER_ADDRESS = ('localhost', 9090)
-
-
-class MailResponder(ipc.Responder):
- def __init__(self):
- ipc.Responder.__init__(self, MAIL_PROTOCOL)
-
- def invoke(self, message, request):
- if message.name == 'send':
- request_content = request['message']
- response = "Sent message to %(to)s from %(from)s with body %(body)s" % \
- request_content
- return response
- elif message.name == 'replay':
- return 'replay'
-
-
-if __name__ == '__main__':
- root = server.Site(txipc.AvroResponderResource(MailResponder()))
- reactor.listenTCP(9090, root)
- reactor.run()
diff --git a/lang/py/avro/txipc.py b/lang/py/avro/txipc.py
deleted file mode 100644
index 83308ee..0000000
--- a/lang/py/avro/txipc.py
+++ /dev/null
@@ -1,232 +0,0 @@
-#!/usr/bin/env python3
-# -*- mode: python -*-
-# -*- coding: utf-8 -*-
-
-##
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements. See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership. The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License. You may obtain a copy of the License at
-#
-# https://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-from __future__ import absolute_import, division, print_function
-
-import io
-
-from zope.interface import implements
-
-import avro.io
-from avro import ipc
-from twisted.internet.defer import Deferred, maybeDeferred
-from twisted.internet.protocol import Protocol
-from twisted.web import resource, server
-from twisted.web.client import Agent
-from twisted.web.http_headers import Headers
-from twisted.web.iweb import IBodyProducer
-
-
-class TwistedRequestor(ipc.BaseRequestor):
- """A Twisted-compatible requestor. Returns a Deferred that will fire with the
- returning value, instead of blocking until the request completes."""
-
- def _process_handshake(self, call_response, message_name, request_datum):
- # process the handshake and call response
- buffer_decoder = avro.io.BinaryDecoder(io.BytesIO(call_response))
- call_response_exists = self.read_handshake_response(buffer_decoder)
- if call_response_exists:
- return self.read_call_response(message_name, buffer_decoder)
- else:
- return self.request(message_name, request_datum)
-
- def issue_request(self, call_request, message_name, request_datum):
- d = self.transceiver.transceive(call_request)
- d.addCallback(self._process_handshake, message_name, request_datum)
- return d
-
-
-class RequestStreamingProducer(object):
- """A streaming producer for issuing requests with the Twisted.web Agent."""
- implements(IBodyProducer)
-
- paused = False
- stopped = False
- started = False
-
- def __init__(self, message):
- self._message = message
- self._length = len(message)
- # We need a buffer length header for every buffer and an additional
- # zero-length buffer as the message terminator
- self._length += (self._length / ipc.BUFFER_SIZE + 2) \
- * ipc.BUFFER_HEADER_LENGTH
- self._total_bytes_sent = 0
- self._deferred = Deferred()
-
- # read-only properties
- message = property(lambda self: self._message)
- length = property(lambda self: self._length)
- consumer = property(lambda self: self._consumer)
- deferred = property(lambda self: self._deferred)
-
- def _get_total_bytes_sent(self):
- return self._total_bytes_sent
-
- def _set_total_bytes_sent(self, bytes_sent):
- self._total_bytes_sent = bytes_sent
-
- total_bytes_sent = property(_get_total_bytes_sent, _set_total_bytes_sent)
-
- def startProducing(self, consumer):
- if self.started:
- return
-
- self.started = True
- self._consumer = consumer
- # Keep writing data to the consumer until we're finished,
- # paused (pauseProducing()) or stopped (stopProducing())
- while self.length - self.total_bytes_sent > 0 and \
- not self.paused and not self.stopped:
- self.write()
- # self.write will fire this deferred once it has written
- # the entire message to the consumer
- return self.deferred
-
- def resumeProducing(self):
- self.paused = False
- self.write(self)
-
- def pauseProducing(self):
- self.paused = True
-
- def stopProducing(self):
- self.stopped = True
-
- def write(self):
- if self.length - self.total_bytes_sent > ipc.BUFFER_SIZE:
- buffer_length = ipc.BUFFER_SIZE
- else:
- buffer_length = self.length - self.total_bytes_sent
- self.write_buffer(self.message[self.total_bytes_sent:
- (self.total_bytes_sent + buffer_length)])
- self.total_bytes_sent += buffer_length
- # Make sure we wrote the entire message
- if self.total_bytes_sent == self.length and not self.stopped:
- self.stopProducing()
- # A message is always terminated by a zero-length buffer.
- self.write_buffer_length(0)
- self.deferred.callback(None)
-
- def write_buffer(self, chunk):
- buffer_length = len(chunk)
- self.write_buffer_length(buffer_length)
- self.consumer.write(chunk)
-
- def write_buffer_length(self, n):
- self.consumer.write(ipc.BIG_ENDIAN_INT_STRUCT.pack(n))
-
-
-class AvroProtocol(Protocol):
-
- recvd = ''
- done = False
-
- def __init__(self, finished):
- self.finished = finished
- self.message = []
-
- def dataReceived(self, data):
- self.recvd = self.recvd + data
- while len(self.recvd) >= ipc.BUFFER_HEADER_LENGTH:
- buffer_length, = ipc.BIG_ENDIAN_INT_STRUCT.unpack(
- self.recvd[:ipc.BUFFER_HEADER_LENGTH])
- if buffer_length == 0:
- response = ''.join(self.message)
- self.done = True
- self.finished.callback(response)
- break
- if len(self.recvd) < buffer_length + ipc.BUFFER_HEADER_LENGTH:
- break
- buffer = self.recvd[ipc.BUFFER_HEADER_LENGTH:buffer_length + ipc.BUFFER_HEADER_LENGTH]
- self.recvd = self.recvd[buffer_length + ipc.BUFFER_HEADER_LENGTH:]
- self.message.append(buffer)
-
- def connectionLost(self, reason):
- if not self.done:
- self.finished.errback(ipc.ConnectionClosedException("Reader read 0 bytes."))
-
-
-class TwistedHTTPTransceiver(object):
- """This transceiver uses the Agent class present in Twisted.web >= 9.0
- for issuing requests to the remote endpoint."""
-
- def __init__(self, host, port, remote_name=None, reactor=None):
- self.url = "http://%s:%d/" % (host, port)
-
- if remote_name is None:
- # There's no easy way to get this peer's remote address
- # in Twisted so I use a random UUID to identify ourselves
- import uuid
- self.remote_name = uuid.uuid4()
-
- if reactor is None:
- from twisted.internet import reactor
- self.agent = Agent(reactor)
-
- def read_framed_message(self, response):
- finished = Deferred()
- response.deliverBody(AvroProtocol(finished))
- return finished
-
- def transceive(self, request):
- req_method = 'POST'
- req_headers = {
- 'Content-Type': ['avro/binary'],
- 'Accept-Encoding': ['identity'],
- }
-
- body_producer = RequestStreamingProducer(request)
- d = self.agent.request(
- req_method,
- self.url,
- headers=Headers(req_headers),
- bodyProducer=body_producer)
- return d.addCallback(self.read_framed_message)
-
-
-class AvroResponderResource(resource.Resource):
- """This Twisted.web resource can be placed anywhere in a URL hierarchy
- to provide an Avro endpoint. Different Avro protocols can be served
- by the same web server as long as they are in different resources in
- a URL hierarchy."""
- isLeaf = True
-
- def __init__(self, responder):
- resource.Resource.__init__(self)
- self.responder = responder
-
- def cb_render_POST(self, resp_body, request):
- request.setResponseCode(200)
- request.setHeader('Content-Type', 'avro/binary')
- resp_writer = ipc.FramedWriter(request)
- resp_writer.write_framed_message(resp_body)
- request.finish()
-
- def render_POST(self, request):
- # Unfortunately, Twisted.web doesn't support incoming
- # streamed input yet, the whole payload must be kept in-memory
- request.content.seek(0, 0)
- call_request_reader = ipc.FramedReader(request.content)
- call_request = call_request_reader.read_framed_message()
- d = maybeDeferred(self.responder.respond, call_request)
- d.addCallback(self.cb_render_POST, request)
- return server.NOT_DONE_YET
diff --git a/lang/py/build.sh b/lang/py/build.sh
index b8cc03b..0c4d1fc 100755
--- a/lang/py/build.sh
+++ b/lang/py/build.sh
@@ -36,7 +36,10 @@
}
dist() {
- ./setup.py dist
+ python3 setup.py sdist
+ python3 setup.py bdist_wheel
+ mkdir -p ../../dist/py
+ cp dist/*.{tar.gz,whl} ../../dist/py
}
interop-data-generate() {
diff --git a/lang/py/mypy.ini b/lang/py/mypy.ini
index c0628e2..72ef3ca 100644
--- a/lang/py/mypy.ini
+++ b/lang/py/mypy.ini
@@ -25,9 +25,3 @@
[mypy-zstandard]
ignore_missing_imports = True
-
-[mypy-zope.interface]
-ignore_missing_imports = True
-
-[mypy-twisted.*]
-ignore_missing_imports = True
diff --git a/lang/py/setup.cfg b/lang/py/setup.cfg
index de4657b..046026e 100644
--- a/lang/py/setup.cfg
+++ b/lang/py/setup.cfg
@@ -28,6 +28,7 @@
author = Apache Avro
author_email = dev@avro.apache.org
url = https://avro.apache.org/
+license_file = avro/LICENSE
license = Apache License 2.0
classifiers =
License :: OSI Approved :: Apache Software License
@@ -36,6 +37,9 @@
Programming Language :: Python :: 3.7
Programming Language :: Python :: 3.8
+[bdist_wheel]
+universal = 1
+
[options]
packages =
avro
diff --git a/lang/py/setup.py b/lang/py/setup.py
index 66ae91b..42d8320 100755
--- a/lang/py/setup.py
+++ b/lang/py/setup.py
@@ -22,13 +22,12 @@
from __future__ import absolute_import, division, print_function
+import setuptools # type: ignore
import distutils.errors
import glob
import os
import subprocess
-import setuptools # type: ignore
-
_HERE = os.path.dirname(os.path.abspath(__file__))
_AVRO_DIR = os.path.join(_HERE, 'avro')
_VERSION_FILE_NAME = 'VERSION.txt'
diff --git a/lang/py/tox.ini b/lang/py/tox.ini
index 24f71a1..504d763 100644
--- a/lang/py/tox.ini
+++ b/lang/py/tox.ini
@@ -14,9 +14,9 @@
# limitations under the License.
[tox]
-# Remember to run tox --skip-missing-interpreters
-# If you don't want to install all these interpreters.
+# Run tox --skip-missing-interpreters if you don't want to install all these interpreters.
envlist =
+ build # Build the wheel
# Fastest checks first
lint
typechecks
@@ -36,6 +36,8 @@
.tox/*
[testenv]
+wheel = true
+wheel_build_env = build
deps =
coverage
python-snappy
@@ -54,13 +56,22 @@
coverage combine
coverage report
+[testenv:build]
+##
+# A virtual environment only used to build the wheel
+deps =
+whitelist_externals =
+commands_pre =
+commands =
+commands_post =
+
[testenv:lint]
deps =
isort
pycodestyle
commands_pre =
commands =
- isort --check-only
+ isort --check-only .
pycodestyle
commands_post =
@@ -74,8 +85,9 @@
mypy
[tool:isort]
+extra_standard_library = setuptools
+force_to_top = setuptools
line_length = 150
-known_third_party=zope
[pycodestyle]
exclude = .eggs,.tox,build
diff --git a/share/docker/Dockerfile b/share/docker/Dockerfile
index 6cca331..7035b17 100644
--- a/share/docker/Dockerfile
+++ b/share/docker/Dockerfile
@@ -140,7 +140,7 @@
# Install Python3 packages
RUN python3 -m pip install --upgrade pip setuptools wheel \
- && python3 -m pip install tox zstandard
+ && python3 -m pip install tox-wheel zstandard
# Install .NET SDK
RUN curl -sSLO https://packages.microsoft.com/config/ubuntu/16.04/packages-microsoft-prod.deb \