lang/py/avro/test/test_script.py - avro - Git at Google

 #!/usr/bin/env python

 ##
 # Licensed to the Apache Software Foundation (ASF) under one
 # or more contributor license agreements.  See the NOTICE file
 # distributed with this work for additional information
 # regarding copyright ownership.  The ASF licenses this file
 # to you under the Apache License, Version 2.0 (the
 # "License"); you may not use this file except in compliance
 # with the License.  You may obtain a copy of the License at
 #
 # https://www.apache.org/licenses/LICENSE-2.0
 #
 # Unless required by applicable law or agreed to in writing, software
 # distributed under the License is distributed on an "AS IS" BASIS,
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.

 from __future__ import absolute_import, division, print_function

 import csv
 import io
 import json
 import sys
 import unittest
 from operator import itemgetter
 from os import remove
 from os.path import dirname, isfile, join
 from subprocess import check_call, check_output
 from tempfile import NamedTemporaryFile

 import avro.schema
 from avro.datafile import DataFileWriter
 from avro.io import DatumWriter

 try:
     unicode
 except NameError:
     unicode = str

 NUM_RECORDS = 7


 SCHEMA = '''
 {
     "namespace": "test.avro",
         "name": "LooneyTunes",
         "type": "record",
         "fields": [
             {"name": "first", "type": "string"},
             {"name": "last", "type": "string"},
             {"name": "type", "type": "string"}
         ]
 }
 '''

 LOONIES = (
     (unicode("daffy"), unicode("duck"), unicode("duck")),
     (unicode("bugs"), unicode("bunny"), unicode("bunny")),
     (unicode("tweety"), unicode(""), unicode("bird")),
     (unicode("road"), unicode("runner"), unicode("bird")),
     (unicode("wile"), unicode("e"), unicode("coyote")),
     (unicode("pepe"), unicode("le pew"), unicode("skunk")),
     (unicode("foghorn"), unicode("leghorn"), unicode("rooster")),
 )


 def looney_records():
     for f, l, t in LOONIES:
         yield {"first": f, "last": l, "type": t}


 SCRIPT = join(dirname(dirname(dirname(__file__))), "scripts", "avro")

 _JSON_PRETTY = '''{
     "first": "daffy",
     "last": "duck",
     "type": "duck"
 }'''


 def gen_avro(filename):
     schema = avro.schema.parse(SCHEMA)
     fo = open(filename, "wb")
     writer = DataFileWriter(fo, DatumWriter(), schema)
     for record in looney_records():
         writer.append(record)
     writer.close()
     fo.close()


 def tempfile():
     return NamedTemporaryFile(delete=False).name


 class TestCat(unittest.TestCase):
     def setUp(self):
         self.avro_file = tempfile()
         gen_avro(self.avro_file)

     def tearDown(self):
         if isfile(self.avro_file):
             remove(self.avro_file)

     def _run(self, *args, **kw):
         out = check_output([sys.executable, SCRIPT, "cat", self.avro_file] + list(args)).decode()
         if kw.get("raw"):
             return out
         return out.splitlines()

     def test_print(self):
         return len(self._run()) == NUM_RECORDS

     def test_filter(self):
         return len(self._run("--filter", "r['type']=='bird'")) == 2

     def test_skip(self):
         skip = 3
         return len(self._run("--skip", str(skip))) == NUM_RECORDS - skip

     def test_csv(self):
         reader = csv.reader(io.StringIO(self._run("-f", "csv", raw=True)))
         assert len(list(reader)) == NUM_RECORDS

     def test_csv_header(self):
         r = {"type": unicode("duck"), "last": unicode("duck"), "first": unicode("daffy")}
         out = self._run("-f", "csv", "--header", raw=True)
         io_ = io.StringIO(out)
         reader = csv.DictReader(io_)
         assert next(reader) == r

     def test_print_schema(self):
         out = self._run("--print-schema", raw=True)
         assert json.loads(out)["namespace"] == "test.avro"

     def test_help(self):
         # Just see we have these
         self._run("-h")
         self._run("--help")

     def test_json_pretty(self):
         out = self._run("--format", "json-pretty", "-n", "1", raw=1)
         self.assertEqual(out.strip(), _JSON_PRETTY.strip())

     def test_version(self):
         check_output([sys.executable, SCRIPT, "cat", "--version"])

     def test_files(self):
         out = self._run(self.avro_file)
         assert len(out) == 2 * NUM_RECORDS

     def test_fields(self):
         # One field selection (no comma)
         out = self._run('--fields', 'last')
         assert json.loads(out[0]) == {'last': 'duck'}

         # Field selection (with comma and space)
         out = self._run('--fields', 'first, last')
         assert json.loads(out[0]) == {'first': unicode('daffy'), 'last': unicode('duck')}

         # Empty fields should get all
         out = self._run('--fields', '')
         assert json.loads(out[0]) == \
             {'first': unicode('daffy'), 'last': unicode('duck'),
              'type': unicode('duck')}

         # Non existing fields are ignored
         out = self._run('--fields', 'first,last,age')
         assert json.loads(out[0]) == {'first': unicode('daffy'), 'last': unicode('duck')}


 class TestWrite(unittest.TestCase):
     def setUp(self):
         self.json_file = tempfile() + ".json"
         fo = open(self.json_file, "w")
         for record in looney_records():
             json.dump(record, fo)
             fo.write("\n")
         fo.close()

         self.csv_file = tempfile() + ".csv"
         fo = open(self.csv_file, "w")
         write = csv.writer(fo).writerow
         get = itemgetter("first", "last", "type")
         for record in looney_records():
             write(get(record))
         fo.close()

         self.schema_file = tempfile()
         fo = open(self.schema_file, "w")
         fo.write(SCHEMA)
         fo.close()

     def tearDown(self):
         for filename in (self.csv_file, self.json_file, self.schema_file):
             try:
                 remove(filename)
             except OSError:
                 continue

     def _run(self, *args, **kw):
         args = [sys.executable, SCRIPT, "write", "--schema", self.schema_file] + list(args)
         check_call(args, **kw)

     def load_avro(self, filename):
         out = check_output([sys.executable, SCRIPT, "cat", filename]).decode()
         return [json.loads(o) for o in out.splitlines()]

     def test_version(self):
         check_call([sys.executable, SCRIPT, "write", "--version"])

     def format_check(self, format, filename):
         tmp = tempfile()
         with open(tmp, "wb") as fo:
             self._run(filename, "-f", format, stdout=fo)

         records = self.load_avro(tmp)
         assert len(records) == NUM_RECORDS
         assert records[0]["first"] == unicode("daffy")

         remove(tmp)

     def test_write_json(self):
         self.format_check("json", self.json_file)

     def test_write_csv(self):
         self.format_check("csv", self.csv_file)

     def test_outfile(self):
         tmp = tempfile()
         remove(tmp)
         self._run(self.json_file, "-o", tmp)

         assert len(self.load_avro(tmp)) == NUM_RECORDS
         remove(tmp)

     def test_multi_file(self):
         tmp = tempfile()
         with open(tmp, 'wb') as o:
             self._run(self.json_file, self.json_file, stdout=o)
         assert len(self.load_avro(tmp)) == 2 * NUM_RECORDS
         remove(tmp)

     def test_stdin(self):
         tmp = tempfile()
         info = open(self.json_file, "rb")
         self._run("--input-type", "json", "-o", tmp, stdin=info)

         assert len(self.load_avro(tmp)) == NUM_RECORDS
         remove(tmp)
	#!/usr/bin/env python

	##
	# Licensed to the Apache Software Foundation (ASF) under one
	# or more contributor license agreements. See the NOTICE file
	# distributed with this work for additional information
	# regarding copyright ownership. The ASF licenses this file
	# to you under the Apache License, Version 2.0 (the
	# "License"); you may not use this file except in compliance
	# with the License. You may obtain a copy of the License at
	#
	# https://www.apache.org/licenses/LICENSE-2.0
	#
	# Unless required by applicable law or agreed to in writing, software
	# distributed under the License is distributed on an "AS IS" BASIS,
	# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
	# See the License for the specific language governing permissions and
	# limitations under the License.

	from __future__ import absolute_import, division, print_function

	import csv
	import io
	import json
	import sys
	import unittest
	from operator import itemgetter
	from os import remove
	from os.path import dirname, isfile, join
	from subprocess import check_call, check_output
	from tempfile import NamedTemporaryFile

	import avro.schema
	from avro.datafile import DataFileWriter
	from avro.io import DatumWriter

	try:
	unicode
	except NameError:
	unicode = str

	NUM_RECORDS = 7


	SCHEMA = '''
	{
	"namespace": "test.avro",
	"name": "LooneyTunes",
	"type": "record",
	"fields": [
	{"name": "first", "type": "string"},
	{"name": "last", "type": "string"},
	{"name": "type", "type": "string"}
	]
	}
	'''

	LOONIES = (
	(unicode("daffy"), unicode("duck"), unicode("duck")),
	(unicode("bugs"), unicode("bunny"), unicode("bunny")),
	(unicode("tweety"), unicode(""), unicode("bird")),
	(unicode("road"), unicode("runner"), unicode("bird")),
	(unicode("wile"), unicode("e"), unicode("coyote")),
	(unicode("pepe"), unicode("le pew"), unicode("skunk")),
	(unicode("foghorn"), unicode("leghorn"), unicode("rooster")),
	)


	def looney_records():
	for f, l, t in LOONIES:
	yield {"first": f, "last": l, "type": t}


	SCRIPT = join(dirname(dirname(dirname(__file__))), "scripts", "avro")

	_JSON_PRETTY = '''{
	"first": "daffy",
	"last": "duck",
	"type": "duck"
	}'''


	def gen_avro(filename):
	schema = avro.schema.parse(SCHEMA)
	fo = open(filename, "wb")
	writer = DataFileWriter(fo, DatumWriter(), schema)
	for record in looney_records():
	writer.append(record)
	writer.close()
	fo.close()


	def tempfile():
	return NamedTemporaryFile(delete=False).name


	class TestCat(unittest.TestCase):
	def setUp(self):
	self.avro_file = tempfile()
	gen_avro(self.avro_file)

	def tearDown(self):
	if isfile(self.avro_file):
	remove(self.avro_file)

	def _run(self, args, *kw):
	out = check_output([sys.executable, SCRIPT, "cat", self.avro_file] + list(args)).decode()
	if kw.get("raw"):
	return out
	return out.splitlines()

	def test_print(self):
	return len(self._run()) == NUM_RECORDS

	def test_filter(self):
	return len(self._run("--filter", "r['type']=='bird'")) == 2

	def test_skip(self):
	skip = 3
	return len(self._run("--skip", str(skip))) == NUM_RECORDS - skip

	def test_csv(self):
	reader = csv.reader(io.StringIO(self._run("-f", "csv", raw=True)))
	assert len(list(reader)) == NUM_RECORDS

	def test_csv_header(self):
	r = {"type": unicode("duck"), "last": unicode("duck"), "first": unicode("daffy")}
	out = self._run("-f", "csv", "--header", raw=True)
	io_ = io.StringIO(out)
	reader = csv.DictReader(io_)
	assert next(reader) == r

	def test_print_schema(self):
	out = self._run("--print-schema", raw=True)
	assert json.loads(out)["namespace"] == "test.avro"

	def test_help(self):
	# Just see we have these
	self._run("-h")
	self._run("--help")

	def test_json_pretty(self):
	out = self._run("--format", "json-pretty", "-n", "1", raw=1)
	self.assertEqual(out.strip(), _JSON_PRETTY.strip())

	def test_version(self):
	check_output([sys.executable, SCRIPT, "cat", "--version"])

	def test_files(self):
	out = self._run(self.avro_file)
	assert len(out) == 2 * NUM_RECORDS

	def test_fields(self):
	# One field selection (no comma)
	out = self._run('--fields', 'last')
	assert json.loads(out[0]) == {'last': 'duck'}

	# Field selection (with comma and space)
	out = self._run('--fields', 'first, last')
	assert json.loads(out[0]) == {'first': unicode('daffy'), 'last': unicode('duck')}

	# Empty fields should get all
	out = self._run('--fields', '')
	assert json.loads(out[0]) == \
	{'first': unicode('daffy'), 'last': unicode('duck'),
	'type': unicode('duck')}

	# Non existing fields are ignored
	out = self._run('--fields', 'first,last,age')
	assert json.loads(out[0]) == {'first': unicode('daffy'), 'last': unicode('duck')}


	class TestWrite(unittest.TestCase):
	def setUp(self):
	self.json_file = tempfile() + ".json"
	fo = open(self.json_file, "w")
	for record in looney_records():
	json.dump(record, fo)
	fo.write("\n")
	fo.close()

	self.csv_file = tempfile() + ".csv"
	fo = open(self.csv_file, "w")
	write = csv.writer(fo).writerow
	get = itemgetter("first", "last", "type")
	for record in looney_records():
	write(get(record))
	fo.close()

	self.schema_file = tempfile()
	fo = open(self.schema_file, "w")
	fo.write(SCHEMA)
	fo.close()

	def tearDown(self):
	for filename in (self.csv_file, self.json_file, self.schema_file):
	try:
	remove(filename)
	except OSError:
	continue

	def _run(self, args, *kw):
	args = [sys.executable, SCRIPT, "write", "--schema", self.schema_file] + list(args)
	check_call(args, **kw)

	def load_avro(self, filename):
	out = check_output([sys.executable, SCRIPT, "cat", filename]).decode()
	return [json.loads(o) for o in out.splitlines()]

	def test_version(self):
	check_call([sys.executable, SCRIPT, "write", "--version"])

	def format_check(self, format, filename):
	tmp = tempfile()
	with open(tmp, "wb") as fo:
	self._run(filename, "-f", format, stdout=fo)

	records = self.load_avro(tmp)
	assert len(records) == NUM_RECORDS
	assert records[0]["first"] == unicode("daffy")

	remove(tmp)

	def test_write_json(self):
	self.format_check("json", self.json_file)

	def test_write_csv(self):
	self.format_check("csv", self.csv_file)

	def test_outfile(self):
	tmp = tempfile()
	remove(tmp)
	self._run(self.json_file, "-o", tmp)

	assert len(self.load_avro(tmp)) == NUM_RECORDS
	remove(tmp)

	def test_multi_file(self):
	tmp = tempfile()
	with open(tmp, 'wb') as o:
	self._run(self.json_file, self.json_file, stdout=o)
	assert len(self.load_avro(tmp)) == 2 * NUM_RECORDS
	remove(tmp)

	def test_stdin(self):
	tmp = tempfile()
	info = open(self.json_file, "rb")
	self._run("--input-type", "json", "-o", tmp, stdin=info)

	assert len(self.load_avro(tmp)) == NUM_RECORDS
	remove(tmp)