| # Licensed to the Apache Software Foundation (ASF) under one |
| # or more contributor license agreements. See the NOTICE file |
| # distributed with this work for additional information |
| # regarding copyright ownership. The ASF licenses this file |
| # to you under the Apache License, Version 2.0 (the |
| # "License"); you may not use this file except in compliance |
| # with the License. You may obtain a copy of the License at |
| # |
| # http://www.apache.org/licenses/LICENSE-2.0 |
| # |
| # Unless required by applicable law or agreed to in writing, |
| # software distributed under the License is distributed on an |
| # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY |
| # KIND, either express or implied. See the License for the |
| # specific language governing permissions and limitations |
| # under the License. |
| # |
| |
| import os |
| import platform |
| import shutil |
| |
| import unittest as ut |
| import numpy as np |
| import pandas as pd |
| |
| |
| import tsfile as ts |
| from tsfile.tsfile import EmptyFileError |
| |
| TABLE_NAME = "test_table" |
| DATA_PATH = os.path.join(os.path.dirname(__file__), "target") |
| |
| |
| # test writing data |
| def test_write_tsfile(): |
| # test write empty data |
| df = pd.DataFrame() |
| ts.write_tsfile(DATA_PATH + "/empty.tsfile", TABLE_NAME, df) |
| assert not os.path.exists(DATA_PATH + "/empty.tsfile") |
| |
| # data without Time |
| # 1000 rows data |
| level = np.linspace(2000, 3000, num=1000, dtype=np.float32) |
| num = np.arange(10000, 11000, dtype=np.int64) |
| df = pd.DataFrame({"level": level, "num": num}) |
| with ut.TestCase().assertRaises(AttributeError): |
| ts.write_tsfile(DATA_PATH + "/no_time.tsfile", TABLE_NAME, df) |
| |
| # time with wrong type |
| time = np.arange(1, 1001, dtype=np.float32) |
| df = pd.DataFrame({"Time": time, "level": level, "num": num}) |
| with ut.TestCase().assertRaises(TypeError): |
| ts.write_tsfile(DATA_PATH + "/wrong_time_type.tsfile", TABLE_NAME, df) |
| # TXT is not support yet |
| time = np.arange(1, 1001, dtype=np.int64) |
| text = np.random.choice(["a", "b", "c"], 1000) |
| df = pd.DataFrame({"Time": time, "text": text}) |
| with ut.TestCase().assertRaises(TypeError): |
| ts.write_tsfile(DATA_PATH + "/txt.tsfile", TABLE_NAME, df) |
| |
| # full datatypes test |
| time = np.arange(1, 1001, dtype=np.int64) # int64 |
| level = np.linspace(2000, 3000, num=1000, dtype=np.float32) # float32 |
| num = np.arange(10000, 11000, dtype=np.int64) # int64 |
| bools = np.random.choice([True, False], 1000) # bool |
| double = np.random.rand(1000) # double |
| df = pd.DataFrame( |
| {"Time": time, "level": level, "num": num, "bools": bools, "double": double} |
| ) |
| ts.write_tsfile(DATA_PATH + "/full_datatypes.tsfile", TABLE_NAME, df) |
| |
| |
| # test reading data |
| def test_read_tsfile(): |
| # skip test on windows because of the bug in the tsfile library |
| if platform.system() == "Windows": |
| return |
| # test read a non-existent file |
| with ut.TestCase().assertRaises(FileNotFoundError): |
| ts.read_tsfile(DATA_PATH + "/notexist.tsfile", TABLE_NAME, ["level", "num"]) |
| |
| # test read empty file |
| with open(DATA_PATH + "/empty.tsfile", "w", encoding="utf-8") as f: |
| pass |
| |
| with ut.TestCase().assertRaises(EmptyFileError): |
| ts.read_tsfile(DATA_PATH + "/empty.tsfile", TABLE_NAME, ["level", "num"]) |
| |
| FILE_NAME = DATA_PATH + "/full_datatypes.tsfile" |
| # test read data |
| ## 1. read all data |
| df, _ = ts.read_tsfile(FILE_NAME, TABLE_NAME, ["level", "num", "bools", "double"]) |
| assert df.shape == (1000, 5) |
| assert df["level"].dtype == np.float32 |
| assert df["Time"].dtype == np.int64 |
| assert df["num"].dtype == np.int64 |
| assert df["bools"].dtype == np.bool_ |
| assert df["double"].dtype == np.float64 |
| |
| ## 2. read with chunksize |
| df, _ = ts.read_tsfile(FILE_NAME, TABLE_NAME, ["level", "num"], chunksize=100) |
| assert df.shape == (100, 3) |
| assert df["level"].dtype == np.float32 |
| assert df["Time"].sum() == np.arange(1, 101).sum() |
| |
| ## 3. read with iterator |
| chunk_num = 0 |
| with ts.read_tsfile( |
| FILE_NAME, TABLE_NAME, ["level", "num"], iterator=True, chunksize=100 |
| ) as reader: |
| for chunk, _ in reader: |
| assert chunk.shape == (100, 3) |
| assert chunk["level"].dtype == np.float32 |
| assert ( |
| chunk["Time"].sum() |
| == np.arange(1 + chunk_num * 100, 101 + chunk_num * 100).sum() |
| ) |
| chunk_num += 1 |
| assert chunk_num == 10 |
| |
| ## 4. read with time scale |
| df, _ = ts.read_tsfile(FILE_NAME, TABLE_NAME, ["num"], start_time=50, end_time=99) |
| assert df.shape == (50, 2) |
| assert df["num"][0] == 10049 |
| assert df["num"][9] == 10058 |
| |
| ## 5. read with time scale and chunksize |
| df, _ = ts.read_tsfile( |
| FILE_NAME, TABLE_NAME, ["num"], start_time=50, end_time=99, chunksize=10 |
| ) |
| assert df.shape == (10, 2) |
| assert df["num"][0] == 10049 |
| assert df["num"][9] == 10058 |
| |
| ## 6. read with time scale and iterator |
| chunk_num = 0 |
| with ts.read_tsfile( |
| FILE_NAME, |
| TABLE_NAME, |
| ["num"], |
| start_time=50, |
| end_time=99, |
| iterator=True, |
| chunksize=10, |
| ) as reader: |
| for chunk, _ in reader: |
| assert chunk.shape == (10, 2) |
| assert chunk["num"][0] == 10049 + chunk_num * 10 |
| assert chunk["num"][9] == 10058 + chunk_num * 10 |
| chunk_num += 1 |
| assert chunk_num == 5 |
| |
| |
| if __name__ == "__main__": |
| if os.path.exists(DATA_PATH): |
| print("Remove old data") |
| shutil.rmtree(DATA_PATH) |
| os.makedirs(DATA_PATH) |
| else: |
| os.makedirs(DATA_PATH) |
| test_write_tsfile() |
| test_read_tsfile() |
| print("All tests passed") |
| shutil.rmtree(DATA_PATH) |