| # Licensed to the Apache Software Foundation (ASF) under one |
| # or more contributor license agreements. See the NOTICE file |
| # distributed with this work for additional information |
| # regarding copyright ownership. The ASF licenses this file |
| # to you under the Apache License, Version 2.0 (the |
| # "License"); you may not use this file except in compliance |
| # with the License. You may obtain a copy of the License at |
| # |
| # http://www.apache.org/licenses/LICENSE-2.0 |
| # |
| # Unless required by applicable law or agreed to in writing, software |
| # distributed under the License is distributed on an "AS IS" BASIS, |
| # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| # See the License for the specific language governing permissions and |
| # limitations under the License. |
| |
| # this converts rotate instructions from "ro[lr] <reg>" -> "ro[lr] <reg>, 1" for yasm compatibility |
| PERL_FIXUP_ROTATE=perl -i -pe 's/(ro[rl]\s+\w{2,3})$$/\1, 1/' |
| |
| C2GOASM=c2goasm |
| CC=clang |
| C_FLAGS=-target x86_64-unknown-none -masm=intel -mno-red-zone -mstackrealign -mllvm -inline-threshold=1000 \ |
| -fno-asynchronous-unwind-tables -fno-exceptions -fno-rtti -O3 -fno-builtin -ffast-math -fno-jump-tables -I_lib |
| ASM_FLAGS_AVX2=-mavx2 -mfma -mllvm -force-vector-width=32 |
| ASM_FLAGS_SSE4=-msse4 |
| ASM_FLAGS_BMI2=-mbmi2 |
| ASM_FLAGS_POPCNT=-mpopcnt |
| |
| GO_SOURCES := $(shell find . -path ./_lib -prune -o -name '*.go' -not -name '*_test.go') |
| ALL_SOURCES := $(shell find . -path ./_lib -prune -o -name '*.go' -name '*.s' -not -name '*_test.go') |
| |
| .PHONEY: assembly |
| |
| INTEL_SOURCES := \ |
| bit_packing_avx2.s min_max_avx2.s min_max_sse4.s \ |
| unpack_bool_avx2.s unpack_bool_sse4.s |
| |
| assembly: $(INTEL_SOURCES) |
| |
| _lib/bit_packing_avx2.s: _lib/bit_packing_avx2.c |
| $(CC) -S $(C_FLAGS) $(ASM_FLAGS_AVX2) $^ -o $@ ; $(PERL_FIXUP_ROTATE) $@; perl -i -pe 's/mem(cpy|set)/clib·_mem\1(SB)/' $@ |
| |
| _lib/min_max_avx2.s: _lib/min_max.c |
| $(CC) -S $(C_FLAGS) $(ASM_FLAGS_AVX2) $^ -o $@ ; $(PERL_FIXUP_ROTATE) $@ |
| |
| _lib/min_max_sse4.s: _lib/min_max.c |
| $(CC) -S $(C_FLAGS) $(ASM_FLAGS_SSE4) $^ -o $@ ; $(PERL_FIXUP_ROTATE) $@ |
| |
| _lib/unpack_bool_avx2.s: _lib/unpack_bool.c |
| $(CC) -S $(C_FLAGS) $(ASM_FLAGS_AVX2) $^ -o $@ ; $(PERL_FIXUP_ROTATE) $@ |
| |
| _lib/unpack_bool_sse4.s: _lib/unpack_bool.c |
| $(CC) -S $(C_FLAGS) $(ASM_FLAGS_SSE4) $^ -o $@ ; $(PERL_FIXUP_ROTATE) $@ |
| |
| bit_packing_avx2.s: _lib/bit_packing_avx2.s |
| $(C2GOASM) -a -f $^ $@ |
| |
| min_max_avx2.s: _lib/min_max_avx2.s |
| $(C2GOASM) -a -f $^ $@ |
| |
| min_max_sse4.s: _lib/min_max_sse4.s |
| $(C2GOASM) -a -f $^ $@ |
| |
| unpack_bool_avx2.s: _lib/unpack_bool_avx2.s |
| $(C2GOASM) -a -f $^ $@ |
| |
| unpack_bool_sse4.s: _lib/unpack_bool_sse4.s |
| $(C2GOASM) -a -f $^ $@ |
| |
| clean: |
| rm -f $(INTEL_SOURCES) |
| rm -f $(addprefix _lib/,$(INTEL_SOURCES)) |