blob: bde8b845fb8c56a41c0b7a7a2eeaeaae435ff1a1 [file] [log] [blame]
# -*- mode: python ; coding: utf-8 -*-
#
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under this License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
#
from pathlib import Path
# Get project root directory
project_root = Path(SPECPATH).parent
block_cipher = None
# Auto-collect all submodules of large dependency libraries
# Using collect_all automatically includes all dependencies and avoids manual maintenance of hiddenimports
from PyInstaller.utils.hooks import collect_all, collect_submodules, collect_data_files
# Collect only essential data files and binaries for large libraries
# Using collect_all for all submodules slows down startup significantly.
# However, for certain libraries with many dynamic imports (e.g., torch, transformers, safetensors),
# collect_all is necessary to ensure all required modules are included.
# For other libraries, we use lighter-weight collection methods to improve startup time.
all_datas = []
all_binaries = []
all_hiddenimports = []
# Only collect essential data files and binaries for critical libraries
# This reduces startup time by avoiding unnecessary module imports
essential_libraries = {
'torch': True,
'transformers': True,
'tokenizers': True,
'huggingface_hub': True,
'safetensors': True,
'hf_xet': True,
'numpy': True,
'scipy': True,
'pandas': True,
'sklearn': True,
'statsmodels': True,
'sktime': True,
'pmdarima': True,
'hmmlearn': True,
'accelerate': True
}
# Collect all libraries using collect_all (includes data files and binaries)
for lib in essential_libraries:
try:
lib_datas, lib_binaries, lib_hiddenimports = collect_all(lib)
all_datas.extend(lib_datas)
all_binaries.extend(lib_binaries)
all_hiddenimports.extend(lib_hiddenimports)
except Exception:
pass
# Additionally collect ALL submodules for libraries that commonly have dynamic imports
# This is a more aggressive approach but ensures we don't miss any modules
# Libraries that are known to have many dynamic imports and submodules
libraries_with_dynamic_imports = [
'scipy', # Has many subpackages: stats, interpolate, optimize, linalg, sparse, signal, etc.
'sklearn', # Has many submodules that may be dynamically imported
'transformers', # Has dynamic model loading
'torch', # Has many submodules, especially _dynamo.polyfills
]
# Collect all submodules for these libraries to ensure comprehensive coverage
for lib in libraries_with_dynamic_imports:
try:
submodules = collect_submodules(lib)
all_hiddenimports.extend(submodules)
print(f"Collected {len(submodules)} submodules from {lib}")
except Exception as e:
print(f"Warning: Failed to collect submodules from {lib}: {e}")
# Helper function to collect submodules with fallback
def collect_submodules_with_fallback(package, fallback_modules=None, package_name=None):
"""
Collect all submodules for a package, with fallback to manual module list if collection fails.
Args:
package: Package name to collect submodules from
fallback_modules: List of module names to add if collection fails (optional)
package_name: Display name for logging (defaults to package)
"""
if package_name is None:
package_name = package
try:
submodules = collect_submodules(package)
all_hiddenimports.extend(submodules)
print(f"Collected {len(submodules)} submodules from {package_name}")
except Exception as e:
print(f"Warning: Failed to collect {package_name} submodules: {e}")
if fallback_modules:
all_hiddenimports.extend(fallback_modules)
print(f"Using fallback modules for {package_name}")
# Additional specific packages that need submodule collection
# Note: scipy, sklearn, transformers, torch are already collected above via libraries_with_dynamic_imports
# This section is for more specific sub-packages that need special handling
# Format: (package_name, fallback_modules_list, display_name)
submodule_collection_configs = [
# torch._dynamo.polyfills - critical for torch dynamo functionality
# (torch is already collected above, but this ensures polyfills are included)
(
'torch._dynamo.polyfills',
[
'torch._dynamo.polyfills',
'torch._dynamo.polyfills.functools',
'torch._dynamo.polyfills.operator',
'torch._dynamo.polyfills.collections',
],
'torch._dynamo.polyfills'
),
# transformers sub-packages with dynamic imports
# (transformers is already collected above, but these specific sub-packages may need extra attention)
('transformers.generation', None, 'transformers.generation'),
('transformers.models.auto', None, 'transformers.models.auto'),
]
# Collect submodules for all configured packages
for package, fallback_modules, display_name in submodule_collection_configs:
collect_submodules_with_fallback(package, fallback_modules, display_name)
# Project-specific packages that need their submodules collected
# Only list top-level packages - collect_submodules will recursively collect all submodules
project_packages = [
'iotdb.ainode.core', # This will include all sub-packages: manager, model, inference, etc.
'iotdb.thrift', # This will include all thrift sub-packages
]
# Collect all submodules for project packages automatically
# Using top-level packages avoids duplicate collection
# If collection fails, add the package itself as fallback
for package in project_packages:
collect_submodules_with_fallback(package, fallback_modules=[package], package_name=package)
# Add parent packages to ensure they are included
all_hiddenimports.extend(['iotdb', 'iotdb.ainode'])
# Fix circular import issues in scipy.stats
# scipy.stats has circular imports that can cause issues in PyInstaller
# We need to ensure _stats is imported before scipy.stats tries to import it
# This helps resolve the "partially initialized module" error
scipy_stats_critical_modules = [
'scipy.stats._stats', # Core stats module, must be imported first
'scipy.stats._stats_py', # Python implementation
'scipy.stats._continuous_distns', # Continuous distributions
'scipy.stats._discrete_distns', # Discrete distributions
'scipy.stats.distributions', # Distribution base classes
]
all_hiddenimports.extend(scipy_stats_critical_modules)
# Multiprocessing support for PyInstaller
# When using multiprocessing with PyInstaller, we need to ensure proper handling
multiprocessing_modules = [
'multiprocessing',
'multiprocessing.spawn',
'multiprocessing.popen_spawn_posix',
'multiprocessing.popen_spawn_win32',
'multiprocessing.popen_fork',
'multiprocessing.popen_forkserver',
'multiprocessing.context',
'multiprocessing.reduction',
'multiprocessing.util',
'torch.multiprocessing',
'torch.multiprocessing.spawn',
]
# Additional dependencies that may need explicit import
# These are external libraries that might use dynamic imports
external_dependencies = [
'einops',
'dynaconf',
'tzlocal',
'thrift',
'psutil',
'requests',
]
all_hiddenimports.extend(multiprocessing_modules)
all_hiddenimports.extend(external_dependencies)
# Analyze main entry file
# Note: Do NOT add virtual environment site-packages to pathex manually.
# When PyInstaller is run from the virtual environment's Python, it automatically
# detects and uses the virtual environment's site-packages.
a = Analysis(
['iotdb/ainode/core/script.py'],
pathex=[str(project_root)],
binaries=all_binaries,
datas=all_datas,
hiddenimports=all_hiddenimports,
hookspath=[],
hooksconfig={},
runtime_hooks=[],
excludes=[
# Exclude unnecessary modules to reduce size and improve startup time
# Note: Do not exclude unittest, as torch and other libraries require it
# Only exclude modules that are definitely not used and not required by dependencies
'matplotlib',
'IPython',
'jupyter',
'notebook',
'pytest',
'test',
'tests'
],
win_no_prefer_redirects=False,
win_private_assemblies=False,
cipher=block_cipher,
noarchive=False, # Set to False to avoid circular import issues with scipy.stats
# When noarchive=True, modules are loaded as separate files which can cause
# circular import issues. Using PYZ archive helps PyInstaller handle module loading order better.
)
# Package all PYZ files
pyz = PYZ(a.pure, a.zipped_data, cipher=block_cipher)
# Create executable (onedir mode for faster startup)
exe = EXE(
pyz,
a.scripts,
[],
exclude_binaries=True,
name='ainode',
debug=False,
bootloader_ignore_signals=False,
strip=False,
upx=True,
console=True,
disable_windowed_traceback=False,
argv_emulation=False,
target_arch=None,
codesign_identity=None,
entitlements_file=None,
)
# Collect all files into a directory (onedir mode)
coll = COLLECT(
exe,
a.binaries,
a.zipfiles,
a.datas,
strip=False,
upx=True,
upx_exclude=[],
name='ainode',
)