Skip to content

dbx_patch.install_sitecustomize

[docs] module dbx_patch.install_sitecustomize

  1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
"""Install sitecustomize.py to Auto-Apply Patches on Startup.

This module provides functionality to install a sitecustomize.py file that
automatically applies all dbx-patch fixes when Python starts. This solves
the timing issue where sys_path_init and WsfsImportHook are loaded before
any notebook code runs.

Why sitecustomize.py?
- Python automatically imports sitecustomize.py during interpreter initialization
- It runs BEFORE sys_path_init.py and import hooks are installed
- This is the ONLY way to patch the import system early enough

Usage:
    from dbx_patch.install_sitecustomize import install_sitecustomize
    install_sitecustomize()
"""

from pathlib import Path
import sys

from dbx_patch.models import SitecustomizeStatus
from dbx_patch.utils.logger import PatchLogger

logger = PatchLogger()


def get_site_packages_path() -> Path | None:
    """Get the first writable site-packages directory.

    Returns:
        Path to site-packages directory, or None if not found
    """
    for path_str in sys.path:
        if "site-packages" in path_str:
            path = Path(path_str)
            if path.exists() and path.is_dir():
                # Check if writable
                try:
                    test_file = path / ".dbx_patch_write_test"
                    test_file.touch()
                    test_file.unlink()
                    return path
                except (OSError, PermissionError):
                    continue
    return None


def get_sitecustomize_content() -> str:
    """Generate the sitecustomize.py content.

    Returns:
        Python code to be written to sitecustomize.py
    """
    return """\"\"\"Auto-apply dbx-patch on Python startup.

This file is automatically loaded by Python during interpreter initialization.
It applies all dbx-patch fixes BEFORE sys_path_init and import hooks are loaded.

Generated by: dbx-patch
DO NOT EDIT MANUALLY - Use dbx_patch.install_sitecustomize() to update
\"\"\"

import sys


def _apply_dbx_patch() -> None:
    \"\"\"Apply dbx-patch fixes silently during startup.\"\"\"
    try:
        # Import and apply all patches
        from dbx_patch.patch_dbx import patch_dbx

        # Apply silently (no output to avoid cluttering startup)
        patch_dbx(force_refresh=False)

    except ImportError:
        # dbx-patch not installed, skip silently
        pass
    except Exception as e:
        # Log error but don't break Python startup
        print(f"Warning: dbx-patch auto-apply failed: {e}", file=sys.stderr)


# Apply patches immediately on import
_apply_dbx_patch()
"""


def install_sitecustomize(force: bool = True, restart_python: bool = True) -> bool:
    """Install sitecustomize.py to auto-apply patches on Python startup.

    This is the RECOMMENDED way to use dbx-patch because:
    1. Patches are applied BEFORE sys_path_init runs
    2. Import hooks are patched BEFORE they're installed
    3. No need to manually call patch_dbx() in every notebook
    4. Works automatically for all Python processes on the cluster

    Args:
        force: If True, overwrite existing sitecustomize.py
        restart_python: If True, automatically restart Python using dbutils.library.restartPython()

    Returns:
        True if installation succeeded, False otherwise

    Example:
        # Run once per cluster (e.g., in init script or first notebook):
        from dbx_patch.install_sitecustomize import install_sitecustomize
        install_sitecustomize()

        # Python will restart automatically if running in Databricks
        # After restart, editable installs will work automatically!
    """
    with logger.section("Installing sitecustomize.py for auto-apply"):
        # Find site-packages
        site_packages = get_site_packages_path()
        if site_packages is None:
            logger.error("Could not find writable site-packages directory")
            with logger.indent():
                logger.info("Make sure you have write permissions to site-packages")
            return False

        sitecustomize_path = site_packages / "sitecustomize.py"

        # Check if already exists
        if sitecustomize_path.exists() and not force:
            logger.warning(f"sitecustomize.py already exists: {sitecustomize_path}")
            with logger.indent():
                logger.info("Use force=True to overwrite")
                logger.info("Or manually merge the content")
            return False

        # Backup existing file if it exists
        if sitecustomize_path.exists():
            backup_path = site_packages / "sitecustomize.py.backup"
            logger.info(f"Backing up existing file to: {backup_path}")
            try:
                sitecustomize_path.rename(backup_path)
            except OSError as e:
                logger.error(f"Failed to backup existing file: {e}")  # noqa: TRY400
                return False

        # Write new sitecustomize.py
        try:
            content = get_sitecustomize_content()
            sitecustomize_path.write_text(content, encoding="utf-8")

            logger.success(f"sitecustomize.py installed: {sitecustomize_path}")
            logger.blank()
            logger.info("✅ Installation complete!")

            # Try to restart Python automatically if in Databricks environment
            if restart_python:
                logger.blank()
                logger.info("Attempting to restart Python kernel...")

                try:
                    # Try to access dbutils (available in Databricks notebooks)
                    # dbutils is injected by Databricks and available as a variable
                    try:
                        logger.info("Restarting Python kernel via dbutils.library.restartPython()...")

                        dbutils.library.restartPython()  # ty:ignore[unresolved-reference]  # noqa: F821

                    except Exception:
                        # Not in Databricks environment
                        logger.blank()
                        logger.warning("Not running in Databricks environment")
                        logger.info("Next steps:")
                        with logger.indent():
                            logger.info("1. Restart your Python kernel/notebook manually")
                            logger.info("2. Editable installs will work automatically")
                            logger.info("3. No need to call patch_dbx() anymore!")

                except Exception as e:
                    # Failed to restart, provide manual instructions
                    logger.blank()
                    logger.warning(f"Could not restart Python automatically: {e}")
                    logger.info("Next steps:")
                    with logger.indent():
                        logger.info("1. Restart your Python kernel/notebook manually")
                        logger.info("2. Editable installs will work automatically")
                        logger.info("3. No need to call patch_dbx() anymore!")
            else:
                logger.blank()
                logger.info("Next steps:")
                with logger.indent():
                    logger.info("1. Restart your Python kernel/notebook")
                    logger.info("2. Editable installs will work automatically")
                    logger.info("3. No need to call patch_dbx() anymore!")

            return True

        except OSError as e:
            logger.error(f"Failed to write sitecustomize.py: {e}")  # noqa: TRY400
            return False


def uninstall_sitecustomize() -> bool:
    """Remove the auto-apply sitecustomize.py.

    Returns:
        True if uninstallation succeeded, False otherwise
    """
    logger.debug("uninstall_sitecustomize() called")

    with logger.section("Uninstalling sitecustomize.py"):
        site_packages = get_site_packages_path()
        if site_packages is None:
            logger.error("Could not find site-packages directory")
            return False

        sitecustomize_path = site_packages / "sitecustomize.py"

        if not sitecustomize_path.exists():
            logger.info("sitecustomize.py does not exist, nothing to uninstall")
            return True

        # Check if it's our file
        try:
            content = sitecustomize_path.read_text(encoding="utf-8")
            if "dbx-patch" not in content and "dbx_patch" not in content:
                logger.warning("sitecustomize.py exists but wasn't created by dbx-patch")
                with logger.indent():
                    logger.info("Skipping removal for safety")
                    logger.info("Manual removal required if needed")
                return False
        except OSError as e:
            logger.error(f"Failed to read sitecustomize.py: {e}")  # noqa: TRY400
            return False

        # Remove the file
        try:
            sitecustomize_path.unlink()
            logger.success("sitecustomize.py removed")

            # Restore backup if it exists
            backup_path = site_packages / "sitecustomize.py.backup"
            if backup_path.exists():
                backup_path.rename(sitecustomize_path)
                logger.info("Restored backup file")

            return True

        except OSError as e:
            logger.error(f"Failed to remove sitecustomize.py: {e}")  # noqa: TRY400
            return False


def check_sitecustomize_status() -> SitecustomizeStatus:
    """Check if sitecustomize.py is installed and active.

    Returns:
        SitecustomizeStatus with installation information
    """
    logger.debug("check_sitecustomize_status() called")

    site_packages = get_site_packages_path()
    if site_packages is None:
        logger.warning("Could not find site-packages directory")
        return SitecustomizeStatus(
            installed=False,
            path=None,
            is_dbx_patch=False,
        )

    sitecustomize_path = site_packages / "sitecustomize.py"
    installed = sitecustomize_path.exists()
    is_dbx_patch = False

    if installed:
        try:
            content = sitecustomize_path.read_text(encoding="utf-8")
            is_dbx_patch = "dbx-patch" in content or "dbx_patch" in content
        except OSError:
            pass

    logger.info("sitecustomize.py status:")
    with logger.indent():
        logger.info(f"Installed: {installed}")
        logger.info(f"Path: {sitecustomize_path}")
        logger.info(f"Created by dbx-patch: {is_dbx_patch}")

    return SitecustomizeStatus(
        installed=installed,
        path=str(sitecustomize_path) if sitecustomize_path else None,
        is_dbx_patch=is_dbx_patch,
    )