Source code for runhouse.resources.functions.function_factory

import re
from pathlib import Path
from typing import Callable, List, Optional, Union

from runhouse.logger import get_logger

from runhouse.resources.envs import _get_env_from, Env
from runhouse.resources.functions.function import Function
from runhouse.resources.packages import git_package

logger = get_logger(__name__)


[docs]def function( fn: Optional[Union[str, Callable]] = None, name: Optional[str] = None, env: Optional[Union[List[str], Env, str]] = None, load_from_den: bool = True, dryrun: bool = False, load_secrets: bool = False, serialize_notebook_fn: bool = False, ): """runhouse.function(fn: str | Callable | None = None, name: str | None = None, system: str | Cluster | None = None, env: str | List[str] | Env | None = None, dryrun: bool = False, load_secrets: bool = False, serialize_notebook_fn: bool = False) Builds an instance of :class:`Function`. Args: fn (Optional[str or Callable]): The function to execute on the remote system when the function is called. name (Optional[str], optional): Name of the Function to create or retrieve. This can be either from a local config or from the RNS. (Default: ``None``) env (Optional[List[str] or Env or str], optional): List of requirements to install on the remote cluster, or path to the requirements.txt file, or Env object or string name of an Env object. (Default: ``None``) load_from_den (bool, optional): Whether to try loading the function from Den. (Default: ``True``) dryrun (bool, optional): Whether to create the Function if it doesn't exist, or load the Function object as a dryrun. (Default: ``False``) load_secrets (bool, optional): Whether or not to send secrets; only applicable if `dryrun` is set to ``False``. (Default: ``False``) serialize_notebook_fn (bool, optional): If function is of a notebook setting, whether or not to serialized the function. (Default: ``False``) Returns: Function: The resulting Function object. Example: >>> import runhouse as rh >>> cluster = rh.ondemand_cluster(name="my_cluster") >>> def sum(a, b): >>> return a + b >>> summer = rh.function(fn=sum, name="my_func").to(cluster, env=['requirements.txt']).save() >>> # using the function >>> res = summer(5, 8) # returns 13 >>> # Load function from above >>> reloaded_function = rh.function(name="my_func") """ # noqa: E501 if name and not any([fn, env]): # Try reloading existing function return Function.from_name(name, load_from_den=load_from_den, dryrun=dryrun) if env: logger.warning( "The `env` argument is deprecated and will be removed in a future version. Please first " "construct your module and then do `module.to(system=system, system=env)` to set the environment. " "You can do `module.to(system=rh.here, env=env)` to set the environment on the local system." ) if not isinstance(env, Env): env = _get_env_from(env) or Env() fn_pointers = None if callable(fn): fn_pointers = Function._extract_pointers(fn) if isinstance(env, Env): # Sometimes env may still be a string, in which case it won't be modified ( local_path_containing_module, should_add, ) = Function._get_local_path_containing_module(fn_pointers[0], env.reqs) if should_add: env.reqs = [str(local_path_containing_module)] + env.reqs if fn_pointers[1] == "notebook": fn_pointers = Function._handle_nb_fn( fn, fn_pointers=fn_pointers, serialize_notebook_fn=serialize_notebook_fn, name=fn_pointers[2] or name, ) elif isinstance(fn, str): # Url must match a regex of the form # 'https://github.com/username/repo_name/blob/branch_name/path/to/file.py:func_name' # Use a regex to extract username, repo_name, branch_name, path/to/file.py, and func_name pattern = ( r"https://github\.com/(?P<username>[^/]+)/(?P<repo_name>[^/]+)/blob/" r"(?P<branch_name>[^/]+)/(?P<path>[^:]+):(?P<func_name>.+)" ) match = re.match(pattern, fn) if match: username = match.group("username") repo_name = match.group("repo_name") branch_name = match.group("branch_name") path = match.group("path") func_name = match.group("func_name") else: raise ValueError( "fn must be a callable or string of the form " '"https://github.com/username/repo_name/blob/branch_name/path/to/file.py:func_name"' ) module_name = Path(path).stem relative_path = str(repo_name / Path(path).parent) fn_pointers = (relative_path, module_name, func_name) # TODO [DG] check if the user already added this in their reqs repo_package = git_package( git_url=f"https://github.com/{username}/{repo_name}.git", revision=branch_name, ) env.reqs = [repo_package] + env.reqs new_function = Function(fn_pointers=fn_pointers, name=name, dryrun=dryrun, env=env) if load_secrets and not dryrun: new_function.send_secrets() return new_function