first commit

This commit is contained in:
2020-11-03 18:30:14 -08:00
commit 31d8522470
1881 changed files with 345408 additions and 0 deletions

View File

@@ -0,0 +1,141 @@
"""
The Proc sub is used to spin up worker processes that run hub referenced
coroutines.
"""
# Import python libs
import os
import sys
import atexit
import itertools
import asyncio
import subprocess
# Import third party libs
import msgpack
import pop.hub
def __init__(hub: "pop.hub.Hub"):
"""
Create constants used by the client and server side of procs
"""
hub.proc.DELIM = b"d\xff\xcfCO)\xfe="
hub.proc.D_FLAG = b"D"
hub.proc.I_FLAG = b"I"
hub.proc.Workers = {}
hub.proc.WorkersIter = {}
hub.proc.WorkersTrack = {}
def _get_cmd(hub: "pop.hub.Hub", ind, ref, ret_ref, sock_dir):
"""
Return the shell command to execute that will start up the worker
"""
code = "import sys; "
code += "import pop.hub; "
code += "hub = pop.hub.Hub(); "
code += 'hub.pop.sub.add("pop.mods.proc"); '
code += f'hub.proc.worker.start("{sock_dir}", "{ind}", "{ref}", "{ret_ref}")'
cmd = f"{sys.executable} -c '{code}'"
return cmd
def mk_proc(hub: "pop.hub.Hub", ind, workers, ret_ref, sock_dir):
"""
Create the process and add it to the passed in workers dict at the
specified index
"""
ref = os.urandom(3).hex() + ".sock"
workers[ind] = {"ref": ref}
workers[ind]["path"] = os.path.join(sock_dir, ref)
cmd = _get_cmd(hub, ind, ref, ret_ref, sock_dir)
workers[ind]["proc"] = subprocess.Popen(cmd, shell=True)
workers[ind]["pid"] = workers[ind]["proc"].pid
async def pool(
hub: "pop.hub.Hub", num, name: str = "Workers", callback=None, sock_dir=None
):
"""
Create a new local pool of process based workers
:param num: The number of processes to add to this pool
:param ref: The location on the hub to create the Workers dict used to
store the worker pool, defaults to `hub.pop.proc.Workers`
:param callback: The pop ref to call when the process communicates
back
"""
ret_ref = os.urandom(3).hex() + ".sock"
ret_sock_path = os.path.join(sock_dir, ret_ref)
if not hasattr(hub.proc, "Tracker"):
hub.proc.init.mk_tracker()
workers = {}
if callback:
await asyncio.start_unix_server(
hub.proc.init.ret_work(callback), path=ret_sock_path
)
for ind in range(num):
hub.proc.init.mk_proc(ind, workers, ret_ref, sock_dir)
w_iter = itertools.cycle(workers)
hub.proc.Workers[name] = workers
hub.proc.WorkersIter[name] = w_iter
hub.proc.WorkersTrack[name] = {"subs": [], "ret_ref": ret_ref, "sock_dir": sock_dir}
up = set()
while True:
for ind in workers:
if os.path.exists(workers[ind]["path"]):
up.add(ind)
if len(up) == num:
break
await asyncio.sleep(0.01)
# TODO: This seems to be spawning extra procs, this should be fixed
# asyncio.ensure_future(hub.proc.init.maintain(name))
async def maintain(hub: "pop.hub.Hub", name):
"""
Keep an eye on these processes
"""
workers = hub.proc.Workers[name]
while True:
for ind, data in workers.items():
if not data["proc"].poll():
hub.proc.init.mk_proc(ind, workers)
await asyncio.sleep(2)
def mk_tracker(hub: "pop.hub.Hub"):
"""
Create the process tracker, this simply makes a data structure to hold
process references and sets them to be terminated when the system is
shutdown.
"""
hub.proc.Tracker = True
atexit.register(hub.proc.init.clean)
def clean(hub: "pop.hub.Hub"):
"""
Clean up the processes registered in the tracker
"""
for name, workers in hub.proc.Workers.items():
for ind in workers:
workers[ind]["proc"].terminate()
def ret_work(hub: "pop.hub.Hub", callback):
async def work(reader, writer):
"""
Process the incoming work
"""
inbound = await reader.readuntil(hub.proc.DELIM)
inbound = inbound[: -len(hub.proc.DELIM)]
payload = msgpack.loads(inbound, raw=False)
ret = await callback(payload)
ret = msgpack.dumps(ret, use_bin_type=True)
ret += hub.proc.DELIM
writer.write(ret)
await writer.drain()
writer.close()
return work

View File

@@ -0,0 +1,180 @@
"""
Execute functions or load subs on the workers in the named worker pool
"""
# import python libs
import asyncio
import os
# Import third party libs
import msgpack
import pop.hub
async def add_sub(hub: "pop.hub.Hub", worker_name, *args, **kwargs):
"""
Tell all of the worker in the named pool to load the given sub,
This function takes all of the same arguments as hub.pop.sub.add
"""
ret = {}
workers = hub.proc.Workers[worker_name]
for ind in workers:
payload = {"fun": "sub", "args": args, "kwargs": kwargs}
# TODO: Make these futures to the run at the same time
async for chunk in hub.proc.run.send(workers[ind], payload):
ret[ind] = chunk
hub.proc.WorkersTrack[worker_name]["subs"].append({"args": args, "kwargs": kwargs})
return ret
async def add_proc(hub: "pop.hub.Hub", worker_name):
"""
Add a single process to the worker pool, also make sure that
"""
# grab and extrapolate the data we need
ret_ref = hub.proc.WorkersTrack[worker_name]["ret_ref"]
sock_dir = hub.proc.WorkersTrack[worker_name]["sock_dir"]
workers = hub.proc.Workers[worker_name]
ind = len(workers) + 1
for s_ind in range(len(workers) + 1):
if s_ind not in workers:
ind = s_ind
hub.proc.init.mk_proc(ind, workers, ret_ref, sock_dir)
# Make sure the process is up with a live socket
while True:
if os.path.exists(workers[ind]["path"]):
break
await asyncio.sleep(0.01)
# Add all of the subs that have been added to processes in this pool
for sub in hub.proc.WorkersTrack[worker_name]["subs"]:
payload = {"fun": "sub", "args": sub["args"], "kwargs": sub["kwargs"]}
async for chunk in hub.proc.run.send(workers[ind], payload):
pass
return ind
async def pub(hub: "pop.hub.Hub", worker_name, func_ref, *args, **kwargs):
"""
Execute the given function reference on ALL the workers in the given
worker pool and return the return data from each.
Pass in the arguments for the function, keep in mind that the sub needs
to be loaded into the workers for a function to be available via
hub.proc.run.add_sub
"""
workers = hub.proc.Workers[worker_name]
ret = {}
for ind in workers:
payload = {"fun": "run", "ref": func_ref, "args": args, "kwargs": kwargs}
# TODO: Make these futures to the run at the same time
async for chunk in hub.proc.run.send(workers[ind], payload):
ret[ind] = chunk
return ret
async def set_attr(hub: "pop.hub.Hub", worker_name, ref, value):
"""
Set the given attribute to the given location on the hub of all
worker procs
"""
workers = hub.proc.Workers[worker_name]
ret = {}
for ind in workers:
payload = {"fun": "setattr", "ref": ref, "value": value}
# TODO: Make these futures to the run at the same time
async for chunk in hub.proc.run.send(workers[ind], payload):
ret[ind] = chunk
return ret
async def ind_func(hub: "pop.hub.Hub", worker_name, _ind, func_ref, *args, **kwargs):
"""
Execute the function on the indexed process within the named worker pool
"""
workers = hub.proc.Workers[worker_name]
worker = workers[_ind]
payload = {"fun": "run", "ref": func_ref, "args": args, "kwargs": kwargs}
async for ret in hub.proc.run.send(worker, payload):
return ret
async def func(hub: "pop.hub.Hub", worker_name, func_ref, *args, **kwargs):
"""
Execute the given function reference on one worker in the given worker
pool and return the return data.
Pass in the arguments for the function, keep in mind that the sub needs
to be loaded into the workers for a function to be available via
hub.proc.run.add_sub
"""
ind, coro = await hub.proc.run.track_func(worker_name, func_ref, *args, **kwargs)
return await coro
async def track_func(hub: "pop.hub.Hub", worker_name, func_ref, *args, **kwargs):
"""
Run a function and return the index of the worker that the function was
executed on and a coroutine to track
"""
w_iter = hub.proc.WorkersIter[worker_name]
ind = next(w_iter)
coro = hub.proc.run.ind_func(worker_name, ind, func_ref, *args, **kwargs)
return ind, coro
async def gen(hub: "pop.hub.Hub", worker_name, func_ref, *args, **kwargs):
"""
Execute a generator function reference within one worker within the given
worker pool.
Like `func` the sub needs to be made available to all workers first
"""
ind, coro = await hub.proc.run.track_gen(worker_name, func_ref, *args, **kwargs)
async for chunk in coro:
yield chunk
async def track_gen(hub: "pop.hub.Hub", worker_name, func_ref, *args, **kwargs):
"""
Return an iterable coroutine and the index executed on
"""
w_iter = hub.proc.WorkersIter[worker_name]
ind = next(w_iter)
coro = hub.proc.run.ind_gen(worker_name, ind, func_ref, *args, **kwargs)
return ind, coro
async def ind_gen(hub: "pop.hub.Hub", worker_name, _ind, func_ref, *args, **kwargs):
"""
run the given iterator on the defined index
"""
workers = hub.proc.Workers[worker_name]
worker = workers[_ind]
payload = {"fun": "gen", "ref": func_ref, "args": args, "kwargs": kwargs}
async for chunk in hub.proc.run.send(worker, payload):
yield chunk
async def send(hub: "pop.hub.Hub", worker, payload):
"""
Send the given payload to the given worker, yield iterations based on the
returns from the remote.
"""
mp = msgpack.dumps(payload, use_bin_type=True)
mp += hub.proc.DELIM
reader, writer = await asyncio.open_unix_connection(path=worker["path"])
writer.write(mp)
await writer.drain()
final_ret = True
while True:
ret = await reader.readuntil(hub.proc.DELIM)
p_ret = ret[: -len(hub.proc.DELIM)]
i_flag = p_ret[-1:]
ret = msgpack.loads(p_ret[:-1], raw=False)
if i_flag == hub.proc.D_FLAG:
# break for the end of the sequence
break
yield ret
final_ret = False
if final_ret:
yield ret

View File

@@ -0,0 +1,168 @@
"""
This module is used to manage the process started up by the pool. Work in this
module is used to manage the worker process itself and not other routines on
the hub this process was derived from
This is an exec, not a fork! This is a fresh memory space!
"""
# Import python libs
import os
import types
import asyncio
import pop.hub
# Import third party libs
import msgpack
# TODO: The workers should detect if their controlling process dies and terminate by themselves
# The controlling process will kill them when it exists, but if it exists hard then the workers
# Should be able to also clean themselves up
def start(hub: "pop.hub.Hub", sock_dir, ind, ref, ret_ref):
"""
This function is called by the startup script to create a worker process
:NOTE: This is a new process started from the shell, it does not have any
of the process namespace from the creating process.
This is an EXEC, NOT a FORK!
"""
hub.proc.SOCK_DIR = sock_dir
hub.proc.REF = ref
hub.proc.SOCK_PATH = os.path.join(sock_dir, ref)
hub.proc.RET_REF = ret_ref
hub.proc.RET_SOCK_PATH = os.path.join(sock_dir, ret_ref)
hub.proc.IND = ind
hub.pop.loop.start(hub.proc.worker.hold(), hub.proc.worker.server())
async def hold(hub: "pop.hub.Hub"):
"""
This function just holds the loop open by sleeping in a while loop
"""
while True:
await asyncio.sleep(60)
async def server(hub: "pop.hub.Hub"):
"""
Start the unix socket server to receive commands
"""
await asyncio.start_unix_server(hub.proc.worker.work, path=hub.proc.SOCK_PATH)
async def work(hub: "pop.hub.Hub", reader, writer):
"""
Process the incoming work
"""
inbound = await reader.readuntil(hub.proc.DELIM)
inbound = inbound[: -len(hub.proc.DELIM)]
if msgpack.version < (1, 0, 0):
payload = msgpack.loads(inbound, encoding="utf-8")
else:
payload = msgpack.loads(inbound)
ret = b""
if "fun" not in payload:
ret = {"err": "Invalid format"}
elif payload["fun"] == "sub":
# Time to add a sub to the hub!
try:
hub.proc.worker.add_sub(payload)
ret = {"status": True}
except Exception as exc:
ret = {"status": False, "exc": str(exc)}
elif payload["fun"] == "run":
# Time to do some work!
try:
ret = await hub.proc.worker.run(payload)
except Exception as exc:
ret = {"status": False, "exc": str(exc)}
elif payload["fun"] == "gen":
ret = await hub.proc.worker.gen(payload, reader, writer)
elif payload["fun"] == "setattr":
ret = await hub.proc.worker.set_attr(payload)
ret = msgpack.dumps(ret, use_bin_type=True)
ret += hub.proc.D_FLAG
ret += hub.proc.DELIM
writer.write(ret)
await writer.drain()
writer.close()
def add_sub(hub: "pop.hub.Hub", payload):
"""
Add a new sub onto the hub for this worker
"""
hub.pop.sub.add(*payload["args"], **payload["kwargs"])
async def gen(hub: "pop.hub.Hub", payload, reader, writer):
"""
Run a generator and yield back the returns. Supports a generator and an
async generator
"""
ref = payload.get("ref")
args = payload.get("args", [])
kwargs = payload.get("kwargs", {})
ret = hub.pop.ref.last(ref)(*args, **kwargs)
if isinstance(ret, types.AsyncGeneratorType):
async for chunk in ret:
rchunk = msgpack.dumps(chunk, use_bin_type=True)
rchunk += hub.proc.I_FLAG
rchunk += hub.proc.DELIM
writer.write(rchunk)
await writer.drain()
elif isinstance(ret, types.GeneratorType):
for chunk in ret:
rchunk = msgpack.dumps(chunk, use_bin_type=True)
rchunk += hub.proc.I_FLAG
rchunk += hub.proc.DELIM
writer.write(rchunk)
await writer.drain()
elif asyncio.iscoroutine(ret):
return await ret
else:
return ret
return ""
async def run(hub: "pop.hub.Hub", payload):
"""
Execute the given payload
"""
ref = payload.get("ref")
args = payload.get("args", [])
kwargs = payload.get("kwargs", {})
ret = hub.pop.ref.last(ref)(*args, **kwargs)
if asyncio.iscoroutine(ret):
return await ret
return ret
async def set_attr(hub: "pop.hub.Hub", payload):
"""
Set the named attribute to the hub
"""
ref = payload.get("ref")
value = payload.get("value")
hub.pop.ref.create(ref, value)
async def ret(hub: "pop.hub.Hub", payload):
"""
Send a return payload to the spawning process. This return will be tagged
with the index of the process that returned it
"""
payload = {"ind": hub.proc.IND, "payload": payload}
mp = msgpack.dumps(payload, use_bin_type=True)
mp += hub.proc.DELIM
reader, writer = await asyncio.open_unix_connection(path=hub.proc.RET_SOCK_PATH)
writer.write(mp)
await writer.drain()
ret = await reader.readuntil(hub.proc.DELIM)
ret = ret[: -len(hub.proc.DELIM)]
writer.close()
if msgpack.version < (1, 0, 0):
return msgpack.loads(ret, encoding="utf-8")
else:
return msgpack.loads(ret)