forked from tinygrad/tinygrad
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathops_clang.py
28 lines (24 loc) · 1.44 KB
/
ops_clang.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
import ctypes, subprocess, pathlib, tempfile
from tinygrad.device import Compiled, Compiler, MallocAllocator
from tinygrad.helpers import cpu_time_execution, DEBUG, cpu_objdump
from tinygrad.renderer.cstyle import ClangRenderer
class ClangCompiler(Compiler):
def compile(self, src:str) -> bytes:
# TODO: remove file write. sadly clang doesn't like the use of /dev/stdout here
with tempfile.NamedTemporaryFile(delete=True) as output_file:
subprocess.check_output(['clang', '-shared', '-march=native', '-O2', '-Wall', '-Werror', '-x', 'c', '-fPIC', '-ffreestanding', '-nostdlib',
'-', '-o', str(output_file.name)], input=src.encode('utf-8'))
return pathlib.Path(output_file.name).read_bytes()
class ClangProgram:
def __init__(self, name:str, lib:bytes):
if DEBUG >= 6: cpu_objdump(lib)
self.name, self.lib = name, lib
# write to disk so we can load it
with tempfile.NamedTemporaryFile(delete=True) as cached_file_path:
pathlib.Path(cached_file_path.name).write_bytes(lib)
self.fxn = ctypes.CDLL(str(cached_file_path.name))[name]
def __call__(self, *bufs, vals=(), wait=False): return cpu_time_execution(lambda: self.fxn(*bufs, *vals), enable=wait)
class ClangDevice(Compiled):
def __init__(self, device:str):
from tinygrad.runtime.graph.clang import ClangGraph
super().__init__(device, MallocAllocator, ClangRenderer(), ClangCompiler("compile_clang"), ClangProgram, ClangGraph)