Skip to content

Commit

Permalink
need to fix "a.b-1" in parser
Browse files Browse the repository at this point in the history
  • Loading branch information
hoosierEE committed Jun 17, 2024
1 parent b03ac13 commit b05e266
Show file tree
Hide file tree
Showing 8 changed files with 174 additions and 162 deletions.
14 changes: 7 additions & 7 deletions README.org
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,7 @@ K is a vector language descended from APL, with hints of lisp.
** typical K features
Most Ks have:
- 1-dimensional vectors, nested lists, dictionaries
- value semantics
- an interpreter
- eager evaluation
- simple scoping rules (names are exclusively local or global)
Expand Down Expand Up @@ -80,20 +81,19 @@ Planned features:
- lexical scope
- closures
- string data type (distinct from "list of char") as in [[https://codeberg.org/anaseto/goal/src/branch/master][goal]]
- short-circuiting =and= and =or= keywords also from [[https://codeberg.org/anaseto/goal/src/branch/master][goal]]
- =x:y= assign (immutable) variable
- =x::y= assign (mutable) variable
- No modified assignment operators. Use =a::a+1= instead of =a+:1=.
Meanwhile, verb-plus-colon still invokes the unary version of that verb, so =a-:1= is =a (- 1)=.
Meanwhile, verb-plus-colon still invokes the unary version of that verb, so =a-:b= is =a(-b)=.
- No partial assignment to containers. If you assign the name =a= with =a::1 2 3=, then =a[0]::99= is invalid. Use =@= (amend) instead, and re-assign the whole container: =a::@[a;0;99]=. To improve performance, the implementation is free to reuse the original memory for =a= instead of making a copy.
- Lists and function arguments evaluate left-to-right, top-to-bottom.
For example, =f[a+b;a:1;b:2]= or =(a+b;a:1;b:2)= are valid in K but not in Element.
In Element, =a;b= always parses as two separate expressions evaluated left to right regardless of being wrapped in any kind of parentheses.
Alternatives: use =a:1;b:2;(a+b;a;b)= or =2_(a:1;b:2;a+b;a;b)=.
# - Dictionary literals
# - ={a:2;b:5}= ⇔ ={`a:2;`b:5}= ⇔ =(`a;`b)!(2;5)= ⇔ =`a`b!2 5=
# - get: ={a:2}[`a]=
# - set: not supported for dict literals ={a:1}[`a]::2 /error=
# - set: have to bind (with =::=) to a name first: =d::{a:2;b:3}; d[`a]::40; d[`c]::42=
# - immutable dict can't be modified, even to add new keys: =i:{a:1}; i[`b]:3 /error=
# - Dictionary literals are a sequence of assignments enclosed in =[]= similar to [[https://kparc.io/k/][Shakti]]: =[a:2+q;b:5]= ⇔ =[`a:2;`b:5]= ⇔ =(`a;`b)!(2;5)= ⇔ =`a`b!2 5=
# - =[a:1;42]= is not a dict literal because =42= is not an assignment
# -

Aspirational features:
- module system: =require x=, =provide x=
Expand Down
14 changes: 10 additions & 4 deletions prototype/Ast.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,12 @@
class Ast:#(node children*)
def __init__(s,n,*c): s.node,s.children = s.t = n,c
class Ast:#(node,*children)
def __init__(s,n,*c):
s.node = n
s.children = c
s.t = n,c
def __iter__(s): return iter(s.t)
def __getitem__(s,i): return s.t[i]
def __repr__(s):
n = dict(zip("{[(;/\\'",'lam prg lst seq fld scn ech'.split())).get(s.node,s.node)
return f'({n} {" ".join(map(repr,s.children))})' if s.children else str(n)
a,b=s;n=dict(zip("{[(;/\\'",'lam prg lst seq fld scn ech'.split())).get(a,a)
return f'({n} {" ".join(map(repr,s[1]))})' if b else str(n) # lisp-style
# return f'{n} ({", ".join(map(repr,s[1]))})' if b else str(n) # C-style
# return f'{" ".join(map(repr,s[1]))} {n}({len(s[1])})' if b else str(n) # rpn
110 changes: 20 additions & 90 deletions prototype/Eval.py
Original file line number Diff line number Diff line change
@@ -1,96 +1,26 @@
# TODO: simplify (prototype only)
# get rid of integers
# don't save type info; instead compute it on-demand
# use plain Python values instead of Val class
from Ast import Ast
from Builtin import ASSIGN,VERB
from Semantic import lam_from_prj,formalize
'''Eval.py - interpret a parsed expression'''
Expr = float|list|str
class Sym(str):pass
class Name(str):pass
class Num(float):pass
Ty = dict(zip((tuple,int,float,Num,str,Name,Ast),
'l i f d s n a'.split()))
# TG = {int:'d',float:'d',**Ty}#generic Num mapping
Yt = {v:k for k,v in Ty.items()}

class K:
def __init__(s,v:str):
s.t: type = tty(v) if type(v)==tuple else Ty[ty(v)]
s.v: object = tuple(map(Yt[s.t.lower()],v)) if s.t.isupper() else Yt[s.t](v)
def ty1(x:str) -> type:
if x[0] in '`"': return (str,Sym)[x[0]=='`']
if x.replace('.','').replace('-','').isnumeric(): return float
return Name

def __getitem__(s,k):
return s.v[k]

def __repr__(s):
return f'{s.v}:{s.t}'


def tty(xs:tuple) -> type:
t = {ty(x) for x in xs}
return Ty[t.pop()].upper() if len(t)==1 else 'L'

def isnumeric(x:str) -> bool:
return x.replace('.','').replace('-','').isnumeric()

def ty(x:str) -> type:
def infer_lit(x:Expr)->Expr:
'''infer types for literals'''
match x:
case str():
if x[0] in '"`': return (str,Sym)[x[0]=='`']
if isnumeric(x): return float if '.' in x else int
return Name
case K(): return x.t
case _: return type(x)

def v1(op,x): return f'{op}:{x}'
def v2(op,x,y):
match op,ty(x),ty(y):
case ['-',tuple(),tuple()]: return tuple(v2(op,a,b) for a,b in zip(x,y))
case ['-',tuple(),float()]: return tuple(a-float(y) for a in x)
case ['-',float(),tuple()]: return tuple(float(x)-b for b in y)
case ['-',float(),float()]: return x-y
case [',',tuple(),tuple()]: return (*x,*y)
case [',',tuple(),_]: return (*x,y)
case [',',_,tuple()]: return (x,*y)
case [',',*_]: return (x,y)
raise RuntimeError(f'unrecognized use of ({op}) with args ({x}) and ({y})')

def evl(x:Ast,e=None) -> object:
e = e or {}
x = formalize(lam_from_prj(x))
return Eval(e,x)
case ('vec',*a): t = [ty1(i) for i in a]
case _: raise TypeError(f'unmatched pattern: {x}')

def Eval(e:dict,x:Ast) -> object:
if type(x)!=Ast: return x
if not x.children:
r = Eval(e,x.node)
return e[r] if ty(r)==Name else r

if x.node=='vec':
t = ty(x.children[0].node)
return tuple(t(c.node) for c in x.children)

if x.node in ASSIGN:
n = x.children[0].node#name
r = Eval(e,x.children[1])#value
e[n] = r#FIXME: reference semantics make this update visible to caller; want value semantics
return r

if x.node in [*'(;']:#list or sequence
ks = [Eval(e,i) for i in x.children]
return ks[-1] if x.node == ';' else ks#progn: last item only

if x.node in ('{','cmp','prj'): return x#defer eval of lambda, cmp, prj until/if they are applied.

if x.node in ('@','app'):#"app" always has 2 children TODO: what about "@"?
b = Eval(e,x.children[0])#body (...should it use {**e} instead of e?)
args = [Eval(e,xi) for xi in x.children[1].children] if x.children[1].node=='[' else [Eval(e,x.children[1])]
if type(b)==Ast and b.node=='{':
newenv = {a.node:v for a,v in zip(b.children[0].children,args)}
return Eval({**e,**newenv},b.children[1])
raise RuntimeError(f'nyi: {x}')

if type(x.node)==str and x.node[0] in VERB:
k = [Eval(e,c) for c in x.children[::(-1,1)[x.node in '(;']]]
return (0,v1,v2)[len(x.children)](x.node,*k[::-1])
else:
raise RuntimeError(f'ast not recognized: {x}')
def ev(x:Expr)->Expr: return Eval({},infer_lit(x))
def Eval(e:dict,x:Expr)->Expr:
match x:
case float()|('lam',_,_):return x
case str(): return e[x]
case ('-',a,b): return Eval(e,a)-Eval(e,b) # TODO: all the other operators
case ('let',a,b,c): return Eval({**e,a:Eval(e,b)},c)
case ('if',a,b,c): return Eval(e,b) if Eval(e,a) else Eval(e,c)
case (('lam',a,b),*c): return Eval({**e,**dict(zip(a,(Eval(e,i) for i in c)))},b)
case list(): return Eval(e,[Eval(e,i) for i in x])
10 changes: 5 additions & 5 deletions prototype/Parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,17 +28,17 @@ def reduce(until:str):#reduce until (until) matches
def rt(x,arity):#(r)educe (t)op of stack based on x's arity
k = [d.pop() for _ in range(min(len(d),arity))][::-1]
if x in ENDEXP:
if len(k)>1 and k[1].node==x: k = [k[0],*k[1].children]
elif len(k)>0 and k[0].node==x: k = [*k[0].children,*k[1:]]
if len(k)>1 and k[1][0]==x: k = [k[0],*k[1][1]]
elif len(k)>0 and k[0][0]==x: k = [*k[0][1],*k[1:]]
# if x in ANDOR: d.append(Ast(x,*k))
if noun(x): d.append(Ast('app',Ast(x),*k))
elif type(x)==Ast and k: d.append(Ast('app',x,*k))
else: d.append(Ast(x,*k))
debug('rt',x,k)

def rp(x:Op):#(r)educe (p)aren, e.g: reduce(OPAREN); rp(s.pop())
k = Ast(x.name,*(y.children if (y:=d.pop()).node==';' else (y,)))
if x.name=='(' and len(k.children)==1 and k.children[0]!=NIL: k = k.children[0]
k = Ast(x.name,*(y[1] if (y:=d.pop())[0]==';' else (y,)))
if x.name=='(' and len(k[1])==1 and k[1][0]!=NIL: k = k[1][0]
if x.name=='[' and x.arity==2: k = Ast('app',d.pop(),k)
d.append(k); debug('rp',x,k)

Expand All @@ -48,7 +48,7 @@ def rq(k:Ast):#juxtaposition-based syntax: projection and composition
k = Ast('cmp',Ast('prj',Ast(x),d.pop()) if a==2 else Ast(x),k)
d.append(k); debug('rq')

def loop(i=0) -> int|None:#return error token index or None
def loop(i=0) -> int|None:#return index of error-causing token (if any), else None
nn = lambda i:(t[i+1] if type(t[i+1])==str else 1) if i+1<z else ''
while True:
while True:#unary
Expand Down
45 changes: 29 additions & 16 deletions prototype/README.org
Original file line number Diff line number Diff line change
Expand Up @@ -2,28 +2,29 @@
This folder contains experiments used to guide Element development.
Currently the focus is on an Element interpreter, written in Python to prioritize quick iteration over run-time performance.

** Ast.py
The =Ast= class defines an abstract syntax tree (AST) node and its pretty-printer.
An Ast node is a (required) name and (optional) children.
These are accessible by field names (=Ast.name= and =Ast.children=) or by tuple index (=x.name= is =x[0]=).
This duality enables pattern matching Ast nodes as tuples.

** Scanner.py
This breaks an input string into a list.
Most list elements are strings, but some are lists of strings representing lists in the source.
This module breaks an input string into a list of values, where each value is either a string or a tuple of strings.
When the scanner encounters strand notation, it groups all the elements of the strand into a list of strings:
- =2+3= ⇒ =['2', '+', '3']=
- =1 2+3 4= ⇒ =[['1', '2'], '+', ['3', '4']]=
- =2+`a`b``c= ⇒ =['2', '+', ['`a', '`b', '`', '`c']]=
In particular, the scanner recognizes /stranding/ of certain kinds of values:
- symbols like =`a= or =`=
- strings like "hi world" or "x"
- numbers like =1= or =3.1415=
Numbers form strands when separated by spaces, while strings and symbols may strand together without spaces.
Stranding two strings without spaces as in "hi""world" results in the list =["hi","world"]=.
The /strand/ concept in array languages is simply a minimal syntax for creating 1-dimensional lists.
Creating nested list literals requires parentheses as in =(1;(2;3);4)=.

** Ast.py
The =Ast= class defines an abstract syntax tree (AST) node and its pretty-printer.
An =Ast= node has a name and zero or more additional nodes as its =children=.
- ="a" "hello""""world"= ⇒ =['"a"', '"hello"', '""', '"world"']=
Numbers separated by spaces form strands, but spaces are optional in symbol or string strands.
A backtick (=`=) not followed by a letter or double-quote is equivalent to ~`""~.
Stranding two strings without spaces as in "hi""world" results in the list =["hi", "world"]=.
Strands are lightweight and optional syntax to create 1-dimensional lists.
You can create a 1-d list without stranding with the "join" primitive function: =1,2= ⇒ =['1', '2']=, while nested lists always require parentheses as in =(1;(2;3);4)=.

** Parser.py
This parser is a bottom-up style parser originally inspired by [[the Double-E method]].
After a while I removed the operator precedence logic from this parser since K operators have uniform precedence.
# FIXME: "a.b.c + 1" should be (+ (. a (. b c)) 1) but currently is (. a (. b (+ c 1)))
A large portion of this parser now deals with juxtaposition, projection, and composition within the context of the usual application of values to operators, function calls, and parentheses.
- juxtaposition (implied application)
+ =a b c= ⇒ =(a (b c))=
Expand Down Expand Up @@ -53,8 +54,20 @@ For example ="ab"/3= is an invalid expression in the language but parses as =((f
The evaluator will handle this task.

** Eval.py
TODO
This section is TBD.

** unit_test.py
- Interesting paper: https://arxiv.org/pdf/1109.0781
- Interesting statically typed compiled language: https://aardappel.github.io/lobster/README_FIRST.html

** Test.py
This file aims to include unit tests for the major parts of the prototype.
Currently it has about 100 parser tests comparing a string input with expected Ast output.

* GPU Support
This section is TBD but here are some potentially useful links:
- [[https://cupy.dev/][CuPy]]
- [[https://github.com/NVIDIA/warp][Warp]]
- [[https://docs.taichi-lang.org/][Taichi]]
- [[https://numba.pydata.org/][Numba]]
- [[https://docs.exaloop.io/codon][Codon]]
- [[https://github.com/HazyResearch/ThunderKittens][ThunderKittens]]
44 changes: 31 additions & 13 deletions prototype/Semantic.py
Original file line number Diff line number Diff line change
@@ -1,18 +1,33 @@
'''
These transformations should happen before Eval.
e.g: formalize(lamp(Parse(Scan("x+1"))))
Parse formalize
{1+x-3} ⇒ (lam (+ 1 (- x 3))) ⇒ (lam (prg x) (+ 1 (- x 3)))
{x-y} ⇒ (lam (+ x y)) ⇒ (lam (prg x y) (+ x y))
{3} ⇒ (lam 3) ⇒ (lam prg 3)
Parse lamp
3- ⇒ (prj - 3) ⇒ (lam (prg x) (- 3 x))
- ⇒ (prj -) ⇒ (lam (prg x y) (- x y))
'''
from Ast import Ast
from Builtin import ASSIGN,VERB
from dataclasses import dataclass

type Sym = str
type Name = str
type Builtin = str

def ty(x:str) -> type:
if x[0] in '`"': return (str,Sym)[x[0]=='`']
if x.replace('.','').replace('-','').isnumeric(): return float if '.' in x else int
if x=='NIL': return type(None)
if x in ASSIGN+VERB: return Builtin
return Name

@dataclass
class Val:
v:Ast
t:type
def __repr__(s): return f'{s.v}:{repr(s.t)[8:-2]}'

def infer(a:Ast) -> Val:
'''infer types from literal expressions'''
match (a.node,a.children):
case ('vec',b): return Val(a,ty(b[0].node))
case ('{',p,b): return Ast(a.node,*map(infer,a.children))
case ('[',()): return Val(a.node,'NIL')
case (node,()): return Val(node,ty(node))
case (node,children): return Ast(a.node,*map(infer,children))
case _: raise SyntaxError(f'unmatched: {a}')

def lam_from_prj(a:Ast) -> Ast:
'''convert projection to lambda'''
Expand Down Expand Up @@ -40,3 +55,6 @@ def formalize(a:Ast) -> Ast:
xyz = ''.join(x if x in xyz else '_' for x,_ in zip('xyz',range(ord(max(xyz))-ord('w'))))
return Ast(a.node, Ast('[',*(map(Ast,filter(str,xyz)))), *map(formalize,a.children))#insert (prg x y z)
return Ast(a.node, *map(formalize,a.children))

def analyze(a:Ast) -> Ast|Val:
return infer(formalize(lam_from_prj(a)))
16 changes: 9 additions & 7 deletions prototype/Test.py
Original file line number Diff line number Diff line change
Expand Up @@ -76,6 +76,8 @@ def test_expr(scan,parse):
;b ⇒ (seq NIL b)
[] ⇒ (prg NIL)
[x] ⇒ (prg x)
a.b.c ⇒ (. a (. b c))
a.b - 1 ⇒ (- (. a b) 1)
a ⇒ a
a(b) ⇒ (app a b)
a++b ⇒ (+ a (+ b))
Expand Down Expand Up @@ -196,10 +198,10 @@ def test_eval(scan,parse,evil):
print(f'{"expected:":>{len(i)}} {o}')


def test(scan,parse,evil):
print("test_expr:")
test_expr(scan,parse)
print("test_exception:")
test_exception(scan,parse)
print("test_eval:")
test_eval(scan,parse,evil)
# def test(scan,parse,evil):
# print("test_expr:")
# test_expr(scan,parse)
# print("test_exception:")
# test_exception(scan,parse)
# print("test_eval:")
# test_eval(scan,parse,evil)
Loading

0 comments on commit b05e266

Please sign in to comment.