need to fix "a.b-1" in parser

hoosierEE · Jun 17, 2024 · b05e266 · b05e266
1 parent b03ac13
commit b05e266
Show file tree

Hide file tree

Showing 8 changed files with 174 additions and 162 deletions.
diff --git a/README.org b/README.org
@@ -47,6 +47,7 @@ K is a vector language descended from APL, with hints of lisp.
 ** typical K features
 Most Ks have:
 - 1-dimensional vectors, nested lists, dictionaries
+- value semantics
 - an interpreter
 - eager evaluation
 - simple scoping rules (names are exclusively local or global)
@@ -80,20 +81,19 @@ Planned features:
 - lexical scope
 - closures
 - string data type (distinct from "list of char") as in [[https://codeberg.org/anaseto/goal/src/branch/master][goal]]
+- short-circuiting =and= and =or= keywords also from [[https://codeberg.org/anaseto/goal/src/branch/master][goal]]
 - =x:y= assign (immutable) variable
 - =x::y= assign (mutable) variable
 - No modified assignment operators. Use =a::a+1= instead of =a+:1=.
-  Meanwhile, verb-plus-colon still invokes the unary version of that verb, so =a-:1= is =a (- 1)=.
+  Meanwhile, verb-plus-colon still invokes the unary version of that verb, so =a-:b= is =a(-b)=.
+- No partial assignment to containers. If you assign the name =a= with =a::1 2 3=, then =a[0]::99= is invalid. Use =@= (amend) instead, and re-assign the whole container: =a::@[a;0;99]=. To improve performance, the implementation is free to reuse the original memory for =a= instead of making a copy.
 - Lists and function arguments evaluate left-to-right, top-to-bottom.
   For example, =f[a+b;a:1;b:2]= or =(a+b;a:1;b:2)= are valid in K but not in Element.
   In Element, =a;b= always parses as two separate expressions evaluated left to right regardless of being wrapped in any kind of parentheses.
   Alternatives: use =a:1;b:2;(a+b;a;b)= or =2_(a:1;b:2;a+b;a;b)=.
-# - Dictionary literals
-#   - ={a:2;b:5}= ⇔ ={`a:2;`b:5}= ⇔ =(`a;`b)!(2;5)= ⇔ =`a`b!2 5=
-#   - get: ={a:2}[`a]=
-#   - set: not supported for dict literals ={a:1}[`a]::2 /error=
-#   - set: have to bind (with =::=) to a name first: =d::{a:2;b:3}; d[`a]::40; d[`c]::42=
-#   - immutable dict can't be modified, even to add new keys: =i:{a:1}; i[`b]:3 /error=
+# - Dictionary literals are a sequence of assignments enclosed in =[]= similar to [[https://kparc.io/k/][Shakti]]: =[a:2+q;b:5]= ⇔ =[`a:2;`b:5]= ⇔ =(`a;`b)!(2;5)= ⇔ =`a`b!2 5=
+#   - =[a:1;42]= is not a dict literal because =42= is not an assignment
+#   -
 
 Aspirational features:
 - module system: =require x=, =provide x=

diff --git a/prototype/Ast.py b/prototype/Ast.py
@@ -1,6 +1,12 @@
-class Ast:#(node children*)
- def __init__(s,n,*c): s.node,s.children = s.t = n,c
+class Ast:#(node,*children)
+ def __init__(s,n,*c):
+  s.node = n
+  s.children = c
+  s.t = n,c
+ def __iter__(s): return iter(s.t)
  def __getitem__(s,i): return s.t[i]
  def __repr__(s):
-  n = dict(zip("{[(;/\\'",'lam prg lst seq fld scn ech'.split())).get(s.node,s.node)
-  return f'({n} {" ".join(map(repr,s.children))})' if s.children else str(n)
+  a,b=s;n=dict(zip("{[(;/\\'",'lam prg lst seq fld scn ech'.split())).get(a,a)
+  return f'({n} {" ".join(map(repr,s[1]))})' if b else str(n) # lisp-style
+  # return f'{n} ({", ".join(map(repr,s[1]))})' if b else str(n) # C-style
+  # return f'{" ".join(map(repr,s[1]))} {n}({len(s[1])})' if b else str(n) # rpn
diff --git a/prototype/Eval.py b/prototype/Eval.py
@@ -1,96 +1,26 @@
-# TODO: simplify (prototype only)
-# get rid of integers
-# don't save type info; instead compute it on-demand
-# use plain Python values instead of Val class
-from Ast import Ast
-from Builtin import ASSIGN,VERB
-from Semantic import lam_from_prj,formalize
+'''Eval.py - interpret a parsed expression'''
+Expr = float|list|str
 class Sym(str):pass
 class Name(str):pass
-class Num(float):pass
-Ty = dict(zip((tuple,int,float,Num,str,Name,Ast),
-              'l     i   f     d   s   n    a'.split()))
-# TG = {int:'d',float:'d',**Ty}#generic Num mapping
-Yt = {v:k for k,v in Ty.items()}
 
-class K:
- def __init__(s,v:str):
-  s.t: type   = tty(v) if type(v)==tuple else Ty[ty(v)]
-  s.v: object = tuple(map(Yt[s.t.lower()],v)) if s.t.isupper() else Yt[s.t](v)
+def ty1(x:str) -> type:
+ if x[0] in '`"': return (str,Sym)[x[0]=='`']
+ if x.replace('.','').replace('-','').isnumeric(): return float
+ return Name
 
- def __getitem__(s,k):
-  return s.v[k]
-
- def __repr__(s):
-  return f'{s.v}:{s.t}'
-
-
-def tty(xs:tuple) -> type:
- t = {ty(x) for x in xs}
- return Ty[t.pop()].upper() if len(t)==1 else 'L'
-
-def isnumeric(x:str) -> bool:
- return x.replace('.','').replace('-','').isnumeric()
-
-def ty(x:str) -> type:
+def infer_lit(x:Expr)->Expr:
+ '''infer types for literals'''
  match x:
-  case str():
-   if x[0] in '"`': return (str,Sym)[x[0]=='`']
-   if isnumeric(x): return float if '.' in x else int
-   return Name
-  case K(): return x.t
-  case _: return type(x)
-
-def v1(op,x): return f'{op}:{x}'
-def v2(op,x,y):
- match op,ty(x),ty(y):
-  case ['-',tuple(),tuple()]: return tuple(v2(op,a,b) for a,b in zip(x,y))
-  case ['-',tuple(),float()]: return tuple(a-float(y) for a in x)
-  case ['-',float(),tuple()]: return tuple(float(x)-b for b in y)
-  case ['-',float(),float()]: return x-y
-  case [',',tuple(),tuple()]: return (*x,*y)
-  case [',',tuple(),_]: return (*x,y)
-  case [',',_,tuple()]: return (x,*y)
-  case [',',*_]: return (x,y)
- raise RuntimeError(f'unrecognized use of ({op}) with args ({x}) and ({y})')
-
-def evl(x:Ast,e=None) -> object:
- e = e or {}
- x = formalize(lam_from_prj(x))
- return Eval(e,x)
+  case ('vec',*a): t = [ty1(i) for i in a]
+  case _: raise TypeError(f'unmatched pattern: {x}')
 
-def Eval(e:dict,x:Ast) -> object:
- if type(x)!=Ast: return x
- if not x.children:
-  r = Eval(e,x.node)
-  return e[r] if ty(r)==Name else r
-
- if x.node=='vec':
-  t = ty(x.children[0].node)
-  return tuple(t(c.node) for c in x.children)
-
- if x.node in ASSIGN:
-  n = x.children[0].node#name
-  r = Eval(e,x.children[1])#value
-  e[n] = r#FIXME: reference semantics make this update visible to caller; want value semantics
-  return r
-
- if x.node in [*'(;']:#list or sequence
-  ks = [Eval(e,i) for i in x.children]
-  return ks[-1] if x.node == ';' else ks#progn: last item only
-
- if x.node in ('{','cmp','prj'): return x#defer eval of lambda, cmp, prj until/if they are applied.
-
- if x.node in ('@','app'):#"app" always has 2 children  TODO: what about "@"?
-  b = Eval(e,x.children[0])#body  (...should it use {**e} instead of e?)
-  args = [Eval(e,xi) for xi in x.children[1].children] if x.children[1].node=='[' else [Eval(e,x.children[1])]
-  if type(b)==Ast and b.node=='{':
-   newenv = {a.node:v for a,v in zip(b.children[0].children,args)}
-   return Eval({**e,**newenv},b.children[1])
-  raise RuntimeError(f'nyi: {x}')
-
- if type(x.node)==str and x.node[0] in VERB:
-  k = [Eval(e,c) for c in x.children[::(-1,1)[x.node in '(;']]]
-  return (0,v1,v2)[len(x.children)](x.node,*k[::-1])
- else:
-  raise RuntimeError(f'ast not recognized: {x}')
+def ev(x:Expr)->Expr: return Eval({},infer_lit(x))
+def Eval(e:dict,x:Expr)->Expr:
+ match x:
+  case float()|('lam',_,_):return x
+  case str():              return e[x]
+  case ('-',a,b):          return Eval(e,a)-Eval(e,b) # TODO: all the other operators
+  case ('let',a,b,c):      return Eval({**e,a:Eval(e,b)},c)
+  case ('if',a,b,c):       return Eval(e,b) if Eval(e,a) else Eval(e,c)
+  case (('lam',a,b),*c):   return Eval({**e,**dict(zip(a,(Eval(e,i) for i in c)))},b)
+  case list():             return Eval(e,[Eval(e,i) for i in x])
diff --git a/prototype/Parser.py b/prototype/Parser.py
@@ -28,17 +28,17 @@ def reduce(until:str):#reduce until (until) matches
  def rt(x,arity):#(r)educe (t)op of stack based on x's arity
   k = [d.pop() for _ in range(min(len(d),arity))][::-1]
   if x in ENDEXP:
-   if   len(k)>1 and k[1].node==x: k = [k[0],*k[1].children]
-   elif len(k)>0 and k[0].node==x: k = [*k[0].children,*k[1:]]
+   if   len(k)>1 and k[1][0]==x: k = [k[0],*k[1][1]]
+   elif len(k)>0 and k[0][0]==x: k = [*k[0][1],*k[1:]]
   # if x in ANDOR:                   d.append(Ast(x,*k))
   if noun(x):                      d.append(Ast('app',Ast(x),*k))
   elif type(x)==Ast and k:         d.append(Ast('app',x,*k))
   else:                            d.append(Ast(x,*k))
   debug('rt',x,k)
 
  def rp(x:Op):#(r)educe (p)aren, e.g: reduce(OPAREN); rp(s.pop())
-  k = Ast(x.name,*(y.children if (y:=d.pop()).node==';' else (y,)))
-  if x.name=='(' and len(k.children)==1 and k.children[0]!=NIL: k = k.children[0]
+  k = Ast(x.name,*(y[1] if (y:=d.pop())[0]==';' else (y,)))
+  if x.name=='(' and len(k[1])==1 and k[1][0]!=NIL: k = k[1][0]
   if x.name=='[' and x.arity==2: k = Ast('app',d.pop(),k)
   d.append(k); debug('rp',x,k)
 
@@ -48,7 +48,7 @@ def rq(k:Ast):#juxtaposition-based syntax: projection and composition
    k = Ast('cmp',Ast('prj',Ast(x),d.pop()) if a==2 else Ast(x),k)
   d.append(k); debug('rq')
 
- def loop(i=0) -> int|None:#return error token index or None
+ def loop(i=0) -> int|None:#return index of error-causing token (if any), else None
   nn = lambda i:(t[i+1] if type(t[i+1])==str else 1) if i+1<z else ''
   while True:
    while True:#unary

diff --git a/prototype/README.org b/prototype/README.org
@@ -2,28 +2,29 @@
 This folder contains experiments used to guide Element development.
 Currently the focus is on an Element interpreter, written in Python to prioritize quick iteration over run-time performance.
 
+** Ast.py
+The =Ast= class defines an abstract syntax tree (AST) node and its pretty-printer.
+An Ast node is a (required) name and (optional) children.
+These are accessible by field names (=Ast.name= and =Ast.children=) or by tuple index (=x.name= is =x[0]=).
+This duality enables pattern matching Ast nodes as tuples.
+
 ** Scanner.py
-This breaks an input string into a list.
-Most list elements are strings, but some are lists of strings representing lists in the source.
+This module breaks an input string into a list of values, where each value is either a string or a tuple of strings.
+When the scanner encounters strand notation, it groups all the elements of the strand into a list of strings:
 - =2+3= ⇒ =['2', '+', '3']=
 - =1 2+3 4= ⇒ =[['1', '2'], '+', ['3', '4']]=
 - =2+`a`b``c= ⇒ =['2', '+', ['`a', '`b', '`', '`c']]=
-In particular, the scanner recognizes /stranding/ of certain kinds of values:
-- symbols like =`a= or =`=
-- strings like "hi world" or "x"
-- numbers like =1= or =3.1415=
-Numbers form strands when separated by spaces, while strings and symbols may strand together without spaces.
-Stranding two strings without spaces as in "hi""world" results in the list =["hi","world"]=.
-The /strand/ concept in array languages is simply a minimal syntax for creating 1-dimensional lists.
-Creating nested list literals requires parentheses as in =(1;(2;3);4)=.
-
-** Ast.py
-The =Ast= class defines an abstract syntax tree (AST) node and its pretty-printer.
-An =Ast= node has a name and zero or more additional nodes as its =children=.
+- ="a" "hello""""world"= ⇒ =['"a"', '"hello"', '""', '"world"']=
+Numbers separated by spaces form strands, but spaces are optional in symbol or string strands.
+A backtick (=`=) not followed by a letter or double-quote is equivalent to ~`""~.
+Stranding two strings without spaces as in "hi""world" results in the list =["hi", "world"]=.
+Strands are lightweight and optional syntax to create 1-dimensional lists.
+You can create a 1-d list without stranding with the "join" primitive function: =1,2= ⇒ =['1', '2']=, while nested lists always require parentheses as in =(1;(2;3);4)=.
 
 ** Parser.py
 This parser is a bottom-up style parser originally inspired by [[the Double-E method]].
 After a while I removed the operator precedence logic from this parser since K operators have uniform precedence.
+# FIXME: "a.b.c + 1" should be (+ (. a (. b c)) 1) but currently is (. a (. b (+ c 1)))
 A large portion of this parser now deals with juxtaposition, projection, and composition within the context of the usual application of values to operators, function calls, and parentheses.
 - juxtaposition (implied application)
   + =a b c= ⇒ =(a (b c))=
@@ -53,8 +54,20 @@ For example ="ab"/3= is an invalid expression in the language but parses as =((f
 The evaluator will handle this task.
 
 ** Eval.py
-TODO
+This section is TBD.
 
-** unit_test.py
+- Interesting paper: https://arxiv.org/pdf/1109.0781
+- Interesting statically typed compiled language: https://aardappel.github.io/lobster/README_FIRST.html
+
+** Test.py
 This file aims to include unit tests for the major parts of the prototype.
 Currently it has about 100 parser tests comparing a string input with expected Ast output.
+
+* GPU Support
+This section is TBD but here are some potentially useful links:
+- [[https://cupy.dev/][CuPy]]
+- [[https://github.com/NVIDIA/warp][Warp]]
+- [[https://docs.taichi-lang.org/][Taichi]]
+- [[https://numba.pydata.org/][Numba]]
+- [[https://docs.exaloop.io/codon][Codon]]
+- [[https://github.com/HazyResearch/ThunderKittens][ThunderKittens]]
diff --git a/prototype/Semantic.py b/prototype/Semantic.py
@@ -1,18 +1,33 @@
-'''
-These transformations should happen before Eval.
-e.g: formalize(lamp(Parse(Scan("x+1"))))
-
-        Parse                     formalize
-{1+x-3}   ⇒   (lam (+ 1 (- x 3)))     ⇒     (lam (prg x) (+ 1 (- x 3)))
-{x-y}     ⇒   (lam (+ x y))           ⇒     (lam (prg x y) (+ x y))
-{3}       ⇒   (lam 3)                 ⇒     (lam prg 3)
-
-   Parse           lamp
-3-   ⇒   (prj - 3)    ⇒     (lam (prg x) (- 3 x))
--    ⇒   (prj -)      ⇒     (lam (prg x y) (- x y))
-'''
 from Ast import Ast
 from Builtin import ASSIGN,VERB
+from dataclasses import dataclass
+
+type Sym = str
+type Name = str
+type Builtin = str
+
+def ty(x:str) -> type:
+ if x[0] in '`"': return (str,Sym)[x[0]=='`']
+ if x.replace('.','').replace('-','').isnumeric(): return float if '.' in x else int
+ if x=='NIL': return type(None)
+ if x in ASSIGN+VERB: return Builtin
+ return Name
+
+@dataclass
+class Val:
+ v:Ast
+ t:type
+ def __repr__(s): return f'{s.v}:{repr(s.t)[8:-2]}'
+
+def infer(a:Ast) -> Val:
+ '''infer types from literal expressions'''
+ match (a.node,a.children):
+  case ('vec',b): return Val(a,ty(b[0].node))
+  case ('{',p,b): return Ast(a.node,*map(infer,a.children))
+  case ('[',()): return Val(a.node,'NIL')
+  case (node,()): return Val(node,ty(node))
+  case (node,children): return Ast(a.node,*map(infer,children))
+  case _: raise SyntaxError(f'unmatched: {a}')
 
 def lam_from_prj(a:Ast) -> Ast:
  '''convert projection to lambda'''
@@ -40,3 +55,6 @@ def formalize(a:Ast) -> Ast:
    xyz = ''.join(x if x in xyz else '_' for x,_ in zip('xyz',range(ord(max(xyz))-ord('w'))))
   return Ast(a.node, Ast('[',*(map(Ast,filter(str,xyz)))), *map(formalize,a.children))#insert (prg x y z)
  return Ast(a.node, *map(formalize,a.children))
+
+def analyze(a:Ast) -> Ast|Val:
+ return infer(formalize(lam_from_prj(a)))
diff --git a/prototype/Test.py b/prototype/Test.py
@@ -76,6 +76,8 @@ def test_expr(scan,parse):
  ;b         ⇒ (seq NIL b)
  []         ⇒ (prg NIL)
  [x]        ⇒ (prg x)
+ a.b.c      ⇒ (. a (. b c))
+ a.b - 1    ⇒ (- (. a b) 1)
  a          ⇒ a
  a(b)       ⇒ (app a b)
  a++b       ⇒ (+ a (+ b))
@@ -196,10 +198,10 @@ def test_eval(scan,parse,evil):
   print(f'{"expected:":>{len(i)}}   {o}')
 
 
-def test(scan,parse,evil):
- print("test_expr:")
- test_expr(scan,parse)
- print("test_exception:")
- test_exception(scan,parse)
- print("test_eval:")
- test_eval(scan,parse,evil)
+# def test(scan,parse,evil):
+#  print("test_expr:")
+#  test_expr(scan,parse)
+#  print("test_exception:")
+#  test_exception(scan,parse)
+#  print("test_eval:")
+#  test_eval(scan,parse,evil)