| Bytes | Lang | Time | Link |
|---|---|---|---|
| 453 | tinylisp | 250203T193149Z | Mukundan |
| 251 | Python | 250202T050524Z | Mukundan |
| 1022 | tinylisp | 220513T114552Z | emanresu |
| 388 | Python 3 | 220203T050831Z | DialFros |
| 330 | Retina 0.8.2 | 220204T014701Z | Neil |
| 166 | Charcoal | 220205T114931Z | Neil |
| 629 | Python3 | 220203T025703Z | Ajax1234 |
tinylisp, 493 ... 462 453 bytes
-2 bytes thanks to @DLosc
-4 bytes thanks to @emanresu A by making the entrypoint an anonymous lambda
(load library
(d Z strcat
(d A(q((R F I)(insert(F(t R)I(h(max(t R))))(D(h R)I)1
(d B(q((L I M)(c(+(i M M 0)1)(c 40(insert-end 41(t(concat(h L)(i(t L)(chars(Z(i(l(h(h(t L)))3)spc I)(join(map string(map t(t L)))(i(l M 3)spc I))))(
(d C(q((L)(i(contains?(q(32()40 41))(h L))(c L())(insert(h L)(C(t L))1
(d D(q((L I)(i(e(h L)32)(D(t L)I)(i(e(h L)40)(A(D(t L)(Z I spc))B I)(i(-(h L)41)(A(C(c 1 L))(q(x(h x)))I)(c(t L)(
(q((S)(string(t(h(t(D(chars S)(Z nl spc
Because tinylisp doesn't have string literals, you'll need to pass your input through this python script (taken from @emanresu A's answer).
A port of my python answer. Doesn't use tail-call recursion in some places so fails for the larger inputs., While I have made more of the functions not optimizable with tail-call elimination, I have also made the method for adding indent more efficient so it is now able to run larger inputs.
Ungolfed
(load library)
(d # comment)
(def return (lambda (res el) (insert el res 1)))
(# given (unparsed-ls data...) (i.e. return from sym and expr) applies func on data, expr on remaining then uses return to join them)
(def return-func (lambda (res func indent)
(return
(expr (head res) indent)
(func (tail res) indent (head (max (tail res)))))))
(# formats a list given formatted child elements and max depth of children)
(def wrap (lambda (ls indent depth)
(cons (+ (i depth depth 0) 1) (cons 40 (insert-end 41
(tail (concat
(head ls)
(if (tail ls)
(chars (strcat
(if (less? (head (head (tail ls))) 3) spc indent)
(join (map string (map tail (tail ls))) (if (less? depth 3) spc indent))))
()))))))))
(# returns (unparsed-ls symbol))
(def sym (lambda (ls)
(if (contains? (list () 32 40 41) (head ls))
(list ls)
(return (sym (tail ls)) (head ls)))))
(# returns (unparsed-ls (depth ungolfed...) (depth ungolfed...) ...))
(def expr (lambda (ls indent)
(if (equal? (head ls) 32)
(expr (tail ls) indent)
(if (equal? (head ls) 40)
(return-func (expr (tail ls) (strcat indent spc)) wrap indent)
(if (- (head ls) 41)
(return-func (sym (cons 1 ls)) (q(x(h x))) indent)
(list (tail ls)))))))
(# entrypoint)
(lambda (str) (string (tail (head (tail (expr (chars str) (strcat nl spc)))))))
Explanation
The core of the algorithm is a recursive function which returns a list containing the ungolfed version of elements till the first unmatched ), which works as follows:
- if the first element is
:- remove first element and recursively call
- else if the first element is
(:- remove first element and recursively call with increased indent
- surround result with
(and) - recursively call for unparsed part
- concatenate and return
- else if at end of string or the first element is
):- return empty
- else:
- take elements till
(,),or end of string - recursively call for unparsed part
- concatenate and return
- take elements till
Python, 270 ... 255 251 bytes
- 8 bytes thanks to @DLosc by storing t.pop in a variable
def f(t,E=[],i="\n "):
p=t.pop;t[0]<"!"<p(0);r=""
while(t[0]in"() ")<1:r+=p(0)
if r:E+=1,;return r
t+=")";p(0);D,*R=[0],
while")"!=t[0]:R+=f(t,D,i+" "),
d=max(D);r,*R=R or[""];E+=d+1,;return"("+r+(R and i[-(D[2]<3):]or"")+i[-(d<3):].join(R)+p(0)
Commented Version
def f(t, E=[], i="\n "): # E = return variable for depth
# remove first element of t if it is ' '
t[0] < "!" < t.pop(0)
# read into r from t till '(', ')' or ' '
r = ""
while (t[0] in "() ") < 1:
r += t.pop(0)
# if r is not empty (i.e. r contains a symbol or integer) return depth=0+1, r
if r:
E += 1,
return r
# Remove first element of t (which must equal '(')
t.pop(0)
# add a extra ')' to end in case there isn't enough ')'
t += ")"
# D = list for storing depths returned by child elements
# R = list for storing ungolfed versions of child elements
D, *R = [0],
while ")" != t[0]: # while current expression isn't at its end
# add child element with recursive call
R += f(t, D, i + " "),
# depth of current expression is maximum of depth returns from elements
d = max(D)
# extract first element into r keep rest in R
r, *R = R or [""]
E += d + 1, # return (depth = d + 1)
return ( # return
"(" # "("
+ r # + first element
+ (R and i[-(D[2] < 3):] or "") # + ("" if only one element otherwise i or " " depending on depth of second element)
+ i[-(d < 3):].join(R) # + rest of R joined with i or " " depending on depth
+ t.pop(0) # + ")"
)
tinylisp, 1022 bytes
(load library
(d ? contains?
(d r reverse
(d O last
(d _(q((S T C)(i S(i(e(h S)32)(_(t S)(i C(c(r C)T)T)())(i(?(q(40 41))(h S))(_(t S)(c(h S)(i C(c(r C)T)T))())(_(t S)T(c(h S)C))))(r(i C(c(r C)T)T
(d @(q((T A D)(i T(@(t T)(c(+(w T)D)A)(+(w T)D))(r(c(+(w T)D)A
(d w(q((T)(-(e(h T)40)(e(h T)41
(d #(q((T A)(i A(#(c 41 T)(- A 1))(r T
(d E(q((T)(max(@ T()0
(d p(q((L A)(i A(p(c 32 L)(- A 1))L
(d n(q((D T A)(i(l 0(h D))(n(t D)(t T)(c(h T)A))(r(c(h T)A
(d N(q((T)(n(@ T()0)T(
(d H(q((T A)(i T(i(*(e(h T)40)(e(cadr T)41))(H(t(t T))(c(q(40 41))A))(H(t T)(c(h T)A)))(r A
(d W(q((L)(i(c()L)L(c L(
(d :(q((I L)(i I(:(- I 1)(t L))L
(d ;(q((L)(:(s(last-index(r(@(r L)()0))1)1)L
(d }(q((D T)(i(l 0(h D))(}(t D)(t T))(t T
(d f(q((T S P)(i T(f(t T)(concat S(i(e(h T)41)(c 41())(i(+(l(E(N(concat(; P)T)))3)(*(not((q((T)(}(@ T()0)T)))(t(; P))))(l(E(N T))2)))(p(W(h T))(- 1(?(q(()40))(O P))))(i(e(O P)40)(W(h T))(c 10(p(W(h T))(O(@ P()0))))))))(insert-end(h T)P))S
(d F(q((S)(string(f(H((q((T)(#(r T)(last(@ T()0)))))(_(chars S)()()))())()(
This took several hours days, but now it actually works.
This emits a couple of errors, which I think are unavoidable, and now a lot more which are avoidable.
-18 thanks to DLosc.
Because tinylisp doesn't have string literals, you'll need to pass your input through this python script.
I only sorta understand this, but here's the basic algorithm the code follows:
Convert the string to a list of characters for easier manipulation.
Parse the string into a list of tokens
- Parentheses are left on their own, other tokens are grouped into lists.
Iterate through the string, keeping a list of the previous tokens:
- If it's a
), immediately append it. Else: - If:
- Backtrack through the previous tokens to the start of the parent expression.
- Use this index to get the depth of the parent expression, is it less than 3?
- or:
- Backtrack to the parent expression
- Remove the first parenthesis and remove the first expression after that.
- Is it empty?
- and:
- is the current expression depth less than 2?
- If so, this token should go on the same line, possibly with a space.
- Else, it should be on a new line - get the depth of the current expression and indent by that.
- If it's a
Finally, return the string.
Python 3, 388 bytes
Basically a golfed version of the reference solution @DLosc gave
import re
k=" "
def p(e,*t,l=0):
for n in e:
if")"==n:break
if")">n:z=p(e);l=max(l,z[0]+1);t+=z,
else:l=max(l,1);t+=n,
return[l,*t]
t=p(iter(re.findall("[^() ]+|\S",input())[1:]))
def f(t,d=k):
if[*t]==t:
r="(";s=[f(i,d+k)for i in t[1:]]
if t[0]>2:
if len(t)>2and""in t[2]or t[2][0]<2:r+=s[0]+k;s=s[1:]
q="\n"+d
else:q=k
return r+q.join(s)+")"
return t
print(f(t))
The regex used in the code [^() ]+|\S is the a shorter way of writing the regex [()]|[^() ]+ given in the question by 2 bytes
[*t]==t is a sneaky trick to cut back 6 bytes instead of type(t)==list
""in t[2] is another trick to cut down 7 bytes instead of type(t[2])!=list
-3 thx to @pxeger
-1 thx to @aidenchow
Retina 0.8.2, 330 bytes
^((\()|(?<-2>\))|[^()])+
$&$#2$*)
[^ (](?=\()|\)(?=[^) ])
$&
{%`^(( *)\(+?([^() ]+ |((\()|(?<-5>\))|[^()])+(?(5)^) )+)(\(([^() ]+ |\(\) )*\([^)])
$1¶$2 $6
¶
¶
%+`^(( *)\(([^() ]+|((\()|(?<-5>\))|[^()])+(?(5)^))( [^() ]+| \([^()]+\))+)( [^() ]+| \([^()]+\))+$
$1¶$2$7
%+`^(( *)\(([^() ]+|((\()|(?<-5>\))|[^()])+(?(5)^))\))
$1¶$2
Try it online! Link includes test cases. Explanation:
^((\()|(?<-2>\))|[^()])+
$&$#2$*)
Use a .NET balancing group to count how many extra (s there are and append that many trailing )s.
[^ (](?=\()|\)(?=[^) ])
$&
Insert spaces before (s and after )s where necessary.
{`
Repeat until no more transforms can be made.
%`^(( *)\(+?([^() ]+ |((\()|(?<-5>\))|[^()])+(?(5)^) )+)(\(([^() ]+ |\(\) )*\([^)])
$1¶$2 $6
¶
¶
Split lists of nesting level 3 or more.
%+`^(( *)\(([^() ]+|((\()|(?<-5>\))|[^()])+(?(5)^))( [^() ]+| \([^()]+\))+)( [^() ]+| \([^()]+\))+$
$1¶$2$7
For those lists, where there are only lists of nesting level 0 or 1 left, put all but the first on their own line. (We need to check explicitly to ensure that the list was in fact split in the first place.)
%+`^(( *)\(([^() ]+|((\()|(?<-5>\))|[^()])+(?(5)^))\))
$1¶$2
Lists of nesting level 0 or 1 after a list of nesting level 2 or more also need to go on their own line. (We know they can't have also nesting level 2 or more because they would have been split off earlier, so we don't actually need to check their nesting level.)
Charcoal, 166 bytes
⊞υ⟦⟧≔ωηF⁺θ×)⁻№θ(№θ)«F№( )ιF›ηω«⊞§υ±¹⟦η¹⟧≔ωη»≡ι ω(⊞υ⟦⟧)«≔∨⊟υ⟦⟦ω⁰⟧⟧ι≔∧⊖Lι›³§§ι¹±¹δ≔⌈Eι⊕⊟κζF›⁴ζ≔⊖Lιδ≔E§ι⁰⁺§ (¬λκεFδ⊞ε⁺⁺⊟ε ⊟§ι⊕κFΦι›λδFκ⊞ε⁺ λ⊞ε⁺⊟ε)⊞εζ⊞§υ±¹ε»≔⁺ηιη»✂⊟⊟υ⁰±¹
Try it online! Link is to verbose version of code. Explanation:
⊞υ⟦⟧
Add a dummy container to the predefined empty list to hold the final result.
≔ωη
Start off with no atom.
F⁺θ×)⁻№θ(№θ)«
Append enough trailing )s to balance the input and loop over its characters.
F№( )ιF›ηω«⊞§υ±¹⟦η¹⟧≔ωη»
If the current character is one of ( ) then push any current token to the most recent expression. (Note that my nesting levels are one greater than those used by the question, because the nesting level of () evaluates to 1.)
≡ι ω
If the current character is a space then do nothing more at this point.
(⊞υ⟦⟧
If the current character is a ( then start a new subexpression.
)«
If the current character is a ):
≔∨⊟υ⟦⟦ω⁰⟧⟧ι
Get the latest expression, or an empty token if there wasn't one. (Since this makes () contain an empty token with a nesting level of 0, its nesting level evaluates to 1.)
≔∧⊖Lι›³§§ι¹±¹δ
Calculate whether the second term should be concatenated to the end of the first.
≔⌈Eι⊕⊟κζ
Calculate the depth of this term.
F›⁴ζ≔⊖Lιδ
If this term is shallow enough then concatenate all the terms together.
≔E§ι⁰⁺§ (¬λκε
Indent the first term, except for the first line, which is prefixed with a leading ( instead.
Fδ⊞ε⁺⁺⊟ε ⊟§ι⊕κ
Concatenate the desired number of terms to the first term.
FΦι›λδFκ⊞ε⁺ λ
Indent and append any remaining terms.
⊞ε⁺⊟ε)
Append a final ) to the last line.
⊞εζ
Append the nesting level to this term.
⊞§υ±¹ε
Append this term to its parent expression.
»≔⁺ηιη
For any other character, append it to the current token.
»✂⊟⊟υ⁰±¹
Output the pretty-printed expression, excluding its nesting depth.
Python3, 629 bytes:
import re
d=lambda x,c=0:c if type(x)!=list or not x else max(d(i,c+1)for i in x)
def p(s):
r=[];S=str.lstrip
while s:
if(c:=s[0])==')':s=S(s[1:]);break
if c=='(':s=(l:=p(s[1:]))[1];r+=[l[0]]
else:r+=[l:=re.findall('[^() ]+',s)[0]];s=S(s[len(l):])
return r,S(s)
def f(r,t=0):
j=lambda x:x if type(x)!=list else'()'
k,s=[],0
if all(d(i)<2 for i in r):k=[f(i,t+1)if d(i)else j(i)for i in r]
else:
for i,a in enumerate(r):
D=d(a);F=f(a,t+1)if D else j(a)
if not i:k+=[F]
elif i==1 and D<2:k+=[F]
else:k+=['\n'+' '*(t+1)+F]
return'('+' '.join(k)+')'
g=lambda x:f(p(x+')'*((c:=x.count)('(')-c(')')))[0][0])