In a bash script that I'm writing for Linux/Solaris I need to decode more than a hundred thousand base64-encoded text strings, and, because I don't wanna massively fork a non-portable base64 binary from awk, I wrote a function that does the decoding.
Here's the code of my base64_decode function:
function base64_decode(str, out,i,n,v) {
out = ""
if ( ! ("A" in _BASE64_DECODE_c2i) )
for (i = 1; i <= 64; i++)
_BASE64_DECODE_c2i[substr("ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/",i,1)] = i-1
i = 0
n = length(str)
while (i <= n) {
v = _BASE64_DECODE_c2i[substr(str,++i,1)] * 262144 + \
_BASE64_DECODE_c2i[substr(str,++i,1)] * 4096 + \
_BASE64_DECODE_c2i[substr(str,++i,1)] * 64 + \
_BASE64_DECODE_c2i[substr(str,++i,1)]
out = out sprintf("%c%c%c", int(v/65536), int(v/256), v)
}
return out
}
Which works fine:
printf '%s\n' SmFuZQ== amRvZQ== |
LANG=C command -p awk '
{ print base64_decode($0) }
function base64_decode(...) {...}
'
Jane
jdoe
SIMPLIFIED REAL-LIFE EXAMPLE THAT DOESN'T WORK AS EXPECTED
I want to get the givenName of the users that are members of GroupCode = 025496 from the output of ldapsearch -LLL -o ldif-wrap=no ... '(|(uid=*)(GroupCode=*))' uid givenName sn GroupCode memberUid:
dn: uid=jsmith,ou=users,dc=example,dc=com
givenName: John
sn: SMITH
uid: jsmith
dn: uid=jdoe,ou=users,dc=example,dc=com
uid: jdoe
givenName:: SmFuZQ==
sn:: RE9F
dn: cn=group1,ou=groups,dc=example,dc=com
GroupCode: 025496
memberUid:: amRvZQ==
memberUid: jsmith
Here would be an awk for doing so:
LANG=C command -p awk -F '\n' -v RS='' -v GroupCode=025496 '
{
delete attrs
for (i = 2; i <= NF; i++) {
match($i,/::? /)
key = substr($i,1,RSTART-1)
val = substr($i,RSTART+RLENGTH)
if (RLENGTH == 3)
val = base64_decode(val)
attrs[key] = ((key in attrs) ? attrs[key] SUBSEP val : val)
}
if ( /\nuid:/ )
givenName[ attrs["uid"] ] = attrs["givenName"]
else
memberUid[ attrs["GroupCode"] ] = attrs["memberUid"]
}
END {
n = split(memberUid[GroupCode],uid,SUBSEP)
for ( i = 1; i <= n; i++ )
print givenName[ uid[i] ]
}
function base64_decode(...) { ... }
'
On BSD and Solaris the result is:
Jane
John
While on Linux it is:
John
I don't know where the issue might be; is there something wrong with the base64_decode function and/or the code that uses it?
Your function generates NUL bytes when its argument (encoded string) ends with padding characters (=s). Below is a corrected version of your while loop:
while (i < n) {
v = _BASE64_DECODE_c2i[substr(str,1+i,1)] * 262144 + \
_BASE64_DECODE_c2i[substr(str,2+i,1)] * 4096 + \
_BASE64_DECODE_c2i[substr(str,3+i,1)] * 64 + \
_BASE64_DECODE_c2i[substr(str,4+i,1)]
i += 4
if (v%256 != 0)
out = out sprintf("%c%c%c", int(v/65536), int(v/256), v)
else if (int(v/256)%256 != 0)
out = out sprintf("%c%c", int(v/65536), int(v/256))
else
out = out sprintf("%c", int(v/65536))
}
Note that if the decoded bytes contains an embedded NUL then this approach may not work properly.
Problem is within base64_decode function that outputs some junk characters on gnu-awk.
You can use this awk code that uses system provided base64 utility as an alternative:
{
delete attrs
for (i = 2; i <= NF; i++) {
match($i,/::? /)
key = substr($i,1,RSTART-1)
val = substr($i,RSTART+RLENGTH)
if (RLENGTH == 3) {
cmd = "echo " val " | base64 -di"
cmd | getline val # should also check exit code here
}
attrs[key] = ((key in attrs) ? attrs[key] SUBSEP val : val)
}
if ( /\nuid:/ )
givenName[ attrs["uid"] ] = attrs["givenName"]
else
memberUid[ attrs["GroupCode"] ] = attrs["memberUid"]
}
END {
n = split(memberUid[GroupCode],uid,SUBSEP)
for ( i = 1; i <= n; i++ )
print givenName[ uid[i] ]
}
I have tested this on gnu and BSD awk versions and I am getting expected output in all the cases.
If you cannot use external base64 utility then I suggest you take a look here for awk version of base64 decode.
This answer is for reference
Here's a working base64_decode function (thanks #MNejatAydin for pointing out the issue(s) in the original one):
function base64_decode(str, out,bits,n,i,c1,c2,c3,c4) {
out = ""
# One-time initialization during the first execution
if ( ! ("A" in _BASE64) )
for (i = 1; i <= 64; i++)
# The "_BASE64" array associates a character to its base64 index
_BASE64[substr("ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/",i,1)] = i-1
# Decoding the input string
n = length(str)
i = 0
while ( i < n ) {
c1 = substr(str, ++i, 1)
c2 = substr(str, ++i, 1)
c3 = substr(str, ++i, 1)
c4 = substr(str, ++i, 1)
bits = _BASE64[c1] * 262144 + _BASE64[c2] * 4096 + _BASE64[c3] * 64 + _BASE64[c4]
if ( c4 != "=" )
out = out sprintf("%c%c%c", bits/65536, bits/256, bits)
else if ( c3 != "=" )
out = out sprintf("%c%c", bits/65536, bits/256)
else
out = out sprintf("%c", bits/65536)
}
return out
}
WARNING: the function requires LANG=C
It also doesn't check that the input is a valid base64 string; for that you can add a simple condition like:
match( str, "^([a-zA-Z/-9+]{4})*([a-zA-Z/-9+]{2}[a-zA-Z/-9+=]{2})?$" )
Interestingly, the code is 2x faster than base64decode.awk, but it's only 3x faster than forking the base64 binary from inside awk.
notes:
In a base64 encoded string, 4 bytes represent 3 bytes of data; the input have to be processed by groups of 4 characters.
Multiplying and dividing an integer by a power of two is equivalent to do bitwise left and right shifts operations.
262144 is 2^18, so N * 262144 is equivalent to N << 18
4096 is 2^12, so N * 4096 is equivalent to N << 12
64 id 2^6, so N * 4096 is equivalent to N << 6
65536 is 2^16, so N / 65536 (integer division) is equivalent to N >> 16
256 is 2^8, so N / 256 (integer division) is equivalent to N >> 8
What happens in printf "%c", N:
N is first converted to an integer (if need be) and then, WITH LANG=C, the 8 least significant bits are taken in for the %c formatting.
How the possible padding of one or two trailing = characters at the end of the encoded string is handled:
If the 4th char isn't = (i.e. there's no padding) then the result should be 3 bytes of data.
If the 4th char is = and the 3rd char isn't = then there's 2 bytes of of data to decode.
If the fourth char is = and the third char is = then there's only one byte of data.
How to use an array of numbers in a for loop with awk ?
I tried:
awk '{ for (i in [10, 12, 18]) print i }' myfile.txt
But I'm getting a syntax error.
The in operator works on arrays. The way to create an array from a list of numbers like 10 12 18 is to split() a string containing those numbers.
To have those numbers stored as values in an array a[] with indices 1 to 3:
awk 'BEGIN{FS=OFS="|"; split("10 12 18",a," ")}
(FNR>2) { for(j in a) { i=a[j]; k=$i OFS $(i+1); c[k]++; d[k] = i } }
END{for (k in c) print d[k],k,c[k] }' myfile.txt
To have those numbers stored as indices of an array b[] with all values 0-or-null (same as an uninitialized scalar variable):
awk 'BEGIN{FS=OFS="|"; split("10 12 18",a," "); for (j in a) b[a[j]]}
(FNR>2) { for(i in b) { k=$i OFS $(i+1); c[k]++; d[k] = i } }
END{for (k in c) print d[k],k,c[k] }' myfile.txt
If you didn't want to create the array once up front for some reason (e.g. the list of numbers you want to split is created dynamically) then you could create it every time you need it, e.g.:
awk 'BEGIN{FS=OFS="|"}
(FNR>2) { split("10 12 18",a," "); for(j in a) { i=a[j]; k=$i OFS $(i+1); c[k]++; d[k] = i } }
END{for (k in c) print d[k],k,c[k] }' myfile.txt
but obviously creating the same array multiple times is less efficient than creating it once.
kinda made a very rough emulation of a for-loop that directly takes in a list without needing an extra function call prior to that to initialize it :
it tries to be as flexible as possible regarding what the
delimiter(s) might be, so
foreach("[CA=MX=JP=FR=SG=AUS=N.Z.]")
would actually also work.
Despite being shown with the gawk profile below,
and using the PROCINFO array, you don't need gawk for it to work :
it's functional on mawk 1.3.4, mawk 1.9.9.6, gnu gawk 5.1.1, and macos x
just added a Unicode UTF8 feature, which works regardless of what your locale setting is, or whether you're using gawk mawk or nawk
emojis work fine too
that said, it cannot properly parse CSV, XML, or JSON inputs
(didn't have the time to make it that fancy)
list 1 :: 10
list 1 :: 12
list 1 :: 18.
list 1 :: 27
list 1 :: 36
list 1 :: pi
list 1 :: hubble
list 1 :: kelvins
list 1 :: euler
list 1 :: higgs
list 1 :: 9.6
list 2 :: CA
list 2 :: MX
list 2 :: JP
list 2 :: FR
list 2 :: SG
list 2 :: AUS
list 2 :: N.Z.
# gawk profile, created Mon May 9 22:06:03 2022
# BEGIN rule(s)
BEGIN {
11 while (i = foreach("[10, 12, 18., 27, 36, pi, hubble, kelvins, euler, higgs, 9.6]")) {
11 print "list 1 :: ", i
}
1 printf ("\n\n\n")
7 while (i = foreach("[CA, MX, JP, FR, SG, AUS, N.Z., ]")) {
7 print "list 2 :: ", i
}
}
# Functions, listed alphabetically
20 function foreach(_, __)
{
20 if (_=="") {
return \
PROCINFO["foreach", "_"] = \
PROCINFO["foreach", "#"] = _
}
20 __ = "\032\016\024"
20 if (_!= PROCINFO["foreach", "_"]) { # 2
2
PROCINFO["foreach","_"]=_
2 gsub("^[ \t]*[[<{(][ \t]*"\
"|[ \t]*[]>})][ \t]*$"\
"|\\300|\\301","",_)
gsub("[^"(\
"\333\222"~"[^\333\222]"\
? "\\200-\\277"\
"\\302-\\364"\
: "" \
)"[:alnum:]"\
\
"\302\200""""-\337\277" \
"\340\240\200-\355\237\277" \
"\356\200\200-\357\277\277" \
"\360\220\200\200-\364\217\277\277"\
\
".\42\47#$&%+-]+",__,_)
gsub("^"(__)"|"\
(__)"$","", _)
2 PROCINFO["foreach","#"]=_
}
20 if ((_=PROCINFO["foreach","#"])=="") { # 2
2 return _
}
18 sub((__) ".*$", "", _)
sub("^[^"(__)"]+("(__)")?","",PROCINFO["foreach","#"])
18 return _
}
list 2 :: CA
list 2 :: MX
list 2 :: JP
list 2 :: FR
list 2 :: SG
list 2 :: 눷
list 2 :: 🤡
list 2 :: N.Z.
while(i = foreach("[CA=MX=JP=FR=SG=\353\210\267"\
"=\360\237\244\241=N.Z.]")) {
print "list 2 :: ", i
}
I am trying to transpose a really long file and I am concerned that it will not be transposed entirely.
My data looks something like this:
Thisisalongstring12345678 1 AB abc 937 4.320194
Thisisalongstring12345678 1 AB efg 549 0.767828
Thisisalongstring12345678 1 AB hi 346 -4.903441
Thisisalongstring12345678 1 AB jk 193 7.317946
I want my data to look like this:
Thisisalongstring12345678 Thisisalongstring12345678 Thisisalongstring12345678 Thisisalongstring12345678
1 1 1 1
AB AB AB AB
abc efg hi jk
937 549 346 193
4.320194 0.767828 -4.903441 7.317946
Would the length of the first string prove to be an issue? My file is much longer than this approx 2000 lines long. Also is it possible to change the name of the first string to Thisis234, and then transpose?
I don't see why it will not be - unless you don't have enough memory. Try the below and see if you run into problems.
Input:
$ cat inf.txt
a b c d
1 2 3 4
. , + -
A B C D
Awk program:
$ cat mkt.sh
awk '
{
for(c = 1; c <= NF; c++) {
a[c, NR] = $c
}
if(max_nf < NF) {
max_nf = NF
}
}
END {
for(r = 1; r <= NR; r++) {
for(c = 1; c <= max_nf; c++) {
printf("%s ", a[r, c])
}
print ""
}
}
' inf.txt
Run:
$ ./mkt.sh
a 1 . A
b 2 , B
c 3 + C
d 4 - D
Credits:
http://www.chemie.fu-berlin.de/chemnet/use/info/gawk/gawk_12.html#SEC121
Hope this helps.
This can be done with the rs BSD command:
http://www.unix.com/man-page/freebsd/1/rs/
Check out the -T option.
I tried icyrock.com's answer, but found that I had to change:
for(r = 1; r <= NR; r++) {
for(c = 1; c <= max_nf; c++) {
to
for(r = 1; r <= max_nf; r++) {
for(c = 1; c <= NR; c++) {
to get the NR columns and max_nf rows. So icyrock's code becomes:
$ cat mkt.sh
awk '
{
for(c = 1; c <= NF; c++) {
a[c, NR] = $c
}
if(max_nf < NF) {
max_nf = NF
}
}
END {
for(r = 1; r <= max_nf; r++) {
for(c = 1; c <= NR; c++) {
printf("%s ", a[r, c])
}
print ""
}
}
' inf.txt
If you don't do that and use an asymmetrical input, like:
a b c d
1 2 3 4
. , + -
You get:
a 1 .
b 2 ,
c 3 +
i.e. still 3 rows and 4 columns (the last of which is blank).
For # ScubaFishi and # icyrock code:
"if (max_nf < NF)" seems unnecessary. I deleted it, and the code works just fine.
Locked. This question and its answers are locked because the question is off-topic but has historical significance. It is not currently accepting new answers or interactions.
I made the ultimate laugh generator using these rules. Can you implement it in your favorite language in a clever manner?
Rules:
On every iteration, the following transformations occur.
H -> AH
A -> HA
AA -> HA
HH -> AH
AAH -> HA
HAA -> AH
n = 0 | H
n = 1 | AH
n = 2 | HAAH
n = 3 | AHAH
n = 4 | HAAHHAAH
n = 5 | AHAHHA
n = 6 | HAAHHAAHHA
n = 7 | AHAHHAAHHA
n = 8 | HAAHHAAHHAAHHA
n = 9 | AHAHHAAHAHHA
n = ...
Lex/Flex
69 characters. In the text here, I changed tabs to 8 spaces so it would look right, but all those consecutive spaces should be tabs, and the tabs are important, so it comes out to 69 characters.
#include <stdio.h>
%%
HAA|HH|H printf("AH");
AAH|AA|A printf("HA");
For what it's worth, the generated lex.yy.c is 42736 characters, but I don't think that really counts. I can (and soon will) write a pure-C version that will be much shorter and do the same thing, but I feel that should probably be a separate entry.
EDIT:
Here's a more legit Lex/Flex entry (302 characters):
char*c,*t;
#define s(a) t=c?realloc(c,strlen(c)+3):calloc(3,1);if(t)c=t,strcat(c,#a);
%%
free(c);c=NULL;
HAA|HH|H s(AH)
AAH|AA|A s(HA)
%%
int main(void){c=calloc(2,1);if(!c)return 1;*c='H';for(int n=0;n<10;n++)printf("n = %d | %s\n",n,c),yy_scan_string(c),yylex();return 0;}int yywrap(){return 1;}
This does multiple iterations (unlike the last one, which only did one iteration, and had to be manually seeded each time, but produced the correct results) and has the advantage of being extremely horrific-looking code. I use a function macro, the stringizing operator, and two global variables. If you want an even messier version that doesn't even check for malloc() failure, it looks like this (282 characters):
char*c,*t;
#define s(a) t=c?realloc(c,strlen(c)+3):calloc(3,1);c=t;strcat(c,#a);
%%
free(c);c=NULL;
HAA|HH|H s(AH)
AAH|AA|A s(HA)
%%
int main(void){c=calloc(2,1);*c='H';for(int n=0;n<10;n++)printf("n = %d | %s\n",n,c),yy_scan_string(c),yylex();return 0;}int yywrap(){return 1;}
An even worse version could be concocted where c is an array on the stack, and we just give it a MAX_BUFFER_SIZE of some sort, but I feel that's taking this too far.
...Just kidding. 207 characters if we take the "99 characters will always be enough" mindset:
char c[99]="H";
%%
c[0]=0;
HAA|HH|H strcat(c, "AH");
AAH|AA|A strcat(c, "HA");
%%
int main(void){for(int n=0;n<10;n++)printf("n = %d | %s\n",n,c),yy_scan_string(c),yylex();return 0;}int yywrap(){return 1;}
My preference is for the one that works best (i.e. the first one that can iterate until memory runs out and checks its errors), but this is code golf.
To compile the first one, type:
flex golf.l
gcc -ll lex.yy.c
(If you have lex instead of flex, just change flex to lex. They should be compatible.)
To compile the others, type:
flex golf.l
gcc -std=c99 lex.yy.c
Or else GCC will whine about ‘for’ loop initial declaration used outside C99 mode and other crap.
Pure C answer coming up.
MATLAB (v7.8.0):
73 characters (not including formatting characters used to make it look readable)
This script ("haha.m") assumes you have already defined the variable n:
s = 'H';
for i = 1:n,
s = regexprep(s,'(H)(H|AA)?|(A)(AH)?','${[137-$1 $1]}');
end
...and here's the one-line version:
s='H';for i=1:n,s = regexprep(s,'(H)(H|AA)?|(A)(AH)?','${[137-$1 $1]}');end
Test:
>> for n=0:10, haha; disp([num2str(n) ': ' s]); end
0: H
1: AH
2: HAAH
3: AHAH
4: HAAHHAAH
5: AHAHHA
6: HAAHHAAHHA
7: AHAHHAAHHA
8: HAAHHAAHHAAHHA
9: AHAHHAAHAHHA
10: HAAHHAAHHAHAAHHA
A simple translation to Haskell:
grammar = iterate step
where
step ('H':'A':'A':xs) = 'A':'H':step xs
step ('A':'A':'H':xs) = 'H':'A':step xs
step ('A':'A':xs) = 'H':'A':step xs
step ('H':'H':xs) = 'A':'H':step xs
step ('H':xs) = 'A':'H':step xs
step ('A':xs) = 'H':'A':step xs
step [] = []
And a shorter version (122 chars, optimized down to three derivation rules + base case):
grammar=iterate s where{i 'H'='A';i 'A'='H';s(n:'A':m:x)|n/=m=m:n:s x;s(n:m:x)|n==m=(i n):n:s x;s(n:x)=(i n):n:s x;s[]=[]}
And a translation to C++ (182 chars, only does one iteration, invoke with initial state on the command line):
#include<cstdio>
#define o putchar
int main(int,char**v){char*p=v[1];while(*p){p[1]==65&&~*p&p[2]?o(p[2]),o(*p),p+=3:*p==p[1]?o(137-*p++),o(*p++),p:(o(137-*p),o(*p++),p);}return 0;}
Javascript:
120 stripping whitespace and I'm leaving it alone now!
function f(n,s){s='H';while(n--){s=s.replace(/HAA|AAH|HH?|AA?/g,function(a){return a.match(/^H/)?'AH':'HA'});};return s}
Expanded:
function f(n,s)
{
s = 'H';
while (n--)
{
s = s.replace(/HAA|AAH|HH?|AA?/g, function(a) { return a.match(/^H/) ? 'AH' : 'HA' } );
};
return s
}
that replacer is expensive!
Here's a C# example, coming in at 321 bytes if I reduce whitespace to one space between each item.
Edit: In response to #Johannes Rössel comment, I removed generics from the solution to eek out a few more bytes.
Edit: Another change, got rid of all temporary variables.
public static String E(String i)
{
return new Regex("HAA|AAH|HH|AA|A|H").Replace(i,
m => (String)new Hashtable {
{ "H", "AH" },
{ "A", "HA" },
{ "AA", "HA" },
{ "HH", "AH" },
{ "AAH", "HA" },
{ "HAA", "AH" }
}[m.Value]);
}
The rewritten solution with less whitespace, that still compiles, is 158 characters:
return new Regex("HAA|AAH|HH|AA|A|H").Replace(i,m =>(String)new Hashtable{{"H","AH"},{"A","HA"},{"AA","HA"},{"HH","AH"},{"AAH","HA"},{"HAA","AH"}}[m.Value]);
For a complete source code solution for Visual Studio 2008, a subversion repository with the necessary code, including unit tests, is available below.
Repository is here, username and password are both 'guest', without the quotes.
Ruby
This code golf is not very well specified -- I assumed that function returning n-th iteration string is best way to solve it. It has 80 characters.
def f n
a='h'
n.times{a.gsub!(/(h(h|aa)?)|(a(ah?)?)/){$1.nil?? "ha":"ah"}}
a
end
Code printing out n first strings (71 characters):
a='h';n.times{puts a.gsub!(/(h(h|aa)?)|(a(ah?)?)/){$1.nil?? "ha":"ah"}}
Erlang
241 bytes and ready to run:
> erl -noshell -s g i -s init stop
AHAHHAAHAHHA
-module(g).
-export([i/0]).
c("HAA"++T)->"AH"++c(T);
c("AAH"++T)->"HA"++c(T);
c("HH"++T)->"AH"++c(T);
c("AA"++T)->"HA"++c(T);
c("A"++T)->"HA"++c(T);
c("H"++T)->"AH"++c(T);
c([])->[].
i(0,L)->L;
i(N,L)->i(N-1,c(L)).
i()->io:format(i(9,"H"))
Could probably be improved.
Perl 168 characters.
(not counting unnecessary newlines)
perl -E'
($s,%m)=qw[H H AH A HA AA HA HH AH AAH HA HAA AH];
sub p{say qq[n = $_[0] | $_[1]]};p(0,$s);
for(1..9){$s=~s/(H(AA|H)?|A(AH?)?)/$m{$1}/g;p($_,$s)}
say q[n = ...]'
De-obfuscated:
use strict;
use warnings;
use 5.010;
my $str = 'H';
my %map = (
H => 'AH',
A => 'HA',
AA => 'HA',
HH => 'AH',
AAH => 'HA',
HAA => 'AH'
);
sub prn{
my( $n, $str ) = #_;
say "n = $n | $str"
}
prn( 0, $str );
for my $i ( 1..9 ){
$str =~ s(
(
H(?:AA|H)? # HAA | HH | H
|
A(?:AH?)? # AAH | AA | A
)
){
$map{$1}
}xge;
prn( $i, $str );
}
say 'n = ...';
Perl 150 characters.
(not counting unnecessary newlines)
perl -E'
$s="H";
sub p{say qq[n = $_[0] | $_[1]]};p(0,$s);
for(1..9){$s=~s/(?|(H)(?:AA|H)?|(A)(?:AH?)?)/("H"eq$1?"A":"H").$1/eg;p($_,$s)}
say q[n = ...]'
De-obfuscated
#! /usr/bin/env perl
use strict;
use warnings;
use 5.010;
my $str = 'H';
sub prn{
my( $n, $str ) = #_;
say "n = $n | $str"
}
prn( 0, $str );
for my $i ( 1..9 ){
$str =~ s{(?|
(H)(?:AA|H)? # HAA | HH | H
|
(A)(?:AH?)? # AAH | AA | A
)}{
( 'H' eq $1 ?'A' :'H' ).$1
}egx;
prn( $i, $str );
}
say 'n = ...';
Python (150 bytes)
import re
N = 10
s = "H"
for n in range(N):
print "n = %d |"% n, s
s = re.sub("(HAA|HH|H)|AAH|AA|A", lambda m: m.group(1) and "AH" or "HA",s)
Output
n = 0 | H
n = 1 | AH
n = 2 | HAAH
n = 3 | AHAH
n = 4 | HAAHHAAH
n = 5 | AHAHHA
n = 6 | HAAHHAAHHA
n = 7 | AHAHHAAHHA
n = 8 | HAAHHAAHHAAHHA
n = 9 | AHAHHAAHAHHA
Here is a very simple C++ version:
#include <iostream>
#include <sstream>
using namespace std;
#define LINES 10
#define put(t) s << t; cout << t
#define r1(o,a,c0) \
if(c[0]==c0) {put(o); s.unget(); s.unget(); a; continue;}
#define r2(o,a,c0,c1) \
if(c[0]==c0 && c[1]==c1) {put(o); s.unget(); a; continue;}
#define r3(o,a,c0,c1,c2) \
if(c[0]==c0 && c[1]==c1 && c[2]==c2) {put(o); a; continue;}
int main() {
char c[3];
stringstream s;
put("H\n\n");
for(int i=2;i<LINES*2;) {
s.read(c,3);
r3("AH",,'H','A','A');
r3("HA",,'A','A','H');
r2("AH",,'H','H');
r2("HA",,'A','A');
r1("HA",,'A');
r1("AH",,'H');
r1("\n",i++,'\n');
}
}
It's not exactly code-golf (it could be made a lot shorter), but it works. Change LINES to however many lines you want printed (note: it will not work for 0). It will print output like this:
H
AH
HAAH
AHAH
HAAHHAAH
AHAHHA
HAAHHAAHHA
AHAHHAAHHA
HAAHHAAHHAAHHA
AHAHHAAHAHHA
ANSI C99
Coming in at a brutal 306 characters:
#include <stdio.h>
#include <string.h>
char s[99]="H",t[99]={0};int main(){for(int n=0;n<10;n++){int i=0,j=strlen(s);printf("n = %u | %s\n",n,s);strcpy(t,s);s[0]=0;for(;i<j;){if(t[i++]=='H'){t[i]=='H'?i++:t[i+1]=='A'?i+=2:1;strcat(s,"AH");}else{t[i]=='A'?i+=1+(t[i+1]=='H'):1;strcat(s,"HA");}}}return 0;}
There are too many nested ifs and conditional operators for me to effectively reduce this with macros. Believe me, I tried. Readable version:
#include <stdio.h>
#include <string.h>
char s[99] = "H", t[99] = {0};
int main()
{
for(int n = 0; n < 10; n++)
{
int i = 0, j = strlen(s);
printf("n = %u | %s\n", n, s);
strcpy(t, s);
s[0] = 0;
/*
* This was originally just a while() loop.
* I tried to make it shorter by making it a for() loop.
* I failed.
* I kept the for() loop because it looked uglier than a while() loop.
* This is code golf.
*/
for(;i<j;)
{
if(t[i++] == 'H' )
{
// t[i] == 'H' ? i++ : t[i+1] == 'A' ? i+=2 : 1;
// Oh, ternary ?:, how do I love thee?
if(t[i] == 'H')
i++;
else if(t[i+1] == 'A')
i+= 2;
strcat(s, "AH");
}
else
{
// t[i] == 'A' ? i += 1 + (t[i + 1] == 'H') : 1;
if(t[i] == 'A')
if(t[++i] == 'H')
i++;
strcat(s, "HA");
}
}
}
return 0;
}
I may be able to make a shorter version with strncmp() in the future, but who knows? We'll see what happens.
In python:
def l(s):
H=['HAA','HH','H','AAH','AA','A']
L=['AH']*3+['HA']*3
for i in [3,2,1]:
if s[:i] in H: return L[H.index(s[:i])]+l(s[i:])
return s
def a(n,s='H'):
return s*(n<1)or a(n-1,l(s))
for i in xrange(0,10):
print '%d: %s'%(i,a(i))
First attempt: 198 char of code, I'm sure it can get smaller :D
REBOL, 150 characters. Unfortunately REBOL is not a language conducive to code golf, but 150 characters ain't too shabby, as Adam Sandler says.
This assumes the loop variable m has already been defined.
s: "H" r: "" z:[some[["HAA"|"HH"|"H"](append r "AH")|["AAH"|"AA"|"A"](append r "HA")]to end]repeat n m[clear r parse s z print["n =" n "|" s: copy r]]
And here it is with better layout:
s: "H"
r: ""
z: [
some [
[ "HAA" | "HH" | "H" ] (append r "AH")
| [ "AAH" | "AA" | "A" ] (append r "HA")
]
to end
]
repeat n m [
clear r
parse s z
print ["n =" n "|" s: copy r]
]
F#: 184 chars
Seems to map pretty cleanly to F#:
type grammar = H | A
let rec laugh = function
| 0,l -> l
| n,l ->
let rec loop = function
|H::A::A::x|H::H::x|H::x->A::H::loop x
|A::A::H::x|A::A::x|A::x->H::A::loop x
|x->x
laugh(n-1,loop l)
Here's a run in fsi:
> [for a in 0 .. 9 -> a, laugh(a, [H])] |> Seq.iter (fun (a, b) -> printfn "n = %i: %A" a b);;
n = 0: [H]
n = 1: [A; H]
n = 2: [H; A; A; H]
n = 3: [A; H; A; H]
n = 4: [H; A; A; H; H; A; A; H]
n = 5: [A; H; A; H; H; A]
n = 6: [H; A; A; H; H; A; A; H; H; A]
n = 7: [A; H; A; H; H; A; A; H; H; A]
n = 8: [H; A; A; H; H; A; A; H; H; A; A; H; H; A]
n = 9: [A; H; A; H; H; A; A; H; A; H; H; A]